def _parameter_control_dependencies(self, is_init):
        assertions = []

        logits = self._logits
        probs = self._probs
        param, name = (probs, 'probs') if logits is None else (logits,
                                                               'logits')

        # In init, we can always build shape and dtype checks because
        # we assume shape doesn't change for Variable backed args.
        if is_init:
            if not dtype_util.is_floating(param.dtype):
                raise TypeError(
                    'Argument `{}` must having floating type.'.format(name))

            msg = 'Argument `{}` must have rank at least 1.'.format(name)
            shape_static = tensorshape_util.dims(param.shape)
            if shape_static is not None:
                if len(shape_static) < 1:
                    raise ValueError(msg)
            elif self.validate_args:
                param = tf.convert_to_tensor(param)
                assertions.append(
                    assert_util.assert_rank_at_least(param, 1, message=msg))
                with tf.control_dependencies(assertions):
                    param = tf.identity(param)

            msg1 = 'Argument `{}` must have final dimension >= 1.'.format(name)
            msg2 = 'Argument `{}` must have final dimension <= {}.'.format(
                name, dtype_util.max(tf.int32))
            event_size = shape_static[-1] if shape_static is not None else None
            if event_size is not None:
                if event_size < 1:
                    raise ValueError(msg1)
                if event_size > dtype_util.max(tf.int32):
                    raise ValueError(msg2)
            elif self.validate_args:
                param = tf.convert_to_tensor(param)
                assertions.append(
                    assert_util.assert_greater_equal(tf.shape(param)[-1],
                                                     1,
                                                     message=msg1))
                # NOTE: For now, we leave out a runtime assertion that
                # `tf.shape(param)[-1] <= tf.int32.max`.  An earlier `tf.shape` call
                # will fail before we get to this point.

        if not self.validate_args:
            assert not assertions  # Should never happen.
            return []

        if probs is not None:
            probs = param  # reuse tensor conversion from above
            if is_init != tensor_util.is_ref(probs):
                probs = tf.convert_to_tensor(probs)
                one = tf.ones([], dtype=probs.dtype)
                assertions.extend([
                    assert_util.assert_non_negative(probs),
                    assert_util.assert_less_equal(probs, one),
                    assert_util.assert_near(
                        tf.reduce_sum(probs, axis=-1),
                        one,
                        message='Argument `probs` must sum to 1.'),
                ])

        return assertions
예제 #2
0
 def _prob(self, x):
   loc = tf.convert_to_tensor(self.loc)
   # Enforces dtype of probability to be float, when self.dtype is not.
   prob_dtype = self.dtype if dtype_util.is_floating(
       self.dtype) else tf.float32
   return tf.cast(tf.abs(x - loc) <= self._slack(loc), dtype=prob_dtype)
예제 #3
0
def sample_halton_sequence(dim,
                           num_results=None,
                           sequence_indices=None,
                           dtype=tf.float32,
                           randomized=True,
                           seed=None,
                           name=None):
    r"""Returns a sample from the `dim` dimensional Halton sequence.

  Warning: The sequence elements take values only between 0 and 1. Care must be
  taken to appropriately transform the domain of a function if it differs from
  the unit cube before evaluating integrals using Halton samples. It is also
  important to remember that quasi-random numbers without randomization are not
  a replacement for pseudo-random numbers in every context. Quasi random numbers
  are completely deterministic and typically have significant negative
  autocorrelation unless randomization is used.

  Computes the members of the low discrepancy Halton sequence in dimension
  `dim`. The `dim`-dimensional sequence takes values in the unit hypercube in
  `dim` dimensions. Currently, only dimensions up to 1000 are supported. The
  prime base for the k-th axes is the k-th prime starting from 2. For example,
  if `dim` = 3, then the bases will be [2, 3, 5] respectively and the first
  element of the non-randomized sequence will be: [0.5, 0.333, 0.2]. For a more
  complete description of the Halton sequences see
  [here](https://en.wikipedia.org/wiki/Halton_sequence). For low discrepancy
  sequences and their applications see
  [here](https://en.wikipedia.org/wiki/Low-discrepancy_sequence).

  If `randomized` is true, this function produces a scrambled version of the
  Halton sequence introduced by [Owen (2017)][1]. For the advantages of
  randomization of low discrepancy sequences see [here](
  https://en.wikipedia.org/wiki/Quasi-Monte_Carlo_method#Randomization_of_quasi-Monte_Carlo).

  The number of samples produced is controlled by the `num_results` and
  `sequence_indices` parameters. The user must supply either `num_results` or
  `sequence_indices` but not both.
  The former is the number of samples to produce starting from the first
  element. If `sequence_indices` is given instead, the specified elements of
  the sequence are generated. For example, sequence_indices=tf.range(10) is
  equivalent to specifying n=10.

  #### Examples

  ```python
  import tensorflow as tf
  import tensorflow_probability as tfp

  # Produce the first 1000 members of the Halton sequence in 3 dimensions.
  num_results = 1000
  dim = 3
  sample = tfp.mcmc.sample_halton_sequence(
    dim,
    num_results=num_results,
    seed=127)

  # Evaluate the integral of x_1 * x_2^2 * x_3^3  over the three dimensional
  # hypercube.
  powers = tf.range(1.0, limit=dim + 1)
  integral = tf.reduce_mean(tf.reduce_prod(sample ** powers, axis=-1))
  true_value = 1.0 / tf.reduce_prod(powers + 1.0)
  with tf.Session() as session:
    values = session.run((integral, true_value))

  # Produces a relative absolute error of 1.7%.
  print ("Estimated: %f, True Value: %f" % values)

  # Now skip the first 1000 samples and recompute the integral with the next
  # thousand samples. The sequence_indices argument can be used to do this.


  sequence_indices = tf.range(start=1000, limit=1000 + num_results,
                              dtype=tf.int32)
  sample_leaped = tfp.mcmc.sample_halton_sequence(
      dim,
      sequence_indices=sequence_indices,
      seed=111217)

  integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers,
                                                  axis=-1))
  with tf.Session() as session:
    values = session.run((integral_leaped, true_value))
  # Now produces a relative absolute error of 0.05%.
  print ("Leaped Estimated: %f, True Value: %f" % values)
  ```

  Args:
    dim: Positive Python `int` representing each sample's `event_size.` Must
      not be greater than 1000.
    num_results: (Optional) Positive scalar `Tensor` of dtype int32. The number
      of samples to generate. Either this parameter or sequence_indices must
      be specified but not both. If this parameter is None, then the behaviour
      is determined by the `sequence_indices`.
      Default value: `None`.
    sequence_indices: (Optional) `Tensor` of dtype int32 and rank 1. The
      elements of the sequence to compute specified by their position in the
      sequence. The entries index into the Halton sequence starting with 0 and
      hence, must be whole numbers. For example, sequence_indices=[0, 5, 6] will
      produce the first, sixth and seventh elements of the sequence. If this
      parameter is None, then the `num_results` parameter must be specified
      which gives the number of desired samples starting from the first sample.
      Default value: `None`.
    dtype: (Optional) The dtype of the sample. One of: `float16`, `float32` or
      `float64`.
      Default value: `tf.float32`.
    randomized: (Optional) bool indicating whether to produce a randomized
      Halton sequence. If True, applies the randomization described in
      [Owen (2017)][1].
      Default value: `True`.
    seed: (Optional) Seed for reproducible sampling. Only used if `randomized`
      is True. If not supplied and `randomized` is True, no seed is set.
      Default value: `None`.
    name:  (Optional) Python `str` describing ops managed by this function. If
      not supplied the name of this function is used.
      Default value: "sample_halton_sequence".

  Returns:
    halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype
      and `shape` `[num_results, dim]` if `num_results` was specified or shape
      `[s, dim]` where s is the size of `sequence_indices` if `sequence_indices`
      were specified.

  Raises:
    ValueError: if both `sequence_indices` and `num_results` were specified or
      if dimension `dim` is less than 1 or greater than 1000.

  #### References

  [1]: Art B. Owen. A randomized Halton algorithm in R. _arXiv preprint
       arXiv:1706.02808_, 2017. https://arxiv.org/abs/1706.02808
  """
    if dim < 1 or dim > _MAX_DIMENSION:
        raise ValueError(
            'Dimension must be between 1 and {}. Supplied {}'.format(
                _MAX_DIMENSION, dim))
    if (num_results is None) == (sequence_indices is None):
        raise ValueError('Either `num_results` or `sequence_indices` must be'
                         ' specified but not both.')

    if not dtype_util.is_floating(dtype):
        raise ValueError('dtype must be of `float`-type')

    with tf.name_scope(name or 'sample'):
        # Here and in the following, the shape layout is as follows:
        # [sample dimension, event dimension, coefficient dimension].
        # The coefficient dimension is an intermediate axes which will hold the
        # weights of the starting integer when expressed in the (prime) base for
        # an event dimension.
        if num_results is not None:
            num_results = tf.convert_to_tensor(num_results)
        if sequence_indices is not None:
            sequence_indices = tf.convert_to_tensor(sequence_indices)
        indices = _get_indices(num_results, sequence_indices, dtype)
        radixes = tf.constant(_PRIMES[0:dim], dtype=dtype, shape=[dim, 1])

        max_sizes_by_axes = _base_expansion_size(tf.reduce_max(indices),
                                                 radixes)

        max_size = tf.reduce_max(max_sizes_by_axes)

        # The powers of the radixes that we will need. Note that there is a bit
        # of an excess here. Suppose we need the place value coefficients of 7
        # in base 2 and 3. For 2, we will have 3 digits but we only need 2 digits
        # for base 3. However, we can only create rectangular tensors so we
        # store both expansions in a [2, 3] tensor. This leads to the problem that
        # we might end up attempting to raise large numbers to large powers. For
        # example, base 2 expansion of 1024 has 10 digits. If we were in 10
        # dimensions, then the 10th prime (29) we will end up computing 29^10 even
        # though we don't need it. We avoid this by setting the exponents for each
        # axes to 0 beyond the maximum value needed for that dimension.
        exponents_by_axes = tf.tile([tf.range(max_size)], [dim, 1])

        # The mask is true for those coefficients that are irrelevant.
        weight_mask = exponents_by_axes < max_sizes_by_axes
        capped_exponents = tf.where(weight_mask, exponents_by_axes,
                                    tf.constant(0, exponents_by_axes.dtype))
        weights = radixes**capped_exponents
        # The following computes the base b expansion of the indices. Suppose,
        # x = a0 + a1*b + a2*b^2 + ... Then, performing a floor div of x with
        # the vector (1, b, b^2, b^3, ...) will produce
        # (a0 + s1 * b, a1 + s2 * b, ...) where s_i are coefficients we don't care
        # about. Noting that all a_i < b by definition of place value expansion,
        # we see that taking the elements mod b of the above vector produces the
        # place value expansion coefficients.
        coeffs = tf.math.floordiv(indices, weights)
        coeffs *= tf.cast(weight_mask, dtype)
        coeffs %= radixes
        if not randomized:
            coeffs /= radixes
            return tf.reduce_sum(coeffs / weights, axis=-1)

        shuffle_seed, zero_correction_seed = samplers.split_seed(
            seed, salt='MCMCSampleHaltonSequence')

        coeffs = _randomize(coeffs, radixes, seed=shuffle_seed)
        # Remove the contribution from randomizing the trailing zero for the
        # axes where max_size_by_axes < max_size. This will be accounted
        # for separately below (using zero_correction).
        coeffs *= tf.cast(weight_mask, dtype)
        coeffs /= radixes
        base_values = tf.reduce_sum(coeffs / weights, axis=-1)

        # The randomization used in Owen (2017) does not leave 0 invariant. While
        # we have accounted for the randomization of the first `max_size_by_axes`
        # coefficients, we still need to correct for the trailing zeros. Luckily,
        # this is equivalent to adding a uniform random value scaled so the first
        # `max_size_by_axes` coefficients are zero. The following statements perform
        # this correction.
        zero_correction = samplers.uniform([dim, 1],
                                           seed=zero_correction_seed,
                                           dtype=dtype)
        zero_correction /= radixes**max_sizes_by_axes
        return base_values + tf.reshape(zero_correction, [-1])
예제 #4
0
  def __init__(self,
               outcomes,
               logits=None,
               probs=None,
               rtol=None,
               atol=None,
               validate_args=False,
               allow_nan_stats=True,
               name='FiniteDiscrete'):
    """Construct a finite discrete contribution.

    Args:
      outcomes: A 1-D floating or integer `Tensor`, representing a list of
        possible outcomes in strictly ascending order.
      logits: A floating N-D `Tensor`, `N >= 1`, representing the log
        probabilities of a set of FiniteDiscrete distributions. The first `N -
        1` dimensions index into a batch of independent distributions and the
        last dimension represents a vector of logits for each discrete value.
        Only one of `logits` or `probs` should be passed in.
      probs: A floating  N-D `Tensor`, `N >= 1`, representing the probabilities
        of a set of FiniteDiscrete distributions. The first `N - 1` dimensions
        index into a batch of independent distributions and the last dimension
        represents a vector of probabilities for each discrete value. Only one
        of `logits` or `probs` should be passed in.
      rtol: `Tensor` with same `dtype` as `outcomes`. The relative tolerance for
        floating number comparison. Only effective when `outcomes` is a floating
        `Tensor`. Default is `10 * eps`.
      atol: `Tensor` with same `dtype` as `outcomes`. The absolute tolerance for
        floating number comparison. Only effective when `outcomes` is a floating
        `Tensor`. Default is `10 * eps`.
      validate_args:  Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may render incorrect outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value '`NaN`' to indicate the
        result is undefined. When `False`, an exception is raised if one or more
        of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
    parameters = dict(locals())
    with tf.compat.v1.name_scope(
        name, values=[outcomes, logits, probs]) as name:
      self._outcomes = tf.convert_to_tensor(value=outcomes, name='outcomes')
      if validate_args:
        assertions = _maybe_validate_args(self._outcomes, logits, probs,
                                          validate_args)
        with tf.control_dependencies(assertions):
          self._outcomes = tf.identity(self._outcomes)
      if dtype_util.is_floating(self._outcomes.dtype):
        eps = np.finfo(dtype_util.as_numpy_dtype(self._outcomes.dtype)).eps
        self._rtol = 10 * eps if rtol is None else rtol
        self._atol = 10 * eps if atol is None else atol
      else:
        self._rtol = None
        self._atol = None
      self._categorical = categorical.Categorical(
          logits=logits,
          probs=probs,
          dtype=tf.int32,
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats)
    super(FiniteDiscrete, self).__init__(
        dtype=self._outcomes.dtype,
        reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
        validate_args=validate_args,
        allow_nan_stats=allow_nan_stats,
        parameters=parameters,
        graph_parents=[
            self._outcomes, self._categorical.logits, self._categorical.probs
        ],
        name=name)
예제 #5
0
  def __init__(self,
               df,
               loc,
               scale,
               validate_args=False,
               allow_nan_stats=True,
               name="MultivariateStudentTLinearOperator"):
    """Construct Multivariate Student's t-distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `df`, `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` must broadcast with this.

    Additional leading dimensions (if any) will index batches.

    Args:
      df: A positive floating-point `Tensor`. Has shape `[B1, ..., Bb]` where `b
        >= 0`.
      loc: Floating-point `Tensor`. Has shape `[B1, ..., Bb, k]` where `k` is
        the event size.
      scale: Instance of `LinearOperator` with a floating `dtype` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are invalid,
        correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/variance/etc...) is undefined for
        any batch member If `True`, batch members with valid parameters leading
        to undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      TypeError: if not `scale.dtype.is_floating`.
      ValueError: if not `scale.is_positive_definite`.
    """
    parameters = dict(locals())
    if not dtype_util.is_floating(scale.dtype):
      raise TypeError("`scale` must have floating-point dtype.")
    if validate_args and not scale.is_positive_definite:
      raise ValueError("`scale` must be positive definite.")

    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([df, loc, scale],
                                      preferred_dtype=tf.float32)

      with tf.control_dependencies([
          assert_util.assert_positive(df, message="`df` must be positive.")
      ] if validate_args else []):
        self._df = tf.identity(
            tf.convert_to_tensor(value=df, dtype=dtype), name="df")
      self._loc = tf.convert_to_tensor(value=loc, name="loc", dtype=dtype)
      self._scale = scale

    super(MultivariateStudentTLinearOperator, self).__init__(
        dtype=dtype,
        reparameterization_type=reparameterization.FULLY_REPARAMETERIZED,
        parameters=parameters,
        graph_parents=[self._df, self._loc] + self._scale.graph_parents,
        name=name,
        validate_args=validate_args,
        allow_nan_stats=allow_nan_stats)
    self._parameters = parameters
예제 #6
0
def assign_log_moving_mean_exp(log_value,
                               moving_log_mean_exp,
                               zero_debias_count=None,
                               decay=0.99,
                               name=None):
    """Compute the log of the exponentially weighted moving mean of the exp.

  If `log_value` is a draw from a stationary random variable, this function
  approximates `log(E[exp(log_value)])`, i.e., a weighted log-sum-exp. More
  precisely, a `tf.Variable`, `moving_log_mean_exp`, is updated by `log_value`
  using the following identity:

  ```none
  moving_log_mean_exp =
  = log(decay exp(moving_log_mean_exp) + (1 - decay) exp(log_value))
  = log(exp(moving_log_mean_exp + log(decay)) + exp(log_value + log1p(-decay)))
  = moving_log_mean_exp
    + log(  exp(moving_log_mean_exp   - moving_log_mean_exp + log(decay))
          + exp(log_value - moving_log_mean_exp + log1p(-decay)))
  = moving_log_mean_exp
    + log_sum_exp([log(decay), log_value - moving_log_mean_exp +
    log1p(-decay)]).
  ```

  In addition to numerical stability, this formulation is advantageous because
  `moving_log_mean_exp` can be updated in a lock-free manner, i.e., using
  `assign_add`. (Note: the updates are not thread-safe; it's just that the
  update to the tf.Variable is presumed efficient due to being lock-free.)

  Args:
    log_value: `float`-like `Tensor` representing a new (streaming) observation.
      Same shape as `moving_log_mean_exp`.
    moving_log_mean_exp: `float`-like `Variable` representing the log of the
      exponentially weighted moving mean of the exp. Same shape as `log_value`.
    zero_debias_count: `int`-like `tf.Variable` representing the number of times
      this function has been called on streaming input (*not* the number of
      reduced values used in this functions computation). When not `None` (the
      default) the returned values for `moving_mean` and `moving_variance` are
      "zero debiased", i.e., corrected for their presumed all zeros
      intialization. Note: the `tf.Variable`s `moving_mean` and
      `moving_variance` *always* store the unbiased calculation, regardless of
      setting this argument. To obtain unbiased calculations from these
      `tf.Variable`s, see `tfp.stats.moving_mean_variance_zero_debiased`.
      Default value: `None` (i.e., no zero debiasing calculation is made).
    decay: A `float`-like `Tensor` representing the moving mean decay. Typically
      close to `1.`, e.g., `0.99`.
      Default value: `0.99`.
    name: Python `str` prepended to op names created by this function.
      Default value: `None` (i.e., 'assign_log_moving_mean_exp').

  Returns:
    moving_log_mean_exp: A reference to the input 'Variable' tensor with the
      `log_value`-updated log of the exponentially weighted moving mean of exp.

  Raises:
    TypeError: if `moving_log_mean_exp` does not have float type `dtype`.
    TypeError: if `moving_log_mean_exp`, `log_value`, `decay` have different
      `base_dtype`.
  """
    if zero_debias_count is not None:
        raise NotImplementedError(
            'Argument `zero_debias_count` is not yet supported. If you require '
            'this feature please create a new issue on '
            '`https://github.com/tensorflow/probability` or email '
            '`[email protected]`.')
    with tf.name_scope(name or 'assign_log_moving_mean_exp'):
        # We want to update the variable in a numerically stable and lock-free way.
        # To do this, observe that variable `x` updated by `v` is:
        # x = log(w exp(x) + (1-w) exp(v))
        #   = log(exp(x + log(w)) + exp(v + log1p(-w)))
        #   = x + log(exp(x - x + log(w)) + exp(v - x + log1p(-w)))
        #   = x + lse([log(w), v - x + log1p(-w)])
        base_dtype = dtype_util.base_dtype(moving_log_mean_exp.dtype)
        if not dtype_util.is_floating(base_dtype):
            raise TypeError(
                'Argument `moving_log_mean_exp` is not float type (saw {}).'.
                format(dtype_util.name(moving_log_mean_exp.dtype)))
        log_value = tf.convert_to_tensor(log_value,
                                         dtype=base_dtype,
                                         name='log_value')
        decay = tf.convert_to_tensor(decay, dtype=base_dtype, name='decay')
        delta = (log_value - moving_log_mean_exp)[tf.newaxis, ...]
        x = tf.concat([
            tf.broadcast_to(
                tf.math.log(decay),
                ps.broadcast_shape(ps.shape(decay), ps.shape(delta))),
            delta + tf.math.log1p(-decay)
        ],
                      axis=0)
        update = tf.reduce_logsumexp(x, axis=0)
        return moving_log_mean_exp.assign_add(update)
예제 #7
0
def auto_correlation(x,
                     axis=-1,
                     max_lags=None,
                     center=True,
                     normalize=True,
                     name='auto_correlation'):
    """Auto correlation along one axis.

  Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation
  `RXX` may be defined as  (with `E` expectation and `Conj` complex conjugate)

  ```
  RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) },
  W[n]   := (X[n] - MU) / S,
  MU     := E{ X[0] },
  S**2   := E{ (X[0] - MU) Conj(X[0] - MU) }.
  ```

  This function takes the viewpoint that `x` is (along one axis) a finite
  sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an
  estimate of `RXX[m]` as follows:

  After extending `x` from length `L` to `inf` by zero padding, the auto
  correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as

  ```
  rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]),
  w[n]   := (x[n] - mu) / s,
  mu     := L**-1 sum_n x[n],
  s**2   := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu)
  ```

  The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users
  often set `max_lags` small enough so that the entire output is meaningful.

  Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by
  `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation
  contains a slight bias, which goes to zero as `len(x) - m --> infinity`.

  Args:
    x:  `float32` or `complex64` `Tensor`.
    axis:  Python `int`. The axis number along which to compute correlation.
      Other dimensions index different batch members.
    max_lags:  Positive `int` tensor.  The maximum value of `m` to consider (in
      equation above).  If `max_lags >= x.shape[axis]`, we effectively re-set
      `max_lags` to `x.shape[axis] - 1`.
    center:  Python `bool`.  If `False`, do not subtract the mean estimate `mu`
      from `x[n]` when forming `w[n]`.
    normalize:  Python `bool`.  If `False`, do not divide by the variance
      estimate `s**2` when forming `w[n]`.
    name:  `String` name to prepend to created ops.

  Returns:
    `rxx`: `Tensor` of same `dtype` as `x`.  `rxx.shape[i] = x.shape[i]` for
      `i != axis`, and `rxx.shape[axis] = max_lags + 1`.

  Raises:
    TypeError:  If `x` is not a supported type.
  """
    # Implementation details:
    # Extend length N / 2 1-D array x to length N by zero padding onto the end.
    # Then, set
    #   F[x]_k := sum_n x_n exp{-i 2 pi k n / N }.
    # It is not hard to see that
    #   F[x]_k Conj(F[x]_k) = F[R]_k, where
    #   R_m := sum_n x_n Conj(x_{(n - m) mod N}).
    # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m].

    # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT
    # based version of estimating RXX.
    # Note that this is a special case of the Wiener-Khinchin Theorem.
    with tf.name_scope(name):
        x = tf.convert_to_tensor(x, name='x')

        # Rotate dimensions of x in order to put axis at the rightmost dim.
        # FFT op requires this.
        rank = prefer_static.rank(x)
        if axis < 0:
            axis = rank + axis
        shift = rank - 1 - axis
        # Suppose x.shape[axis] = T, so there are T 'time' steps.
        #   ==> x_rotated.shape = B + [T],
        # where B is x_rotated's batch shape.
        x_rotated = distribution_util.rotate_transpose(x, shift)

        if center:
            x_rotated -= tf.reduce_mean(x_rotated, axis=-1, keepdims=True)

        # x_len = N / 2 from above explanation.  The length of x along axis.
        # Get a value for x_len that works in all cases.
        x_len = prefer_static.shape(x_rotated)[-1]

        # TODO(langmore) Investigate whether this zero padding helps or hurts.  At
        # the moment is necessary so that all FFT implementations work.
        # Zero pad to the next power of 2 greater than 2 * x_len, which equals
        # 2**(ceil(Log_2(2 * x_len))).  Note: Log_2(X) = Log_e(X) / Log_e(2).
        x_len_float64 = tf.cast(x_len, np.float64)
        target_length = tf.pow(
            np.float64(2.),
            tf.math.ceil(tf.math.log(x_len_float64 * 2) / np.log(2.)))
        pad_length = tf.cast(target_length - x_len_float64, np.int32)

        # We should have:
        # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length]
        #                     = B + [T + pad_length]
        x_rotated_pad = distribution_util.pad(x_rotated,
                                              axis=-1,
                                              back=True,
                                              count=pad_length)

        dtype = x.dtype
        if not dtype_util.is_complex(dtype):
            if not dtype_util.is_floating(dtype):
                raise TypeError(
                    'Argument x must have either float or complex dtype'
                    ' found: {}'.format(dtype))
            x_rotated_pad = tf.complex(
                x_rotated_pad,
                dtype_util.as_numpy_dtype(dtype_util.real_dtype(dtype))(0.))

        # Autocorrelation is IFFT of power-spectral density (up to some scaling).
        fft_x_rotated_pad = tf.signal.fft(x_rotated_pad)
        spectral_density = fft_x_rotated_pad * tf.math.conj(fft_x_rotated_pad)
        # shifted_product is R[m] from above detailed explanation.
        # It is the inner product sum_n X[n] * Conj(X[n - m]).
        shifted_product = tf.signal.ifft(spectral_density)

        # Cast back to real-valued if x was real to begin with.
        shifted_product = tf.cast(shifted_product, dtype)

        # Figure out if we can deduce the final static shape, and set max_lags.
        # Use x_rotated as a reference, because it has the time dimension in the far
        # right, and was created before we performed all sorts of crazy shape
        # manipulations.
        know_static_shape = True
        if not tensorshape_util.is_fully_defined(x_rotated.shape):
            know_static_shape = False
        if max_lags is None:
            max_lags = x_len - 1
        else:
            max_lags = tf.convert_to_tensor(max_lags, name='max_lags')
            max_lags_ = tf.get_static_value(max_lags)
            if max_lags_ is None or not know_static_shape:
                know_static_shape = False
                max_lags = tf.minimum(x_len - 1, max_lags)
            else:
                max_lags = min(x_len - 1, max_lags_)

        # Chop off the padding.
        # We allow users to provide a huge max_lags, but cut it off here.
        # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags]
        shifted_product_chopped = shifted_product[..., :max_lags + 1]

        # If possible, set shape.
        if know_static_shape:
            chopped_shape = tensorshape_util.as_list(x_rotated.shape)
            chopped_shape[-1] = min(x_len, max_lags + 1)
            shifted_product_chopped.set_shape(chopped_shape)

        # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]).  The
        # other terms were zeros arising only due to zero padding.
        # `denominator = (N / 2 - m)` (defined below) is the proper term to
        # divide by to make this an unbiased estimate of the expectation
        # E[X[n] Conj(X[n - m])].
        x_len = tf.cast(x_len, dtype_util.real_dtype(dtype))
        max_lags = tf.cast(max_lags, dtype_util.real_dtype(dtype))
        denominator = x_len - tf.range(0., max_lags + 1.)
        denominator = tf.cast(denominator, dtype)
        shifted_product_rotated = shifted_product_chopped / denominator

        if normalize:
            shifted_product_rotated /= shifted_product_rotated[..., :1]

        # Transpose dimensions back to those of x.
        return distribution_util.rotate_transpose(shifted_product_rotated,
                                                  -shift)
예제 #8
0
파일: lkj.py 프로젝트: wenfahu/probability
    def _sample_n(self, num_samples, seed=None, name=None):
        """Returns a Tensor of samples from an LKJ distribution.

    Args:
      num_samples: Python `int`. The number of samples to draw.
      seed: Python integer seed for RNG
      name: Python `str` name prefixed to Ops created by this function.

    Returns:
      samples: A Tensor of correlation matrices with shape `[n, B, D, D]`,
        where `B` is the shape of the `concentration` parameter, and `D`
        is the `dimension`.

    Raises:
      ValueError: If `dimension` is negative.
    """
        if self.dimension < 0:
            raise ValueError(
                'Cannot sample negative-dimension correlation matrices.')
        # Notation below: B is the batch shape, i.e., tf.shape(concentration)
        seed = seed_stream.SeedStream(seed, 'sample_lkj')
        with tf.name_scope('sample_lkj' or name):
            if not dtype_util.is_floating(self.concentration.dtype):
                raise TypeError(
                    'The concentration argument should have floating type, not '
                    '{}'.format(dtype_util.name(self.concentration.dtype)))

            concentration = _replicate(num_samples, self.concentration)
            concentration_shape = tf.shape(input=concentration)
            if self.dimension <= 1:
                # For any dimension <= 1, there is only one possible correlation matrix.
                shape = tf.concat(
                    [concentration_shape, [self.dimension, self.dimension]],
                    axis=0)
                return tf.ones(shape=shape, dtype=self.concentration.dtype)
            beta_conc = concentration + (self.dimension - 2.) / 2.
            beta_dist = beta.Beta(concentration1=beta_conc,
                                  concentration0=beta_conc)

            # Note that the sampler below deviates from [1], by doing the sampling in
            # cholesky space. This does not change the fundamental logic of the
            # sampler, but does speed up the sampling.

            # This is the correlation coefficient between the first two dimensions.
            # This is also `r` in reference [1].
            corr12 = 2. * beta_dist.sample(seed=seed()) - 1.

            # Below we construct the Cholesky of the initial 2x2 correlation matrix,
            # which is of the form:
            # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the
            # first two dimensions.
            # This is the top-left corner of the cholesky of the final sample.
            first_row = tf.concat([
                tf.ones_like(corr12)[..., tf.newaxis],
                tf.zeros_like(corr12)[..., tf.newaxis]
            ],
                                  axis=-1)
            second_row = tf.concat([
                corr12[..., tf.newaxis],
                tf.sqrt(1 - corr12**2)[..., tf.newaxis]
            ],
                                   axis=-1)

            chol_result = tf.concat([
                first_row[..., tf.newaxis, :], second_row[..., tf.newaxis, :]
            ],
                                    axis=-2)

            for n in range(2, self.dimension):
                # Loop invariant: on entry, result has shape B + [n, n]
                beta_conc -= 0.5
                # norm is y in reference [1].
                norm = beta.Beta(concentration1=n / 2.,
                                 concentration0=beta_conc).sample(seed=seed())
                # distance shape: B + [1] for broadcast
                distance = tf.sqrt(norm)[..., tf.newaxis]
                # direction is u in reference [1].
                # direction shape: B + [n]
                direction = _uniform_unit_norm(n, concentration_shape,
                                               self.concentration.dtype, seed)
                # raw_correlation is w in reference [1].
                raw_correlation = distance * direction  # shape: B + [n]

                # This is the next row in the cholesky of the result,
                # which differs from the construction in reference [1].
                # In the reference, the new row `z` = chol_result @ raw_correlation^T
                # = C @ raw_correlation^T (where as short hand we use C = chol_result).
                # We prove that the below equation is the right row to add to the
                # cholesky, by showing equality with reference [1].
                # Let S be the sample constructed so far, and let `z` be as in
                # reference [1]. Then at this iteration, the new sample S' will be
                # [[S z^T]
                #  [z 1]]
                # In our case we have the cholesky decomposition factor C, so
                # we want our new row x (same size as z) to satisfy:
                #  [[S z^T]  [[C 0]    [[C^T  x^T]         [[CC^T  Cx^T]
                #   [z 1]] =  [x k]]    [0     k]]  =       [xC^t   xx^T + k**2]]
                # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible,
                # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k
                # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 -
                # distance**2).
                new_row = tf.concat(
                    [raw_correlation,
                     tf.sqrt(1. - norm[..., tf.newaxis])],
                    axis=-1)

                # Finally add this new row, by growing the cholesky of the result.
                chol_result = tf.concat([
                    chol_result,
                    tf.zeros_like(chol_result[..., 0][..., tf.newaxis])
                ],
                                        axis=-1)

                chol_result = tf.concat(
                    [chol_result, new_row[..., tf.newaxis, :]], axis=-2)

            if self.input_output_cholesky:
                return chol_result

            result = tf.matmul(chol_result, chol_result, transpose_b=True)
            # The diagonal for a correlation matrix should always be ones. Due to
            # numerical instability the matmul might not achieve that, so manually set
            # these to ones.
            result = tf.linalg.set_diag(
                result,
                tf.ones(shape=tf.shape(input=result)[:-1], dtype=result.dtype))
            # This sampling algorithm can produce near-PSD matrices on which standard
            # algorithms such as `tf.cholesky` or `tf.linalg.self_adjoint_eigvals`
            # fail. Specifically, as documented in b/116828694, around 2% of trials
            # of 900,000 5x5 matrices (distributed according to 9 different
            # concentration parameter values) contained at least one matrix on which
            # the Cholesky decomposition failed.
            return result
예제 #9
0
    def __init__(self,
                 df,
                 scale_operator,
                 input_output_cholesky=False,
                 validate_args=False,
                 allow_nan_stats=True,
                 name=None):
        """Construct Wishart distributions.

    Args:
      df: `float` or `double` tensor, the degrees of freedom of the
        distribution(s). `df` must be greater than or equal to `k`.
      scale_operator: `float` or `double` instance of `LinearOperator`.
      input_output_cholesky: Python `bool`. If `True`, functions whose input or
        output have the semantics of samples assume inputs are in Cholesky form
        and return outputs in Cholesky form. In particular, if this flag is
        `True`, input to `log_prob` is presumed of Cholesky form and output from
        `sample`, `mean`, and `mode` are of Cholesky form.  Setting this
        argument to `True` is purely a computational optimization and does not
        change the underlying distribution; for instance, `mean` returns the
        Cholesky of the mean, not the mean of Cholesky factors. The `variance`
        and `stddev` methods are unaffected by this flag.
        Default value: `False` (i.e., input/output does not have Cholesky
        semantics).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      TypeError: if scale is not floating-type
      TypeError: if scale.dtype != df.dtype
      ValueError: if df < k, where scale operator event shape is
        `(k, k)`
    """
        parameters = dict(locals())
        self._input_output_cholesky = input_output_cholesky
        with tf.name_scope(name) as name:
            with tf.name_scope("init"):
                if not dtype_util.is_floating(scale_operator.dtype):
                    raise TypeError(
                        "scale_operator.dtype=%s is not a floating-point type"
                        % scale_operator.dtype)
                if not scale_operator.is_square:
                    print(scale_operator.to_dense().eval())
                    raise ValueError("scale_operator must be square.")

                self._scale_operator = scale_operator
                self._df = tf.convert_to_tensor(value=df,
                                                dtype=scale_operator.dtype,
                                                name="df")
                tf.debugging.assert_same_float_dtype(
                    [self._df, self._scale_operator])
                if tf.compat.dimension_value(
                        self._scale_operator.shape[-1]) is None:
                    self._dimension = tf.cast(
                        self._scale_operator.domain_dimension_tensor(),
                        dtype=self._scale_operator.dtype,
                        name="dimension")
                else:
                    self._dimension = tf.convert_to_tensor(
                        value=tf.compat.dimension_value(
                            self._scale_operator.shape[-1]),
                        dtype=self._scale_operator.dtype,
                        name="dimension")
                df_val = tf.get_static_value(self._df)
                dim_val = tf.get_static_value(self._dimension)
                if df_val is not None and dim_val is not None:
                    df_val = np.asarray(df_val)
                    if not df_val.shape:
                        df_val = [df_val]
                    if np.any(df_val < dim_val):
                        raise ValueError(
                            "Degrees of freedom (df = %s) cannot be less than "
                            "dimension of scale matrix (scale.dimension = %s)"
                            % (df_val, dim_val))
                elif validate_args:
                    assertions = assert_util.assert_less_equal(
                        self._dimension,
                        self._df,
                        message=("Degrees of freedom (df = %s) cannot be "
                                 "less than dimension of scale matrix "
                                 "(scale.dimension = %s)" %
                                 (self._dimension, self._df)))
                    self._df = distribution_util.with_dependencies(
                        [assertions], self._df)
        super(_WishartLinearOperator, self).__init__(
            dtype=self._scale_operator.dtype,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            reparameterization_type=reparameterization.FULLY_REPARAMETERIZED,
            parameters=parameters,
            graph_parents=([self._df, self._dimension] +
                           self._scale_operator.graph_parents),
            name=name)
예제 #10
0
def find_bins(x,
              edges,
              extend_lower_interval=False,
              extend_upper_interval=False,
              dtype=None,
              name=None):
    """Bin values into discrete intervals.

  Given `edges = [c0, ..., cK]`, defining intervals
  `I0 = [c0, c1)`, `I1 = [c1, c2)`, ..., `I_{K-1} = [c_{K-1}, cK]`,
  This function returns `bins`, such that:
  `edges[bins[i]] <= x[i] < edges[bins[i] + 1]`.

  Args:
    x:  Numeric `N-D` `Tensor` with `N > 0`.
    edges:  `Tensor` of same `dtype` as `x`.  The first dimension indexes edges
      of intervals.  Must either be `1-D` or have
      `x.shape[1:] == edges.shape[1:]`.  If `rank(edges) > 1`, `edges[k]`
      designates a shape `edges.shape[1:]` `Tensor` of bin edges for the
      corresponding dimensions of `x`.
    extend_lower_interval:  Python `bool`.  If `True`, extend the lowest
      interval `I0` to `(-inf, c1]`.
    extend_upper_interval:  Python `bool`.  If `True`, extend the upper
      interval `I_{K-1}` to `[c_{K-1}, +inf)`.
    dtype: The output type (`int32` or `int64`). `Default value:` `x.dtype`.
      This effects the output values when `x` is below/above the intervals,
      which will be `-1/K+1` for `int` types and `NaN` for `float`s.
      At indices where `x` is `NaN`, the output values will be `0` for `int`
      types and `NaN` for floats.
    name:  A Python string name to prepend to created ops. Default: 'find_bins'

  Returns:
    bins: `Tensor` with same `shape` as `x` and `dtype`.
      Has whole number values.  `bins[i] = k` means the `x[i]` falls into the
      `kth` bin, ie, `edges[bins[i]] <= x[i] < edges[bins[i] + 1]`.

  Raises:
    ValueError:  If `edges.shape[0]` is determined to be less than 2.

  #### Examples

  Cut a `1-D` array

  ```python
  x = [0., 5., 6., 10., 20.]
  edges = [0., 5., 10.]
  tfp.stats.find_bins(x, edges)
  ==> [0., 0., 1., 1., np.nan]
  ```

  Cut `x` into its deciles

  ```python
  x = tf.random_uniform(shape=(100, 200))
  decile_edges = tfp.stats.quantiles(x, num_quantiles=10)
  bins = tfp.stats.find_bins(x, edges=decile_edges)
  bins.shape
  ==> (100, 200)
  tf.reduce_mean(bins == 0.)
  ==> approximately 0.1
  tf.reduce_mean(bins == 1.)
  ==> approximately 0.1
  ```

  """
    # TFP users may be surprised to see the "action" in the leftmost dim of
    # edges, rather than the rightmost (event) dim.  Why?
    # 1. Most likely you created edges by getting quantiles over samples, and
    #    quantile/percentile return these edges in the leftmost (sample) dim.
    # 2. Say you have event_shape = [5], then we expect the bin will be different
    #    for all 5 events, so the index of the bin should not be in the event dim.
    with tf.name_scope(name or 'find_bins'):
        in_type = dtype_util.common_dtype([x, edges], dtype_hint=tf.float32)
        edges = tf.convert_to_tensor(edges, name='edges', dtype=in_type)
        x = tf.convert_to_tensor(x, name='x', dtype=in_type)

        if (tf.compat.dimension_value(edges.shape[0]) is not None
                and tf.compat.dimension_value(edges.shape[0]) < 2):
            raise ValueError(
                'First dimension of `edges` must have length > 1 to index 1 or '
                'more bin. Found: {}'.format(edges.shape))

        flattening_x = (tensorshape_util.rank(edges.shape) == 1
                        and tensorshape_util.rank(x.shape) > 1)

        if flattening_x:
            x_orig_shape = tf.shape(x)
            x = tf.reshape(x, [-1])

        if dtype is None:
            dtype = in_type
        dtype = tf.as_dtype(dtype)

        # Move first dims into the rightmost.
        x_permed = distribution_util.rotate_transpose(x, shift=-1)
        edges_permed = distribution_util.rotate_transpose(edges, shift=-1)

        # If...
        #   x_permed = [0, 1, 6., 10]
        #   edges = [0, 5, 10.]
        #   ==> almost_output = [0, 1, 2, 2]
        searchsorted_type = dtype if dtype in [tf.int32, tf.int64] else None
        almost_output_permed = tf.searchsorted(sorted_sequence=edges_permed,
                                               values=x_permed,
                                               side='right',
                                               out_type=searchsorted_type)
        # Move the rightmost dims back to the leftmost.
        almost_output = tf.cast(
            distribution_util.rotate_transpose(almost_output_permed, shift=1),
            dtype)

        # In above example, we want [0, 0, 1, 1], so correct this here.
        bins = tf.clip_by_value(almost_output - 1, tf.cast(0, dtype),
                                tf.cast(tf.shape(edges)[0] - 2, dtype))

        if not extend_lower_interval:
            low_fill = np.nan if dtype_util.is_floating(dtype) else -1
            bins = tf.where(x < tf.expand_dims(edges[0], 0),
                            tf.cast(low_fill, dtype), bins)

        if not extend_upper_interval:
            up_fill = (np.nan if dtype_util.is_floating(dtype) else
                       tf.shape(edges)[0] - 1)
            bins = tf.where(x > tf.expand_dims(edges[-1], 0),
                            tf.cast(up_fill, dtype), bins)

        if flattening_x:
            bins = tf.reshape(bins, x_orig_shape)

        return bins
예제 #11
0
    def __init__(self,
                 loc=None,
                 scale=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 experimental_use_kahan_sum=False,
                 name='MultivariateNormalLinearOperator'):
        """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      experimental_use_kahan_sum: Python `bool`. When `True`, we use Kahan
        summation to aggregate independent underlying log_prob values. For best
        results, Kahan summation should also be applied when computing the
        log-determinant of the `LinearOperator` representing the scale matrix.
        Kahan summation improves against the precision of a naive float32 sum.
        This can be noticeable in particular for large dimensions in float32.
        See CPU caveat on `tfp.math.reduce_kahan_sum`.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
        parameters = dict(locals())
        self._experimental_use_kahan_sum = experimental_use_kahan_sum
        if scale is None:
            raise ValueError('Missing required `scale` parameter.')
        if not dtype_util.is_floating(scale.dtype):
            raise TypeError(
                '`scale` parameter must have floating-point dtype.')

        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([loc, scale],
                                            dtype_hint=tf.float32)
            # Since expand_dims doesn't preserve constant-ness, we obtain the
            # non-dynamic value if possible.
            loc = tensor_util.convert_nonref_to_tensor(loc,
                                                       dtype=dtype,
                                                       name='loc')
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, scale)
        self._loc = loc
        self._scale = scale

        bijector = scale_matvec_linear_operator.ScaleMatvecLinearOperator(
            scale, validate_args=validate_args)
        if loc is not None:
            bijector = shift_bijector.Shift(
                shift=loc, validate_args=validate_args)(bijector)
        super(MultivariateNormalLinearOperator, self).__init__(
            # TODO(b/137665504): Use batch-adding meta-distribution to set the batch
            # shape instead of tf.zeros.
            # We use `Sample` instead of `Independent` because `Independent`
            # requires concatenating `batch_shape` and `event_shape`, which loses
            # static `batch_shape` information when `event_shape` is not statically
            # known.
            distribution=sample.Sample(
                normal.Normal(loc=tf.zeros(batch_shape, dtype=dtype),
                              scale=tf.ones([], dtype=dtype)),
                event_shape,
                experimental_use_kahan_sum=experimental_use_kahan_sum),
            bijector=bijector,
            validate_args=validate_args,
            name=name)
        self._parameters = parameters
예제 #12
0
def sample_lkj(
    num_samples,
    dimension,
    concentration,
    cholesky_space=False,
    seed=None,
    name=None):
  """Returns a Tensor of samples from an LKJ distribution.

  Args:
    num_samples: Python `int`. The number of samples to draw.
    dimension: Python `int`. The dimension of correlation matrices.
    concentration: `Tensor` representing the concentration of the LKJ
      distribution.
    cholesky_space: Python `bool`. Whether to take samples from LKJ or
      Chol(LKJ).
    seed: PRNG seed; see `tfp.random.sanitize_seed` for details.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    samples: A Tensor of correlation matrices (or Cholesky factors of
      correlation matrices if `cholesky_space = True`) with shape
      `[n] + B + [D, D]`, where `B` is the shape of the `concentration`
      parameter, and `D` is the `dimension`.

  Raises:
    ValueError: If `dimension` is negative.
  """
  if dimension < 0:
    raise ValueError(
        'Cannot sample negative-dimension correlation matrices.')
  # Notation below: B is the batch shape, i.e., tf.shape(concentration)

  with tf.name_scope('sample_lkj' or name):
    concentration = tf.convert_to_tensor(concentration)
    if not dtype_util.is_floating(concentration.dtype):
      raise TypeError(
          'The concentration argument should have floating type, not '
          '{}'.format(dtype_util.name(concentration.dtype)))

    batch_shape = ps.concat([[num_samples], ps.shape(concentration)], axis=0)
    dtype = concentration.dtype
    if dimension <= 1:
      # For any dimension <= 1, there is only one possible correlation matrix.
      shape = ps.concat([batch_shape, [dimension, dimension]], axis=0)
      return tf.ones(shape=shape, dtype=dtype)

    # We need 1 seed for beta and 1 seed for tril_spherical_uniform.
    beta_seed, tril_spherical_uniform_seed = samplers.split_seed(
        seed, n=2, salt='sample_lkj')

    # Note that the sampler below deviates from [1], by doing the sampling in
    # cholesky space. This does not change the fundamental logic of the
    # sampler, but does speed up the sampling.
    # In addition, we also vectorize the computation to make the sampler
    # more feasible to use in problems where `dimension` is large.

    beta_conc = concentration + (dimension - 2.) / 2.
    dimension_range = np.arange(
        1., dimension, dtype=dtype_util.as_numpy_dtype(dtype))
    beta_conc1 = dimension_range / 2.
    beta_conc0 = beta_conc[..., tf.newaxis] - (dimension_range - 1) / 2.
    beta_dist = beta.Beta(concentration1=beta_conc1, concentration0=beta_conc0)
    # norm is y in reference [1].
    norm = beta_dist.sample(sample_shape=[num_samples], seed=beta_seed)
    # distance shape: B + [dimension - 1, 1] for broadcast
    distance = tf.sqrt(norm)[..., tf.newaxis]

    # direction is u in reference [1].
    # direction follows the spherical uniform distribution and will be stored
    # in a lower triangular matrix, hence it will have shape:
    # B + [dimension - 1, dimension - 1]
    direction = _tril_spherical_uniform(dimension - 1, batch_shape, dtype,
                                        tril_spherical_uniform_seed)

    # raw_correlation is w in reference [1].
    # shape: B + [dimension - 1, dimension - 1]
    raw_correlation = distance * direction

    # This is the rows in the cholesky of the result,
    # which differs from the construction in reference [1].
    # In the reference, the new row `z` = chol_result @ raw_correlation^T
    # = C @ raw_correlation^T (where as short hand we use C = chol_result).
    # We prove that the below equation is the right row to add to the
    # cholesky, by showing equality with reference [1].
    # Let S be the sample constructed so far, and let `z` be as in
    # reference [1]. Then at this iteration, the new sample S' will be
    # [[S z^T]
    #  [z 1]]
    # In our case we have the cholesky decomposition factor C, so
    # we want our new row x (same size as z) to satisfy:
    #  [[S z^T]  [[C 0]    [[C^T  x^T]         [[CC^T  Cx^T]
    #   [z 1]] =  [x k]]    [0     k]]  =       [xC^t   xx^T + k**2]]
    # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible,
    # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k
    # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 -
    # distance**2).
    paddings_prepend = [[0, 0]] * len(batch_shape)
    diag = tf.pad(
        tf.sqrt(1. - norm), paddings_prepend + [[1, 0]], constant_values=1.)
    chol_result = tf.pad(
        raw_correlation,
        paddings_prepend + [[1, 0], [0, 1]],
        constant_values=0.)
    chol_result = tf.linalg.set_diag(chol_result, diag)

    if cholesky_space:
      return chol_result

    result = tf.matmul(chol_result, chol_result, transpose_b=True)
    # The diagonal for a correlation matrix should always be ones. Due to
    # numerical instability the matmul might not achieve that, so manually set
    # these to ones.
    result = tf.linalg.set_diag(
        result, tf.ones(shape=ps.shape(result)[:-1], dtype=result.dtype))
    # This sampling algorithm can produce near-PSD matrices on which standard
    # algorithms such as `tf.linalg.cholesky` or
    # `tf.linalg.self_adjoint_eigvals` fail. Specifically, as documented in
    # b/116828694, around 2% of trials of 900,000 5x5 matrices (distributed
    # according to 9 different concentration parameter values) contained at
    # least one matrix on which the Cholesky decomposition failed.
    return result
예제 #13
0
    def __init__(self,
                 rate=None,
                 log_rate=None,
                 interpolate_nondiscrete=True,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="Poisson"):
        """Initialize a batch of Poisson distributions.

    Args:
      rate: Floating point tensor, the rate parameter. `rate` must be positive.
        Must specify exactly one of `rate` and `log_rate`.
      log_rate: Floating point tensor, the log of the rate parameter.
        Must specify exactly one of `rate` and `log_rate`.
      interpolate_nondiscrete: Python `bool`. When `False`,
        `log_prob` returns `-inf` (and `prob` returns `0`) for non-integer
        inputs. When `True`, `log_prob` evaluates the continuous function
        `k * log_rate - lgamma(k+1) - rate`, which matches the Poisson pmf
        at integer arguments `k` (note that this function is not itself
        a normalized probability log-density).
        Default value: `True`.
      validate_args: Python `bool`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if none or both of `rate`, `log_rate` are specified.
      TypeError: if `rate` is not a float-type.
      TypeError: if `log_rate` is not a float-type.
    """
        parameters = dict(locals())
        with tf.name_scope(name) as name:
            if (rate is None) == (log_rate is None):
                raise ValueError(
                    "Must specify exactly one of `rate` and `log_rate`.")
            elif log_rate is None:
                rate = tf.convert_to_tensor(value=rate,
                                            name="rate",
                                            dtype=dtype_util.common_dtype(
                                                [rate],
                                                preferred_dtype=tf.float32))
                if not dtype_util.is_floating(rate.dtype):
                    raise TypeError(
                        "rate.dtype ({}) is a not a float-type.".format(
                            dtype_util.name(rate.dtype)))
                with tf.control_dependencies(
                    [assert_util.assert_positive(rate
                                                 )] if validate_args else []):
                    self._rate = tf.identity(rate, name="rate")
                    self._log_rate = tf.math.log(rate, name="log_rate")
            else:
                log_rate = tf.convert_to_tensor(value=log_rate,
                                                name="log_rate",
                                                dtype=dtype_util.common_dtype(
                                                    [log_rate], tf.float32))
                if not dtype_util.is_floating(log_rate.dtype):
                    raise TypeError(
                        "log_rate.dtype ({}) is a not a float-type.".format(
                            dtype_util.name(log_rate.dtype)))
                self._rate = tf.exp(log_rate, name="rate")
                self._log_rate = tf.convert_to_tensor(value=log_rate,
                                                      name="log_rate")

        self._interpolate_nondiscrete = interpolate_nondiscrete
        super(Poisson, self).__init__(
            dtype=self._rate.dtype,
            reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            parameters=parameters,
            graph_parents=[self._rate],
            name=name)
예제 #14
0
  def __init__(self,
               loc,
               atol=None,
               rtol=None,
               is_vector=False,
               validate_args=False,
               allow_nan_stats=True,
               parameters=None,
               name='_BaseDeterministic'):
    """Initialize a batch of `_BaseDeterministic` distributions.

    The `atol` and `rtol` parameters allow for some slack in `pmf`, `cdf`
    computations, e.g. due to floating-point error.

    ```
    pmf(x; loc)
      = 1, if Abs(x - loc) <= atol + rtol * Abs(loc),
      = 0, otherwise.
    ```

    Args:
      loc: Numeric `Tensor`.  The point (or batch of points) on which this
        distribution is supported.
      atol:  Non-negative `Tensor` of same `dtype` as `loc` and broadcastable
        shape.  The absolute tolerance for comparing closeness to `loc`.
        Default is `0`.
      rtol:  Non-negative `Tensor` of same `dtype` as `loc` and broadcastable
        shape.  The relative tolerance for comparing closeness to `loc`.
        Default is `0`.
      is_vector:  Python `bool`.  If `True`, this is for `VectorDeterministic`,
        else `Deterministic`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value '`NaN`' to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      parameters: Dict of locals to facilitate copy construction.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError:  If `loc` is a scalar.
    """
    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([loc, atol, rtol], dtype_hint=tf.float32)
      self._loc = tensor_util.convert_nonref_to_tensor(
          loc, dtype_hint=dtype, name='loc')
      self._atol = tensor_util.convert_nonref_to_tensor(
          0 if atol is None else atol, dtype=dtype, name='atol')
      self._rtol = tensor_util.convert_nonref_to_tensor(
          0 if rtol is None else rtol, dtype=dtype, name='rtol')
      self._is_vector = is_vector

      super(_BaseDeterministic, self).__init__(
          dtype=self._loc.dtype,
          reparameterization_type=(
              reparameterization.FULLY_REPARAMETERIZED
              if dtype_util.is_floating(self._loc.dtype)
              else reparameterization.NOT_REPARAMETERIZED),
          validate_args=validate_args,
          allow_nan_stats=allow_nan_stats,
          parameters=parameters,
          name=name)
    def __init__(self,
                 loc=None,
                 scale=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='VectorExponentialLinearOperator'):
        """Construct Vector Exponential distribution supported on a subset of `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
        parameters = dict(locals())
        if loc is None:
            loc = 0.0  # Implicit value for backwards compatibility.
        if scale is None:
            raise ValueError('Missing required `scale` parameter.')
        if not dtype_util.is_floating(scale.dtype):
            raise TypeError(
                '`scale` parameter must have floating-point dtype.')

        with tf.name_scope(name) as name:
            # Since expand_dims doesn't preserve constant-ness, we obtain the
            # non-dynamic value if possible.
            # TODO(b/190433277): Verify GradientTape safety and use
            # `convert_nonref_to_tensor` on `loc`.
            loc = loc if loc is None else tf.convert_to_tensor(
                loc, name='loc', dtype=scale.dtype)
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, scale)
            self._loc = loc
            self._scale = scale
            super(VectorExponentialLinearOperator, self).__init__(
                # TODO(b/137665504): Use batch-adding meta-distribution to set the
                # batch shape instead of tf.ones.
                # We use `Sample` instead of `Independent` because `Independent`
                # requires concatenating `batch_shape` and `event_shape`, which loses
                # static `batch_shape` information when `event_shape` is not
                # statically known.
                distribution=sample.Sample(
                    exponential.Exponential(rate=tf.ones(batch_shape,
                                                         dtype=scale.dtype),
                                            allow_nan_stats=allow_nan_stats),
                    event_shape),
                bijector=shift_bijector.Shift(shift=loc)(
                    scale_matvec_linear_operator.ScaleMatvecLinearOperator(
                        scale=scale, validate_args=validate_args)),
                validate_args=validate_args,
                name=name)
            self._parameters = parameters
예제 #16
0
def sqrt_with_finite_grads(x, name=None):
    """A sqrt function whose gradient at zero is very large but finite.

  Args:
    x: a `Tensor` whose sqrt is to be computed.
    name: a Python `str` prefixed to all ops created by this function.
      Default `None` (i.e., "sqrt_with_finite_grads").

  Returns:
    sqrt: the square root of `x`, with an overridden gradient at zero
    grad: a gradient function, which is the same as sqrt's gradient everywhere
      except at zero, where it is given a large finite value, instead of `inf`.

  Raises:
    TypeError: if `tf.convert_to_tensor(x)` is not a `float` type.

  Often in kernel functions, we need to compute the L2 norm of the difference
  between two vectors, `x` and `y`: `sqrt(sum_i((x_i - y_i) ** 2))`. In the
  case where `x` and `y` are identical, e.g., on the diagonal of a kernel
  matrix, we get `NaN`s when we take gradients with respect to the inputs. To
  see, this consider the forward pass:

    ```
    [x_1 ... x_N]  -->  [x_1 ** 2 ... x_N ** 2]  -->
        (x_1 ** 2 + ... + x_N ** 2)  -->  sqrt((x_1 ** 2 + ... + x_N ** 2))
    ```

  When we backprop through this forward pass, the `sqrt` yields an `inf` because
  `grad_z(sqrt(z)) = 1 / (2 * sqrt(z))`. Continuing the backprop to the left, at
  the `x ** 2` term, we pick up a `2 * x`, and when `x` is zero, we get
  `0 * inf`, which is `NaN`.

  We'd like to avoid these `NaN`s, since they infect the rest of the connected
  computation graph. Practically, when two inputs to a kernel function are
  equal, we are in one of two scenarios:
    1. We are actually computing k(x, x), in which case norm(x - x) is
       identically zero, independent of x. In this case, we'd like the
       gradient to reflect this independence: it should be zero.
    2. We are computing k(x, y), and x just *happens* to have the same value
       as y. The gradient at such inputs is in fact ill-defined (there is a
       cusp in the sqrt((x - y) ** 2) surface along the line x = y). There are,
       however, an infinite number of sub-gradients, all of which are valid at
       all such inputs. By symmetry, there is exactly one which is "special":
       zero, and we elect to use that value here. In practice, having two
       identical inputs to a kernel matrix is probably a pathological
       situation to be avoided, but that is better resolved at a higher level
       than this.

  To avoid the infinite gradient at zero, we use tf.custom_gradient to redefine
  the gradient at zero. We assign it to be a very large value, specifically
  the sqrt of the max value of the floating point dtype of the input. We use
  the sqrt (as opposed to just using the max floating point value) to avoid
  potential overflow when combining this value with others downstream.
  """
    with tf.name_scope(name or 'sqrt_with_finite_grads'):
        x = tf.convert_to_tensor(value=x, name='x')
        if not dtype_util.is_floating(x.dtype):
            raise TypeError('Input `x` must be floating type.')

        def grad(grad_ys):
            large_float_like_x = np.sqrt(
                np.finfo(dtype_util.as_numpy_dtype(x.dtype)).max)
            safe_grads = tf.where(tf.equal(x, 0), large_float_like_x,
                                  0.5 * tf.math.rsqrt(x))
            return grad_ys * safe_grads

        return tf.sqrt(x), grad
예제 #17
0
    def __init__(self,
                 loc=None,
                 scale=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="VectorExponentialLinearOperator"):
        """Construct Vector Exponential distribution supported on a subset of `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and `scale`
    arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `scale`. The last dimension of `loc` (if provided) must broadcast with this.

    Recall that `covariance = scale @ scale.T`.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape
        `[B1, ..., Bb, k, k]`.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are
        invalid, correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `scale` is unspecified.
      TypeError: if not `scale.dtype.is_floating`
    """
        parameters = dict(locals())
        if scale is None:
            raise ValueError("Missing required `scale` parameter.")
        if not dtype_util.is_floating(scale.dtype):
            raise TypeError(
                "`scale` parameter must have floating-point dtype.")

        with tf.name_scope(name) as name:
            # Since expand_dims doesn't preserve constant-ness, we obtain the
            # non-dynamic value if possible.
            loc = loc if loc is None else tf.convert_to_tensor(
                value=loc, name="loc", dtype=scale.dtype)
            batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
                loc, scale)

            super(VectorExponentialLinearOperator, self).__init__(
                distribution=exponential.Exponential(
                    rate=tf.ones([], dtype=scale.dtype),
                    allow_nan_stats=allow_nan_stats),
                bijector=affine_linear_operator_bijector.AffineLinearOperator(
                    shift=loc, scale=scale, validate_args=validate_args),
                batch_shape=batch_shape,
                event_shape=event_shape,
                validate_args=validate_args,
                name=name)
            self._parameters = parameters
예제 #18
0
    def __init__(self,
                 power,
                 dtype=tf.int32,
                 interpolate_nondiscrete=True,
                 sample_maximum_iterations=100,
                 validate_args=False,
                 allow_nan_stats=False,
                 name='Zipf'):
        """Initialize a batch of Zipf distributions.

    Args:
      power: `Float` like `Tensor` representing the power parameter. Must be
        strictly greater than `1`.
      dtype: The `dtype` of `Tensor` returned by `sample`.
        Default value: `tf.int32`.
      interpolate_nondiscrete: Python `bool`. When `False`, `log_prob` returns
        `-inf` (and `prob` returns `0`) for non-integer inputs. When `True`,
        `log_prob` evaluates the continuous function `-power log(k) -
        log(zeta(power))` , which matches the Zipf pmf at integer arguments `k`
        (note that this function is not itself a normalized probability
        log-density).
        Default value: `True`.
      sample_maximum_iterations: Maximum number of iterations of allowable
        iterations in `sample`. When `validate_args=True`, samples which fail to
        reach convergence (subject to this cap) are masked out with
        `self.dtype.min` or `nan` depending on `self.dtype.is_integer`.
        Default value: `100`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or more
        of the statistic's batch members are undefined.
        Default value: `False`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: `'Zipf'`.

    Raises:
      TypeError: if `power` is not `float` like.
    """
        parameters = dict(locals())
        with tf.name_scope(name) as name:
            self._power = tensor_util.convert_nonref_to_tensor(
                power,
                name='power',
                dtype=dtype_util.common_dtype([power], dtype_hint=tf.float32))
            if (not dtype_util.is_floating(self._power.dtype)
                    or dtype_util.base_equal(self._power.dtype, tf.float16)):
                raise TypeError(
                    'power.dtype ({}) is not a supported `float` type.'.format(
                        dtype_util.name(self._power.dtype)))
            self._interpolate_nondiscrete = interpolate_nondiscrete
            self._sample_maximum_iterations = sample_maximum_iterations
            super(Zipf, self).__init__(
                dtype=dtype,
                reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                name=name)
예제 #19
0
    def _sample_n(self, n, seed):
        components_seed, mix_seed = samplers.split_seed(
            seed, salt='MixtureSameFamily')
        try:
            seed_stream = SeedStream(seed, salt='MixtureSameFamily')
        except TypeError as e:  # Can happen for Tensor seeds.
            seed_stream = None
            seed_stream_err = e
        try:
            x = self.components_distribution.sample(  # [n, B, k, E]
                n, seed=components_seed)
            if seed_stream is not None:
                seed_stream()  # Advance even if unused.
        except TypeError as e:
            if ('Expected int for argument' not in str(e)
                    and TENSOR_SEED_MSG_PREFIX not in str(e)):
                raise
            if seed_stream is None:
                raise seed_stream_err
            msg = (
                'Falling back to stateful sampling for `components_distribution` '
                '{} of type `{}`. Please update to use `tf.random.stateless_*` '
                'RNGs. This fallback may be removed after 20-Aug-2020. {}')
            warnings.warn(
                msg.format(self.components_distribution.name,
                           type(self.components_distribution), str(e)))
            x = self.components_distribution.sample(  # [n, B, k, E]
                n, seed=seed_stream())

        event_shape = None
        event_ndims = tensorshape_util.rank(self.event_shape)
        if event_ndims is None:
            event_shape = self.components_distribution.event_shape_tensor()
            event_ndims = ps.rank_from_shape(event_shape)
        event_ndims_static = tf.get_static_value(event_ndims)

        num_components = None
        if event_ndims_static is not None:
            num_components = tf.compat.dimension_value(
                x.shape[-1 - event_ndims_static])
        # We could also check if num_components can be computed statically from
        # self.mixture_distribution's logits or probs.
        if num_components is None:
            num_components = tf.shape(x)[-1 - event_ndims]

        # TODO(jvdillon): Consider using tf.gather (by way of index unrolling).
        npdt = dtype_util.as_numpy_dtype(x.dtype)
        try:
            mix_sample = self.mixture_distribution.sample(
                n, seed=mix_seed)  # [n, B] or [n]
        except TypeError as e:
            if ('Expected int for argument' not in str(e)
                    and TENSOR_SEED_MSG_PREFIX not in str(e)):
                raise
            if seed_stream is None:
                raise seed_stream_err
            msg = (
                'Falling back to stateful sampling for `mixture_distribution` '
                '{} of type `{}`. Please update to use `tf.random.stateless_*` '
                'RNGs. This fallback may be removed after 20-Aug-2020. ({})')
            warnings.warn(
                msg.format(self.mixture_distribution.name,
                           type(self.mixture_distribution), str(e)))
            mix_sample = self.mixture_distribution.sample(
                n, seed=seed_stream())  # [n, B] or [n]
        mask = tf.one_hot(
            indices=mix_sample,  # [n, B] or [n]
            depth=num_components,
            on_value=npdt(1),
            off_value=npdt(0))  # [n, B, k] or [n, k]

        # Pad `mask` to [n, B, k, [1]*e] or [n, [1]*b, k, [1]*e] .
        batch_ndims = ps.rank(x) - event_ndims - 1
        mask_batch_ndims = ps.rank(mask) - 1
        pad_ndims = batch_ndims - mask_batch_ndims
        mask_shape = ps.shape(mask)
        target_shape = ps.concat([
            mask_shape[:-1],
            ps.ones([pad_ndims], dtype=tf.int32),
            mask_shape[-1:],
            ps.ones([event_ndims], dtype=tf.int32),
        ],
                                 axis=0)
        mask = tf.reshape(mask, shape=target_shape)

        if dtype_util.is_floating(x.dtype) or dtype_util.is_complex(x.dtype):
            masked = tf.math.multiply_no_nan(x, mask)
        else:
            masked = x * mask
        ret = tf.reduce_sum(masked, axis=-1 - event_ndims)  # [n, B, E]

        if self._reparameterize:
            if event_shape is None:
                event_shape = self.components_distribution.event_shape_tensor()
            ret = self._reparameterize_sample(ret, event_shape=event_shape)

        return ret
예제 #20
0
def moving_mean_variance_zero_debiased(moving_mean,
                                       moving_variance=None,
                                       zero_debias_count=None,
                                       decay=0.99,
                                       name=None):
    """Compute zero debiased versions of `moving_mean` and `moving_variance`.

  Since `moving_*` variables initialized with `0`s will be biased (toward `0`),
  this function rescales the `moving_mean` and `moving_variance` by the factor
  `1 - decay**zero_debias_count`, i.e., such that the `moving_mean` is unbiased.
  For more details, see [Kingma (2014)][1].

  Args:
    moving_mean: `float`-like `tf.Variable` representing the exponentially
      weighted moving mean. Same shape as `moving_variance` and `value`. This
      function presumes the `tf.Variable` was created with all zero initial
      value(s).
    moving_variance: `float`-like `tf.Variable` representing the exponentially
      weighted moving variance. Same shape as `moving_mean` and `value`.  This
      function presumes the `tf.Variable` was created with all zero initial
      value(s).
      Default value: `None` (i.e., no moving variance is computed).
    zero_debias_count: `int`-like `tf.Variable` representing the number of times
      this function has been called on streaming input (*not* the number of
      reduced values used in this functions computation). When not `None` (the
      default) the returned values for `moving_mean` and `moving_variance` are
      "zero debiased", i.e., corrected for their presumed all zeros
      intialization. Note: the `tf.Variable`s `moving_mean` and
      `moving_variance` *always* store the unbiased calculation, regardless of
      setting this argument. To obtain unbiased calculations from these
      `tf.Variable`s, see `tfp.stats.moving_mean_variance_zero_debiased`.
      Default value: `None` (i.e., no zero debiasing calculation is made).
    decay: A `float`-like `Tensor` representing the moving mean decay. Typically
      close to `1.`, e.g., `0.99`.
      Default value: `0.99`.
    name: Python `str` prepended to op names created by this function.
      Default value: `None` (i.e., 'moving_mean_variance_zero_debiased').

  Returns:
    moving_mean: The zero debiased exponentially weighted moving mean.
    moving_variance: The zero debiased exponentially weighted moving variance.

  Raises:
    TypeError: if `moving_mean` does not have float type `dtype`.
    TypeError: if `moving_mean`, `moving_variance`, `decay` have different
      `base_dtype`.

  #### References

  [1]: Diederik P. Kingma, Jimmy Ba. Adam: A Method for Stochastic Optimization.
        _arXiv preprint arXiv:1412.6980_, 2014.
       https://arxiv.org/abs/1412.6980
  """
    with tf.name_scope(name or 'zero_debias_count'):
        if zero_debias_count is None:
            raise ValueError()
        base_dtype = dtype_util.base_dtype(moving_mean.dtype)
        if not dtype_util.is_floating(base_dtype):
            raise TypeError(
                'Argument `moving_mean` is not float type (saw {}).'.format(
                    dtype_util.name(moving_mean.dtype)))
        t = tf.cast(zero_debias_count, dtype=base_dtype)
        # Could have used:
        #   bias_correction = -tf.math.expm1(t * tf.math.log(decay))
        # however since we expect decay to be nearly 1, we don't expect this to bear
        # a significant improvement, yet would incur higher computational cost.
        t = tf.where(t > 0., t, tf.constant(np.inf, base_dtype))
        bias_correction = 1. - decay**t
        unbiased_mean = moving_mean / bias_correction
        if moving_variance is None:
            return unbiased_mean
        if base_dtype != dtype_util.base_dtype(moving_variance.dtype):
            raise TypeError(
                'Arguments `moving_mean` and `moving_variance` do not '
                'have same base `dtype` (saw {}, {}).'.format(
                    dtype_util.name(moving_mean.dtype),
                    dtype_util.name(moving_variance.dtype)))
        unbiased_variance = moving_variance / bias_correction
        return unbiased_mean, unbiased_variance
예제 #21
0
    def __init__(self,
                 rate=None,
                 log_rate=None,
                 force_probs_to_zero_outside_support=False,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='Poisson'):
        """Initialize a batch of Poisson distributions.

    Args:
      rate: Floating point tensor, the rate parameter. `rate` must be positive.
        Must specify exactly one of `rate` and `log_rate`.
      log_rate: Floating point tensor, the log of the rate parameter.
        Must specify exactly one of `rate` and `log_rate`.
      force_probs_to_zero_outside_support: Python `bool`. When `True`, negative
        and non-integer values are evaluated "strictly": `log_prob` returns
        `-inf`, `prob` returns `0`, and `cdf` and `sf` correspond.  When
        `False`, the implementation is free to save computation (and TF graph
        size) by evaluating something that matches the Poisson pmf at integer
        values `k` but produces an unrestricted result on other inputs.  In the
        case of Poisson, the `log_prob` formula in this case happens to be the
        continuous function `k * log_rate - lgamma(k+1) - rate`.  Note that this
        function is not itself a normalized probability log-density.
        Default value: `False`.
      validate_args: Python `bool`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if none or both of `rate`, `log_rate` are specified.
      TypeError: if `rate` is not a float-type.
      TypeError: if `log_rate` is not a float-type.
    """
        parameters = dict(locals())
        if (rate is None) == (log_rate is None):
            raise ValueError(
                'Must specify exactly one of `rate` and `log_rate`.')
        with tf.name_scope(name) as name:
            dtype = dtype_util.common_dtype([rate, log_rate],
                                            dtype_hint=tf.float32)
            if not dtype_util.is_floating(dtype):
                raise TypeError(
                    '[log_]rate.dtype ({}) is a not a float-type.'.format(
                        dtype_util.name(dtype)))
            self._rate = tensor_util.convert_nonref_to_tensor(rate,
                                                              name='rate',
                                                              dtype=dtype)
            self._log_rate = tensor_util.convert_nonref_to_tensor(
                log_rate, name='log_rate', dtype=dtype)
            self._force_probs_to_zero_outside_support = force_probs_to_zero_outside_support

            super(Poisson, self).__init__(
                dtype=dtype,
                reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                name=name)
예제 #22
0
def assign_moving_mean_variance(value,
                                moving_mean,
                                moving_variance=None,
                                zero_debias_count=None,
                                decay=0.99,
                                axis=(),
                                name=None):
    """Compute one update to the exponentially weighted moving mean and variance.

  The `value` updated exponentially weighted moving `moving_mean` and
  `moving_variance` are conceptually given by the following recurrence
  relations ([Welford (1962)][1]):

  ```python
  new_mean = old_mean + (1 - decay) * (value - old_mean)
  new_var  = old_var  + (1 - decay) * (value - old_mean) * (value - new_mean)
  ```

  This function implements the above recurrences in a numerically stable manner
  and also uses the `assign_add` op to allow concurrent lockless updates to the
  supplied variables.

  For additional references see [this John D. Cook blog post][
  https://www.johndcook.com/blog/standard_deviation/]
  (whereas we use `1 - decay = 1 / k`) and
  [Finch (2009; Eq.  143)][2] (whereas we use `1 - decay = alpha`).

  Since variables that are initialized to a `0` value will be `0` biased,
  providing `zero_debias_count` triggers scaling the `moving_mean` and
  `moving_variance` by the factor of `1 - decay ** (zero_debias_count + 1)`.
  For more details, see `tfp.stats.moving_mean_variance_zero_debiased`.

  Args:
    value: `float`-like `Tensor` representing one or more streaming
      observations. When `axis` is non-empty `value ` is reduced (by mean) for
      updated `moving_mean` and `moving-variance`. Presumed to have same shape
      as `moving_mean` and `moving_variance`.
    moving_mean: `float`-like `tf.Variable` representing the exponentially
      weighted moving mean. Same shape as `moving_variance` and `value`. This
      function presumes the `tf.Variable` was created with all zero initial
      value(s).
    moving_variance: `float`-like `tf.Variable` representing the exponentially
      weighted moving variance. Same shape as `moving_mean` and `value`.  This
      function presumes the `tf.Variable` was created with all zero initial
      value(s).
      Default value: `None` (i.e., no moving variance is computed).
    zero_debias_count: `int`-like `tf.Variable` representing the number of times
      this function has been called on streaming input (*not* the number of
      reduced values used in this functions computation). When not `None` (the
      default) the returned values for `moving_mean` and `moving_variance` are
      "zero debiased", i.e., corrected for their presumed all zeros
      intialization. Note: the `tf.Variable`s `moving_mean` and
      `moving_variance` *always* store the unbiased calculation, regardless of
      setting this argument. To obtain unbiased calculations from these
      `tf.Variable`s, see `tfp.stats.moving_mean_variance_zero_debiased`.
      Default value: `None` (i.e., no zero debiasing calculation is made).
    decay: A `float`-like `Tensor` representing the moving mean decay. Typically
      close to `1.`, e.g., `0.99`.
      Default value: `0.99`.
    axis: The dimensions to reduce. If `()` (the default) no dimensions are
      reduced. If `None` all dimensions are reduced. Must be in the range
      `[-rank(value), rank(value))`.
      Default value: `()` (i.e., no reduction is made).
    name: Python `str` prepended to op names created by this function.
      Default value: `None` (i.e., 'assign_moving_mean_variance').

  Returns:
    moving_mean: The `value`-updated exponentially weighted moving mean.
      Debiased if `zero_debias_count is not None`.
    moving_variance: The `value`-updated exponentially weighted moving variance.
      Debiased if `zero_debias_count is not None`.

  Raises:
    TypeError: if `moving_mean` does not have float type `dtype`.
    TypeError: if `moving_mean`, `moving_variance`, `value`, `decay` have
      different `base_dtype`.

  #### Examples

  ```python
  import tensorflow as tf
  import tensorflow_probability as tfp
  tfd = tfp.distributions
  d = tfd.MultivariateNormalTriL(
      loc=[-1., 1.],
      scale_tril=tf.linalg.cholesky([[0.75, 0.05],
                                     [0.05, 0.5]]))
  d.mean()
  # ==> [-1.,  1.]
  d.variance()
  # ==> [0.75, 0.5]
  moving_mean = tf.Variable(tf.zeros(2))
  moving_variance = tf.Variable(tf.zeros(2))
  zero_debias_count = tf.Variable(0)
  for _ in range(100):
    m, v = tfp.stats.assign_moving_mean_variance(
      value=d.sample(3),
      moving_mean=moving_mean,
      moving_variance=moving_variance,
      zero_debias_count=zero_debias_count,
      decay=0.99,
      axis=-2)
    print(m.numpy(), v.numpy())
  # ==> [-1.0334632  0.9545268] [0.8126194 0.5118788]
  # ==> [-1.0293456   0.96070296] [0.8115873  0.50947404]
  # ...
  # ==> [-1.025172  0.96351 ] [0.7142789  0.48570773]

  m1, v1 = tfp.stats.moving_mean_variance_zero_debiased(
    moving_mean,
    moving_variance,
    zero_debias_count,
    decay=0.99)
  print(m.numpy(), v.numpy())
  # ==> [-1.025172  0.96351 ] [0.7142789  0.48570773]
  assert(all(m == m1))
  assert(all(v == v1))
  ```

  #### References

  [1]  B. P. Welford. Note on a Method for Calculating Corrected Sums of
       Squares and Products. Technometrics, Vol. 4, No. 3 (Aug., 1962), p419-20.
       http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.302.7503&rep=rep1&type=pdf
       http://www.jstor.org/stable/1266577

  [2]: Tony Finch. Incremental calculation of weighted mean and variance.
       _Technical Report_, 2009.
       http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf
  """
    with tf.name_scope(name or 'assign_moving_mean_variance'):
        base_dtype = dtype_util.base_dtype(moving_mean.dtype)
        if not dtype_util.is_floating(base_dtype):
            raise TypeError(
                'Argument `moving_mean` is not float type (saw {}).'.format(
                    dtype_util.name(moving_mean.dtype)))

        value = tf.convert_to_tensor(value, dtype=base_dtype, name='value')
        decay = tf.convert_to_tensor(decay, dtype=base_dtype, name='decay')
        # Force a read of `moving_mean` as  we'll need it twice.
        old_mean = tf.convert_to_tensor(moving_mean,
                                        dtype=base_dtype,
                                        name='old_mean')

        updated_mean = moving_mean.assign_add(
            (1. - decay) * (tf.reduce_mean(value, axis=axis) - old_mean))

        if zero_debias_count is not None:
            t = tf.cast(zero_debias_count.assign_add(1), base_dtype)
            # Could have used:
            #   bias_correction = -tf.math.expm1(t * tf.math.log(decay))
            # however since we expect decay to be nearly 1, we don't expect this to
            # bear a significant improvement, yet would incur higher computational
            # cost.
            bias_correction = 1. - decay**t
            with tf.control_dependencies([updated_mean]):
                updated_mean = updated_mean / bias_correction

        if moving_variance is None:
            return updated_mean

        if base_dtype != dtype_util.base_dtype(moving_variance.dtype):
            raise TypeError(
                'Arguments `moving_mean` and `moving_variance` do not '
                'have same base `dtype` (saw {}, {}).'.format(
                    dtype_util.name(moving_mean.dtype),
                    dtype_util.name(moving_variance.dtype)))

        if zero_debias_count is not None:
            old_t = tf.where(t > 1., t - 1., tf.constant(np.inf, base_dtype))
            old_bias_correction = 1. - decay**old_t
            old_mean = old_mean / old_bias_correction

        mean_sq_diff = tf.reduce_mean(tf.math.squared_difference(
            value, old_mean),
                                      axis=axis)
        updated_variance = moving_variance.assign_add(
            (1. - decay) * (decay * mean_sq_diff - moving_variance))

        if zero_debias_count is not None:
            with tf.control_dependencies([updated_variance]):
                updated_variance = updated_variance / bias_correction

        return updated_mean, updated_variance
  def __init__(self,
               loc=None,
               cov_operator=None,
               validate_args=False,
               allow_nan_stats=True,
               name='MultivariateNormalLowRankUpdateLinearOperatorCovariance'):
    """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and
    `cov_operator` arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `cov_operator`. The last dimension of `loc` (if provided) must
    broadcast with this.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      cov_operator: Instance of `LinearOperatorLowRankUpdate` with same
        `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`.  Must have structure
        `A + UU^T` or `A + UDU^T`, where `A` and `D` (if provided) are
        self-adjoint and positive definite.
      validate_args: Python `bool`, default `False`. Whether to validate input
        with asserts. If `validate_args` is `False`, and the inputs are invalid,
        correct behavior is not guaranteed.
      allow_nan_stats: Python `bool`, default `True`. If `False`, raise an
        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
        batch member. If `True`, batch members with valid parameters leading to
        undefined statistics will return NaN for this statistic.
      name: The name to give Ops created by the initializer.

    Raises:
      ValueError: if `cov_operator` is unspecified.
      ValueError: if `cov_operator` does not specify the self-adjoint
        positive definite conditions explained above.
      TypeError: if not `cov_operator.dtype.is_floating`
    """
    parameters = dict(locals())
    if cov_operator is None:
      raise ValueError('Missing required `cov_operator` parameter.')
    if not dtype_util.is_floating(cov_operator.dtype):
      raise TypeError(
          '`cov_operator` parameter must have floating-point dtype.')
    if not isinstance(cov_operator,
                      tf.linalg.LinearOperatorLowRankUpdate):
      raise TypeError(
          '`cov_operator` must be a LinearOperatorLowRankUpdate. '
          'Found {}'.format(type(cov_operator)))

    if cov_operator.u is not cov_operator.v:
      raise ValueError('The `U` and `V` (typically low rank) matrices of '
                       '`cov_operator` must be the same, but were not.')

    # For cov_operator, raise if the user explicitly set these to False,
    # or if False was inferred by the LinearOperator. The default value is None,
    # which will not trigger these raises.
    # pylint: disable=g-bool-id-comparison
    if cov_operator.is_self_adjoint is False:
      raise ValueError('`cov_operator` must be self-adjoint.')
    if cov_operator.is_positive_definite is False:
      raise ValueError('`cov_operator` must be positive definite.')
    # pylint: enable=g-bool-id-comparison

    # For the base_operator, we require the user to explicity set
    # is_self_adjoint and is_positive_definite.
    if not cov_operator.base_operator.is_self_adjoint:
      raise ValueError(
          'The `base_operator` of `cov_operator` must be self-adjoint. '
          'You may have to set the `is_self_adjoint` initialization hint.')
    if not cov_operator.base_operator.is_positive_definite:
      raise ValueError(
          'The `base_operator` of `cov_operator` must be positive '
          'definite. You may have to set the `is_positive_definite` '
          'initialization hint.')

    with tf.name_scope(name) as name:
      dtype = dtype_util.common_dtype([loc, cov_operator],
                                      dtype_hint=tf.float32)
      if loc is not None:
        loc = tensor_util.convert_nonref_to_tensor(loc, dtype=dtype, name='loc')

      # Get dynamic shapes (for self.*shape_tensor methods).
      # shapes_from_loc_and_scale tries to return TensorShapes, but may return
      # tensors. So we can only use it for the *shape_tensor methods.
      # It is useful though, since it does lots of shape checks, and is a
      # well-tested function.
      batch_shape, event_shape = distribution_util.shapes_from_loc_and_scale(
          loc, cov_operator)
      self._batch_shape_tensor_value = tf.convert_to_tensor(
          batch_shape, name='batch_shape')
      self._event_shape_tensor_value = tf.convert_to_tensor(
          event_shape, name='event_shape')

      # Get static shapes (for self.*shape methods).
      self._batch_shape_value = cov_operator.batch_shape
      if loc is not None:
        self._batch_shape_value = tf.broadcast_static_shape(
            self._batch_shape_value, loc.shape[:-1])
      self._event_shape_value = cov_operator.shape[-1:]
      if loc is not None:
        self._event_shape_value = tf.broadcast_static_shape(
            self._event_shape_value, loc.shape[-1:])

    self._loc = loc
    self._cov_operator = cov_operator

    super(MultivariateNormalLowRankUpdateLinearOperatorCovariance,
          self).__init__(
              dtype=dtype,
              reparameterization_type=reparameterization.FULLY_REPARAMETERIZED,
              validate_args=validate_args,
              allow_nan_stats=allow_nan_stats,
              parameters=parameters,
              name=name)
    self._parameters = parameters