Exemple #1
0
    def poisson_and_mixture_distributions(self):
        """Returns the Poisson and Mixture distribution parameterized by the quadrature grid and weights."""
        loc = tf.convert_to_tensor(self.loc)
        scale = tf.convert_to_tensor(self.scale)
        quadrature_grid, quadrature_probs = tuple(
            self._quadrature_fn(loc, scale, self.quadrature_size,
                                self.validate_args))
        dt = quadrature_grid.dtype
        if not dtype_util.base_equal(dt, quadrature_probs.dtype):
            raise TypeError(
                'Quadrature grid dtype ({}) does not match quadrature '
                'probs dtype ({}).'.format(
                    dtype_util.name(dt),
                    dtype_util.name(quadrature_probs.dtype)))

        dist = poisson.Poisson(log_rate=quadrature_grid,
                               validate_args=self.validate_args,
                               allow_nan_stats=self.allow_nan_stats)

        mixture_dist = categorical.Categorical(
            logits=tf.math.log(quadrature_probs),
            validate_args=self.validate_args,
            allow_nan_stats=self.allow_nan_stats)
        return dist, mixture_dist
Exemple #2
0
    def body(i, vecs, cur_sample, seed):
      sample_seed, next_seed = samplers.split_seed(seed)
      # squared norm at each coord across active subspace
      is_active = (i < sample_size)
      coord_prob = tf.reduce_sum(tf.square(vecs), axis=-1)
      coord_logits = tf.where(
          is_active[..., tf.newaxis], tf.math.log(coord_prob), 0.)

      idx = categorical.Categorical(logits=coord_logits).sample(
          seed=sample_seed)
      new_vecs = tf.where(
          (tf.range(n) < sample_size[..., tf.newaxis, tf.newaxis] - i - 1) &
          ~cur_sample[..., tf.newaxis],
          _orthogonal_complement_e_i(
              vecs, i=tf.where(is_active, idx, 0),
              gram_schmidt_iters=max_sample_size - i),
          0.)
      # Since range(n) may have unknown shape in the stmt above, we clarify.
      tensorshape_util.set_shape(new_vecs, vecs.shape)
      vecs = tf.where(is_active[..., tf.newaxis, tf.newaxis], new_vecs, vecs)
      cur_sample = (cur_sample |
                    (tf.equal(tf.range(d), idx[..., tf.newaxis]) &
                     is_active[..., tf.newaxis]))
      return i + 1, vecs, cur_sample, next_seed
Exemple #3
0
    def _sample_channels(self,
                         component_logits,
                         locs,
                         scales,
                         coeffs=None,
                         seed=None):
        """Sample a single pixel-iteration and apply channel conditioning.

    Args:
      component_logits: 4D `Tensor` of logits for the Categorical distribution
        over Quantized Logistic mixture components. Dimensions are `[batch_size,
        height, width, num_logistic_mix]`.
      locs: 4D `Tensor` of location parameters for the Quantized Logistic
        mixture components. Dimensions are `[batch_size, height, width,
        num_logistic_mix, num_channels]`.
      scales: 4D `Tensor` of location parameters for the Quantized Logistic
        mixture components. Dimensions are `[batch_size, height, width,
        num_logistic_mix, num_channels]`.
      coeffs: 4D `Tensor` of coefficients for the linear dependence among color
        channels, or `None` if there is only one channel. Dimensions are
        `[batch_size, height, width, num_logistic_mix, num_coeffs]`, where
        `num_coeffs = num_channels * (num_channels - 1) // 2`.
      seed: PRNG seed; see `tfp.random.sanitize_seed` for details.

    Returns:
      samples: 4D `Tensor` of sampled image data with autoregression among
        channels. Dimensions are `[batch_size, height, width, num_channels]`.
    """
        num_channels = self.event_shape[-1]

        # sample mixture components once for the entire pixel
        component_dist = categorical.Categorical(logits=component_logits)
        mask = tf.one_hot(indices=component_dist.sample(seed=seed),
                          depth=self._num_logistic_mix)
        mask = tf.cast(mask[..., tf.newaxis], self.dtype)

        # apply mixture component mask and separate out RGB parameters
        masked_locs = tf.reduce_sum(locs * mask, axis=-2)
        loc_tensors = tf.split(masked_locs, num_channels, axis=-1)
        masked_scales = tf.reduce_sum(scales * mask, axis=-2)
        scale_tensors = tf.split(masked_scales, num_channels, axis=-1)

        if coeffs is not None:
            num_coeffs = num_channels * (num_channels - 1) // 2
            masked_coeffs = tf.reduce_sum(coeffs * mask, axis=-2)
            coef_tensors = tf.split(masked_coeffs, num_coeffs, axis=-1)

        channel_samples = []
        coef_count = 0
        for i in range(num_channels):
            loc = loc_tensors[i]
            for c in channel_samples:
                loc += c * coef_tensors[coef_count]
                coef_count += 1

            logistic_samp = logistic.Logistic(
                loc=loc, scale=scale_tensors[i]).sample(seed=seed)
            logistic_samp = tf.clip_by_value(logistic_samp, -1., 1.)
            channel_samples.append(logistic_samp)

        return tf.concat(channel_samples, axis=-1)
 def testLogPMF(self):
   logits = np.log([[0.2, 0.8], [0.6, 0.4]]) - 50.
   dist = categorical.Categorical(logits)
   with self.cached_session():
     self.assertAllClose(dist.log_prob([0, 1]).eval(), np.log([0.2, 0.4]))
     self.assertAllClose(dist.log_prob([0.0, 1.0]).eval(), np.log([0.2, 0.4]))
    def __init__(self,
                 mix_loc,
                 temperature,
                 distribution,
                 loc=None,
                 scale=None,
                 quadrature_size=8,
                 quadrature_fn=quadrature_scheme_softmaxnormal_quantiles,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="VectorDiffeomixture"):
        """Constructs the VectorDiffeomixture on `R^d`.

    The vector diffeomixture (VDM) approximates the compound distribution

    ```none
    p(x) = int p(x | z) p(z) dz,
    where z is in the K-simplex, and
    p(x | z) := p(x | loc=sum_k z[k] loc[k], scale=sum_k z[k] scale[k])
    ```

    Args:
      mix_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`.
        In terms of samples, larger `mix_loc[..., k]` ==>
        `Z` is more likely to put more weight on its `kth` component.
      temperature: `float`-like `Tensor`. Broadcastable with `mix_loc`.
        In terms of samples, smaller `temperature` means one component is more
        likely to dominate.  I.e., smaller `temperature` makes the VDM look more
        like a standard mixture of `K` components.
      distribution: `tfp.distributions.Distribution`-like instance. Distribution
        from which `d` iid samples are used as input to the selected affine
        transformation. Must be a scalar-batch, scalar-event distribution.
        Typically `distribution.reparameterization_type = FULLY_REPARAMETERIZED`
        or it is a function of non-trainable parameters. WARNING: If you
        backprop through a VectorDiffeomixture sample and the `distribution`
        is not `FULLY_REPARAMETERIZED` yet is a function of trainable variables,
        then the gradient will be incorrect!
      loc: Length-`K` list of `float`-type `Tensor`s. The `k`-th element
        represents the `shift` used for the `k`-th affine transformation.  If
        the `k`-th item is `None`, `loc` is implicitly `0`.  When specified,
        must have shape `[B1, ..., Bb, d]` where `b >= 0` and `d` is the event
        size.
      scale: Length-`K` list of `LinearOperator`s. Each should be
        positive-definite and operate on a `d`-dimensional vector space. The
        `k`-th element represents the `scale` used for the `k`-th affine
        transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`,
        `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices
      quadrature_size: Python `int` scalar representing number of
        quadrature points.  Larger `quadrature_size` means `q_N(x)` better
        approximates `p(x)`.
      quadrature_fn: Python callable taking `normal_loc`, `normal_scale`,
        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
        representing the SoftmaxNormal grid and corresponding normalized weight.
        normalized) weight.
        Default value: `quadrature_scheme_softmaxnormal_quantiles`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if `not scale or len(scale) < 2`.
      ValueError: if `len(loc) != len(scale)`
      ValueError: if `quadrature_grid_and_probs is not None` and
        `len(quadrature_grid_and_probs[0]) != len(quadrature_grid_and_probs[1])`
      ValueError: if `validate_args` and any not scale.is_positive_definite.
      TypeError: if any scale.dtype != scale[0].dtype.
      TypeError: if any loc.dtype != scale[0].dtype.
      NotImplementedError: if `len(scale) != 2`.
      ValueError: if `not distribution.is_scalar_batch`.
      ValueError: if `not distribution.is_scalar_event`.
    """
        parameters = dict(locals())
        with tf.name_scope(name) as name:
            if not scale or len(scale) < 2:
                raise ValueError(
                    "Must specify list (or list-like object) of scale "
                    "LinearOperators, one for each component with "
                    "num_component >= 2.")

            if loc is None:
                loc = [None] * len(scale)

            if len(loc) != len(scale):
                raise ValueError("loc/scale must be same-length lists "
                                 "(or same-length list-like objects).")

            dtype = dtype_util.base_dtype(scale[0].dtype)

            loc = [
                tf.convert_to_tensor(
                    value=loc_, dtype=dtype, name="loc{}".format(k))
                if loc_ is not None else None for k, loc_ in enumerate(loc)
            ]

            for k, scale_ in enumerate(scale):
                if validate_args and not scale_.is_positive_definite:
                    raise ValueError(
                        "scale[{}].is_positive_definite = {} != True".format(
                            k, scale_.is_positive_definite))
                if dtype_util.base_dtype(scale_.dtype) != dtype:
                    raise TypeError(
                        "dtype mismatch; scale[{}].base_dtype=\"{}\" != \"{}\""
                        .format(k, dtype_util.name(scale_.dtype),
                                dtype_util.name(dtype)))

            self._endpoint_affine = [
                affine_linear_operator_bijector.AffineLinearOperator(  # pylint: disable=g-complex-comprehension
                    shift=loc_,
                    scale=scale_,
                    validate_args=validate_args,
                    name="endpoint_affine_{}".format(k))
                for k, (loc_, scale_) in enumerate(zip(loc, scale))
            ]

            # TODO(jvdillon): Remove once we support k-mixtures.
            # We make this assertion here because otherwise `grid` would need to be a
            # vector not a scalar.
            if len(scale) != 2:
                raise NotImplementedError(
                    "Currently only bimixtures are supported; "
                    "len(scale)={} is not 2.".format(len(scale)))

            mix_loc = tf.convert_to_tensor(value=mix_loc,
                                           dtype=dtype,
                                           name="mix_loc")
            temperature = tf.convert_to_tensor(value=temperature,
                                               dtype=dtype,
                                               name="temperature")
            self._grid, probs = tuple(
                quadrature_fn(mix_loc / temperature, 1. / temperature,
                              quadrature_size, validate_args))

            # Note: by creating the logits as `log(prob)` we ensure that
            # `self.mixture_distribution.logits` is equivalent to
            # `math_ops.log(self.mixture_distribution.probs)`.
            self._mixture_distribution = categorical.Categorical(
                logits=tf.math.log(probs),
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats)

            asserts = distribution_util.maybe_check_scalar_distribution(
                distribution, dtype, validate_args)
            if asserts:
                self._grid = distribution_util.with_dependencies(
                    asserts, self._grid)
            self._distribution = distribution

            self._interpolated_affine = [
                affine_linear_operator_bijector.AffineLinearOperator(  # pylint: disable=g-complex-comprehension
                    shift=loc_,
                    scale=scale_,
                    validate_args=validate_args,
                    name="interpolated_affine_{}".format(k))
                for k, (loc_, scale_) in enumerate(
                    zip(interpolate_loc(self._grid, loc),
                        interpolate_scale(self._grid, scale)))
            ]

            [
                self._batch_shape_,
                self._batch_shape_tensor_,
                self._event_shape_,
                self._event_shape_tensor_,
            ] = determine_batch_event_shapes(self._grid, self._endpoint_affine)

            super(VectorDiffeomixture, self).__init__(
                dtype=dtype,
                # We hard-code `FULLY_REPARAMETERIZED` because when
                # `validate_args=True` we verify that indeed
                # `distribution.reparameterization_type == FULLY_REPARAMETERIZED`. A
                # distribution which is a function of only non-trainable parameters
                # also implies we can use `FULLY_REPARAMETERIZED`. However, we cannot
                # easily test for that possibility thus we use `validate_args=False`
                # as a "back-door" to allow users a way to use non
                # `FULLY_REPARAMETERIZED` distribution. In such cases IT IS THE USERS
                # RESPONSIBILITY to verify that the base distribution is a function of
                # non-trainable parameters.
                reparameterization_type=reparameterization.
                FULLY_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                graph_parents=(
                    distribution._graph_parents  # pylint: disable=protected-access
                    + [loc_ for loc_ in loc if loc_ is not None] +
                    [p for scale_ in scale for p in scale_.graph_parents]),  # pylint: disable=g-complex-comprehension
                name=name)
    def posterior_marginals(self, observations):
        """Compute marginal posterior distribution for each state.

    This function computes, for each time step, the marginal
    conditional probability that the hidden Markov model was in
    each possible state given the observations that were made
    at each time step.
    So if the hidden states are `z[0],...,z[num_steps - 1]` and
    the observations are `x[0], ..., x[num_steps - 1]`, then
    this function computes `P(z[i] | x[0], ..., x[num_steps - 1])`
    for all `i` from `0` to `num_steps - 1`.

    This operation is sometimes called smoothing. It uses a form
    of the forward-backward algorithm.

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Args:
      observations: A tensor representing a batch of observations
      made on the hidden Markov model.  The rightmost dimension
      of this tensor gives the steps in a sequence of observations
      from a single sample from the hidden Markov model. The size
      of this dimension should match the `num_steps` parameter
      of the hidden Markov model object. The other dimensions are
      the dimensions of the batch and these are broadcast with
      the hidden Markov model's parameters.

    Returns:
      A `Categorical` distribution object representing the marginal
      probability of the hidden Markov model being in each state at
      each step. The rightmost dimension of the `Categorical`
      distributions batch will equal the `num_steps` parameter
      providing one marginal distribution for each step. The
      other dimensions are the dimensions corresponding to the
      batch of observations.

    Raises:
      ValueError: if rightmost dimension of `observations` does not
      have size `num_steps`.
    """

        with tf.compat.v1.name_scope("posterior_marginals",
                                     values=[observations]):
            with tf.control_dependencies(self._runtime_assertions):
                observation_tensor_shape = tf.shape(input=observations)

                with self._observation_shape_preconditions(
                        observation_tensor_shape):
                    observation_batch_shape = observation_tensor_shape[:-1 -
                                                                       self.
                                                                       _underlying_event_rank]
                    observation_event_shape = observation_tensor_shape[
                        -1 - self._underlying_event_rank:]

                    batch_shape = tf.broadcast_dynamic_shape(
                        observation_batch_shape, self.batch_shape_tensor())
                    log_init = tf.broadcast_to(
                        self._log_init,
                        tf.concat([batch_shape, [self._num_states]], axis=0))
                    log_transition = self._log_trans

                    observations = tf.broadcast_to(
                        observations,
                        tf.concat([batch_shape, observation_event_shape],
                                  axis=0))
                    observation_rank = tf.rank(observations)
                    underlying_event_rank = self._underlying_event_rank
                    observations = util.move_dimension(
                        observations,
                        observation_rank - underlying_event_rank - 1, 0)
                    observations = tf.expand_dims(
                        observations, observation_rank - underlying_event_rank)
                    observation_log_probs = self._observation_distribution.log_prob(
                        observations)

                    log_adjoint_prob = tf.zeros_like(log_init)

                    def forward_step(log_previous_step, log_prob_observation):
                        return _log_vector_matrix(
                            log_previous_step,
                            log_transition) + log_prob_observation

                    log_prob = log_init + observation_log_probs[0]

                    forward_log_probs = tf.scan(forward_step,
                                                observation_log_probs[1:],
                                                initializer=log_prob,
                                                name="forward_log_probs")

                    forward_log_probs = tf.concat(
                        [[log_prob], forward_log_probs], axis=0)

                    def backward_step(log_previous_step, log_prob_observation):
                        return _log_matrix_vector(
                            log_transition,
                            log_prob_observation + log_previous_step)

                    backward_log_adjoint_probs = tf.scan(
                        backward_step,
                        observation_log_probs[1:],
                        initializer=log_adjoint_prob,
                        reverse=True,
                        name="backward_log_adjoint_probs")

                    total_log_prob = tf.reduce_logsumexp(
                        input_tensor=forward_log_probs[-1], axis=-1)

                    backward_log_adjoint_probs = tf.concat(
                        [backward_log_adjoint_probs, [log_adjoint_prob]],
                        axis=0)

                    log_likelihoods = forward_log_probs + backward_log_adjoint_probs

                    marginal_log_probs = util.move_dimension(
                        log_likelihoods - total_log_prob[..., tf.newaxis], 0,
                        -2)

                    return categorical.Categorical(logits=marginal_log_probs)
    def __init__(self,
                 outcomes,
                 logits=None,
                 probs=None,
                 rtol=None,
                 atol=None,
                 validate_args=False,
                 allow_nan_stats=True,
                 name='FiniteDiscrete'):
        """Construct a finite discrete contribution.

    Args:
      outcomes: A 1-D floating or integer `Tensor`, representing a list of
        possible outcomes in strictly ascending order.
      logits: A floating N-D `Tensor`, `N >= 1`, representing the log
        probabilities of a set of FiniteDiscrete distributions. The first `N -
        1` dimensions index into a batch of independent distributions and the
        last dimension represents a vector of logits for each discrete value.
        Only one of `logits` or `probs` should be passed in.
      probs: A floating  N-D `Tensor`, `N >= 1`, representing the probabilities
        of a set of FiniteDiscrete distributions. The first `N - 1` dimensions
        index into a batch of independent distributions and the last dimension
        represents a vector of probabilities for each discrete value. Only one
        of `logits` or `probs` should be passed in.
      rtol: `Tensor` with same `dtype` as `outcomes`. The relative tolerance for
        floating number comparison. Only effective when `outcomes` is a floating
        `Tensor`. Default is `10 * eps`.
      atol: `Tensor` with same `dtype` as `outcomes`. The absolute tolerance for
        floating number comparison. Only effective when `outcomes` is a floating
        `Tensor`. Default is `10 * eps`.
      validate_args:  Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may render incorrect outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value '`NaN`' to indicate the
        result is undefined. When `False`, an exception is raised if one or more
        of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    """
        parameters = dict(locals())
        with tf.name_scope(name) as name:
            outcomes_dtype = dtype_util.common_dtype([outcomes],
                                                     dtype_hint=tf.float32)
            self._outcomes = tensor_util.convert_nonref_to_tensor(
                outcomes, dtype_hint=outcomes_dtype, name='outcomes')

            if dtype_util.is_floating(self._outcomes.dtype):
                eps = np.finfo(dtype_util.as_numpy_dtype(outcomes_dtype)).eps
                self._rtol = 10 * eps if rtol is None else rtol
                self._atol = 10 * eps if atol is None else atol
            else:
                self._rtol = None
                self._atol = None

            self._categorical = categorical.Categorical(
                logits=logits,
                probs=probs,
                dtype=tf.int32,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats)
        super(FiniteDiscrete, self).__init__(
            dtype=self._outcomes.dtype,
            reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            parameters=parameters,
            name=name)
Exemple #8
0
 def _sample_n(self, n, seed=None):
   return categorical.Categorical(
       logits=self.categorical_log_probs()).sample(n, seed=seed)
Exemple #9
0
 def testPMFWithBatch(self):
     histograms = [[0.2, 0.8], [0.6, 0.4]]
     dist = categorical.Categorical(tf.log(histograms) - 50.)
     with self.cached_session():
         self.assertAllClose(dist.prob([0, 1]).eval(), [0.2, 0.4])
Exemple #10
0
 def testMode(self):
     histograms = [[[0.2, 0.8], [0.6, 0.4]]]
     dist = categorical.Categorical(tf.log(histograms) - 50.)
     self.assertAllEqual([[1, 0]], self.evaluate(dist.mode()))
Exemple #11
0
 def testP(self):
     p = [0.2, 0.8]
     dist = categorical.Categorical(probs=p)
     self.assertAllClose(p, self.evaluate(dist.probs))
     self.assertAllEqual([2], dist.logits.shape)
Exemple #12
0
 def testEntropyNoBatch(self):
     logits = np.log([0.2, 0.8]) - 50.
     dist = categorical.Categorical(logits)
     self.assertAllClose(-(0.2 * np.log(0.2) + 0.8 * np.log(0.8)),
                         self.evaluate(dist.entropy()))
Exemple #13
0
 def testPMFNoBatch(self):
     histograms = [0.2, 0.8]
     dist = categorical.Categorical(tf.log(histograms) - 50.)
     self.assertAllClose(0.2, self.evaluate(dist.prob(0)))
Exemple #14
0
 def testPMFWithBatch(self):
     histograms = [[0.2, 0.8], [0.6, 0.4]]
     dist = categorical.Categorical(tf.log(histograms) - 50.)
     self.assertAllClose([0.2, 0.4], self.evaluate(dist.prob([0, 1])))
Exemple #15
0
 def testPMFNoBatch(self):
     histograms = [0.2, 0.8]
     dist = categorical.Categorical(tf.log(histograms) - 50.)
     with self.cached_session():
         self.assertAllClose(dist.prob(0).eval(), 0.2)
Exemple #16
0
 def _entropy(self):
   return categorical.Categorical(
       logits=self.categorical_log_probs()).entropy()
Exemple #17
0
 def testEntropyNoBatch(self):
     logits = np.log([0.2, 0.8]) - 50.
     dist = categorical.Categorical(logits)
     with self.cached_session():
         self.assertAllClose(dist.entropy().eval(),
                             -(0.2 * np.log(0.2) + 0.8 * np.log(0.8)))
Exemple #18
0
 def _cdf(self, x):
   return categorical.Categorical(
       logits=self.categorical_log_probs()).cdf(x)
Exemple #19
0
def make_categorical(batch_shape, num_classes, dtype=tf.int32):
    logits = tf.random_uniform(
        list(batch_shape) + [num_classes], -10, 10, dtype=tf.float32) - 50.
    return categorical.Categorical(logits, dtype=dtype)
Exemple #20
0
    def __init__(self,
                 loc,
                 scale,
                 quadrature_size=8,
                 quadrature_fn=quadrature_scheme_lognormal_quantiles,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="PoissonLogNormalQuadratureCompound"):
        """Constructs the PoissonLogNormalQuadratureCompound`.

    Note: `probs` returned by (optional) `quadrature_fn` are presumed to be
    either a length-`quadrature_size` vector or a batch of vectors in 1-to-1
    correspondence with the returned `grid`. (I.e., broadcasting is only
    partially supported.)

    Args:
      loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
        the LogNormal prior.
      scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
        the LogNormal prior.
      quadrature_size: Python `int` scalar representing the number of quadrature
        points.
      quadrature_fn: Python callable taking `loc`, `scale`,
        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
        representing the LogNormal grid and corresponding normalized weight.
        normalized) weight.
        Default value: `quadrature_scheme_lognormal_quantiles`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      TypeError: if `quadrature_grid` and `quadrature_probs` have different base
        `dtype`.
    """
        parameters = dict(locals())
        with tf.name_scope(name, values=[loc, scale]) as name:
            dtype = dtype_util.common_dtype([loc, scale], tf.float32)
            if loc is not None:
                loc = tf.convert_to_tensor(loc, name="loc", dtype=dtype)
            if scale is not None:
                scale = tf.convert_to_tensor(scale, dtype=dtype, name="scale")
            self._quadrature_grid, self._quadrature_probs = tuple(
                quadrature_fn(loc, scale, quadrature_size, validate_args))

            dt = self._quadrature_grid.dtype
            if dt.base_dtype != self._quadrature_probs.dtype.base_dtype:
                raise TypeError(
                    "Quadrature grid dtype ({}) does not match quadrature "
                    "probs dtype ({}).".format(
                        dt.name, self._quadrature_probs.dtype.name))

            self._distribution = poisson.Poisson(
                log_rate=self._quadrature_grid,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats)

            self._mixture_distribution = categorical.Categorical(
                logits=tf.log(self._quadrature_probs),
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats)

            self._loc = loc
            self._scale = scale
            self._quadrature_size = quadrature_size

            super(PoissonLogNormalQuadratureCompound, self).__init__(
                dtype=dt,
                reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                graph_parents=[loc, scale],
                name=name)
Exemple #21
0
 def testP(self):
     p = [0.2, 0.8]
     dist = categorical.Categorical(probs=p)
     with self.cached_session():
         self.assertAllClose(p, dist.probs.eval())
         self.assertAllEqual([2], dist.logits.shape)
def infer_trajectories(observations,
                       initial_state_prior,
                       transition_fn,
                       observation_fn,
                       num_particles,
                       initial_state_proposal=None,
                       proposal_fn=None,
                       resample_criterion_fn=ess_below_threshold,
                       rejuvenation_kernel_fn=None,
                       num_transitions_per_observation=1,
                       num_steps_state_history_to_pass=None,
                       num_steps_observation_history_to_pass=None,
                       seed=None,
                       name=None):  # pylint: disable=g-doc-args
    """Use particle filtering to sample from the posterior over trajectories.

  ${particle_filter_arg_str}
  Returns:
    trajectories: a (structure of) Tensor(s) matching the latent state, each
      of shape
      `concat([[num_timesteps, num_particles, b1, ..., bN], event_shape])`,
      representing unbiased samples from the posterior distribution
      `p(latent_states | observations)`.
    step_log_marginal_likelihoods: float `Tensor` of shape
      `[num_observation_steps, b1, ..., bN]`,
      giving the natural logarithm of an unbiased estimate of
      `p(observations[t] | observations[:t])` at each timestep `t`. Note that
      (by [Jensen's inequality](
      https://en.wikipedia.org/wiki/Jensen%27s_inequality))
      this is *smaller* in expectation than the true
      `log p(observations[t] | observations[:t])`.

  ${non_markovian_specification_str}

  #### Examples

  **Tracking unknown position and velocity**: Let's consider tracking an object
  moving in a one-dimensional space. We'll define a dynamical system
  by specifying an `initial_state_prior`, a `transition_fn`,
  and `observation_fn`.

  The structure of the latent state space is determined by the prior
  distribution. Here, we'll define a state space that includes the object's
  current position and velocity:

  ```python
  initial_state_prior = tfd.JointDistributionNamed({
      'position': tfd.Normal(loc=0., scale=1.),
      'velocity': tfd.Normal(loc=0., scale=0.1)})
  ```

  The `transition_fn` specifies the evolution of the system. It should
  return a distribution over latent states of the same structure as the prior.
  Here, we'll assume that the position evolves according to the velocity,
  with a small random drift, and the velocity also changes slowly, following
  a random drift:

  ```python
  def transition_fn(_, previous_state):
    return tfd.JointDistributionNamed({
        'position': tfd.Normal(
            loc=previous_state['position'] + previous_state['velocity'],
            scale=0.1),
        'velocity': tfd.Normal(loc=previous_state['velocity'], scale=0.01)})
  ```

  The `observation_fn` specifies the process by which the system is observed
  at each time step. Let's suppose we observe only a noisy version of the =
  current position.

  ```python
    def observation_fn(_, state):
      return tfd.Normal(loc=state['position'], scale=0.1)
  ```

  Now let's track our object. Suppose we've been given observations
  corresponding to an initial position of `0.4` and constant velocity of `0.01`:

  ```python
  # Generate simulated observations.
  observed_positions = tfd.Normal(loc=tf.linspace(0.4, 0.8, 0.01),
                                  scale=0.1).sample()

  # Run particle filtering to sample plausible trajectories.
  (trajectories,  # {'position': [40, 1000], 'velocity': [40, 1000]}
   lps) = tfp.experimental.mcmc.infer_trajectories(
            observations=observed_positions,
            initial_state_prior=initial_state_prior,
            transition_fn=transition_fn,
            observation_fn=observation_fn,
            num_particles=1000)
  ```

  For all `i`, `trajectories['position'][:, i]` is a sample from the
  posterior over position sequences, given the observations:
  `p(state[0:T] | observations[0:T])`. Often, the sampled trajectories
  will be highly redundant in their earlier timesteps, because most
  of the initial particles have been discarded through resampling
  (this problem is known as 'particle degeneracy'; see section 3.5 of
  [Doucet and Johansen][1]).
  In such cases it may be useful to also consider the series of *filtering*
  distributions `p(state[t] | observations[:t])`, in which each latent state
  is inferred conditioned only on observations up to that point in time; these
  may be computed using `tfp.mcmc.experimental.particle_filter`.

  #### References

  [1] Arnaud Doucet and Adam M. Johansen. A tutorial on particle
      filtering and smoothing: Fifteen years later.
      _Handbook of nonlinear filtering_, 12(656-704), 2009.
      https://www.stats.ox.ac.uk/~doucet/doucet_johansen_tutorialPF2011.pdf

  """
    with tf.name_scope(name or 'infer_trajectories') as name:
        seed = SeedStream(seed, 'infer_trajectories')
        (particles, log_weights, parent_indices,
         step_log_marginal_likelihoods) = particle_filter(
             observations=observations,
             initial_state_prior=initial_state_prior,
             transition_fn=transition_fn,
             observation_fn=observation_fn,
             num_particles=num_particles,
             initial_state_proposal=initial_state_proposal,
             proposal_fn=proposal_fn,
             resample_criterion_fn=resample_criterion_fn,
             rejuvenation_kernel_fn=rejuvenation_kernel_fn,
             num_transitions_per_observation=num_transitions_per_observation,
             num_steps_state_history_to_pass=num_steps_state_history_to_pass,
             num_steps_observation_history_to_pass=(
                 num_steps_observation_history_to_pass),
             seed=seed,
             name=name)
        weighted_trajectories = reconstruct_trajectories(
            particles, parent_indices)

        # Resample all steps of the trajectories using the final weights.
        resample_indices = categorical.Categorical(
            dist_util.move_dimension(log_weights[-1, ...],
                                     source_idx=0,
                                     dest_idx=-1)).sample(num_particles,
                                                          seed=seed)
        trajectories = tf.nest.map_structure(
            lambda x: _batch_gather(x, resample_indices, axis=1),
            weighted_trajectories)

        return trajectories, step_log_marginal_likelihoods
Exemple #23
0
 def testMode(self):
     with self.cached_session():
         histograms = [[[0.2, 0.8], [0.6, 0.4]]]
         dist = categorical.Categorical(tf.log(histograms) - 50.)
         self.assertAllEqual(dist.mode().eval(), [[1, 0]])
Exemple #24
0
    def posterior_marginals(self, observations, mask=None, name=None):
        """Compute marginal posterior distribution for each state.

    This function computes, for each time step, the marginal
    conditional probability that the hidden Markov model was in
    each possible state given the observations that were made
    at each time step.
    So if the hidden states are `z[0],...,z[num_steps - 1]` and
    the observations are `x[0], ..., x[num_steps - 1]`, then
    this function computes `P(z[i] | x[0], ..., x[num_steps - 1])`
    for all `i` from `0` to `num_steps - 1`.

    This operation is sometimes called smoothing. It uses a form
    of the forward-backward algorithm.

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Args:
      observations: A tensor representing a batch of observations
        made on the hidden Markov model.  The rightmost dimension of this tensor
        gives the steps in a sequence of observations from a single sample from
        the hidden Markov model. The size of this dimension should match the
        `num_steps` parameter of the hidden Markov model object. The other
        dimensions are the dimensions of the batch and these are broadcast with
        the hidden Markov model's parameters.
      mask: optional bool-type `tensor` with rightmost dimension matching
        `num_steps` indicating which observations the result of this
        function should be conditioned on. When the mask has value
        `True` the corresponding observations aren't used.
        if `mask` is `None` then all of the observations are used.
        the `mask` dimensions left of the last are broadcast with the
        hmm batch as well as with the observations.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: "HiddenMarkovModel".

    Returns:
      posterior_marginal: A `Categorical` distribution object representing the
        marginal probability of the hidden Markov model being in each state at
        each step. The rightmost dimension of the `Categorical` distributions
        batch will equal the `num_steps` parameter providing one marginal
        distribution for each step. The other dimensions are the dimensions
        corresponding to the batch of observations.

    Raises:
      ValueError: if rightmost dimension of `observations` does not
      have size `num_steps`.
    """

        with tf.name_scope(name or "posterior_marginals"):
            with tf.control_dependencies(self._runtime_assertions):
                observation_tensor_shape = tf.shape(observations)
                mask_tensor_shape = tf.shape(
                    mask) if mask is not None else None

                with self._observation_mask_shape_preconditions(
                        observation_tensor_shape, mask_tensor_shape):
                    observation_log_probs = self._observation_log_probs(
                        observations, mask)
                    log_prob = self._log_init + observation_log_probs[0]
                    log_transition = self._log_trans
                    log_adjoint_prob = tf.zeros_like(log_prob)

                    def _scan_multiple_steps_forwards():
                        def forward_step(log_previous_step,
                                         log_prob_observation):
                            return _log_vector_matrix(
                                log_previous_step,
                                log_transition) + log_prob_observation

                        forward_log_probs = tf.scan(forward_step,
                                                    observation_log_probs[1:],
                                                    initializer=log_prob,
                                                    name="forward_log_probs")
                        return tf.concat([[log_prob], forward_log_probs],
                                         axis=0)

                    forward_log_probs = prefer_static.cond(
                        self._num_steps > 1, _scan_multiple_steps_forwards,
                        lambda: tf.convert_to_tensor([log_prob]))

                    total_log_prob = tf.reduce_logsumexp(forward_log_probs[-1],
                                                         axis=-1)

                    def _scan_multiple_steps_backwards():
                        """Perform `scan` operation when `num_steps` > 1."""
                        def backward_step(log_previous_step,
                                          log_prob_observation):
                            return _log_matrix_vector(
                                log_transition,
                                log_prob_observation + log_previous_step)

                        backward_log_adjoint_probs = tf.scan(
                            backward_step,
                            observation_log_probs[1:],
                            initializer=log_adjoint_prob,
                            reverse=True,
                            name="backward_log_adjoint_probs")

                        return tf.concat(
                            [backward_log_adjoint_probs, [log_adjoint_prob]],
                            axis=0)

                    backward_log_adjoint_probs = prefer_static.cond(
                        self._num_steps > 1, _scan_multiple_steps_backwards,
                        lambda: tf.convert_to_tensor([log_adjoint_prob]))

                    log_likelihoods = forward_log_probs + backward_log_adjoint_probs

                    marginal_log_probs = distribution_util.move_dimension(
                        log_likelihoods - total_log_prob[..., tf.newaxis], 0,
                        -2)

                    return categorical.Categorical(logits=marginal_log_probs)
  def testBroadcastWithBatchParamsAndBiggerEvent(self):
    ## The parameters have a single batch dimension, and the event has two.

    # param shape is [3 x 4], where 4 is the number of bins (non-batch dim).
    cat_params_py = [
        [0.2, 0.15, 0.35, 0.3],
        [0.1, 0.05, 0.68, 0.17],
        [0.1, 0.05, 0.68, 0.17]
    ]

    # event shape = [5, 3], both are "batch" dimensions.
    disc_event_py = [
        [0, 1, 2],
        [1, 2, 3],
        [0, 0, 0],
        [1, 1, 1],
        [2, 1, 0]
    ]

    # shape is [3]
    normal_params_py = [
        -10.0,
        120.0,
        50.0
    ]

    # shape is [5, 3]
    real_event_py = [
        [-1.0, 0.0, 1.0],
        [100.0, 101, -50],
        [90, 90, 90],
        [-4, -400, 20.0],
        [0.0, 0.0, 0.0]
    ]

    cat_params_tf = tf.constant(cat_params_py)
    disc_event_tf = tf.constant(disc_event_py)
    cat = categorical.Categorical(probs=cat_params_tf)

    normal_params_tf = tf.constant(normal_params_py)
    real_event_tf = tf.constant(real_event_py)
    norm = normal.Normal(loc=normal_params_tf, scale=1.0)

    # Check that normal and categorical have the same broadcasting behaviour.
    to_run = {
        "cat_prob": cat.prob(disc_event_tf),
        "cat_log_prob": cat.log_prob(disc_event_tf),
        "cat_cdf": cat.cdf(disc_event_tf),
        "cat_log_cdf": cat.log_cdf(disc_event_tf),
        "norm_prob": norm.prob(real_event_tf),
        "norm_log_prob": norm.log_prob(real_event_tf),
        "norm_cdf": norm.cdf(real_event_tf),
        "norm_log_cdf": norm.log_cdf(real_event_tf),
    }

    with self.cached_session() as sess:
      run_result = sess.run(to_run)

    self.assertAllEqual(run_result["cat_prob"].shape,
                        run_result["norm_prob"].shape)
    self.assertAllEqual(run_result["cat_log_prob"].shape,
                        run_result["norm_log_prob"].shape)
    self.assertAllEqual(run_result["cat_cdf"].shape,
                        run_result["norm_cdf"].shape)
    self.assertAllEqual(run_result["cat_log_cdf"].shape,
                        run_result["norm_log_cdf"].shape)