Ejemplo n.º 1
0
        )
        # Get a list of probabilities for each event with shape(batch_size,)
        probabilities = decay_rate(coeffs, candidates) / norm

        # Get a uniform distribution of numbers between 0 and 1 with shape (batch_size,)
        uniforms = uniform_dist.sample(batch_size)

        # Get a list of row indexes for probabilities tensor (and therefore candidates tensor)
        #  where we accept them (uniform value < probability value)
        accept_candidates_ids = tf.squeeze(tf.where(tf.less(uniforms, probabilities)), -1)
        # Use indexes to gather candidates we accept
        accept_candidates = tf.gather(candidates, accept_candidates_ids)

        # Append accepted candidates to our events list
        events.append(accept_candidates)
        events_found = events_found + tf.shape(accept_candidates)[0]

    # Bolt our event tensors together and limit to returning events_total rows
    return tf.concat(events, axis=0)[0:events_total, :]

def generate_all(sig_coeffs, back_coeffs,events_total=100000, alpha = 0.8, poisson = False):
    if poisson:
        eventspoisson = np.random.poisson(events_total, 1)
    else:
        eventspoisson = events_total
    sig_events = generate_signal_mass(sig_coeffs,events_total=int(alpha*eventspoisson))
    back_events = generate_background_mass(back_coeffs,events_total = int((1-alpha)*eventspoisson))
    events = tf.concat([sig_events,back_events],axis = 0)
    return events 

def generate_background(coeffs, events_total=20_000, batch_size=100_000):
Ejemplo n.º 2
0
    def __init__(self,
                 initial_distribution,
                 transition_distribution,
                 observation_distribution,
                 num_steps,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="HiddenMarkovModel"):
        """Initialize hidden Markov model.

    Args:
      initial_distribution: A `Categorical`-like instance.
        Determines probability of first hidden state in Markov chain.
        The number of categories must match the number of categories of
        `transition_distribution` as well as both the rightmost batch
        dimension of `transition_distribution` and the rightmost batch
        dimension of `observation_distribution`.
      transition_distribution: A `Categorical`-like instance.
        The rightmost batch dimension indexes the probability distribution
        of each hidden state conditioned on the previous hidden state.
      observation_distribution: A `tfp.distributions.Distribution`-like
        instance.  The rightmost batch dimension indexes the distribution
        of each observation conditioned on the corresponding hidden state.
      num_steps: The number of steps taken in Markov chain. A python `int`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
        Default value: `False`.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
        Default value: `True`.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: "HiddenMarkovModel".

    Raises:
      ValueError: if `num_steps` is not at least 1.
      ValueError: if `initial_distribution` does not have scalar `event_shape`.
      ValueError: if `transition_distribution` does not have scalar
        `event_shape.`
      ValueError: if `transition_distribution` and `observation_distribution`
        are fully defined but don't have matching rightmost dimension.
    """

        parameters = dict(locals())

        # pylint: disable=protected-access
        with tf.name_scope(name) as name:
            self._runtime_assertions = []  # pylint: enable=protected-access

            if num_steps < 1:
                raise ValueError(
                    "num_steps ({}) must be at least 1.".format(num_steps))

            self._initial_distribution = initial_distribution
            self._observation_distribution = observation_distribution
            self._transition_distribution = transition_distribution

            if (initial_distribution.event_shape is not None
                    and tensorshape_util.rank(
                        initial_distribution.event_shape) != 0):
                raise ValueError(
                    "`initial_distribution` must have scalar `event_dim`s")
            elif validate_args:
                self._runtime_assertions += [
                    assert_util.assert_equal(
                        tf.shape(input=initial_distribution.event_shape_tensor(
                        ))[0],
                        0,
                        message="`initial_distribution` must have scalar"
                        "`event_dim`s")
                ]

            if (transition_distribution.event_shape is not None
                    and tensorshape_util.rank(
                        transition_distribution.event_shape) != 0):
                raise ValueError(
                    "`transition_distribution` must have scalar `event_dim`s")
            elif validate_args:
                self._runtime_assertions += [
                    assert_util.assert_equal(
                        tf.shape(input=transition_distribution.
                                 event_shape_tensor())[0],
                        0,
                        message="`transition_distribution` must have scalar"
                        "`event_dim`s")
                ]

            if (transition_distribution.batch_shape is not None
                    and tensorshape_util.rank(
                        transition_distribution.batch_shape) == 0):
                raise ValueError(
                    "`transition_distribution` can't have scalar batches")
            elif validate_args:
                self._runtime_assertions += [
                    assert_util.assert_greater(
                        tf.size(input=transition_distribution.
                                batch_shape_tensor()),
                        0,
                        message="`transition_distribution` can't have scalar "
                        "batches")
                ]

            if (observation_distribution.batch_shape is not None
                    and tensorshape_util.rank(
                        observation_distribution.batch_shape) == 0):
                raise ValueError(
                    "`observation_distribution` can't have scalar batches")
            elif validate_args:
                self._runtime_assertions += [
                    assert_util.assert_greater(
                        tf.size(input=observation_distribution.
                                batch_shape_tensor()),
                        0,
                        message="`observation_distribution` can't have scalar "
                        "batches")
                ]

            # Infer number of hidden states and check consistency
            # between transitions and observations
            with tf.control_dependencies(self._runtime_assertions):
                self._num_states = (
                    (transition_distribution.batch_shape
                     and transition_distribution.batch_shape[-1])
                    or transition_distribution.batch_shape_tensor()[-1])

                observation_states = (
                    (observation_distribution.batch_shape
                     and observation_distribution.batch_shape[-1])
                    or observation_distribution.batch_shape_tensor()[-1])

            if (tf.is_tensor(self._num_states)
                    or tf.is_tensor(observation_states)):
                if validate_args:
                    self._runtime_assertions += [
                        assert_util.assert_equal(
                            self._num_states,
                            observation_states,
                            message="`transition_distribution` and "
                            "`observation_distribution` must agree on "
                            "last dimension of batch size")
                    ]
            elif self._num_states != observation_states:
                raise ValueError("`transition_distribution` and "
                                 "`observation_distribution` must agree on "
                                 "last dimension of batch size")

            self._log_init = _extract_log_probs(self._num_states,
                                                initial_distribution)
            self._log_trans = _extract_log_probs(self._num_states,
                                                 transition_distribution)

            self._num_steps = num_steps
            self._num_states = tf.shape(input=self._log_init)[-1]

            self._underlying_event_rank = tf.size(
                input=self._observation_distribution.event_shape_tensor())

            self.static_event_shape = tf.TensorShape([num_steps]).concatenate(
                self._observation_distribution.event_shape)

            with tf.control_dependencies(self._runtime_assertions):
                self.static_batch_shape = tf.broadcast_static_shape(
                    self._initial_distribution.batch_shape,
                    tf.broadcast_static_shape(
                        self._transition_distribution.batch_shape[:-1],
                        self._observation_distribution.batch_shape[:-1]))

            # pylint: disable=protected-access
            super(HiddenMarkovModel, self).__init__(
                dtype=self._observation_distribution.dtype,
                reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                graph_parents=(self._initial_distribution._graph_parents +
                               self._transition_distribution._graph_parents +
                               self._observation_distribution._graph_parents),
                name=name)
            # pylint: enable=protected-access

            self._parameters = parameters
Ejemplo n.º 3
0
    def posterior_mode(self, observations, name=None):
        """Compute maximum likelihood sequence of hidden states.

    When this function is provided with a sequence of observations
    `x[0], ..., x[num_steps - 1]`, it returns the sequence of hidden
    states `z[0], ..., z[num_steps - 1]`, drawn from the underlying
    Markov chain, that is most likely to yield those observations.

    It uses the [Viterbi algorithm](
    https://en.wikipedia.org/wiki/Viterbi_algorithm).

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Note: if there isn't a unique most likely sequence then one
    of the equally most likely sequences is chosen.

    Args:
      observations: A tensor representing a batch of observations made on the
        hidden Markov model.  The rightmost dimensions of this tensor correspond
        to the dimensions of the observation distributions of the underlying
        Markov chain.  The next dimension from the right indexes the steps in a
        sequence of observations from a single sample from the hidden Markov
        model.  The size of this dimension should match the `num_steps`
        parameter of the hidden Markov model object.  The other dimensions are
        the dimensions of the batch and these are broadcast with the hidden
        Markov model's parameters.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: "HiddenMarkovModel".

    Returns:
      posterior_mode: A `Tensor` representing the most likely sequence of hidden
        states. The rightmost dimension of this tensor will equal the
        `num_steps` parameter providing one hidden state for each step. The
        other dimensions are those of the batch.

    Raises:
      ValueError: if the `observations` tensor does not consist of
      sequences of `num_steps` observations.

    #### Examples

    ```python
    tfd = tfp.distributions

    # A simple weather model.

    # Represent a cold day with 0 and a hot day with 1.
    # Suppose the first day of a sequence has a 0.8 chance of being cold.

    initial_distribution = tfd.Categorical(probs=[0.8, 0.2])

    # Suppose a cold day has a 30% chance of being followed by a hot day
    # and a hot day has a 20% chance of being followed by a cold day.

    transition_distribution = tfd.Categorical(probs=[[0.7, 0.3],
                                                     [0.2, 0.8]])

    # Suppose additionally that on each day the temperature is
    # normally distributed with mean and standard deviation 0 and 5 on
    # a cold day and mean and standard deviation 15 and 10 on a hot day.

    observation_distribution = tfd.Normal(loc=[0., 15.], scale=[5., 10.])

    # This gives the hidden Markov model:

    model = tfd.HiddenMarkovModel(
        initial_distribution=initial_distribution,
        transition_distribution=transition_distribution,
        observation_distribution=observation_distribution,
        num_steps=7)

    # Suppose we observe gradually rising temperatures over a week:
    temps = [-2., 0., 2., 4., 6., 8., 10.]

    # We can now compute the most probable sequence of hidden states:

    model.posterior_mode(temps)

    # The result is [0 0 0 0 0 1 1] telling us that the transition
    # from "cold" to "hot" most likely happened between the
    # 5th and 6th days.
    ```
    """

        with tf.name_scope(name or "posterior_mode"):
            with tf.control_dependencies(self._runtime_assertions):
                observation_tensor_shape = tf.shape(input=observations)

                with self._observation_shape_preconditions(
                        observation_tensor_shape):
                    observation_batch_shape = observation_tensor_shape[:-1 -
                                                                       self.
                                                                       _underlying_event_rank]
                    observation_event_shape = observation_tensor_shape[
                        -1 - self._underlying_event_rank:]

                    batch_shape = tf.broadcast_dynamic_shape(
                        observation_batch_shape, self.batch_shape_tensor())
                    log_init = tf.broadcast_to(
                        self._log_init,
                        tf.concat([batch_shape, [self._num_states]], axis=0))

                    observations = tf.broadcast_to(
                        observations,
                        tf.concat([batch_shape, observation_event_shape],
                                  axis=0))
                    observation_rank = tf.rank(observations)
                    underlying_event_rank = self._underlying_event_rank
                    observations = distribution_util.move_dimension(
                        observations,
                        observation_rank - underlying_event_rank - 1, 0)

                    # We need to compute the probability of each observation for
                    # each possible state.
                    # This requires inserting an extra index just before the
                    # observation event indices that will be broadcast with the
                    # last batch index in `observation_distribution`.
                    observations = tf.expand_dims(
                        observations, observation_rank - underlying_event_rank)
                    observation_log_probs = self._observation_distribution.log_prob(
                        observations)

                    log_prob = log_init + observation_log_probs[0]

                    if self._num_steps == 1:
                        most_likely_end = tf.argmax(input=log_prob, axis=-1)
                        return most_likely_end[..., tf.newaxis]

                    def forward_step(previous_step_pair, log_prob_observation):
                        log_prob_previous = previous_step_pair[0]
                        log_prob = (log_prob_previous[..., tf.newaxis] +
                                    self._log_trans +
                                    log_prob_observation[..., tf.newaxis, :])
                        most_likely_given_successor = tf.argmax(input=log_prob,
                                                                axis=-2)
                        max_log_p_given_successor = tf.reduce_max(
                            input_tensor=log_prob, axis=-2)
                        return (max_log_p_given_successor,
                                most_likely_given_successor)

                    forward_log_probs, all_most_likely_given_successor = tf.scan(
                        forward_step,
                        observation_log_probs[1:],
                        initializer=(log_prob,
                                     tf.zeros(tf.shape(input=log_init),
                                              dtype=tf.int64)),
                        name="forward_log_probs")

                    most_likely_end = tf.argmax(input=forward_log_probs[-1],
                                                axis=-1)

                    # We require the operation that gives C from A and B where
                    # C[i...j] = A[i...j, B[i...j]]
                    # and A = most_likely_given_successor
                    #     B = most_likely_successor.
                    # tf.gather requires indices of known shape so instead we use
                    # reduction with tf.one_hot(B) to pick out elements from B
                    def backward_step(most_likely_successor,
                                      most_likely_given_successor):
                        return tf.reduce_sum(
                            input_tensor=(most_likely_given_successor *
                                          tf.one_hot(most_likely_successor,
                                                     self._num_states,
                                                     dtype=tf.int64)),
                            axis=-1)

                    backward_scan = tf.scan(backward_step,
                                            all_most_likely_given_successor,
                                            most_likely_end,
                                            reverse=True)
                    most_likely_sequences = tf.concat(
                        [backward_scan, [most_likely_end]], axis=0)
                    return distribution_util.move_dimension(
                        most_likely_sequences, 0, -1)
Ejemplo n.º 4
0
 def _get_noise_shape(self, inputs):
     input_shape = tf.shape(inputs)
     noise_shape = (input_shape[0], 1, input_shape[2])
     return noise_shape
 def test_gumbel_max_retrieval_fn_has_correct_output_shape(self):
     scores = tf.convert_to_tensor([[1.0, 0.0, 0.0], [100.0, 0.0, 0.0],
                                    [0.0, 0.0, -1.0]])
     gumbel_max_retrieval_fn = retrieval_fns.GumbelMaxRetrievalFn()
     output = gumbel_max_retrieval_fn(scores)
     self.assertAllEqual([3, 1], tf.shape(output))
Ejemplo n.º 6
0
 def _batch_shape_tensor(self):
     return tf.shape(self.concentration)[:-1]
Ejemplo n.º 7
0
 def _batch_shape_tensor(self, low=None, high=None):
   return tf.broadcast_dynamic_shape(
       tf.shape(self.low if low is None else low),
       tf.shape(self.high if high is None else high))
Ejemplo n.º 8
0
    def posterior_marginals(self,
                            observations,
                            mask=None,
                            name='posterior_marginals'):
        """Compute marginal posterior distribution for each state.

    This function computes, for each time step, the marginal
    conditional probability that the hidden Markov model was in
    each possible state given the observations that were made
    at each time step.
    So if the hidden states are `z[0],...,z[num_steps - 1]` and
    the observations are `x[0], ..., x[num_steps - 1]`, then
    this function computes `P(z[i] | x[0], ..., x[num_steps - 1])`
    for all `i` from `0` to `num_steps - 1`.

    This operation is sometimes called smoothing. It uses a form
    of the forward-backward algorithm.

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Args:
      observations: A tensor representing a batch of observations
        made on the hidden Markov model.  The rightmost dimension of this tensor
        gives the steps in a sequence of observations from a single sample from
        the hidden Markov model. The size of this dimension should match the
        `num_steps` parameter of the hidden Markov model object. The other
        dimensions are the dimensions of the batch and these are broadcast with
        the hidden Markov model's parameters.
      mask: optional bool-type `tensor` with rightmost dimension matching
        `num_steps` indicating which observations the result of this
        function should be conditioned on. When the mask has value
        `True` the corresponding observations aren't used.
        if `mask` is `None` then all of the observations are used.
        the `mask` dimensions left of the last are broadcast with the
        hmm batch as well as with the observations.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: "HiddenMarkovModel".

    Returns:
      posterior_marginal: A `Categorical` distribution object representing the
        marginal probability of the hidden Markov model being in each state at
        each step. The rightmost dimension of the `Categorical` distributions
        batch will equal the `num_steps` parameter providing one marginal
        distribution for each step. The other dimensions are the dimensions
        corresponding to the batch of observations.

    Raises:
      ValueError: if rightmost dimension of `observations` does not
      have size `num_steps`.
    """

        with self._name_and_control_scope(name):
            observation_tensor_shape = tf.shape(observations)
            observation_distribution = self.observation_distribution
            underlying_event_rank = tf.size(
                observation_distribution.event_shape_tensor())
            mask_tensor_shape = tf.shape(mask) if mask is not None else None
            num_states = self.transition_distribution.batch_shape_tensor()[-1]

            with self._observation_mask_shape_preconditions(
                    observation_tensor_shape, mask_tensor_shape,
                    underlying_event_rank):
                observation_log_probs = self._observation_log_probs(
                    observations, mask)
                log_init = _extract_log_probs(num_states,
                                              self.initial_distribution)
                log_prob = log_init + observation_log_probs[0]
                log_transition = _extract_log_probs(
                    num_states, self.transition_distribution)
                log_adjoint_prob = tf.zeros_like(log_prob)

                def _scan_multiple_steps_forwards():
                    def forward_step(log_previous_step, log_prob_observation):
                        return _log_vector_matrix(
                            log_previous_step,
                            log_transition) + log_prob_observation

                    forward_log_probs = tf.scan(forward_step,
                                                observation_log_probs[1:],
                                                initializer=log_prob,
                                                name='forward_log_probs')
                    return tf.concat([[log_prob], forward_log_probs], axis=0)

                forward_log_probs = prefer_static.cond(
                    self._num_steps > 1, _scan_multiple_steps_forwards,
                    lambda: tf.convert_to_tensor([log_prob]))

                total_log_prob = tf.reduce_logsumexp(forward_log_probs[-1],
                                                     axis=-1)

                def _scan_multiple_steps_backwards():
                    """Perform `scan` operation when `num_steps` > 1."""
                    def backward_step(log_previous_step, log_prob_observation):
                        return _log_matrix_vector(
                            log_transition,
                            log_prob_observation + log_previous_step)

                    backward_log_adjoint_probs = tf.scan(
                        backward_step,
                        observation_log_probs[1:],
                        initializer=log_adjoint_prob,
                        reverse=True,
                        name='backward_log_adjoint_probs')

                    return tf.concat(
                        [backward_log_adjoint_probs, [log_adjoint_prob]],
                        axis=0)

                backward_log_adjoint_probs = prefer_static.cond(
                    self._num_steps > 1, _scan_multiple_steps_backwards,
                    lambda: tf.convert_to_tensor([log_adjoint_prob]))

                log_likelihoods = forward_log_probs + backward_log_adjoint_probs

                marginal_log_probs = distribution_util.move_dimension(
                    log_likelihoods - total_log_prob[..., tf.newaxis], 0, -2)

                return categorical.Categorical(logits=marginal_log_probs)
Ejemplo n.º 9
0
    def posterior_mode(self, observations, mask=None, name='posterior_mode'):
        """Compute maximum likelihood sequence of hidden states.

    When this function is provided with a sequence of observations
    `x[0], ..., x[num_steps - 1]`, it returns the sequence of hidden
    states `z[0], ..., z[num_steps - 1]`, drawn from the underlying
    Markov chain, that is most likely to yield those observations.

    It uses the [Viterbi algorithm](
    https://en.wikipedia.org/wiki/Viterbi_algorithm).

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Note: if there isn't a unique most likely sequence then one
    of the equally most likely sequences is chosen.

    Args:
      observations: A tensor representing a batch of observations made on the
        hidden Markov model.  The rightmost dimensions of this tensor correspond
        to the dimensions of the observation distributions of the underlying
        Markov chain.  The next dimension from the right indexes the steps in a
        sequence of observations from a single sample from the hidden Markov
        model.  The size of this dimension should match the `num_steps`
        parameter of the hidden Markov model object.  The other dimensions are
        the dimensions of the batch and these are broadcast with the hidden
        Markov model's parameters.
      mask: optional bool-type `tensor` with rightmost dimension matching
        `num_steps` indicating which observations the result of this
        function should be conditioned on. When the mask has value
        `True` the corresponding observations aren't used.
        if `mask` is `None` then all of the observations are used.
        the `mask` dimensions left of the last are broadcast with the
        hmm batch as well as with the observations.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: "HiddenMarkovModel".

    Returns:
      posterior_mode: A `Tensor` representing the most likely sequence of hidden
        states. The rightmost dimension of this tensor will equal the
        `num_steps` parameter providing one hidden state for each step. The
        other dimensions are those of the batch.

    Raises:
      ValueError: if the `observations` tensor does not consist of
      sequences of `num_steps` observations.

    #### Examples

    ```python
    tfd = tfp.distributions

    # A simple weather model.

    # Represent a cold day with 0 and a hot day with 1.
    # Suppose the first day of a sequence has a 0.8 chance of being cold.

    initial_distribution = tfd.Categorical(probs=[0.8, 0.2])

    # Suppose a cold day has a 30% chance of being followed by a hot day
    # and a hot day has a 20% chance of being followed by a cold day.

    transition_distribution = tfd.Categorical(probs=[[0.7, 0.3],
                                                     [0.2, 0.8]])

    # Suppose additionally that on each day the temperature is
    # normally distributed with mean and standard deviation 0 and 5 on
    # a cold day and mean and standard deviation 15 and 10 on a hot day.

    observation_distribution = tfd.Normal(loc=[0., 15.], scale=[5., 10.])

    # This gives the hidden Markov model:

    model = tfd.HiddenMarkovModel(
        initial_distribution=initial_distribution,
        transition_distribution=transition_distribution,
        observation_distribution=observation_distribution,
        num_steps=7)

    # Suppose we observe gradually rising temperatures over a week:
    temps = [-2., 0., 2., 4., 6., 8., 10.]

    # We can now compute the most probable sequence of hidden states:

    model.posterior_mode(temps)

    # The result is [0 0 0 0 0 1 1] telling us that the transition
    # from "cold" to "hot" most likely happened between the
    # 5th and 6th days.
    ```
    """

        with self._name_and_control_scope(name):
            observations = tf.convert_to_tensor(observations,
                                                name='observations')
            if mask is not None:
                mask = tf.convert_to_tensor(mask,
                                            name='mask',
                                            dtype_hint=tf.bool)
            num_states = self.transition_distribution.batch_shape_tensor()[-1]
            observation_distribution = self.observation_distribution
            underlying_event_rank = tf.size(
                observation_distribution.event_shape_tensor())
            observation_tensor_shape = tf.shape(observations)
            mask_tensor_shape = tf.shape(mask) if mask is not None else None

            with self._observation_mask_shape_preconditions(
                    observation_tensor_shape, mask_tensor_shape,
                    underlying_event_rank):
                observation_log_probs = self._observation_log_probs(
                    observations, mask)
                log_init = _extract_log_probs(num_states,
                                              self.initial_distribution)
                log_trans = _extract_log_probs(num_states,
                                               self.transition_distribution)
                log_prob = log_init + observation_log_probs[0]

                def _reduce_multiple_steps():
                    """Perform `reduce_max` operation when `num_steps` > 1."""
                    def forward_step(previous_step_pair, log_prob_observation):
                        log_prob_previous = previous_step_pair[0]
                        log_prob = (log_prob_previous[..., tf.newaxis] +
                                    log_trans +
                                    log_prob_observation[..., tf.newaxis, :])
                        most_likely_given_successor = tf.argmax(log_prob,
                                                                axis=-2)
                        max_log_p_given_successor = tf.reduce_max(log_prob,
                                                                  axis=-2)
                        return (max_log_p_given_successor,
                                most_likely_given_successor)

                    forward_log_probs, all_most_likely_given_successor = tf.scan(
                        forward_step,
                        observation_log_probs[1:],
                        initializer=(log_prob,
                                     tf.zeros(tf.shape(log_prob),
                                              dtype=tf.int64)),
                        name='forward_log_probs')

                    most_likely_end = tf.argmax(forward_log_probs[-1], axis=-1)

                    # We require the operation that gives C from A and B where
                    # C[i...j] = A[i...j, B[i...j]]
                    # and A = most_likely_given_successor
                    #     B = most_likely_successor.
                    # tf.gather requires indices of known shape so instead we use
                    # reduction with tf.one_hot(B) to pick out elements from B
                    def backward_step(most_likely_successor,
                                      most_likely_given_successor):
                        return tf.reduce_sum((most_likely_given_successor *
                                              tf.one_hot(most_likely_successor,
                                                         num_states,
                                                         dtype=tf.int64)),
                                             axis=-1)

                    backward_scan = tf.scan(backward_step,
                                            all_most_likely_given_successor,
                                            most_likely_end,
                                            reverse=True)
                    most_likely_sequences = tf.concat(
                        [backward_scan, [most_likely_end]], axis=0)
                    return distribution_util.move_dimension(
                        most_likely_sequences, 0, -1)

                return prefer_static.cond(
                    self.num_steps > 1, _reduce_multiple_steps,
                    lambda: tf.argmax(log_prob, axis=-1)[..., tf.newaxis])
Ejemplo n.º 10
0
def brier_decomposition(labels, logits, name=None):
    r"""Decompose the Brier score into uncertainty, resolution, and reliability.

  [Proper scoring rules][1] measure the quality of probabilistic predictions;
  any proper scoring rule admits a [unique decomposition][2] as
  `Score = Uncertainty - Resolution + Reliability`, where:

  * `Uncertainty`, is a generalized entropy of the average predictive
    distribution; it can both be positive or negative.
  * `Resolution`, is a generalized variance of individual predictive
    distributions; it is always non-negative.  Difference in predictions reveal
    information, that is why a larger resolution improves the predictive score.
  * `Reliability`, a measure of calibration of predictions against the true
    frequency of events.  It is always non-negative and a lower value here
    indicates better calibration.

  This method estimates the above decomposition for the case of the Brier
  scoring rule for discrete outcomes.  For this, we need to discretize the space
  of probability distributions; we choose a simple partition of the space into
  `nlabels` events: given a distribution `p` over `nlabels` outcomes, the index
  `k` for which `p_k > p_i` for all `i != k` determines the discretization
  outcome; that is, `p in M_k`, where `M_k` is the set of all distributions for
  which `p_k` is the largest value among all probabilities.

  The estimation error of each component is O(k/n), where n is the number
  of instances and k is the number of labels.  There may be an error of this
  order when compared to `brier_score`.

  #### References
  [1]: Tilmann Gneiting, Adrian E. Raftery.
       Strictly Proper Scoring Rules, Prediction, and Estimation.
       Journal of the American Statistical Association, Vol. 102, 2007.
       https://www.stat.washington.edu/raftery/Research/PDF/Gneiting2007jasa.pdf
  [2]: Jochen Broecker.  Reliability, sufficiency, and the decomposition of
       proper scores.
       Quarterly Journal of the Royal Meteorological Society, Vol. 135, 2009.
       https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1002/qj.456

  Args:
    labels: Tensor, (n,), with tf.int32 or tf.int64 elements containing ground
      truth class labels in the range [0,nlabels].
    logits: Tensor, (n, nlabels), with logits for n instances and nlabels.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    uncertainty: Tensor, scalar, the uncertainty component of the
      decomposition.
    resolution: Tensor, scalar, the resolution component of the decomposition.
    reliability: Tensor, scalar, the reliability component of the
      decomposition.
  """
    with tf.name_scope(name or 'brier_decomposition'):
        labels = tf.convert_to_tensor(labels)
        logits = tf.convert_to_tensor(logits)
        num_classes = logits.shape[-1]

        # Compute pbar, the average distribution
        pred_class = tf.argmax(logits, axis=-1, output_type=labels.dtype)

        if logits.shape.rank > 2:
            flatten, unflatten = _make_flatten_unflatten_fns(logits.shape[:-2])

            def fn_to_map(args):
                yhat, y = args
                return tf.math.confusion_matrix(yhat,
                                                y,
                                                num_classes=num_classes,
                                                dtype=logits.dtype)

            confusion_matrix = tf.map_fn(
                fn_to_map,
                [flatten(pred_class), flatten(labels)],
                dtype=logits.dtype)
            confusion_matrix = unflatten(confusion_matrix)
        else:
            confusion_matrix = tf.math.confusion_matrix(
                pred_class,
                labels,
                num_classes=num_classes,
                dtype=logits.dtype)

        dist_weights = tf.reduce_sum(confusion_matrix, axis=-1)
        dist_weights /= tf.reduce_sum(dist_weights, axis=-1, keepdims=True)
        pbar = tf.reduce_sum(confusion_matrix, axis=-2)
        pbar /= tf.reduce_sum(pbar, axis=-1, keepdims=True)

        eps = np.finfo(dtype_util.as_numpy_dtype(confusion_matrix.dtype)).eps
        # dist_mean[k,:] contains the empirical distribution for the set M_k
        # Some outcomes may not realize, corresponding to dist_weights[k] = 0
        dist_mean = confusion_matrix / (
            eps + tf.reduce_sum(confusion_matrix, axis=-1, keepdims=True))

        # Uncertainty: quadratic entropy of the average label distribution
        uncertainty = -tf.reduce_sum(tf.square(pbar), axis=-1)

        # Resolution: expected quadratic divergence of predictive to mean
        resolution = tf.square(tf.expand_dims(pbar, -1) - dist_mean)
        resolution = tf.reduce_sum(dist_weights *
                                   tf.reduce_sum(resolution, axis=-1),
                                   axis=-1)

        # Reliability: expected quadratic divergence of predictive to true
        if logits.shape.rank > 2:
            # TODO(b/139094519): Avoid using tf.map_fn here.
            prob_true = tf.map_fn(
                lambda args: tf.gather(args[0], args[1]),
                [flatten(dist_mean), flatten(pred_class)],
                dtype=dist_mean.dtype)
            prob_true = unflatten(prob_true)
        else:
            prob_true = tf.gather(dist_mean, pred_class, axis=0)
        log_prob_true = tf.math.log(prob_true)

        log_prob_pred = logits - tf.math.reduce_logsumexp(
            logits, axis=-1, keepdims=True)

        log_reliability = _reduce_log_l2_exp(log_prob_pred,
                                             log_prob_true,
                                             axis=-1)
        log_reliability = tf.math.reduce_logsumexp(
            log_reliability,
            axis=-1,
        )

        num_samples = tf.cast(tf.shape(logits)[-2], logits.dtype)
        reliability = tf.exp(log_reliability - tf.math.log(num_samples))

        return uncertainty, resolution, reliability
Ejemplo n.º 11
0
    def _observation_log_probs(self, observations, mask):
        """Compute and shape tensor of log probs associated with observations.."""

        # Let E be the underlying event shape
        #     M the number of steps in the HMM
        #     N the number of states of the HMM
        #
        # Then the incoming observations have shape
        #
        # observations : batch_o [M] E
        #
        # and the mask (if present) has shape
        #
        # mask : batch_m [M]
        #
        # Let this HMM distribution have batch shape batch_d
        # We need to broadcast all three of these batch shapes together
        # into the shape batch.
        #
        # We need to move the step dimension to the first dimension to make
        # them suitable for folding or scanning over.
        #
        # When we call `log_prob` for our observations we need to
        # do this for each state the observation could correspond to.
        # We do this by expanding the dimensions by 1 so we end up with:
        #
        # observations : [M] batch [1] [E]
        #
        # After calling `log_prob` we get
        #
        # observation_log_probs : [M] batch [N]
        #
        # We wish to use `mask` to select from this so we also
        # reshape and broadcast it up to shape
        #
        # mask : [M] batch [N]

        observation_distribution = self.observation_distribution
        underlying_event_rank = tf.size(
            observation_distribution.event_shape_tensor())
        observation_tensor_shape = tf.shape(observations)
        observation_batch_shape = observation_tensor_shape[:-1 -
                                                           underlying_event_rank]
        observation_event_shape = observation_tensor_shape[
            -1 - underlying_event_rank:]

        if mask is not None:
            mask_tensor_shape = tf.shape(mask)
            mask_batch_shape = mask_tensor_shape[:-1]

        batch_shape = tf.broadcast_dynamic_shape(observation_batch_shape,
                                                 self.batch_shape_tensor())

        if mask is not None:
            batch_shape = tf.broadcast_dynamic_shape(batch_shape,
                                                     mask_batch_shape)
        observations = tf.broadcast_to(
            observations,
            tf.concat([batch_shape, observation_event_shape], axis=0))
        observation_rank = tf.rank(observations)
        observations = distribution_util.move_dimension(
            observations, observation_rank - underlying_event_rank - 1, 0)
        observations = tf.expand_dims(observations,
                                      observation_rank - underlying_event_rank)
        observation_log_probs = observation_distribution.log_prob(observations)

        if mask is not None:
            mask = tf.broadcast_to(
                mask, tf.concat([batch_shape, [self._num_steps]], axis=0))
            mask = distribution_util.move_dimension(mask, -1, 0)
            observation_log_probs = tf.where(
                mask[..., tf.newaxis], tf.zeros_like(observation_log_probs),
                observation_log_probs)

        return observation_log_probs
Ejemplo n.º 12
0
def _add_bias(features):
  return tf.concat([features, tf.ones([tf.shape(features)[0], 1])], axis=-1)
Ejemplo n.º 13
0
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None):
    """Squeeze or expand last dimension if needed.

    1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
    (using `remove_squeezable_dimensions`).
    2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
    from the new rank of `y_pred`.
    If `sample_weight` is scalar, it is kept scalar.

    This will use static shape if available. Otherwise, it will add graph
    operations, which could result in a performance hit.

    Args:
      y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
      y_true: Optional label `Tensor` whose dimensions match `y_pred`.
      sample_weight: Optional weight scalar or `Tensor` whose dimensions match
        `y_pred`.

    Returns:
      Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
      the last dimension squeezed,
      `sample_weight` could be extended by one dimension.
      If `sample_weight` is None, (y_pred, y_true) is returned.
    """
    y_pred_shape = y_pred.shape
    y_pred_rank = y_pred_shape.ndims
    if y_true is not None:

        # If sparse matrix is provided as `y_true`, the last dimension in `y_pred`
        # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)),
        # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3))
        # In this case, we should not try to remove squeezable dimension.
        y_true_shape = y_true.shape
        y_true_rank = y_true_shape.ndims
        if (y_true_rank is not None) and (y_pred_rank is not None):
            # Use static rank for `y_true` and `y_pred`.
            if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1:
                y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred)
        else:
            # Use dynamic rank.
            rank_diff = tf.rank(y_pred) - tf.rank(y_true)
            squeeze_dims = lambda: remove_squeezable_dimensions(  # pylint: disable=g-long-lambda
                y_true, y_pred)
            is_last_dim_1 = tf.equal(1, tf.shape(y_pred)[-1])
            maybe_squeeze_dims = (
                lambda: tf.cond(  # pylint: disable=g-long-lambda
                    is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred)))
            y_true, y_pred = tf.cond(tf.equal(1, rank_diff),
                                     maybe_squeeze_dims, squeeze_dims)

    if sample_weight is None:
        return y_pred, y_true

    weights_shape = sample_weight.shape
    weights_rank = weights_shape.ndims
    if weights_rank == 0:  # If weights is scalar, do nothing.
        return y_pred, y_true, sample_weight

    if (y_pred_rank is not None) and (weights_rank is not None):
        # Use static rank.
        if weights_rank - y_pred_rank == 1:
            sample_weight = tf.squeeze(sample_weight, [-1])
        elif y_pred_rank - weights_rank == 1:
            sample_weight = tf.expand_dims(sample_weight, [-1])
        return y_pred, y_true, sample_weight

    # Use dynamic rank.
    weights_rank_tensor = tf.rank(sample_weight)
    rank_diff = weights_rank_tensor - tf.rank(y_pred)
    maybe_squeeze_weights = lambda: tf.squeeze(sample_weight, [-1])

    def _maybe_expand_weights():
        expand_weights = lambda: tf.expand_dims(sample_weight, [-1])
        return tf.cond(tf.equal(rank_diff, -1), expand_weights,
                       lambda: sample_weight)

    def _maybe_adjust_weights():
        return tf.cond(tf.equal(rank_diff, 1), maybe_squeeze_weights,
                       _maybe_expand_weights)

    # squeeze or expand last dim of `sample_weight` if its rank differs by 1
    # from the new rank of `y_pred`.
    sample_weight = tf.cond(
        tf.equal(weights_rank_tensor, 0),
        lambda: sample_weight,
        _maybe_adjust_weights,
    )
    return y_pred, y_true, sample_weight
Ejemplo n.º 14
0
def pad_batch_dimension_for_multiple_chains(
    observed_time_series, model, chain_batch_shape):
  """"Expand the observed time series with extra batch dimension(s)."""
  # Running with multiple chains introduces an extra batch dimension. In
  # general we also need to pad the observed time series with a matching batch
  # dimension.
  #
  # For example, suppose our model has batch shape [3, 4] and
  # the observed time series has shape `concat([[5], [3, 4], [100])`,
  # corresponding to `sample_shape`, `batch_shape`, and `num_timesteps`
  # respectively. The model will produce distributions with batch shape
  # `concat([chain_batch_shape, [3, 4]])`, so we pad `observed_time_series` to
  # have matching shape `[5, 1, 3, 4, 100]`, where the added `1` dimension
  # between the sample and batch shapes will broadcast to `chain_batch_shape`.

  [  # Extract mask and guarantee `event_ndims=2`.
      observed_time_series,
      is_missing
  ] = canonicalize_observed_time_series_with_mask(observed_time_series)

  event_ndims = 2  # event_shape = [num_timesteps, observation_size=1]

  model_batch_ndims = (
      model.batch_shape.ndims if model.batch_shape.ndims is not None else
      tf.shape(input=model.batch_shape_tensor())[0])

  # Compute ndims from chain_batch_shape.
  chain_batch_shape = tf.convert_to_tensor(
      value=chain_batch_shape, name='chain_batch_shape', dtype=tf.int32)
  if not chain_batch_shape.shape.is_fully_defined():
    raise ValueError('Batch shape must have static rank. (given: {})'.format(
        chain_batch_shape))
  if chain_batch_shape.shape.ndims == 0:  # expand int `k` to `[k]`.
    chain_batch_shape = chain_batch_shape[tf.newaxis]
  chain_batch_ndims = tf.compat.dimension_value(chain_batch_shape.shape[0])

  def do_padding(observed_time_series_tensor):
    current_sample_shape = tf.shape(
        input=observed_time_series_tensor)[:-(model_batch_ndims + event_ndims)]
    current_batch_and_event_shape = tf.shape(
        input=observed_time_series_tensor)[-(model_batch_ndims + event_ndims):]
    return tf.reshape(
        tensor=observed_time_series_tensor,
        shape=tf.concat([
            current_sample_shape,
            tf.ones([chain_batch_ndims], dtype=tf.int32),
            current_batch_and_event_shape], axis=0))

  # Padding is only needed if the observed time series has sample shape.
  observed_time_series = prefer_static.cond(
      (dist_util.prefer_static_rank(observed_time_series) >
       model_batch_ndims + event_ndims),
      lambda: do_padding(observed_time_series),
      lambda: observed_time_series)

  if is_missing is not None:
    is_missing = prefer_static.cond(
        (dist_util.prefer_static_rank(is_missing) >
         model_batch_ndims + event_ndims),
        lambda: do_padding(is_missing),
        lambda: is_missing)
    return missing_values_util.MaskedTimeSeries(observed_time_series,
                                                is_missing=is_missing)

  return observed_time_series
Ejemplo n.º 15
0
 def _event_shape_tensor(self):
   # NOTE: In TF1, tf.shape(x) can call `tf.convert_to_tensor(x)` **twice**,
   # so we pre-emptively convert-to-tensor.
   concentration = tf.convert_to_tensor(self.concentration)
   return tf.shape(concentration)[-1:]
Ejemplo n.º 16
0
 def _event_shape_tensor(self, loc=None):
   return tf.shape(self.loc if loc is None else loc)[-2:]
Ejemplo n.º 17
0
 def f(x):
     shape = tf.shape(x)
     return tf.reshape(
         x,
         tf.concat([[2, (shape[0] * shape[1]) // 2], shape[2:]],
                   axis=0))
def _generate_detections_per_image(boxes,
                                   scores,
                                   max_total_size=100,
                                   nms_iou_threshold=0.3,
                                   score_threshold=0.05,
                                   pre_nms_num_boxes=5000):
  """Generate the final detections per image given the model outputs.

  Args:
    boxes: a tensor with shape [N, num_classes, 4] or [N, 1, 4], which box
      predictions on all feature levels. The N is the number of total anchors on
      all levels.
    scores: a tensor with shape [N, num_classes], which stacks class probability
      on all feature levels. The N is the number of total anchors on all levels.
      The num_classes is the number of classes predicted by the model. Note that
      the class_outputs here is the raw score.
    max_total_size: a scalar representing maximum number of boxes retained over
      all classes.
    nms_iou_threshold: a float representing the threshold for deciding whether
      boxes overlap too much with respect to IOU.
    score_threshold: a float representing the threshold for deciding when to
      remove boxes based on score.
    pre_nms_num_boxes: an int number of top candidate detections per class
      before NMS.

  Returns:
    nms_boxes: `float` Tensor of shape [max_total_size, 4] representing top
      detected boxes in [y1, x1, y2, x2].
    nms_scores: `float` Tensor of shape [max_total_size] representing sorted
      confidence scores for detected boxes. The values are between [0, 1].
    nms_classes: `int` Tensor of shape [max_total_size] representing classes for
      detected boxes.
    valid_detections: `int` Tensor of shape [1] only the top `valid_detections`
      boxes are valid detections.
  """
  nmsed_boxes = []
  nmsed_scores = []
  nmsed_classes = []
  num_classes_for_box = boxes.get_shape().as_list()[1]
  num_classes = scores.get_shape().as_list()[1]
  for i in range(num_classes):
    boxes_i = boxes[:, min(num_classes_for_box - 1, i)]
    scores_i = scores[:, i]

    # Obtains pre_nms_num_boxes before running NMS.
    scores_i, indices = tf.nn.top_k(
        scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes))
    boxes_i = tf.gather(boxes_i, indices)

    (nmsed_indices_i,
     nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
         tf.cast(boxes_i, tf.float32),
         tf.cast(scores_i, tf.float32),
         max_total_size,
         iou_threshold=nms_iou_threshold,
         score_threshold=score_threshold,
         pad_to_max_output_size=True,
         name='nms_detections_' + str(i))
    nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
    nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
    # Sets scores of invalid boxes to -1.
    nmsed_scores_i = tf.where(
        tf.less(tf.range(max_total_size), [nmsed_num_valid_i]), nmsed_scores_i,
        -tf.ones_like(nmsed_scores_i))
    nmsed_classes_i = tf.fill([max_total_size], i)
    nmsed_boxes.append(nmsed_boxes_i)
    nmsed_scores.append(nmsed_scores_i)
    nmsed_classes.append(nmsed_classes_i)

  # Concats results from all classes and sort them.
  nmsed_boxes = tf.concat(nmsed_boxes, axis=0)
  nmsed_scores = tf.concat(nmsed_scores, axis=0)
  nmsed_classes = tf.concat(nmsed_classes, axis=0)
  nmsed_scores, indices = tf.nn.top_k(
      nmsed_scores, k=max_total_size, sorted=True)
  nmsed_boxes = tf.gather(nmsed_boxes, indices)
  nmsed_classes = tf.gather(nmsed_classes, indices)
  valid_detections = tf.reduce_sum(
      input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32))
  return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
Ejemplo n.º 19
0
 def _event_shape_tensor(self):
     return tf.shape(self.concentration)[-1:]
  def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape):
    """Generate final detections.

    Args:
      box_outputs: a tensor of shape of [batch_size, K, num_classes * 4]
        representing the class-specific box coordinates relative to anchors.
      class_outputs: a tensor of shape of [batch_size, K, num_classes]
        representing the class logits before applying score activiation.
      anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
        corresponding anchor boxes w.r.t `box_outputs`.
      image_shape: a tensor of shape of [batch_size, 2] storing the image height
        and width w.r.t. the scaled image, i.e. the same image space as
        `box_outputs` and `anchor_boxes`.

    Returns:
      nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
        representing top detected boxes in [y1, x1, y2, x2].
      nms_scores: `float` Tensor of shape [batch_size, max_total_size]
        representing sorted confidence scores for detected boxes. The values are
        between [0, 1].
      nms_classes: `int` Tensor of shape [batch_size, max_total_size]
        representing classes for detected boxes.
      valid_detections: `int` Tensor of shape [batch_size] only the top
        `valid_detections` boxes are valid detections.
    """
    class_outputs = tf.nn.softmax(class_outputs, axis=-1)

    # Removes the background class.
    class_outputs_shape = tf.shape(class_outputs)
    batch_size = class_outputs_shape[0]
    num_locations = class_outputs_shape[1]
    num_classes = class_outputs_shape[-1]
    num_detections = num_locations * (num_classes - 1)

    class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1])
    box_outputs = tf.reshape(
        box_outputs,
        tf.stack([batch_size, num_locations, num_classes, 4], axis=-1))
    box_outputs = tf.slice(
        box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
    anchor_boxes = tf.tile(
        tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1])
    box_outputs = tf.reshape(
        box_outputs,
        tf.stack([batch_size, num_detections, 4], axis=-1))
    anchor_boxes = tf.reshape(
        anchor_boxes,
        tf.stack([batch_size, num_detections, 4], axis=-1))

    # Box decoding.
    decoded_boxes = box_utils.decode_boxes(
        box_outputs, anchor_boxes, weights=[10.0, 10.0, 5.0, 5.0])

    # Box clipping
    decoded_boxes = box_utils.clip_boxes(decoded_boxes, image_shape)

    decoded_boxes = tf.reshape(
        decoded_boxes,
        tf.stack([batch_size, num_locations, num_classes - 1, 4], axis=-1))

    nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
        self._generate_detections(decoded_boxes, class_outputs))

    # Adds 1 to offset the background class which has index 0.
    nmsed_classes += 1

    return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
Ejemplo n.º 21
0
 def _get_noise_shape(self, inputs):
     input_shape = tf.shape(inputs)
     if self.data_format == 'channels_first':
         return (input_shape[0], input_shape[1], 1, 1, 1)
     elif self.data_format == 'channels_last':
         return (input_shape[0], 1, 1, 1, input_shape[4])
Ejemplo n.º 22
0
 def _get_tensor_shape(self, tensor):
     if self.use_static_shape:
         # If input shapes are static, result shapes should be too.
         return tensorshape_util.as_list(tensor.shape)
     else:
         return self.evaluate(tf.shape(tensor))
Ejemplo n.º 23
0
    def _sample_control_dependencies(self, x):
        """Helper which validates sample arg, e.g., input to `log_prob`."""
        x_ndims = (tf.rank(x) if tensorshape_util.rank(x.shape) is None else
                   tensorshape_util.rank(x.shape))
        event_ndims = (tf.size(self.event_shape_tensor())
                       if tensorshape_util.rank(self.event_shape) is None else
                       tensorshape_util.rank(self.event_shape))
        batch_ndims = (tf.size(self._batch_shape_unexpanded)
                       if tensorshape_util.rank(self.batch_shape) is None else
                       tensorshape_util.rank(self.batch_shape))
        expected_batch_event_ndims = batch_ndims + event_ndims

        if (isinstance(x_ndims, int)
                and isinstance(expected_batch_event_ndims, int)):
            if x_ndims < expected_batch_event_ndims:
                raise NotImplementedError(
                    'Broadcasting is not supported; too few batch and event dims '
                    '(expected at least {}, saw {}).'.format(
                        expected_batch_event_ndims, x_ndims))
            ndims_assertion = []
        elif self.validate_args:
            ndims_assertion = [
                assert_util.assert_greater_equal(
                    x_ndims,
                    expected_batch_event_ndims,
                    message=('Broadcasting is not supported; too few '
                             'batch and event dims.'),
                    name='assert_batch_and_event_ndims_large_enough'),
            ]

        if (tensorshape_util.is_fully_defined(self.batch_shape)
                and tensorshape_util.is_fully_defined(self.event_shape)):
            expected_batch_event_shape = np.int32(
                tensorshape_util.concatenate(self.batch_shape,
                                             self.event_shape))
        else:
            expected_batch_event_shape = tf.concat([
                self.batch_shape_tensor(),
                self.event_shape_tensor(),
            ],
                                                   axis=0)

        sample_ndims = x_ndims - expected_batch_event_ndims
        if isinstance(sample_ndims, int):
            sample_ndims = max(sample_ndims, 0)
        if (isinstance(sample_ndims, int)
                and tensorshape_util.is_fully_defined(x.shape[sample_ndims:])):
            actual_batch_event_shape = np.int32(x.shape[sample_ndims:])
        else:
            sample_ndims = tf.maximum(sample_ndims, 0)
            actual_batch_event_shape = tf.shape(x)[sample_ndims:]

        assertions = []
        if (isinstance(expected_batch_event_shape, np.ndarray)
                and isinstance(actual_batch_event_shape, np.ndarray)):
            if any(expected_batch_event_shape != actual_batch_event_shape):
                raise NotImplementedError('Broadcasting is not supported; '
                                          'unexpected batch and event shape '
                                          '(expected {}, saw {}).'.format(
                                              expected_batch_event_shape,
                                              actual_batch_event_shape))
            # We need to set the final runtime-assertions to `ndims_assertion` since
            # its possible this assertion was created. We could add a condition to
            # only do so if `self.validate_args == True`, however this is redundant
            # as `ndims_assertion` already encodes this information.
            assertions.extend(ndims_assertion)
        elif self.validate_args:
            # We need to make the `ndims_assertion` a control dep because otherwise
            # TF itself might raise an exception owing to this assertion being
            # ill-defined, ie, one cannot even compare different rank Tensors.
            with tf.control_dependencies(ndims_assertion):
                shape_assertion = assert_util.assert_equal(
                    expected_batch_event_shape,
                    actual_batch_event_shape,
                    message=('Broadcasting is not supported; '
                             'unexpected batch and event shape.'),
                    name='assert_batch_and_event_shape_same')
            assertions.append(shape_assertion)

        return assertions
Ejemplo n.º 24
0
    def _parameter_control_dependencies(self, is_init):
        assertions = []

        logits = self._logits
        probs = self._probs
        param, name = (probs, 'probs') if logits is None else (logits,
                                                               'logits')

        # In init, we can always build shape and dtype checks because
        # we assume shape doesn't change for Variable backed args.
        if is_init:
            if not dtype_util.is_floating(param.dtype):
                raise TypeError(
                    'Argument `{}` must having floating type.'.format(name))

            msg = 'Argument `{}` must have rank at least 1.'.format(name)
            shape_static = tensorshape_util.dims(param.shape)
            if shape_static is not None:
                if len(shape_static) < 1:
                    raise ValueError(msg)
            elif self.validate_args:
                param = tf.convert_to_tensor(param)
                assertions.append(
                    assert_util.assert_rank_at_least(param, 1, message=msg))
                with tf.control_dependencies(assertions):
                    param = tf.identity(param)

            msg1 = 'Argument `{}` must have final dimension >= 1.'.format(name)
            msg2 = 'Argument `{}` must have final dimension <= {}.'.format(
                name, dtype_util.max(tf.int32))
            event_size = shape_static[-1] if shape_static is not None else None
            if event_size is not None:
                if event_size < 1:
                    raise ValueError(msg1)
                if event_size > dtype_util.max(tf.int32):
                    raise ValueError(msg2)
            elif self.validate_args:
                param = tf.convert_to_tensor(param)
                assertions.append(
                    assert_util.assert_greater_equal(tf.shape(param)[-1],
                                                     1,
                                                     message=msg1))
                # NOTE: For now, we leave out a runtime assertion that
                # `tf.shape(param)[-1] <= tf.int32.max`.  An earlier `tf.shape` call
                # will fail before we get to this point.

        if not self.validate_args:
            assert not assertions  # Should never happen.
            return []

        if probs is not None:
            probs = param  # reuse tensor conversion from above
            if is_init != tensor_util.is_ref(probs):
                probs = tf.convert_to_tensor(probs)
                one = tf.ones([], dtype=probs.dtype)
                assertions.extend([
                    assert_util.assert_non_negative(probs),
                    assert_util.assert_less_equal(probs, one),
                    assert_util.assert_near(
                        tf.reduce_sum(probs, axis=-1),
                        one,
                        message='Argument `probs` must sum to 1.'),
                ])

        return assertions
Ejemplo n.º 25
0
def sample_annealed_importance_chain(num_steps,
                                     proposal_log_prob_fn,
                                     target_log_prob_fn,
                                     current_state,
                                     make_kernel_fn,
                                     parallel_iterations=10,
                                     seed=None,
                                     name=None):
    """Runs annealed importance sampling (AIS) to estimate normalizing constants.

  This function uses an MCMC transition operator (e.g., Hamiltonian Monte Carlo)
  to sample from a series of distributions that slowly interpolates between
  an initial 'proposal' distribution:

  `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)`

  and the target distribution:

  `exp(target_log_prob_fn(x) - target_log_normalizer)`,

  accumulating importance weights along the way. The product of these
  importance weights gives an unbiased estimate of the ratio of the
  normalizing constants of the initial distribution and the target
  distribution:

  `E[exp(ais_weights)] = exp(target_log_normalizer - proposal_log_normalizer)`.

  Note: When running in graph mode, `proposal_log_prob_fn` and
  `target_log_prob_fn` are called exactly three times (although this may be
  reduced to two times in the future).

  Args:
    num_steps: Integer number of Markov chain updates to run. More
      iterations means more expense, but smoother annealing between q
      and p, which in turn means exponentially lower variance for the
      normalizing constant estimator.
    proposal_log_prob_fn: Python callable that returns the log density of the
      initial distribution.
    target_log_prob_fn: Python callable which takes an argument like
      `current_state` (or `*current_state` if it's a list) and returns its
      (possibly unnormalized) log-density under the target distribution.
    current_state: `Tensor` or Python `list` of `Tensor`s representing the
      current state(s) of the Markov chain(s). The first `r` dimensions index
      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
    make_kernel_fn: Python `callable` which returns a `TransitionKernel`-like
      object. Must take one argument representing the `TransitionKernel`'s
      `target_log_prob_fn`. The `target_log_prob_fn` argument represents the
      `TransitionKernel`'s target log distribution.  Note:
      `sample_annealed_importance_chain` creates a new `target_log_prob_fn`
      which is an interpolation between the supplied `target_log_prob_fn` and
      `proposal_log_prob_fn`; it is this interpolated function which is used as
      an argument to `make_kernel_fn`.
    parallel_iterations: The number of iterations allowed to run in parallel.
      It must be a positive integer. See `tf.while_loop` for more details.
    seed: Optional, a seed for reproducible sampling.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'sample_annealed_importance_chain').

  Returns:
    next_state: `Tensor` or Python list of `Tensor`s representing the
      state(s) of the Markov chain(s) at the final iteration. Has same shape as
      input `current_state`.
    ais_weights: Tensor with the estimated weight(s). Has shape matching
      `target_log_prob_fn(current_state)`.
    kernel_results: `collections.namedtuple` of internal calculations used to
      advance the chain.

  #### Examples

  ##### Estimate the normalizing constant of a log-gamma distribution.

  ```python
  tfd = tfp.distributions

  # Run 100 AIS chains in parallel
  num_chains = 100
  dims = 20
  dtype = np.float32

  proposal = tfd.MultivariateNormalDiag(
     loc=tf.zeros([dims], dtype=dtype))

  target = tfd.TransformedDistribution(
    distribution=tfd.Sample(
        tfd.Gamma(concentration=dtype(2), rate=dtype(3)),
        sample_shape=[dims])
    bijector=tfp.bijectors.Invert(tfp.bijectors.Exp()))

  chains_state, ais_weights, kernels_results = (
      tfp.mcmc.sample_annealed_importance_chain(
          num_steps=1000,
          proposal_log_prob_fn=proposal.log_prob,
          target_log_prob_fn=target.log_prob,
          current_state=proposal.sample(num_chains),
          make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=tlp_fn,
            step_size=0.2,
            num_leapfrog_steps=2)))

  log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights)
                              - np.log(num_chains))
  log_true_normalizer = tf.lgamma(2.) - 2. * tf.log(3.)
  ```

  ##### Estimate marginal likelihood of a Bayesian regression model.

  ```python
  tfd = tfp.distributions

  def make_prior(dims, dtype):
    return tfd.MultivariateNormalDiag(
        loc=tf.zeros(dims, dtype))

  def make_likelihood(weights, x):
    return tfd.MultivariateNormalDiag(
        loc=tf.tensordot(weights, x, axes=[[0], [-1]]))

  # Run 100 AIS chains in parallel
  num_chains = 100
  dims = 10
  dtype = np.float32

  # Make training data.
  x = np.random.randn(num_chains, dims).astype(dtype)
  true_weights = np.random.randn(dims).astype(dtype)
  y = np.dot(x, true_weights) + np.random.randn(num_chains)

  # Setup model.
  prior = make_prior(dims, dtype)
  def target_log_prob_fn(weights):
    return prior.log_prob(weights) + make_likelihood(weights, x).log_prob(y)

  proposal = tfd.MultivariateNormalDiag(
      loc=tf.zeros(dims, dtype))

  weight_samples, ais_weights, kernel_results = (
      tfp.mcmc.sample_annealed_importance_chain(
        num_steps=1000,
        proposal_log_prob_fn=proposal.log_prob,
        target_log_prob_fn=target_log_prob_fn
        current_state=tf.zeros([num_chains, dims], dtype),
        make_kernel_fn=lambda tlp_fn: tfp.mcmc.HamiltonianMonteCarlo(
          target_log_prob_fn=tlp_fn,
          step_size=0.1,
          num_leapfrog_steps=2)))
  log_normalizer_estimate = (tf.reduce_logsumexp(ais_weights)
                             - np.log(num_chains))
  ```

  """
    is_seeded = seed is not None
    seed = samplers.sanitize_seed(seed, salt='mcmc.sample_ais_chain')

    with tf.name_scope(name or 'sample_annealed_importance_chain'):
        num_steps = tf.convert_to_tensor(value=num_steps,
                                         dtype=tf.int32,
                                         name='num_steps')
        if mcmc_util.is_list_like(current_state):
            current_state = [
                tf.convert_to_tensor(s, name='current_state')
                for s in current_state
            ]
        else:
            current_state = tf.convert_to_tensor(value=current_state,
                                                 name='current_state')

        def _make_convex_combined_log_prob_fn(iter_):
            def _fn(*args):
                p = tf.identity(proposal_log_prob_fn(*args),
                                name='proposal_log_prob')
                t = tf.identity(target_log_prob_fn(*args),
                                name='target_log_prob')
                dtype = dtype_util.base_dtype(p.dtype)
                beta = tf.cast(iter_ + 1, dtype) / tf.cast(num_steps, dtype)
                return tf.identity(beta * t + (1. - beta) * p,
                                   name='convex_combined_log_prob')

            return _fn

        def _loop_body(iter_, seed, ais_weights, current_state,
                       kernel_results):
            """Closure which implements `tf.while_loop` body."""
            iter_seed, next_seed = samplers.split_seed(
                seed,
                salt='ais_chain.seeded_one_step') if is_seeded else (seed,
                                                                     seed)

            x = (current_state
                 if mcmc_util.is_list_like(current_state) else [current_state])
            proposal_log_prob = proposal_log_prob_fn(*x)
            target_log_prob = target_log_prob_fn(*x)
            ais_weights += ((target_log_prob - proposal_log_prob) /
                            tf.cast(num_steps, ais_weights.dtype))
            kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_))
            # TODO(b/147676843): Should we warn if the kernel is not calibrated?
            one_step_kwargs = dict(seed=iter_seed) if is_seeded else {}
            next_state, inner_results = kernel.one_step(
                current_state, kernel_results.inner_results, **one_step_kwargs)
            kernel_results = AISResults(
                proposal_log_prob=proposal_log_prob,
                target_log_prob=target_log_prob,
                inner_results=inner_results,
            )
            return [
                iter_ + 1, next_seed, ais_weights, next_state, kernel_results
            ]

        def _bootstrap_results(init_state):
            """Creates first version of `previous_kernel_results`."""
            kernel = make_kernel_fn(_make_convex_combined_log_prob_fn(iter_=0))
            inner_results = kernel.bootstrap_results(init_state)
            mh_results = _find_inner_mh_results(inner_results)

            convex_combined_log_prob = mh_results.accepted_results.target_log_prob
            dtype = dtype_util.as_numpy_dtype(convex_combined_log_prob.dtype)
            shape = tf.shape(convex_combined_log_prob)
            proposal_log_prob = tf.fill(shape,
                                        dtype(np.nan),
                                        name='bootstrap_proposal_log_prob')
            target_log_prob = tf.fill(shape,
                                      dtype(np.nan),
                                      name='target_target_log_prob')

            return AISResults(
                proposal_log_prob=proposal_log_prob,
                target_log_prob=target_log_prob,
                inner_results=inner_results,
            )

        previous_kernel_results = _bootstrap_results(current_state)
        inner_results = previous_kernel_results.inner_results
        mh_results = _find_inner_mh_results(inner_results)

        ais_weights = tf.zeros(
            shape=tf.broadcast_dynamic_shape(
                tf.shape(mh_results.proposed_results.target_log_prob),
                tf.shape(mh_results.accepted_results.target_log_prob)),
            dtype=mh_results.proposed_results.target_log_prob.dtype)

        [_, _, ais_weights, current_state, kernel_results] = tf.while_loop(
            cond=lambda iter_, *args: iter_ < num_steps,
            body=_loop_body,
            loop_vars=[
                np.int32(0),  # iter_
                seed,
                ais_weights,
                current_state,
                previous_kernel_results,
            ],
            parallel_iterations=parallel_iterations)

        return [current_state, ais_weights, kernel_results]
Ejemplo n.º 26
0
    def testAutoVectorization(self, bijector_name, data):

        # TODO(b/150161911): reconcile numeric behavior of eager and graph mode.
        if tf.executing_eagerly():
            return

        bijector, event_dim = self._draw_bijector(
            bijector_name,
            data,
            batch_shape=[],  # Avoid conflict with vmap sample dimension.
            validate_args=False,  # Work around lack of `If` support in vmap.
            allowed_bijectors=(set(bhps.INSTANTIABLE_BIJECTORS) -
                               set(AUTOVECTORIZATION_IS_BROKEN)))
        atol = AUTOVECTORIZATION_ATOL[bijector_name]
        rtol = AUTOVECTORIZATION_RTOL[bijector_name]

        # Forward
        n = 3
        xs = self._draw_domain_tensor(bijector,
                                      data,
                                      event_dim,
                                      sample_shape=[n])
        ys = bijector.forward(xs)
        vectorized_ys = tf.vectorized_map(bijector.forward,
                                          xs,
                                          fallback_to_while_loop=False)
        self.assertAllClose(*self.evaluate((ys, vectorized_ys)),
                            atol=atol,
                            rtol=rtol)

        # FLDJ
        event_ndims = data.draw(
            hps.integers(min_value=bijector.forward_min_event_ndims,
                         max_value=ps.rank_from_shape(xs.shape) - 1))
        fldj_fn = functools.partial(bijector.forward_log_det_jacobian,
                                    event_ndims=event_ndims)
        vectorized_fldj = tf.vectorized_map(fldj_fn,
                                            xs,
                                            fallback_to_while_loop=False)
        fldj = tf.broadcast_to(fldj_fn(xs), tf.shape(vectorized_fldj))
        self.assertAllClose(*self.evaluate((fldj, vectorized_fldj)),
                            atol=atol,
                            rtol=rtol)

        # Inverse
        ys = self._draw_codomain_tensor(bijector,
                                        data,
                                        event_dim,
                                        sample_shape=[n])
        xs = bijector.inverse(ys)
        vectorized_xs = tf.vectorized_map(bijector.inverse,
                                          ys,
                                          fallback_to_while_loop=False)
        self.assertAllClose(*self.evaluate((xs, vectorized_xs)),
                            atol=atol,
                            rtol=rtol)

        # ILDJ
        event_ndims = data.draw(
            hps.integers(min_value=bijector.inverse_min_event_ndims,
                         max_value=ps.rank_from_shape(ys.shape) - 1))
        ildj_fn = functools.partial(bijector.inverse_log_det_jacobian,
                                    event_ndims=event_ndims)
        vectorized_ildj = tf.vectorized_map(ildj_fn,
                                            ys,
                                            fallback_to_while_loop=False)
        ildj = tf.broadcast_to(ildj_fn(ys), tf.shape(vectorized_ildj))
        self.assertAllClose(*self.evaluate((ildj, vectorized_ildj)),
                            atol=atol,
                            rtol=rtol)
Ejemplo n.º 27
0
    def posterior_marginals(self, observations, name=None):
        """Compute marginal posterior distribution for each state.

    This function computes, for each time step, the marginal
    conditional probability that the hidden Markov model was in
    each possible state given the observations that were made
    at each time step.
    So if the hidden states are `z[0],...,z[num_steps - 1]` and
    the observations are `x[0], ..., x[num_steps - 1]`, then
    this function computes `P(z[i] | x[0], ..., x[num_steps - 1])`
    for all `i` from `0` to `num_steps - 1`.

    This operation is sometimes called smoothing. It uses a form
    of the forward-backward algorithm.

    Note: the behavior of this function is undefined if the
    `observations` argument represents impossible observations
    from the model.

    Args:
      observations: A tensor representing a batch of observations
        made on the hidden Markov model.  The rightmost dimension of this tensor
        gives the steps in a sequence of observations from a single sample from
        the hidden Markov model. The size of this dimension should match the
        `num_steps` parameter of the hidden Markov model object. The other
        dimensions are the dimensions of the batch and these are broadcast with
        the hidden Markov model's parameters.
      name: Python `str` name prefixed to Ops created by this class.
        Default value: "HiddenMarkovModel".

    Returns:
      posterior_marginal: A `Categorical` distribution object representing the
        marginal probability of the hidden Markov model being in each state at
        each step. The rightmost dimension of the `Categorical` distributions
        batch will equal the `num_steps` parameter providing one marginal
        distribution for each step. The other dimensions are the dimensions
        corresponding to the batch of observations.

    Raises:
      ValueError: if rightmost dimension of `observations` does not
      have size `num_steps`.
    """

        with tf.name_scope(name or "posterior_marginals"):
            with tf.control_dependencies(self._runtime_assertions):
                observation_tensor_shape = tf.shape(input=observations)

                with self._observation_shape_preconditions(
                        observation_tensor_shape):
                    observation_batch_shape = observation_tensor_shape[:-1 -
                                                                       self.
                                                                       _underlying_event_rank]
                    observation_event_shape = observation_tensor_shape[
                        -1 - self._underlying_event_rank:]

                    batch_shape = tf.broadcast_dynamic_shape(
                        observation_batch_shape, self.batch_shape_tensor())
                    log_init = tf.broadcast_to(
                        self._log_init,
                        tf.concat([batch_shape, [self._num_states]], axis=0))
                    log_transition = self._log_trans

                    observations = tf.broadcast_to(
                        observations,
                        tf.concat([batch_shape, observation_event_shape],
                                  axis=0))
                    observation_rank = tf.rank(observations)
                    underlying_event_rank = self._underlying_event_rank
                    observations = distribution_util.move_dimension(
                        observations,
                        observation_rank - underlying_event_rank - 1, 0)
                    observations = tf.expand_dims(
                        observations, observation_rank - underlying_event_rank)
                    observation_log_probs = self._observation_distribution.log_prob(
                        observations)

                    log_adjoint_prob = tf.zeros_like(log_init)

                    def forward_step(log_previous_step, log_prob_observation):
                        return _log_vector_matrix(
                            log_previous_step,
                            log_transition) + log_prob_observation

                    log_prob = log_init + observation_log_probs[0]

                    forward_log_probs = tf.scan(forward_step,
                                                observation_log_probs[1:],
                                                initializer=log_prob,
                                                name="forward_log_probs")

                    forward_log_probs = tf.concat(
                        [[log_prob], forward_log_probs], axis=0)

                    def backward_step(log_previous_step, log_prob_observation):
                        return _log_matrix_vector(
                            log_transition,
                            log_prob_observation + log_previous_step)

                    backward_log_adjoint_probs = tf.scan(
                        backward_step,
                        observation_log_probs[1:],
                        initializer=log_adjoint_prob,
                        reverse=True,
                        name="backward_log_adjoint_probs")

                    total_log_prob = tf.reduce_logsumexp(
                        input_tensor=forward_log_probs[-1], axis=-1)

                    backward_log_adjoint_probs = tf.concat(
                        [backward_log_adjoint_probs, [log_adjoint_prob]],
                        axis=0)

                    log_likelihoods = forward_log_probs + backward_log_adjoint_probs

                    marginal_log_probs = distribution_util.move_dimension(
                        log_likelihoods - total_log_prob[..., tf.newaxis], 0,
                        -2)

                    return categorical.Categorical(logits=marginal_log_probs)
    def testVariationalLossShapes(self):
        # 2x2 grid of index points in R^2 and flatten to 4x2
        index_points = np.linspace(-4., 4., 2, dtype=np.float64)
        index_points = np.stack(np.meshgrid(index_points, index_points),
                                axis=-1)
        index_points = np.reshape(index_points, [-1, 2])
        # ==> shape = [4, 2]
        batched_index_points = np.expand_dims(np.stack([index_points] * 6), -3)
        # ==> shape = [6, 1, 4, 2]

        # 3x3 grid of index points in R^2 and flatten to 9x2
        observation_index_points = np.linspace(-4., 4., 3, dtype=np.float64)
        observation_index_points = np.stack(np.meshgrid(
            observation_index_points, observation_index_points),
                                            axis=-1)
        observation_index_points = np.reshape(observation_index_points,
                                              [-1, 2])
        # ==> shape = [9, 2]
        observation_index_points = np.expand_dims(
            np.stack([observation_index_points] * 6), -3)
        # ==> shape = [6, 1, 9, 2]
        observations = np.sin(observation_index_points[..., 0])
        # ==> shape = [6, 1, 9]

        # 9 inducing index points in R^2
        inducing_index_points = np.linspace(-4., 4., 3, dtype=np.float64)
        inducing_index_points = np.stack(np.meshgrid(inducing_index_points,
                                                     inducing_index_points),
                                         axis=-1)
        inducing_index_points = np.reshape(inducing_index_points, [-1, 2])
        # ==> shape = [9, 2]

        variational_inducing_observations_loc = np.zeros([3, 9],
                                                         dtype=np.float64)
        variational_inducing_observations_scale = np.eye(9, dtype=np.float64)

        # Kernel with batch_shape [2, 4, 1, 1]
        amplitude = np.array([1., 2.], np.float64).reshape([2, 1, 1, 1])
        length_scale = np.array([.1, .2, .3, .4],
                                np.float64).reshape([1, 4, 1, 1])

        jitter = np.float64(1e-6)
        observation_noise_variance = np.float64(1e-2)

        if not self.is_static:
            amplitude = tf1.placeholder_with_default(amplitude, shape=None)
            length_scale = tf1.placeholder_with_default(length_scale,
                                                        shape=None)
            batched_index_points = tf1.placeholder_with_default(
                batched_index_points, shape=None)

            observations = tf1.placeholder_with_default(observations,
                                                        shape=None)
            observation_index_points = tf1.placeholder_with_default(
                observation_index_points, shape=None)
            inducing_index_points = tf1.placeholder_with_default(
                inducing_index_points, shape=None)
            variational_inducing_observations_loc = tf1.placeholder_with_default(
                variational_inducing_observations_loc, shape=None)
            variational_inducing_observations_scale = tf1.placeholder_with_default(
                variational_inducing_observations_scale, shape=None)

        kernel = psd_kernels.ExponentiatedQuadratic(amplitude, length_scale)

        vgp = tfd.VariationalGaussianProcess(
            kernel=kernel,
            index_points=batched_index_points,
            inducing_index_points=inducing_index_points,
            variational_inducing_observations_loc=(
                variational_inducing_observations_loc),
            variational_inducing_observations_scale=(
                variational_inducing_observations_scale),
            observation_noise_variance=observation_noise_variance,
            jitter=jitter,
            validate_args=True)

        loss = vgp.variational_loss(
            observations=observations,
            observation_index_points=observation_index_points)
        # Expect a scalar loss.
        self.assertAllClose([], tf.shape(input=loss))
Ejemplo n.º 29
0
    def one_step(self, state, kernel_results, seed=None):
        """Takes one Sequential Monte Carlo inference step.

    Args:
      state: instance of `tfp.experimental.mcmc.WeightedParticles` representing
        the current particles with (log) weights. The `log_weights` must be
        a float `Tensor` of shape `[num_particles, b1, ..., bN]`. The
        `particles` may be any structure of `Tensor`s, each of which
        must have shape `concat([log_weights.shape, event_shape])` for some
        `event_shape`, which may vary across components.
      kernel_results: instance of
        `tfp.experimental.mcmc.SequentialMonteCarloResults` representing results
        from a previous step.
      seed: Optional Python integer to seed the random number generator.
        If provided, overrides the class-level seed set in `__init__`.
    Returns:
      state: instance of `tfp.experimental.mcmc.WeightedParticles` representing
        new particles with (log) weights.
      kernel_results: instance of
        `tfp.experimental.mcmc.SequentialMonteCarloResults`.
    """
        with tf.name_scope(self.name):
            with tf.name_scope('one_step'):
                seed = SeedStream(seed if seed else self.seed, 'smc_one_step')

                state = WeightedParticles(*state)  # Canonicalize.
                num_particles = ps.size0(state.log_weights)

                # Propose new particles and update weights for this step, unless it's
                # the initial step, in which case, use the user-provided initial
                # particles and weights.
                proposed_state = self.propose_and_update_log_weights_fn(
                    # Propose state[t] from state[t - 1].
                    ps.maximum(0, kernel_results.steps - 1),
                    state,
                    seed=seed())
                is_initial_step = ps.equal(kernel_results.steps, 0)
                # TODO(davmre): this `where` assumes the state size didn't change.
                state = tf.nest.map_structure(
                    lambda a, b: ps.where(is_initial_step, a, b), state,
                    proposed_state)

                normalized_log_weights = tf.nn.log_softmax(state.log_weights,
                                                           axis=0)
                # Every entry of `log_weights` differs from `normalized_log_weights`
                # by the same normalizing constant. We extract that constant by
                # examining an arbitrary entry.
                incremental_log_marginal_likelihood = (
                    state.log_weights[0] - normalized_log_weights[0])

                do_resample = self.resample_criterion_fn(state)

                # Some batch elements may require resampling and others not, so
                # we first do the resampling for all elements, then select whether to
                # use the resampled values for each batch element according to
                # `do_resample`. If there were no batching, we might prefer to use
                # `tf.cond` to avoid the resampling computation on steps where it's not
                # needed---but we're ultimately interested in adaptive resampling
                # for statistical (not computational) purposes, so this isn't a
                # dealbreaker.
                resampled_particles, resample_indices = weighted_resampling.resample(
                    state.particles,
                    state.log_weights,
                    self.resample_fn,
                    seed=seed)
                uniform_weights = tf.fill(
                    tf.shape(state.log_weights),
                    value=-tf.math.log(
                        tf.cast(num_particles, state.log_weights.dtype)))
                (resampled_particles, resample_indices,
                 log_weights) = tf.nest.map_structure(
                     lambda r, p: ps.where(do_resample, r, p),
                     (resampled_particles, resample_indices, uniform_weights),
                     (state.particles, _dummy_indices_like(resample_indices),
                      normalized_log_weights))

            return (
                WeightedParticles(particles=resampled_particles,
                                  log_weights=log_weights),
                SequentialMonteCarloResults(
                    steps=kernel_results.steps + 1,
                    parent_indices=resample_indices,
                    incremental_log_marginal_likelihood=(
                        incremental_log_marginal_likelihood),
                    accumulated_log_marginal_likelihood=(
                        kernel_results.accumulated_log_marginal_likelihood +
                        incremental_log_marginal_likelihood)))
Ejemplo n.º 30
0
 def _shape(self, x):
     if self.use_static_shape:
         return tensorshape_util.as_list(x.shape)
     else:
         return self.evaluate(tf.shape(x))