Esempio n. 1
0
def joint_log_prob(field_prior: tfp.distributions.Distribution,
                   matchup_prior: tfp.distributions.Distribution, n_matches,
                   obs_match_counts, obs_match_wins, candidate_field,
                   candidate_matchups):
    """Joint log probability for a candidate field and matchup distribution"""
    # Priors
    ll_field = field_prior.log_prob(candidate_field)
    # Matchup matrix has n*n entries but only n*(n-1)/2 independent parameters,
    # because M[i,j]=1-M[j,i] and therefore M[i,i]=0.5. Therefore, only compute
    # log likelihood over entries where j>i.
    n = obs_match_counts.shape[0]
    #    matchup_free = fill_triangular_inverse(
    #        tf.slice(candidate_matchups, [0,1], [n-1, n-1]), upper=True)
    ll_matchups = tf.reduce_sum(matchup_prior.log_prob(candidate_matchups))
    # Probability of match counts given field distribution
    # (observed counts should be a matrix, but the distribution is defined over
    # a flat vector, so first we flatten the data)
    flattened_match_counts = tf.reshape(obs_match_counts, [-1])
    rv_match_counts = generate.rv_match_counts(candidate_field, n_matches)
    ll_counts = tf.reduce_sum(rv_match_counts.log_prob(flattened_match_counts))
    # Probability of outcomes given match counts and matchups
    # (observed data is number of wins, but the distribution is defined over
    # combination of wins and losses, which we can derive from wins and counts)
    obs_outcome = generate.wins_to_outcomes(obs_match_wins, obs_match_counts)
    candidate_matchup_matrix = generate.build_matchup_matrix(
        candidate_matchups, n)
    rv_match_outcomes = generate.rv_outcomes(obs_match_counts,
                                             candidate_matchup_matrix)
    ll_wins = tf.reduce_sum(rv_match_outcomes.log_prob(obs_outcome))
    return ll_field + ll_matchups + ll_counts + ll_wins
Esempio n. 2
0
def compute_cross_entropy_loss(
    sampled_actions: tf.Tensor,
    normalized_weights: tf.Tensor,
    online_action_distribution: tfp.distributions.Distribution,
) -> tf.Tensor:
  """Compute cross-entropy online and the reweighted target policy.

  Args:
    sampled_actions: samples used in the Monte Carlo integration in the policy
      loss. Expected shape is [N, B, ...], where N is the number of sampled
      actions and B is the number of sampled states.
    normalized_weights: target policy multiplied by the exponentiated Q values
      and normalized; expected shape is [N, B].
    online_action_distribution: policy to be optimized.

  Returns:
    loss_policy_gradient: the cross-entropy loss that, when differentiated,
      produces the policy gradient.
  """

  # Compute the M-step loss.
  log_prob = online_action_distribution.log_prob(sampled_actions)

  # Compute the weighted average log-prob using the normalized weights.
  loss_policy_gradient = -tf.reduce_sum(log_prob * normalized_weights, axis=0)

  # Return the mean loss over the batch of states.
  return tf.reduce_mean(loss_policy_gradient, axis=0)
Esempio n. 3
0
def estimated_entropy(dist: tfp.distributions.Distribution,
                      seed=None,
                      assume_reparametrization=False,
                      num_samples=1,
                      check_numerics=False):
    """Estimate entropy by sampling.

    Use sampling to calculate entropy. The unbiased estimator for entropy is
    -log(p(x)) where x is an unbiased sample of p. However, the gradient of
    -log(p(x)) is not an unbiased estimator of the gradient of entropy. So we
    also calculate a value whose gradient is an unbiased estimator of the
    gradient of entropy. See docs/subtleties_of_estimating_entropy.py for
    detail.

    Args:
        dist (tfp.distributions.Distribution): concerned distribution
        seed (Any): Any Python object convertible to string, supplying the
            initial entropy.
        assume_reparametrization (bool): assume the sample from continuous
            distribution is generated by transforming a fixed distribution
            by a parameterized function. If we can assume this,
            entropy_for_gradient will have lower variance. We make the default
            to be False to be safe.
        num_samples (int): number of random samples used for estimating entropy.
        check_numerics (bool): If true, adds tf.debugging.check_numerics to
            help find NaN / Inf values. For debugging only.
    Returns:
        tuple of (entropy, entropy_for_gradient). entropy_for_gradient is for
        calculating gradient
    """
    sample_shape = (num_samples, )
    single_action = dist.sample(sample_shape=sample_shape, seed=seed)
    if single_action.dtype.is_floating and assume_reparametrization:
        entropy = -dist.log_prob(single_action)
        if check_numerics:
            entropy = tf.debugging.check_numerics(entropy, 'entropy')
        entropy = tf.reduce_mean(entropy, axis=0)
        entropy_for_gradient = entropy
    else:
        entropy = -dist.log_prob(tf.stop_gradient(single_action))
        if check_numerics:
            entropy = tf.debugging.check_numerics(entropy, 'entropy')
        entropy_for_gradient = -0.5 * tf.math.square(entropy)
        entropy = tf.reduce_mean(entropy, axis=0)
        entropy_for_gradient = tf.reduce_mean(entropy_for_gradient, axis=0)
    return entropy, entropy_for_gradient
Esempio n. 4
0
    def _local_kls(self,
                   posteriors: tfp.distributions.Distribution) -> tf.Tensor:
        """
        Compute the KL divergences [posteriors∥prior].

        :param posteriors: A distribution that represents the approximate posteriors.
        :returns: The KL divergences from the prior for each of the posteriors.
        """
        return posteriors.kl_divergence(self.prior)
Esempio n. 5
0
    def _convert_to_tensor_fn(
            self, distribution: tfp.distributions.Distribution) -> tf.Tensor:
        """
        Convert the predictive distributions at the input points (see
        :meth:`_make_distribution_fn`) to a tensor of :attr:`num_samples`
        samples from that distribution.
        Whether the samples are correlated or marginal (uncorrelated) depends
        on :attr:`full_cov` and :attr:`full_output_cov`.
        """
        # N input points
        # S = self.num_samples
        # Q = output dimensionality
        if self.num_samples is not None:
            samples = distribution.sample(
                (self.num_samples, ))  # [S, N, Q], or [S, Q, N] if full_cov
        else:
            samples = distribution.sample()  # [N, Q], or [Q, N] if full_cov

        if self.full_cov:
            samples = tf.linalg.adjoint(samples)  # [(S,) N, Q]

        return samples
    def _distribution_to_tensor(self, d: tfp.distributions.Distribution):
        actions = d.sample()
        old_actions = self.intermediate_inputs['action'][self.action_index]

        log_prob = d.log_prob(self._clip_actions(actions))
        old_log_prob = d.log_prob(self._clip_actions(old_actions))

        return actions, log_prob, old_log_prob, d.mean(), d.stddev(), d.entropy()
Esempio n. 7
0
 def _compute_entropy(dist: tfp.distributions.Distribution, action_spec):
     try:
         entropy = dist.entropy()
         entropy_for_gradient = entropy
     except NotImplementedError:
         entropy, entropy_for_gradient = estimated_entropy(
             dist, seed=seed_stream())
     outer_rank = _calc_outer_rank(dist, action_spec)
     rank = entropy.shape.ndims
     reduce_dims = list(range(outer_rank, rank))
     entropy = tf.reduce_sum(input_tensor=entropy, axis=reduce_dims)
     entropy_for_gradient = tf.reduce_sum(input_tensor=entropy_for_gradient,
                                          axis=reduce_dims)
     return entropy, entropy_for_gradient
Esempio n. 8
0
    def _compute_entropy(dist: tfp.distributions.Distribution, action_spec):
        if isinstance(dist, SquashToSpecNormal):
            entropy, entropy_for_gradient = estimated_entropy(
                dist, seed=seed_stream())
        else:
            entropy = dist.entropy()
            entropy_for_gradient = entropy

        outer_rank = _calc_outer_rank(dist, action_spec)
        rank = entropy.shape.ndims
        reduce_dims = list(range(outer_rank, rank))
        entropy = tf.reduce_sum(input_tensor=entropy, axis=reduce_dims)
        entropy_for_gradient = tf.reduce_sum(
            input_tensor=entropy_for_gradient, axis=reduce_dims)
        return entropy, entropy_for_gradient
Esempio n. 9
0
def _assert_kolmogorov_smirnov_95(
    # fmt: off
    samples: tf.Tensor,  # [..., S]
    distribution: tfp.distributions.Distribution
    # fmt: on
) -> None:
    assert distribution.event_shape == ()
    tf.debugging.assert_shapes([(samples, [..., "S"])])

    sample_size = samples.shape[-1]
    samples_sorted = tf.sort(samples, axis=-1)  # [..., S]
    edf = tf.range(1.0, sample_size + 1, dtype=samples.dtype) / sample_size  # [S]
    expected_cdf = distribution.cdf(samples_sorted)  # [..., S]

    _95_percent_bound = 1.36 / math.sqrt(sample_size)
    assert tf.reduce_max(tf.abs(edf - expected_cdf)) < _95_percent_bound
Esempio n. 10
0
 def log_prob(dist: tfp.distributions.Distribution):
     return dist.log_prob(tf.squeeze(y_true))
Esempio n. 11
0
 def log_prob(dist: tfp.distributions.Distribution):
     return dist.log_prob(y_true)
Esempio n. 12
0
 def _convert_to_tensor_fn(self, distribution: tfp.distributions.Distribution) -> tf.Tensor:
     return tf.reshape(distribution.sample(), self._convert_to_tensor_output_shape)