Example #1
0
 def f(dist_inputs, values, returns, actions, old_log_probs):
   del values, returns
   return rl_layers.ApproximateKLDivergence(
       dist_inputs,
       actions,
       old_log_probs,
       log_prob_fun=self._policy_dist.log_prob)
Example #2
0
 def approximate_kl_divergence(self):
   """Entropy layer."""
   return tl.Fn(
       lambda dist_inputs, actions, old_log_probs:
       rl_layers.ApproximateKLDivergence(
           dist_inputs,
           actions,
           old_log_probs,
           log_prob_fun=self._policy_dist.log_prob),
       n_out=1)