def f(dist_inputs, values, returns, actions, old_log_probs): del values, returns return rl_layers.ApproximateKLDivergence( dist_inputs, actions, old_log_probs, log_prob_fun=self._policy_dist.log_prob)
def approximate_kl_divergence(self): """Entropy layer.""" return tl.Fn( lambda dist_inputs, actions, old_log_probs: rl_layers.ApproximateKLDivergence( dist_inputs, actions, old_log_probs, log_prob_fun=self._policy_dist.log_prob), n_out=1)