Beispiel #1
0
 def primal_estimate(self, s, p_fn, b_fn, n_samples, action_spec=None):
   _, apn, apn_logp = p_fn.sample_n(s, n_samples)
   _, abn, abn_logb = b_fn.sample_n(s, n_samples)
   # Clip actions here to avoid numerical issues.
   apn_logb = b_fn.get_log_density(
       s, utils.clip_by_eps(apn, action_spec, CLIP_EPS))
   abn_logp = p_fn.get_log_density(
       s, utils.clip_by_eps(abn, action_spec, CLIP_EPS))
   return self._primal_estimate_with_densities(
       apn_logp, apn_logb, abn_logp, abn_logb)
Beispiel #2
0
 def _build_p_loss(self, batch):
     s = batch['s1']
     a_b = batch['a1']
     a_b = utils.clip_by_eps(a_b, self._action_spec, CLIP_EPS)
     log_pi_a_b = self._get_log_density(s, a_b)
     _, _, log_pi_a_p = self._p_fn(s)
     p_loss = tf.reduce_mean(self._get_alpha_entropy() * log_pi_a_p -
                             log_pi_a_b)
     p_w_norm = self._get_p_weight_norm()
     norm_loss = self._weight_decays[0] * p_w_norm
     loss = p_loss + norm_loss
     # Construct information about current training.
     info = collections.OrderedDict()
     info['p_loss'] = p_loss
     info['p_norm'] = p_w_norm
     return loss, info