Example #1
0
 def gradients(self, optimizer):
     if self.config["grad_norm_clipping"] is not None:
         actor_grads_and_vars = _minimize_and_clip(
             optimizer,
             self.loss.actor_loss,
             var_list=self.p_func_vars,
             clip_val=self.config["grad_norm_clipping"])
         critic_grads_and_vars = _minimize_and_clip(
             optimizer,
             self.loss.critic_loss,
             var_list=self.q_func_vars + self.twin_q_func_vars
             if self.config["twin_q"] else self.q_func_vars,
             clip_val=self.config["grad_norm_clipping"])
     else:
         actor_grads_and_vars = optimizer.compute_gradients(
             self.loss.actor_loss, var_list=self.p_func_vars)
         critic_grads_and_vars = optimizer.compute_gradients(
             self.loss.critic_loss,
             var_list=self.q_func_vars + self.twin_q_func_vars
             if self.config["twin_q"] else self.q_func_vars)
     actor_grads_and_vars = [(g, v) for (g, v) in actor_grads_and_vars
                             if g is not None]
     critic_grads_and_vars = [(g, v) for (g, v) in critic_grads_and_vars
                              if g is not None]
     grads_and_vars = actor_grads_and_vars + critic_grads_and_vars
     return grads_and_vars
Example #2
0
 def gradients(self, optimizer):
     if self.config["grad_norm_clipping"] is not None:
         actor_grads_and_vars = _minimize_and_clip(
             optimizer,
             self.loss.actor_loss,
             var_list=self.p_func_vars,
             clip_val=self.config["grad_norm_clipping"])
         critic_grads_and_vars = _minimize_and_clip(
             optimizer,
             self.loss.critic_loss,
             var_list=self.q_func_vars + self.twin_q_func_vars
             if self.config["twin_q"] else self.q_func_vars,
             clip_val=self.config["grad_norm_clipping"])
     else:
         actor_grads_and_vars = optimizer.compute_gradients(
             self.loss.actor_loss, var_list=self.p_func_vars)
         critic_grads_and_vars = optimizer.compute_gradients(
             self.loss.critic_loss,
             var_list=self.q_func_vars + self.twin_q_func_vars
             if self.config["twin_q"] else self.q_func_vars)
     actor_grads_and_vars = [(g, v) for (g, v) in actor_grads_and_vars
                             if g is not None]
     critic_grads_and_vars = [(g, v) for (g, v) in critic_grads_and_vars
                              if g is not None]
     grads_and_vars = actor_grads_and_vars + critic_grads_and_vars
     return grads_and_vars
Example #3
0
 def gradients(self, optimizer, loss):
     if self.config["grad_norm_clipping"] is not None:
         actor_grads_and_vars = _minimize_and_clip(
             self._actor_optimizer,
             self.actor_loss,
             var_list=self.policy_vars,
             clip_val=self.config["grad_norm_clipping"])
         critic_grads_and_vars = _minimize_and_clip(
             self._critic_optimizer,
             self.critic_loss,
             var_list=self.q_func_vars + self.twin_q_func_vars
             if self.config["twin_q"] else self.q_func_vars,
             clip_val=self.config["grad_norm_clipping"])
     else:
         actor_grads_and_vars = self._actor_optimizer.compute_gradients(
             self.actor_loss, var_list=self.policy_vars)
         if self.config["twin_q"]:
             critic_vars = self.q_func_vars + self.twin_q_func_vars
         else:
             critic_vars = self.q_func_vars
         critic_grads_and_vars = self._critic_optimizer.compute_gradients(
             self.critic_loss, var_list=critic_vars)
     # save these for later use in build_apply_op
     self._actor_grads_and_vars = [(g, v) for (g, v) in actor_grads_and_vars
                                   if g is not None]
     self._critic_grads_and_vars = [(g, v)
                                    for (g, v) in critic_grads_and_vars
                                    if g is not None]
     grads_and_vars = self._actor_grads_and_vars \
         + self._critic_grads_and_vars
     return grads_and_vars