def _create_sac_optimizer_ops(self) -> None: """ Creates the Adam optimizers and update ops for SAC, including the policy, value, and entropy updates, as well as the target network update. """ policy_optimizer = self.create_optimizer_op( learning_rate=self.learning_rate, name="sac_policy_opt" ) entropy_optimizer = self.create_optimizer_op( learning_rate=self.learning_rate, name="sac_entropy_opt" ) value_optimizer = self.create_optimizer_op( learning_rate=self.learning_rate, name="sac_value_opt" ) self.target_update_op = [ tf.assign(target, (1 - self.tau) * target + self.tau * source) for target, source in zip( self.target_network.value_vars, self.policy_network.value_vars ) ] logger.debug("value_vars") self.print_all_vars(self.policy_network.value_vars) logger.debug("targvalue_vars") self.print_all_vars(self.target_network.value_vars) logger.debug("critic_vars") self.print_all_vars(self.policy_network.critic_vars) logger.debug("q_vars") self.print_all_vars(self.policy_network.q_vars) logger.debug("policy_vars") policy_vars = self.policy.get_trainable_variables() self.print_all_vars(policy_vars) self.target_init_op = [ tf.assign(target, source) for target, source in zip( self.target_network.value_vars, self.policy_network.value_vars ) ] self.update_batch_policy = policy_optimizer.minimize( self.policy_loss, var_list=policy_vars ) # Make sure policy is updated first, then value, then entropy. with tf.control_dependencies([self.update_batch_policy]): self.update_batch_value = value_optimizer.minimize( self.total_value_loss, var_list=self.policy_network.critic_vars ) # Add entropy coefficient optimization operation with tf.control_dependencies([self.update_batch_value]): self.update_batch_entropy = entropy_optimizer.minimize( self.entropy_loss, var_list=self.log_ent_coef )
def make_beta_update(self) -> None: """ Creates the beta parameter and its updater for GAIL """ new_beta = tf.maximum( self.beta + self.alpha * (self.kl_loss - self.mutual_information), EPSILON ) with tf.control_dependencies([self.update_batch]): self.update_beta = tf.assign(self.beta, new_beta)