def update_targets(self, tau=1.0): """Performs a soft update of the target network parameters. For each weight w_s in the actor/critic networks, and its corresponding weight w_t in the target actor/critic networks, a soft update is: w_t = (1- tau) x w_t + tau x ws Args: tau: A float scalar in [0, 1] Returns: An operation that performs a soft update of the target network parameters. Raises: ValueError: If `tau` is not in [0, 1]. """ if tau < 0 or tau > 1: raise ValueError('Input `tau` should be in [0, 1].') update_actor = utils.soft_variables_update( slim.get_trainable_variables( utils.join_scope(self._scope, self.ACTOR_NET_SCOPE)), slim.get_trainable_variables( utils.join_scope(self._scope, self.TARGET_ACTOR_NET_SCOPE)), tau) # NOTE: This updates both critic networks. update_critic = utils.soft_variables_update( slim.get_trainable_variables( utils.join_scope(self._scope, self.CRITIC_NET_SCOPE)), slim.get_trainable_variables( utils.join_scope(self._scope, self.TARGET_CRITIC_NET_SCOPE)), tau) return tf.group(update_actor, update_critic, name='update_targets')