Beispiel #1
0
    def _set_up_actor_loss(self):
        reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                     scope=self.actor.name_scope)

        loss = -tf.reduce_mean(
            self._critic_with_actor_output.q_tensor) + tf.reduce_sum(reg_loss)

        optimizer = tf.train.AdamOptimizer(
            learning_rate=self.parameters('CRITIC_LEARNING_RATE'))
        grad_var_pair = optimizer.compute_gradients(
            loss=loss, var_list=self.actor.parameters('tf_var_list'))
        grads = [g[0] for g in grad_var_pair]
        if self.parameters('actor_clip_norm') is not None:
            grad_var_pair, grads = clip_grad(
                optimizer=optimizer,
                loss=loss,
                var_list=self.actor.parameters('tf_var_list'),
                clip_norm=self.parameters('critic_clip_norm'))
        optimize_op = optimizer.apply_gradients(grad_var_pair)
        op = []
        for var, target_var in zip(
                self.actor.parameters('tf_var_list'),
                self.target_actor.parameters('tf_var_list')):
            ref_val = self.parameters('DECAY') * target_var + (
                1.0 - self.parameters('DECAY')) * var
            op.append(tf.assign(target_var, ref_val))

        return loss, optimize_op, op, optimizer, grads
Beispiel #2
0
    def _setup_critic_loss(self):
        reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope=self.critic.name_scope)
        # loss = tf.reduce_sum((self.predict_q_value - self.critic.q_tensor) ** 2)
        loss = tf.losses.mean_squared_error(predictions=self.predict_q_value, labels=self.critic.q_tensor)
        if len(reg_loss) > 0:
            loss += tf.reduce_sum(reg_loss)
        optimizer = tf.train.AdamOptimizer(learning_rate=self.parameters('CRITIC_LEARNING_RATE'))
        grad_var_pair = optimizer.compute_gradients(loss=loss, var_list=self.critic.parameters('tf_var_list'))
        grads = [g[0] for g in grad_var_pair]
        if self.parameters('critic_clip_norm') is not None:
            grad_var_pair, grads = clip_grad(optimizer=optimizer,
                                             loss=loss,
                                             var_list=self.critic.parameters('tf_var_list'),
                                             clip_norm=self.parameters('critic_clip_norm'))
        optimize_op = optimizer.apply_gradients(grad_var_pair)
        op = []
        for var, target_var in zip(self.critic.parameters('tf_var_list'),
                                   self.target_critic.parameters('tf_var_list')):
            ref_val = self.parameters('DECAY') * target_var + (1.0 - self.parameters('DECAY')) * var
            op.append(tf.assign(target_var, ref_val))

        return loss, optimize_op, op, optimizer, grads