Exemple #1
0
 def create_sync_net_op(self):
     actor_sync_net = sync_networks_op(self.master.shared_actor_net,
                                       self.actor_net.vars, self.thread_id)
     critic_sync_net = sync_networks_op(self.master.shared_critic_net,
                                        self.critic_net.vars,
                                        self.thread_id)
     return tf.group(actor_sync_net, critic_sync_net)
Exemple #2
0
    def make_trainer(self):
        actor_optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config["actor_learning_rate"])
        critic_optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config["critic_learning_rate"])

        self.actor_sync_net = sync_networks_op(self.master.shared_actor_net,
                                               self.actor_net.vars,
                                               self.thread_id)
        actor_grads = tf.gradients(self.actor_net.loss, self.actor_net.vars)

        self.critic_sync_net = sync_networks_op(self.master.shared_critic_net,
                                                self.critic_net.vars,
                                                self.thread_id)
        critic_grads = tf.gradients(self.critic_net.loss, self.critic_net.vars)

        if self.clip_gradients:
            # Clipped gradients
            gradient_clip_value = self.config["gradient_clip_value"]
            processed_actor_grads = [
                tf.clip_by_value(grad, -gradient_clip_value,
                                 gradient_clip_value) for grad in actor_grads
            ]
            processed_critic_grads = [
                tf.clip_by_value(grad, -gradient_clip_value,
                                 gradient_clip_value) for grad in critic_grads
            ]
        else:
            # Non-clipped gradients: don't do anything
            processed_actor_grads = actor_grads
            processed_critic_grads = critic_grads

        # Apply gradients to the weights of the master network
        # Only increase global_step counter once per update of the 2 networks
        apply_actor_gradients = actor_optimizer.apply_gradients(
            zip(processed_actor_grads, self.master.shared_actor_net.vars),
            global_step=self.master.global_step)
        apply_critic_gradients = critic_optimizer.apply_gradients(
            zip(processed_critic_grads, self.master.shared_critic_net.vars))

        return tf.group(apply_actor_gradients, apply_critic_gradients)