def create_sync_net_op(self): actor_sync_net = sync_networks_op(self.master.shared_actor_net, self.actor_net.vars, self.thread_id) critic_sync_net = sync_networks_op(self.master.shared_critic_net, self.critic_net.vars, self.thread_id) return tf.group(actor_sync_net, critic_sync_net)
def make_trainer(self): actor_optimizer = tf.train.AdamOptimizer( learning_rate=self.config["actor_learning_rate"]) critic_optimizer = tf.train.AdamOptimizer( learning_rate=self.config["critic_learning_rate"]) self.actor_sync_net = sync_networks_op(self.master.shared_actor_net, self.actor_net.vars, self.thread_id) actor_grads = tf.gradients(self.actor_net.loss, self.actor_net.vars) self.critic_sync_net = sync_networks_op(self.master.shared_critic_net, self.critic_net.vars, self.thread_id) critic_grads = tf.gradients(self.critic_net.loss, self.critic_net.vars) if self.clip_gradients: # Clipped gradients gradient_clip_value = self.config["gradient_clip_value"] processed_actor_grads = [ tf.clip_by_value(grad, -gradient_clip_value, gradient_clip_value) for grad in actor_grads ] processed_critic_grads = [ tf.clip_by_value(grad, -gradient_clip_value, gradient_clip_value) for grad in critic_grads ] else: # Non-clipped gradients: don't do anything processed_actor_grads = actor_grads processed_critic_grads = critic_grads # Apply gradients to the weights of the master network # Only increase global_step counter once per update of the 2 networks apply_actor_gradients = actor_optimizer.apply_gradients( zip(processed_actor_grads, self.master.shared_actor_net.vars), global_step=self.master.global_step) apply_critic_gradients = critic_optimizer.apply_gradients( zip(processed_critic_grads, self.master.shared_critic_net.vars)) return tf.group(apply_actor_gradients, apply_critic_gradients)