Beispiel #1
0
 def compute_gradient(self, params):
     self.policy.set_weights(params)
     rollout = self.pull_batch_from_queue()
     batch = process_rollout(rollout, gamma=0.99, lambda_=1.0)
     gradient = self.policy.get_gradients(batch)
     info = {"id": self.id, "size": len(batch.a)}
     return gradient, info
Beispiel #2
0
 def compute_gradient(self, params):
     self.policy.set_weights(params)
     rollout = self.pull_batch_from_queue()
     batch = process_rollout(rollout, gamma=0.99, lambda_=1.0)
     gradient, info = self.policy.get_gradients(batch)
     if "summary" in info:
         self.summary_writer.add_summary(
             tf.Summary.FromString(info['summary']),
             self.policy.local_steps)
         self.summary_writer.flush()
     info = {"id": self.id, "size": len(batch.a)}
     return gradient, info