def compute_gradient(self, params): self.policy.set_weights(params) rollout = self.pull_batch_from_queue() batch = process_rollout(rollout, gamma=0.99, lambda_=1.0) gradient = self.policy.get_gradients(batch) info = {"id": self.id, "size": len(batch.a)} return gradient, info
def compute_gradient(self, params): self.policy.set_weights(params) rollout = self.pull_batch_from_queue() batch = process_rollout(rollout, gamma=0.99, lambda_=1.0) gradient, info = self.policy.get_gradients(batch) if "summary" in info: self.summary_writer.add_summary( tf.Summary.FromString(info['summary']), self.policy.local_steps) self.summary_writer.flush() info = {"id": self.id, "size": len(batch.a)} return gradient, info