class Runner(object): """Actor object to start running simulation on workers. The gradient computation is also executed from this object. """ def __init__(self, env_name, actor_id, logdir="/tmp/ray/a3c/", start=True): env = create_env(env_name) self.id = actor_id num_actions = env.action_space.n self.policy = LSTMPolicy(env.observation_space.shape, num_actions, actor_id) self.runner = RunnerThread(env, self.policy, 20) self.env = env self.logdir = logdir if start: self.start() def pull_batch_from_queue(self): """Take a rollout from the queue of the thread runner.""" rollout = self.runner.queue.get(timeout=600.0) if isinstance(rollout, BaseException): raise rollout while not rollout.terminal: try: part = self.runner.queue.get_nowait() if isinstance(part, BaseException): raise rollout rollout.extend(part) except queue.Empty: break return rollout def get_completed_rollout_metrics(self): """Returns metrics on previously completed rollouts. Calling this clears the queue of completed rollout metrics. """ completed = [] while True: try: completed.append(self.runner.metrics_queue.get_nowait()) except queue.Empty: break return completed def start(self): summary_writer = tf.summary.FileWriter( os.path.join(self.logdir, "agent_%d" % self.id)) self.summary_writer = summary_writer self.runner.start_runner(self.policy.sess, summary_writer) def compute_gradient(self, params): self.policy.set_weights(params) rollout = self.pull_batch_from_queue() batch = process_rollout(rollout, gamma=0.99, lambda_=1.0) gradient = self.policy.get_gradients(batch) info = {"id": self.id, "size": len(batch.a)} return gradient, info
class Runner(object): """Actor object to start running simulation on workers. The gradient computation is also executed from this object. """ def __init__(self, env_name, policy_cls, actor_id, batch_size, logdir): env = create_env(env_name) self.id = actor_id # TODO(rliaw): should change this to be just env.observation_space self.policy = policy_cls(env.observation_space.shape, env.action_space) self.runner = RunnerThread(env, self.policy, batch_size) self.env = env self.logdir = logdir self.start() def pull_batch_from_queue(self): """Take a rollout from the queue of the thread runner.""" rollout = self.runner.queue.get(timeout=600.0) if isinstance(rollout, BaseException): raise rollout while not rollout.terminal: try: part = self.runner.queue.get_nowait() if isinstance(part, BaseException): raise rollout rollout.extend(part) except queue.Empty: break return rollout def get_completed_rollout_metrics(self): """Returns metrics on previously completed rollouts. Calling this clears the queue of completed rollout metrics. """ completed = [] while True: try: completed.append(self.runner.metrics_queue.get_nowait()) except queue.Empty: break return completed def start(self): summary_writer = tf.summary.FileWriter( os.path.join(self.logdir, "agent_%d" % self.id)) self.summary_writer = summary_writer self.runner.start_runner(self.policy.sess, summary_writer) def compute_gradient(self, params): self.policy.set_weights(params) rollout = self.pull_batch_from_queue() batch = process_rollout(rollout, gamma=0.99, lambda_=1.0) gradient, info = self.policy.get_gradients(batch) if "summary" in info: self.summary_writer.add_summary( tf.Summary.FromString(info['summary']), self.policy.local_steps) self.summary_writer.flush() info = {"id": self.id, "size": len(batch.a)} return gradient, info