def _run_one_phase(self, is_training): stats = Statistics() agent_stats = Statistics() self._agent.eval = not is_training min_steps = (self._training_steps if is_training else self._evaluation_steps) * self._env.n_agents self._env.reset() while stats.sum("steps") < min_steps: step_time0 = time.time() states = np.copy(self._env.states) actions = self._agent.step(states) rewards, next_states, dones, env_stats = \ self._env.step(actions) stats.set_all(env_stats) if self._traj_buffer is not None: self._traj_buffer.push(states, actions, rewards, next_states, dones) if is_training: t0 = time.time() agent_stats.set_all( self._agent.transitions(states, actions, rewards, next_states, dones)) stats.set("agent_time", time.time() - t0) stats.set("step_time", time.time() - step_time0) sys.stdout.write( "Iteration {} ({}). ".format( self._iteration, "train" if is_training else "eval") + "Steps executed: {} ".format(stats.sum("steps")) + "Episode length: {} ".format(int(stats.avg("steps"))) + "Return: {:.4f} \r".format(stats.avg("rewards"))) sys.stdout.flush() print() self._agent.episodes_end() return stats, agent_stats
def main(checkpoint, debug=False): filename = os.path.basename(checkpoint) s = filename.split('-') # Create Environment # Derive environment ID from the checkpoint filename file_prefix = s[0] openai_env_ids = { "pole": "CartPole-v1", "lunarcont": "LunarLanderContinuous-v2", "lunar": "LunarLander-v2", "carcont": "MountainCarContinuous-v0", "pendulum": "Pendulum-v0", } if file_prefix in openai_env_ids: env_id = openai_env_ids[file_prefix] else: env_id = file_prefix s = s[1:] env = create_env(env_id) # Create agent sample_action = sample_action_fn(checkpoint, env.action_space) stats = Statistics() try: while True: episode_stats = play_episode(env, sample_action, debug=debug) stats.set_all(episode_stats) print( ("Episode #{}: {:.2f}; Average Reward: {:.2f}; " + "Episode length: {}; Average episode length: {:.1f}").format( stats.sum("episodes"), episode_stats.avg("rewards"), stats.avg("rewards"), int(episode_stats.avg("steps")), stats.avg("steps"))) except KeyboardInterrupt: env.close() return env.close()