class Evaluation(object): """ The evaluation of an agent interacting with an environment to maximize its expected reward. """ OUTPUT_FOLDER = 'out' SAVED_MODELS_FOLDER = 'saved_models' METADATA_FILE = 'metadata.{}.json' def __init__(self, env, agent, directory=None, num_episodes=1000, training=True, sim_seed=None, recover=None, display_env=True, display_agent=True, display_rewards=True, close_env=True): """ :param env: The environment to be solved, possibly wrapping an AbstractEnv environment :param AbstractAgent agent: The agent solving the environment :param str directory: Output directory path :param int num_episodes: Number of episodes run !param training: Whether the agent is being trained or tested :param sim_seed: The seed used for the environment/agent randomness source :param recover: Recover the agent parameters from a file. - If True, it the default latest save will be used. - If a string, it will be used as a path. :param display_env: Render the environment, and have a monitor recording its videos :param display_agent: Add the agent graphics to the environment viewer, if supported :param display_rewards: Display the performances of the agent through the episodes :param close_env: Should the environment be closed when the evaluation is closed """ self.env = env self.agent = agent self.num_episodes = num_episodes self.training = training self.sim_seed = sim_seed self.close_env = close_env self.directory = directory or self.default_directory self.monitor = MonitorV2(env, self.directory, add_subdirectory=(directory is None), video_callable=(None if display_env else False)) self.write_metadata() if recover: self.load_agent_model(recover) self.agent_viewer = None if display_agent: try: # Render the agent within the environment viewer, if supported self.env.render() self.env.unwrapped.viewer.set_agent_display( lambda agent_surface, sim_surface: AgentGraphics.display(self.agent, agent_surface, sim_surface)) except AttributeError: # The environment viewer doesn't support agent rendering, create a separate agent viewer # self.agent_viewer = AgentViewer(self.agent) pass self.reward_viewer = None if display_rewards: self.reward_viewer = RewardViewer() self.observation = None def train(self): self.training = True self.run_episodes() self.close() def test(self, model_path=True): self.training = False self.load_agent_model(model_path) self.monitor.video_callable = MonitorV2.always_call_video try: self.agent.eval() except AttributeError: pass self.run_episodes() self.close() def run_episodes(self): for episode in range(self.num_episodes): # Run episode terminal = False self.seed() self.reset() total_reward = 0 while not terminal: # Step until a terminal step is reached reward, terminal = self.step() total_reward += reward # Catch interruptions try: if self.env.unwrapped.done: return except AttributeError: pass # End of episode self.after_all_episodes(episode, total_reward) self.after_some_episodes(episode) def step(self): """ Plan a sequence of actions according to the agent policy, and step the environment accordingly. """ # Query agent for actions sequence actions = self.agent.plan(self.observation) if not actions: raise Exception("The agent did not plan any action") # Forward the actions to the environment viewer try: self.env.unwrapped.viewer.predict_trajectory(actions) except AttributeError: pass if self.agent_viewer and self.monitor.is_episode_selected(): self.agent_viewer.render() # Step the environment previous_observation, action = self.observation, actions[0] self.observation, reward, terminal, info = self.monitor.step(action) # Record the experience. if self.training: try: self.agent.record(previous_observation, action, reward, self.observation, terminal) except NotImplementedError: pass return reward, terminal def save_agent_model(self, episode, do_save=True): # Create the folder if it doesn't exist permanent_folder = os.path.join(self.directory, self.SAVED_MODELS_FOLDER) os.makedirs(permanent_folder, exist_ok=True) if do_save: episode_path = os.path.join(self.monitor.directory, "checkpoint-{}.tar".format(episode+1)) try: self.agent.save(filename=episode_path) self.agent.save(filename=os.path.join(permanent_folder, "latest.tar")) except NotImplementedError: pass else: logger.info("Saved {} model to {}".format(self.agent.__class__.__name__, episode_path)) def load_agent_model(self, model_path): if model_path is True: model_path = os.path.join(self.directory, self.SAVED_MODELS_FOLDER, "latest.tar") try: self.agent.load(filename=model_path) logger.info("Load {} model from {}".format(self.agent.__class__.__name__, model_path)) except FileNotFoundError: logger.warn("No pre-trained model found at the desired location.") except NotImplementedError: pass def after_all_episodes(self, episode, total_reward): if self.reward_viewer: self.reward_viewer.update(total_reward) logger.info("Episode {} score: {}".format(episode, total_reward)) def after_some_episodes(self, episode): if self.monitor.is_episode_selected(): # Save the model if self.training: self.save_agent_model(episode) @property def default_directory(self): return os.path.join(self.OUTPUT_FOLDER, self.env.unwrapped.__class__.__name__, self.agent.__class__.__name__) def write_metadata(self): metadata = dict(env=serialize(self.env), agent=serialize(self.agent)) file_infix = '{}.{}'.format(self.monitor.monitor_id, os.getpid()) file = os.path.join(self.monitor.directory, self.METADATA_FILE.format(file_infix)) with open(file, 'w') as f: json.dump(metadata, f, sort_keys=True, indent=4) def seed(self): seed = self.monitor.seed(self.sim_seed) self.agent.seed(seed[0]) # Seed the agent with the main environment seed return seed def reset(self): self.observation = self.monitor.reset() self.agent.reset() def close(self): """ Close the evaluation. """ if self.training: self.save_agent_model(self.monitor.episode_id) self.monitor.close() if self.close_env: self.env.close()
def __init__(self, env, agent, directory=None, run_directory=None, num_episodes=1000, training=True, sim_seed=None, recover=None, display_env=True, display_agent=True, display_rewards=True, close_env=True): """ :param env: The environment to be solved, possibly wrapping an AbstractEnv environment :param AbstractAgent agent: The agent solving the environment :param Path directory: Workspace directory path :param Path run_directory: Run directory path :param int num_episodes: Number of episodes run !param training: Whether the agent is being trained or tested :param sim_seed: The seed used for the environment/agent randomness source :param recover: Recover the agent parameters from a file. - If True, it the default latest save will be used. - If a string, it will be used as a path. :param display_env: Render the environment, and have a monitor recording its videos :param display_agent: Add the agent graphics to the environment viewer, if supported :param display_rewards: Display the performances of the agent through the episodes :param close_env: Should the environment be closed when the evaluation is closed """ self.env = env self.agent = agent self.num_episodes = num_episodes self.training = training self.sim_seed = sim_seed self.close_env = close_env self.display_env = display_env self.directory = Path(directory or self.default_directory) self.run_directory = self.directory / (run_directory or self.default_run_directory) self.monitor = MonitorV2( env, self.run_directory, video_callable=(None if self.display_env else False)) self.writer = SummaryWriter(str(self.run_directory)) self.agent.set_writer(self.writer) self.write_logging() self.write_metadata() self.filtered_agent_stats = 0 self.best_agent_stats = -np.infty, 0 self.recover = recover if self.recover: self.load_agent_model(self.recover) if display_agent: try: # Render the agent within the environment viewer, if supported self.env.render() self.env.unwrapped.viewer.set_agent_display( lambda agent_surface, sim_surface: AgentGraphics.display( self.agent, agent_surface, sim_surface)) except AttributeError: logger.info( "The environment viewer doesn't support agent rendering.") self.reward_viewer = None if display_rewards: self.reward_viewer = RewardViewer() self.observation = None
def __init__( self, env, agent, directory=None, run_directory=None, num_episodes=1000, training=True, sim_seed=None, recover=None, display_env=True, display_agent=True, display_rewards=True, close_env=True, test_stable_baseline=False, model=None, options=None, ): """ :param env: The environment to be solved, possibly wrapping an AbstractEnv environment :param AbstractAgent agent: The agent solving the environment :param Path directory: Workspace directory path :param Path run_directory: Run directory path :param int num_episodes: Number of episodes run !param training: Whether the agent is being trained or tested :param sim_seed: The seed used for the environment/agent randomness source :param recover: Recover the agent parameters from a file. - If True, it the default latest save will be used. - If a string, it will be used as a path. :param display_env: Render the environment, and have a monitor recording its videos :param display_agent: Add the agent graphics to the environment viewer, if supported :param display_rewards: Display the performances of the agent through the episodes :param close_env: Should the environment be closed when the evaluation is closed """ self.env = env self.agent = agent self.num_episodes = num_episodes self.training = training self.env.training = training self.sim_seed = sim_seed self.close_env = close_env self.display_env = display_env # Modifications self.dataset_by_episode = [] self.env.options = copy.deepcopy(options) self.options = copy.deepcopy(options) if options['--output_folder']: self.OUTPUT_FOLDER = options['--output_folder'] self.directory = Path(directory or self.default_directory) if self.options["--name-from-envconfig"]: exp_json = options["--environment"].split('/')[-1] default_run_directory = self.default_run_directory + "_" + exp_json.split( '.')[0] if training: default_run_directory = os.path.join("train", default_run_directory) else: default_run_directory = os.path.join( "test", default_run_directory + "-test") else: default_run_directory = self.default_run_directory self.run_directory = self.directory / (run_directory or default_run_directory) self.monitor = MonitorV2( env, self.run_directory, video_callable=(None if self.display_env else False), options=self.options) self.test_stable_baseline = test_stable_baseline self.episode = 0 if not self.test_stable_baseline: self.writer = SummaryWriter(str(self.run_directory)) self.agent.set_writer(self.writer) self.agent.evaluation = self self.write_logging() self.write_metadata() self.filtered_agent_stats = 0 self.best_agent_stats = -np.infty, 0 self.recover = recover if self.recover: self.load_agent_model(self.recover) if display_agent: try: # Render the agent within the environment viewer, if supported self.env.render() self.env.unwrapped.viewer.directory = self.run_directory self.env.unwrapped.viewer.set_agent_display( lambda agent_surface, sim_surface: AgentGraphics.display( self.agent, agent_surface, sim_surface)) self.env.unwrapped.viewer.directory = self.run_directory except AttributeError: logger.info( "The environment viewer doesn't support agent rendering.") self.reward_viewer = None if display_rewards: self.reward_viewer = RewardViewer() self.observation = None # Modifications self.episode_start_time = 0 self.episode_length = None self.episode_info = None self.create_episode_log = options["--create_episode_log"] self.individual_episode_log_level = int( options["--individual_episode_log_level"]) self.create_timestep_log = options["--create_timestep_log"] self.timestep_log_freq = int(options["--timestep_log_freq"]) self.individual_reward_tensorboard = options[ "--individual_reward_tensorboard"] self.log_creator = None self.rewards = None self.rewards_averaged_over_agents = None if self.test_stable_baseline: self.model = model