Exemple #1
0
 def after_all_episodes(self, episode, rewards):
     gamma = self.agent.config.get("gamma", 1)
     self.writer.add_scalar('episode/length', len(rewards), episode)
     self.writer.add_scalar('episode/total_reward', sum(rewards), episode)
     self.writer.add_scalar(
         'episode/return', sum(r * gamma**t for t, r in enumerate(rewards)),
         episode)
     self.writer.add_histogram('episode/rewards', rewards, episode)
     logger.info("Episode {} score: {:.1f}".format(episode, sum(rewards)))
Exemple #2
0
 def load_agent_model(self, model_path):
     if model_path is True:
         model_path = self.directory / self.SAVED_MODELS_FOLDER / "latest.tar"
     try:
         self.agent.load(filename=model_path)
         logger.info("Load {} model from {}".format(
             self.agent.__class__.__name__, model_path))
     except FileNotFoundError:
         logger.warning(
             "No pre-trained model found at the desired location.")
     except NotImplementedError:
         pass
Exemple #3
0
    def save_agent_model(self, episode, do_save=True):
        # Create the folder if it doesn't exist
        permanent_folder = self.directory / self.SAVED_MODELS_FOLDER
        os.makedirs(permanent_folder, exist_ok=True)

        if do_save:
            episode_path = Path(
                self.monitor.directory) / "checkpoint-{}.tar".format(episode +
                                                                     1)
            try:
                self.agent.save(filename=episode_path)
                self.agent.save(filename=permanent_folder / "latest.tar")
            except NotImplementedError:
                pass
            else:
                logger.info("Saved {} model to {}".format(
                    self.agent.__class__.__name__, episode_path))
Exemple #4
0
    def __init__(self,
                 env,
                 agent,
                 directory=None,
                 run_directory=None,
                 num_episodes=1000,
                 training=True,
                 sim_seed=None,
                 recover=None,
                 display_env=True,
                 display_agent=True,
                 display_rewards=True,
                 close_env=True):
        """

        :param env: The environment to be solved, possibly wrapping an AbstractEnv environment
        :param AbstractAgent agent: The agent solving the environment
        :param Path directory: Workspace directory path
        :param Path run_directory: Run directory path
        :param int num_episodes: Number of episodes run
        !param training: Whether the agent is being trained or tested
        :param sim_seed: The seed used for the environment/agent randomness source
        :param recover: Recover the agent parameters from a file.
                        - If True, it the default latest save will be used.
                        - If a string, it will be used as a path.
        :param display_env: Render the environment, and have a monitor recording its videos
        :param display_agent: Add the agent graphics to the environment viewer, if supported
        :param display_rewards: Display the performances of the agent through the episodes
        :param close_env: Should the environment be closed when the evaluation is closed

        """
        self.env = env
        self.agent = agent
        self.num_episodes = num_episodes
        self.training = training
        self.sim_seed = sim_seed
        self.close_env = close_env
        self.display_env = display_env

        self.directory = Path(directory or self.default_directory)
        self.run_directory = self.directory / (run_directory
                                               or self.default_run_directory)
        self.monitor = MonitorV2(
            env,
            self.run_directory,
            video_callable=(None if self.display_env else False))
        self.writer = SummaryWriter(str(self.run_directory))
        self.agent.set_writer(self.writer)
        self.write_logging()
        self.write_metadata()
        self.filtered_agent_stats = 0
        self.best_agent_stats = -np.infty, 0

        self.recover = recover
        if self.recover:
            self.load_agent_model(self.recover)

        if display_agent:
            try:
                # Render the agent within the environment viewer, if supported
                self.env.render()
                self.env.unwrapped.viewer.set_agent_display(
                    lambda agent_surface, sim_surface: AgentGraphics.display(
                        self.agent, agent_surface, sim_surface))
            except AttributeError:
                logger.info(
                    "The environment viewer doesn't support agent rendering.")
        self.reward_viewer = None
        if display_rewards:
            self.reward_viewer = RewardViewer()
        self.observation = None
Exemple #5
0
    def run_batched_episodes(self):
        """
            Alternatively,
            - run multiple sample-collection jobs in parallel
            - update model
        """
        episode = 0
        episode_duration = 14  # TODO: use a fixed number of samples instead
        batch_sizes = near_split(self.num_episodes * episode_duration,
                                 size_bins=self.agent.config["batch_size"])
        self.agent.reset()
        for batch, batch_size in enumerate(batch_sizes):
            logger.info(
                "[BATCH={}/{}]---------------------------------------".format(
                    batch + 1, len(batch_sizes)))
            logger.info(
                "[BATCH={}/{}][run_batched_episodes] #samples={}".format(
                    batch + 1, len(batch_sizes), len(self.agent.memory)))
            logger.info(
                "[BATCH={}/{}]---------------------------------------".format(
                    batch + 1, len(batch_sizes)))
            # Save current agent
            model_path = self.save_agent_model(identifier=batch)

            # Prepare workers
            env_config, agent_config = serialize(self.env), serialize(
                self.agent)
            cpu_processes = self.agent.config["processes"] or os.cpu_count()
            workers_sample_counts = near_split(batch_size, cpu_processes)
            workers_starts = list(
                np.cumsum(np.insert(workers_sample_counts[:-1], 0, 0)) +
                np.sum(batch_sizes[:batch]))
            base_seed = self.seed(batch * cpu_processes)[0]
            workers_seeds = [base_seed + i for i in range(cpu_processes)]
            workers_params = list(
                zip_with_singletons(env_config, agent_config,
                                    workers_sample_counts, workers_starts,
                                    workers_seeds, model_path, batch))

            # Collect trajectories
            logger.info("Collecting {} samples with {} workers...".format(
                batch_size, cpu_processes))
            if cpu_processes == 1:
                results = [Evaluation.collect_samples(*workers_params[0])]
            else:
                with Pool(processes=cpu_processes) as pool:
                    results = pool.starmap(Evaluation.collect_samples,
                                           workers_params)
            trajectories = [
                trajectory for worker in results for trajectory in worker
            ]

            # Fill memory
            for trajectory in trajectories:
                if trajectory[
                        -1].terminal:  # Check whether the episode was properly finished before logging
                    self.after_all_episodes(
                        episode,
                        [transition.reward for transition in trajectory])
                episode += 1
                [self.agent.record(*transition) for transition in trajectory]

            # Fit model
            self.agent.update()
Exemple #6
0
    def after_all_episodes(self, episode, rewards, duration):
        rewards_individual_agents = np.array(self.rewards)
        rewards_averaged_over_agents = np.array(
            self.rewards_averaged_over_agents)
        self.episode_length = rewards_individual_agents.shape[0]
        if len(rewards_individual_agents.shape) > 1:
            controlled_vehicle_count = rewards_individual_agents.shape[1]
        else:
            controlled_vehicle_count = 1
        assert controlled_vehicle_count == len(self.env.controlled_vehicles), \
            "Length of each row in reward should be equal to the number of controlled vehicles"

        reward_total_episode = sum(rewards_averaged_over_agents)
        if not self.test_stable_baseline:
            self.writer.add_scalar('episode/length', self.episode_length,
                                   episode)
            self.writer.add_scalar('episode/total_reward',
                                   reward_total_episode, episode)

        if self.individual_reward_tensorboard:
            # logging individual rewards for each controlled_vehicle
            individual_rewards_dict = {}
            individual_rewards_title = f'individual_stats/agent_rewards'
            for n in range(controlled_vehicle_count):
                agent_name = 'agent' + str(n + 1)
                agent_reward_array = sum(rewards_individual_agents[:, n])
                individual_rewards_dict[agent_name] = agent_reward_array
            self.writer.add_scalars(individual_rewards_title,
                                    individual_rewards_dict, episode)

        if not self.test_stable_baseline:
            gamma = self.agent.config.get("gamma", 1)
            self.writer.add_scalar(
                'episode/return',
                sum(r * gamma**t
                    for t, r in enumerate(rewards_averaged_over_agents)),
                episode)
            self.writer.add_histogram('episode/rewards',
                                      rewards_averaged_over_agents, episode)
            self.writer.add_scalar('episode/fps',
                                   len(rewards) / duration, episode)

        # Create raw logfiles
        if self.create_episode_log:
            logged_info = self.log_creator.episode_info_logger(episode)
            # Adding logged info to TensorBoard
            if not self.test_stable_baseline:
                self.writer.add_scalar('episode/mission_time',
                                       logged_info['mission_time'], episode)

                self.writer.add_scalar(
                    'episode_average_speeds/episode_average_speed_all',
                    logged_info['episode_average_speed_all'], episode)
                self.writer.add_scalar(
                    'episode_average_speeds/episode_average_speed_controlled',
                    logged_info['episode_average_speed_controlled'], episode)
                self.writer.add_scalar(
                    'episode_average_speeds/episode_average_speed_human',
                    logged_info['episode_average_speed_human'], episode)

                if self.log_creator.log_distance:
                    self.writer.add_scalar(
                        'episode_average_distances/episode_average_distance_all',
                        logged_info['episode_average_distance_all'], episode)
                    self.writer.add_scalar(
                        'episode_average_distances/episode_average_distance_controlled',
                        logged_info['episode_average_distance_controlled'],
                        episode)
                    self.writer.add_scalar(
                        'episode_average_distances/episode_average_distance_human',
                        logged_info['episode_average_distance_human'], episode)

        # Calculate episode ET in ms
        episode_elapsed_time = 1000 * (time.time() - self.episode_start_time)
        logger.info(
            "Episode {} done in {:.1f}ms - step duration: {}, - episode duration: {}, total episode reward: {:.1f}"
            .format(episode, episode_elapsed_time,
                    episode_elapsed_time / self.episode_length,
                    self.episode_length, sum(rewards_averaged_over_agents)))
Exemple #7
0
    def __init__(
        self,
        env,
        agent,
        directory=None,
        run_directory=None,
        num_episodes=1000,
        training=True,
        sim_seed=None,
        recover=None,
        display_env=True,
        display_agent=True,
        display_rewards=True,
        close_env=True,
        test_stable_baseline=False,
        model=None,
        options=None,
    ):
        """

        :param env: The environment to be solved, possibly wrapping an AbstractEnv environment
        :param AbstractAgent agent: The agent solving the environment
        :param Path directory: Workspace directory path
        :param Path run_directory: Run directory path
        :param int num_episodes: Number of episodes run
        !param training: Whether the agent is being trained or tested
        :param sim_seed: The seed used for the environment/agent randomness source
        :param recover: Recover the agent parameters from a file.
                        - If True, it the default latest save will be used.
                        - If a string, it will be used as a path.
        :param display_env: Render the environment, and have a monitor recording its videos
        :param display_agent: Add the agent graphics to the environment viewer, if supported
        :param display_rewards: Display the performances of the agent through the episodes
        :param close_env: Should the environment be closed when the evaluation is closed

        """
        self.env = env
        self.agent = agent
        self.num_episodes = num_episodes
        self.training = training
        self.env.training = training
        self.sim_seed = sim_seed
        self.close_env = close_env
        self.display_env = display_env

        # Modifications
        self.dataset_by_episode = []
        self.env.options = copy.deepcopy(options)
        self.options = copy.deepcopy(options)
        if options['--output_folder']:
            self.OUTPUT_FOLDER = options['--output_folder']

        self.directory = Path(directory or self.default_directory)
        if self.options["--name-from-envconfig"]:
            exp_json = options["--environment"].split('/')[-1]
            default_run_directory = self.default_run_directory + "_" + exp_json.split(
                '.')[0]
            if training:
                default_run_directory = os.path.join("train",
                                                     default_run_directory)
            else:
                default_run_directory = os.path.join(
                    "test", default_run_directory + "-test")
        else:
            default_run_directory = self.default_run_directory

        self.run_directory = self.directory / (run_directory
                                               or default_run_directory)

        self.monitor = MonitorV2(
            env,
            self.run_directory,
            video_callable=(None if self.display_env else False),
            options=self.options)

        self.test_stable_baseline = test_stable_baseline
        self.episode = 0

        if not self.test_stable_baseline:
            self.writer = SummaryWriter(str(self.run_directory))
            self.agent.set_writer(self.writer)
            self.agent.evaluation = self
            self.write_logging()
            self.write_metadata()
        self.filtered_agent_stats = 0
        self.best_agent_stats = -np.infty, 0

        self.recover = recover
        if self.recover:
            self.load_agent_model(self.recover)

        if display_agent:
            try:
                # Render the agent within the environment viewer, if supported
                self.env.render()
                self.env.unwrapped.viewer.directory = self.run_directory
                self.env.unwrapped.viewer.set_agent_display(
                    lambda agent_surface, sim_surface: AgentGraphics.display(
                        self.agent, agent_surface, sim_surface))
                self.env.unwrapped.viewer.directory = self.run_directory
            except AttributeError:
                logger.info(
                    "The environment viewer doesn't support agent rendering.")
        self.reward_viewer = None
        if display_rewards:
            self.reward_viewer = RewardViewer()
        self.observation = None

        # Modifications
        self.episode_start_time = 0
        self.episode_length = None
        self.episode_info = None
        self.create_episode_log = options["--create_episode_log"]
        self.individual_episode_log_level = int(
            options["--individual_episode_log_level"])
        self.create_timestep_log = options["--create_timestep_log"]
        self.timestep_log_freq = int(options["--timestep_log_freq"])
        self.individual_reward_tensorboard = options[
            "--individual_reward_tensorboard"]
        self.log_creator = None
        self.rewards = None
        self.rewards_averaged_over_agents = None

        if self.test_stable_baseline:
            self.model = model