Esempio n. 1
0
 def compare(self, runs_directories_a, runs_directories_b):
     runs_a = {
         self.suffix(directory): MonitorV2.load_results(directory)
         for directory in runs_directories_a
     }
     runs_b = {
         self.suffix(directory): MonitorV2.load_results(directory)
         for directory in runs_directories_b
     }
     f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
     self.plot_all(runs_a,
                   field='episode_rewards',
                   title='rewards',
                   axes=ax1)
     self.plot_all(runs_b,
                   field='episode_rewards',
                   title='rewards',
                   axes=ax2)
     plt.show()
    def get_run_dataframe(self,
                          directory,
                          agent_name='',
                          gamma=0.95,
                          subsample=10):
        run_data = MonitorV2.load_results(directory)
        if not run_data:
            return pd.DataFrame()

        # Common fields
        data = {
            "episode":
            np.arange(np.size(run_data["episode_rewards"])),
            "total reward":
            run_data["episode_rewards"],
            "discounted rewards": [
                np.sum([episode[t] * gamma**t for t in range(len(episode))])
                for episode in run_data["episode_rewards_"]
            ],
            "length":
            run_data["episode_lengths"],
        }

        # Additional highway-env fields
        try:
            dt = 1.0
            data.update({
                "crashed":
                [np.any(episode) for episode in run_data["episode_crashed"]],
                "velocity":
                [np.mean(episode) for episode in run_data["episode_velocity"]],
                "distance": [
                    np.sum(episode) * dt
                    for episode in run_data["episode_velocity"]
                ],
            })
        except KeyError as e:
            print(e)

        # Tags
        df = pd.DataFrame(data)
        df["run"] = str(directory.name)
        df["agent"] = agent_name

        # Filtering
        for field in [
                "total reward", "discounted rewards", "length", "crashed",
                "velocity", "distance"
        ]:
            df[field] = df[field].rolling(subsample).mean()

        # Subsample
        df = df.iloc[self.episodes_range[0]:self.episodes_range[1]:subsample]
        return df
Esempio n. 3
0
 def analyze(self, run_directories):
     runs = {self.suffix(directory): MonitorV2.load_results(directory) for directory in run_directories}
     runs = {key: value for (key, value) in runs.items() if value is not None}
     self.plot_all(runs, field='episode_rewards', title='rewards')
     self.histogram_all(runs, field='episode_rewards', title='rewards')
     self.describe_all(runs, field='episode_rewards', title='rewards')
     self.histogram_all(runs, field='episode_lengths', title='lengths')
     self.describe_all(runs, field='episode_lengths', title='lengths')
     self.histogram_all(runs, field='episode_costs', title='costs', preprocess=lambda c: [sum(e) for e in c])
     self.describe_all(runs, field='episode_costs', title='costs', preprocess=lambda c: [sum(e) for e in c])
     self.compare(runs)
     plt.show()
Esempio n. 4
0
    def __init__(self,
                 env,
                 agent,
                 directory=None,
                 run_directory=None,
                 num_episodes=1000,
                 training=True,
                 sim_seed=None,
                 recover=None,
                 display_env=True,
                 display_agent=True,
                 display_rewards=True,
                 close_env=True):
        """

        :param env: The environment to be solved, possibly wrapping an AbstractEnv environment
        :param AbstractAgent agent: The agent solving the environment
        :param Path directory: Workspace directory path
        :param Path run_directory: Run directory path
        :param int num_episodes: Number of episodes run
        !param training: Whether the agent is being trained or tested
        :param sim_seed: The seed used for the environment/agent randomness source
        :param recover: Recover the agent parameters from a file.
                        - If True, it the default latest save will be used.
                        - If a string, it will be used as a path.
        :param display_env: Render the environment, and have a monitor recording its videos
        :param display_agent: Add the agent graphics to the environment viewer, if supported
        :param display_rewards: Display the performances of the agent through the episodes
        :param close_env: Should the environment be closed when the evaluation is closed

        """
        self.env = env
        self.agent = agent
        self.num_episodes = num_episodes
        self.training = training
        self.sim_seed = sim_seed
        self.close_env = close_env
        self.display_env = display_env

        self.directory = Path(directory or self.default_directory)
        self.run_directory = self.directory / (run_directory
                                               or self.default_run_directory)
        self.monitor = MonitorV2(
            env,
            self.run_directory,
            video_callable=(None if self.display_env else False))
        self.writer = SummaryWriter(str(self.run_directory))
        self.agent.set_writer(self.writer)
        self.write_logging()
        self.write_metadata()
        self.filtered_agent_stats = 0
        self.best_agent_stats = -np.infty, 0

        self.recover = recover
        if self.recover:
            self.load_agent_model(self.recover)

        if display_agent:
            try:
                # Render the agent within the environment viewer, if supported
                self.env.render()
                self.env.unwrapped.viewer.set_agent_display(
                    lambda agent_surface, sim_surface: AgentGraphics.display(
                        self.agent, agent_surface, sim_surface))
            except AttributeError:
                logger.info(
                    "The environment viewer doesn't support agent rendering.")
        self.reward_viewer = None
        if display_rewards:
            self.reward_viewer = RewardViewer()
        self.observation = None
Esempio n. 5
0
def main(policy_path, generate_envs, feature_str, device, workspace, bftq_params, seed, general,
         betas_test, N_trajs, gamma, gamma_c, bftq_net_params, **args):
    if not os.path.isabs(policy_path):
        policy_path = workspace / policy_path

    env = envs_factory.generate_envs(**generate_envs)[0][0]
    feature = feature_factory(feature_str)

    bftq = PytorchBudgetedFittedQ(
        device=device,
        workspace=workspace,
        actions_str=get_actions_str(env),
        policy_network=NetBFTQ(size_state=len(feature(env.reset(), env)), n_actions=env.action_space.n,
                               **bftq_net_params),
        gamma=gamma,
        gamma_c=gamma_c,
        cpu_processes=general["cpu"]["processes"],
        env=env,
        hull_options=general["hull_options"],
        **bftq_params)
    bftq.reset(True)

    pi_config = {
        "__class__": repr(PytorchBudgetedFittedPolicy),
        "feature_str": feature_str,
        "network_path": policy_path,
        "betas_for_discretisation": eval(bftq_params["betas_for_discretisation"]),
        "device": device,
        "hull_options": general["hull_options"],
        "clamp_Qc": bftq_params["clamp_Qc"],
        "env": env
    }
    pi = policy_factory(pi_config)

    # Iterate over betas
    for beta in eval(betas_test):
        logger.info("Rendering with beta={}".format(beta))
        set_seed(seed, env)
        for traj in range(N_trajs):
            done = False
            pi.reset()
            info_env = {}
            info_pi = {"beta": beta}
            t = 0

            # Make a workspace for trajectories
            traj_workspace = workspace / "trajs" / "beta={}".format(beta) / "traj={}".format(traj)
            makedirs(traj_workspace)
            bftq.workspace = traj_workspace
            monitor = MonitorV2(env, traj_workspace, add_subdirectory=False)
            obs = monitor.reset()

            # Run trajectory
            while not done:
                action_mask = get_action_mask(env)
                info_pi = merge_two_dicts(info_pi, info_env)
                bftq.draw_Qr_and_Qc(obs, pi.network, "render_t={}".format(t), show=False)
                a, _, info_pi = pi.execute(obs, action_mask, info_pi)
                render(env, workspace, t, a)
                obs, _, done, info_env = monitor.step(a)
                t += 1
            monitor.close()
Esempio n. 6
0
    def __init__(
        self,
        env,
        agent,
        directory=None,
        run_directory=None,
        num_episodes=1000,
        training=True,
        sim_seed=None,
        recover=None,
        display_env=True,
        display_agent=True,
        display_rewards=True,
        close_env=True,
        test_stable_baseline=False,
        model=None,
        options=None,
    ):
        """

        :param env: The environment to be solved, possibly wrapping an AbstractEnv environment
        :param AbstractAgent agent: The agent solving the environment
        :param Path directory: Workspace directory path
        :param Path run_directory: Run directory path
        :param int num_episodes: Number of episodes run
        !param training: Whether the agent is being trained or tested
        :param sim_seed: The seed used for the environment/agent randomness source
        :param recover: Recover the agent parameters from a file.
                        - If True, it the default latest save will be used.
                        - If a string, it will be used as a path.
        :param display_env: Render the environment, and have a monitor recording its videos
        :param display_agent: Add the agent graphics to the environment viewer, if supported
        :param display_rewards: Display the performances of the agent through the episodes
        :param close_env: Should the environment be closed when the evaluation is closed

        """
        self.env = env
        self.agent = agent
        self.num_episodes = num_episodes
        self.training = training
        self.env.training = training
        self.sim_seed = sim_seed
        self.close_env = close_env
        self.display_env = display_env

        # Modifications
        self.dataset_by_episode = []
        self.env.options = copy.deepcopy(options)
        self.options = copy.deepcopy(options)
        if options['--output_folder']:
            self.OUTPUT_FOLDER = options['--output_folder']

        self.directory = Path(directory or self.default_directory)
        if self.options["--name-from-envconfig"]:
            exp_json = options["--environment"].split('/')[-1]
            default_run_directory = self.default_run_directory + "_" + exp_json.split(
                '.')[0]
            if training:
                default_run_directory = os.path.join("train",
                                                     default_run_directory)
            else:
                default_run_directory = os.path.join(
                    "test", default_run_directory + "-test")
        else:
            default_run_directory = self.default_run_directory

        self.run_directory = self.directory / (run_directory
                                               or default_run_directory)

        self.monitor = MonitorV2(
            env,
            self.run_directory,
            video_callable=(None if self.display_env else False),
            options=self.options)

        self.test_stable_baseline = test_stable_baseline
        self.episode = 0

        if not self.test_stable_baseline:
            self.writer = SummaryWriter(str(self.run_directory))
            self.agent.set_writer(self.writer)
            self.agent.evaluation = self
            self.write_logging()
            self.write_metadata()
        self.filtered_agent_stats = 0
        self.best_agent_stats = -np.infty, 0

        self.recover = recover
        if self.recover:
            self.load_agent_model(self.recover)

        if display_agent:
            try:
                # Render the agent within the environment viewer, if supported
                self.env.render()
                self.env.unwrapped.viewer.directory = self.run_directory
                self.env.unwrapped.viewer.set_agent_display(
                    lambda agent_surface, sim_surface: AgentGraphics.display(
                        self.agent, agent_surface, sim_surface))
                self.env.unwrapped.viewer.directory = self.run_directory
            except AttributeError:
                logger.info(
                    "The environment viewer doesn't support agent rendering.")
        self.reward_viewer = None
        if display_rewards:
            self.reward_viewer = RewardViewer()
        self.observation = None

        # Modifications
        self.episode_start_time = 0
        self.episode_length = None
        self.episode_info = None
        self.create_episode_log = options["--create_episode_log"]
        self.individual_episode_log_level = int(
            options["--individual_episode_log_level"])
        self.create_timestep_log = options["--create_timestep_log"]
        self.timestep_log_freq = int(options["--timestep_log_freq"])
        self.individual_reward_tensorboard = options[
            "--individual_reward_tensorboard"]
        self.log_creator = None
        self.rewards = None
        self.rewards_averaged_over_agents = None

        if self.test_stable_baseline:
            self.model = model