def compare(self, runs_directories_a, runs_directories_b): runs_a = { self.suffix(directory): MonitorV2.load_results(directory) for directory in runs_directories_a } runs_b = { self.suffix(directory): MonitorV2.load_results(directory) for directory in runs_directories_b } f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) self.plot_all(runs_a, field='episode_rewards', title='rewards', axes=ax1) self.plot_all(runs_b, field='episode_rewards', title='rewards', axes=ax2) plt.show()
def get_run_dataframe(self, directory, agent_name='', gamma=0.95, subsample=10): run_data = MonitorV2.load_results(directory) if not run_data: return pd.DataFrame() # Common fields data = { "episode": np.arange(np.size(run_data["episode_rewards"])), "total reward": run_data["episode_rewards"], "discounted rewards": [ np.sum([episode[t] * gamma**t for t in range(len(episode))]) for episode in run_data["episode_rewards_"] ], "length": run_data["episode_lengths"], } # Additional highway-env fields try: dt = 1.0 data.update({ "crashed": [np.any(episode) for episode in run_data["episode_crashed"]], "velocity": [np.mean(episode) for episode in run_data["episode_velocity"]], "distance": [ np.sum(episode) * dt for episode in run_data["episode_velocity"] ], }) except KeyError as e: print(e) # Tags df = pd.DataFrame(data) df["run"] = str(directory.name) df["agent"] = agent_name # Filtering for field in [ "total reward", "discounted rewards", "length", "crashed", "velocity", "distance" ]: df[field] = df[field].rolling(subsample).mean() # Subsample df = df.iloc[self.episodes_range[0]:self.episodes_range[1]:subsample] return df
def analyze(self, run_directories): runs = {self.suffix(directory): MonitorV2.load_results(directory) for directory in run_directories} runs = {key: value for (key, value) in runs.items() if value is not None} self.plot_all(runs, field='episode_rewards', title='rewards') self.histogram_all(runs, field='episode_rewards', title='rewards') self.describe_all(runs, field='episode_rewards', title='rewards') self.histogram_all(runs, field='episode_lengths', title='lengths') self.describe_all(runs, field='episode_lengths', title='lengths') self.histogram_all(runs, field='episode_costs', title='costs', preprocess=lambda c: [sum(e) for e in c]) self.describe_all(runs, field='episode_costs', title='costs', preprocess=lambda c: [sum(e) for e in c]) self.compare(runs) plt.show()
def __init__(self, env, agent, directory=None, run_directory=None, num_episodes=1000, training=True, sim_seed=None, recover=None, display_env=True, display_agent=True, display_rewards=True, close_env=True): """ :param env: The environment to be solved, possibly wrapping an AbstractEnv environment :param AbstractAgent agent: The agent solving the environment :param Path directory: Workspace directory path :param Path run_directory: Run directory path :param int num_episodes: Number of episodes run !param training: Whether the agent is being trained or tested :param sim_seed: The seed used for the environment/agent randomness source :param recover: Recover the agent parameters from a file. - If True, it the default latest save will be used. - If a string, it will be used as a path. :param display_env: Render the environment, and have a monitor recording its videos :param display_agent: Add the agent graphics to the environment viewer, if supported :param display_rewards: Display the performances of the agent through the episodes :param close_env: Should the environment be closed when the evaluation is closed """ self.env = env self.agent = agent self.num_episodes = num_episodes self.training = training self.sim_seed = sim_seed self.close_env = close_env self.display_env = display_env self.directory = Path(directory or self.default_directory) self.run_directory = self.directory / (run_directory or self.default_run_directory) self.monitor = MonitorV2( env, self.run_directory, video_callable=(None if self.display_env else False)) self.writer = SummaryWriter(str(self.run_directory)) self.agent.set_writer(self.writer) self.write_logging() self.write_metadata() self.filtered_agent_stats = 0 self.best_agent_stats = -np.infty, 0 self.recover = recover if self.recover: self.load_agent_model(self.recover) if display_agent: try: # Render the agent within the environment viewer, if supported self.env.render() self.env.unwrapped.viewer.set_agent_display( lambda agent_surface, sim_surface: AgentGraphics.display( self.agent, agent_surface, sim_surface)) except AttributeError: logger.info( "The environment viewer doesn't support agent rendering.") self.reward_viewer = None if display_rewards: self.reward_viewer = RewardViewer() self.observation = None
def main(policy_path, generate_envs, feature_str, device, workspace, bftq_params, seed, general, betas_test, N_trajs, gamma, gamma_c, bftq_net_params, **args): if not os.path.isabs(policy_path): policy_path = workspace / policy_path env = envs_factory.generate_envs(**generate_envs)[0][0] feature = feature_factory(feature_str) bftq = PytorchBudgetedFittedQ( device=device, workspace=workspace, actions_str=get_actions_str(env), policy_network=NetBFTQ(size_state=len(feature(env.reset(), env)), n_actions=env.action_space.n, **bftq_net_params), gamma=gamma, gamma_c=gamma_c, cpu_processes=general["cpu"]["processes"], env=env, hull_options=general["hull_options"], **bftq_params) bftq.reset(True) pi_config = { "__class__": repr(PytorchBudgetedFittedPolicy), "feature_str": feature_str, "network_path": policy_path, "betas_for_discretisation": eval(bftq_params["betas_for_discretisation"]), "device": device, "hull_options": general["hull_options"], "clamp_Qc": bftq_params["clamp_Qc"], "env": env } pi = policy_factory(pi_config) # Iterate over betas for beta in eval(betas_test): logger.info("Rendering with beta={}".format(beta)) set_seed(seed, env) for traj in range(N_trajs): done = False pi.reset() info_env = {} info_pi = {"beta": beta} t = 0 # Make a workspace for trajectories traj_workspace = workspace / "trajs" / "beta={}".format(beta) / "traj={}".format(traj) makedirs(traj_workspace) bftq.workspace = traj_workspace monitor = MonitorV2(env, traj_workspace, add_subdirectory=False) obs = monitor.reset() # Run trajectory while not done: action_mask = get_action_mask(env) info_pi = merge_two_dicts(info_pi, info_env) bftq.draw_Qr_and_Qc(obs, pi.network, "render_t={}".format(t), show=False) a, _, info_pi = pi.execute(obs, action_mask, info_pi) render(env, workspace, t, a) obs, _, done, info_env = monitor.step(a) t += 1 monitor.close()
def __init__( self, env, agent, directory=None, run_directory=None, num_episodes=1000, training=True, sim_seed=None, recover=None, display_env=True, display_agent=True, display_rewards=True, close_env=True, test_stable_baseline=False, model=None, options=None, ): """ :param env: The environment to be solved, possibly wrapping an AbstractEnv environment :param AbstractAgent agent: The agent solving the environment :param Path directory: Workspace directory path :param Path run_directory: Run directory path :param int num_episodes: Number of episodes run !param training: Whether the agent is being trained or tested :param sim_seed: The seed used for the environment/agent randomness source :param recover: Recover the agent parameters from a file. - If True, it the default latest save will be used. - If a string, it will be used as a path. :param display_env: Render the environment, and have a monitor recording its videos :param display_agent: Add the agent graphics to the environment viewer, if supported :param display_rewards: Display the performances of the agent through the episodes :param close_env: Should the environment be closed when the evaluation is closed """ self.env = env self.agent = agent self.num_episodes = num_episodes self.training = training self.env.training = training self.sim_seed = sim_seed self.close_env = close_env self.display_env = display_env # Modifications self.dataset_by_episode = [] self.env.options = copy.deepcopy(options) self.options = copy.deepcopy(options) if options['--output_folder']: self.OUTPUT_FOLDER = options['--output_folder'] self.directory = Path(directory or self.default_directory) if self.options["--name-from-envconfig"]: exp_json = options["--environment"].split('/')[-1] default_run_directory = self.default_run_directory + "_" + exp_json.split( '.')[0] if training: default_run_directory = os.path.join("train", default_run_directory) else: default_run_directory = os.path.join( "test", default_run_directory + "-test") else: default_run_directory = self.default_run_directory self.run_directory = self.directory / (run_directory or default_run_directory) self.monitor = MonitorV2( env, self.run_directory, video_callable=(None if self.display_env else False), options=self.options) self.test_stable_baseline = test_stable_baseline self.episode = 0 if not self.test_stable_baseline: self.writer = SummaryWriter(str(self.run_directory)) self.agent.set_writer(self.writer) self.agent.evaluation = self self.write_logging() self.write_metadata() self.filtered_agent_stats = 0 self.best_agent_stats = -np.infty, 0 self.recover = recover if self.recover: self.load_agent_model(self.recover) if display_agent: try: # Render the agent within the environment viewer, if supported self.env.render() self.env.unwrapped.viewer.directory = self.run_directory self.env.unwrapped.viewer.set_agent_display( lambda agent_surface, sim_surface: AgentGraphics.display( self.agent, agent_surface, sim_surface)) self.env.unwrapped.viewer.directory = self.run_directory except AttributeError: logger.info( "The environment viewer doesn't support agent rendering.") self.reward_viewer = None if display_rewards: self.reward_viewer = RewardViewer() self.observation = None # Modifications self.episode_start_time = 0 self.episode_length = None self.episode_info = None self.create_episode_log = options["--create_episode_log"] self.individual_episode_log_level = int( options["--individual_episode_log_level"]) self.create_timestep_log = options["--create_timestep_log"] self.timestep_log_freq = int(options["--timestep_log_freq"]) self.individual_reward_tensorboard = options[ "--individual_reward_tensorboard"] self.log_creator = None self.rewards = None self.rewards_averaged_over_agents = None if self.test_stable_baseline: self.model = model