def save(self, name, t, file_path, binary=False, compressed=False, delimiter=','): """ Saves all the samples collected for the given variable in a text or binary file. :param str name: the name of the variable for which we want the statistic. :param int t: the trial number until which to save the samples. :param str file_path: the path to the file. :param bool binary: whether to save a binary file. If False, a CSV text file will be used. :param bool compressed: whether to save a compressed binary file. Only works if binary is True. :param str delimiter: the delimiter for the fields in the CSV file, if Binary is False. :return: """ if name in self._vars: data = self._vars[name][:t] if binary: if compressed: np.savez_compressed('{}.npz'.format(file_path), a=data) else: np.save('{}.npy'.format(file_path), data) else: col_names = ['Trial {}'.format(_t) for _t in range(t)] write_table_csv(data.T, '{}.csv'.format(file_path), delimiter, '%s', col_names)
def save(self, output_dir): super().save(output_dir) write_table_csv(self.q, join(output_dir, Q_TABLE_FILE_NAME) + TEXT_EXT, col_names=self.action_names) np.savez_compressed(join(output_dir, Q_TABLE_FILE_NAME) + BIN_EXT, a=self.q) write_table_csv(self.dq, join(output_dir, DQ_TABLE_FILE_NAME) + TEXT_EXT, col_names=self.action_names) np.savez_compressed(join(output_dir, DQ_TABLE_FILE_NAME) + BIN_EXT, a=self.dq)
def save(self, output_dir): if not self._collect_stats: return # writes each table to a csv and binary files write_table_csv(self.c_s, join(output_dir, C_S_TABLE_FILE_NAME) + TEXT_EXT, col_names=['state']) np.savez_compressed(join(output_dir, C_S_TABLE_FILE_NAME) + BIN_EXT, a=self.c_s) write_table_csv(self.c_sa, join(output_dir, C_SA_TABLE_FILE_NAME) + TEXT_EXT, col_names=self.action_names) np.savez_compressed(join(output_dir, C_SA_TABLE_FILE_NAME) + BIN_EXT, a=self.c_sa) write_3d_table_csv( self.c_sas, join(output_dir, C_SAS_TABLE_FILE_NAME) + TEXT_EXT, col_names=['state', 'action', 'next-state', 'count']) np.savez_compressed(join(output_dir, C_SAS_TABLE_FILE_NAME) + BIN_EXT, a=self.c_sas) write_table_csv(self.r_sa, join(output_dir, R_SA_TABLE_FILE_NAME) + TEXT_EXT, col_names=self.action_names) np.savez_compressed(join(output_dir, R_SA_TABLE_FILE_NAME) + BIN_EXT, a=self.r_sa) write_table_csv(self.t_s, join(output_dir, T_S_TABLE_FILE_NAME) + TEXT_EXT, col_names=['state']) np.savez_compressed(join(output_dir, T_S_TABLE_FILE_NAME) + BIN_EXT, a=self.t_s) write_table_csv(self.t_sa, join(output_dir, T_SA_TABLE_FILE_NAME) + TEXT_EXT, col_names=self.action_names) np.savez_compressed(join(output_dir, T_SA_TABLE_FILE_NAME) + BIN_EXT, a=self.t_sa)
def run_trial(args): # tries to get agent type agent_t = args.agent if agent_t == AgentType.Testing: # tries to load a pre-trained agent configuration file config, results_dir = load_agent_config(args.results, args.trial) else: # tries to load env config from provided file path config_file = args.config_file_path config = args.default_frogger_config if config_file is None or not exists(config_file) \ else EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # checks for provided output dir output_dir = args.output if args.output is not None else \ get_agent_output_dir(config, agent_t, args.trial) if not exists(output_dir): makedirs(output_dir) # saves / copies configs to file config.save_json(join(output_dir, 'config.json')) helper.save_state_features(join(output_dir, 'state_features.csv')) # register environment in Gym according to env config env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial) helper.register_gym_environment(env_id, False, args.fps, args.show_score_bar) # create environment and monitor env = gym.make(env_id) config.num_episodes = args.num_episodes video_callable = video_schedule(config, args.record) env = Monitor(env, directory=output_dir, force=True, video_callable=video_callable) # adds reference to monitor to allow for gym environments to update video frames if video_callable(0): env.env.monitor = env # initialize seeds (one for the environment, another for the agent) env.seed(config.seed + args.trial) agent_rng = np.random.RandomState(config.seed + args.trial) # creates the agent agent, exploration_strategy = create_agent(helper, agent_t, agent_rng) # if testing, loads tables from file (some will be filled by the agent during the interaction) if agent_t == AgentType.Testing: agent.load(results_dir) # runs episodes behavior_tracker = BehaviorTracker(config.num_episodes) recorded_episodes = [] for e in range(config.num_episodes): # checks whether to activate video monitoring env.env.monitor = env if video_callable(e) else None # reset environment old_obs = env.reset() old_s = helper.get_state_from_observation(old_obs, 0, False) if args.verbose: print(f'Episode: {e}') # helper.update_stats_episode(e) exploration_strategy.update(e) # update for learning agent t = 0 done = False while not done: # select action a = agent.act(old_s) # observe transition obs, r, done, _ = env.step(a) s = helper.get_state_from_observation(obs, r, done) r = helper.get_reward(old_s, a, r, s, done) # update agent and stats agent.update(old_s, a, r, s) behavior_tracker.add_sample(old_s, a) helper.update_stats(e, t, old_obs, obs, old_s, a, r, s) old_s = s old_obs = obs t += 1 # adds to recorded episodes list if video_callable(e): recorded_episodes.append(e) # signals new episode to tracker behavior_tracker.new_episode() # writes results to files agent.save(output_dir) behavior_tracker.save(output_dir) write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv')) helper.save_stats(join(output_dir, 'results'), args.clear_results) print('\nResults of trial {} written to:\n\t\'{}\''.format( args.trial, output_dir)) env.close()
def run_trial(args): # tries to get agent type agent_t = args.agent results_dir = '' if agent_t == AgentType.Testing: # tries to load config from provided results dir path results_dir = args.results if args.results is not None else \ get_agent_output_dir(DEFAULT_CONFIG, AgentType.Learning) config_file = join(results_dir, 'config.json') if not exists(results_dir) or not exists(config_file): raise ValueError('Could not load configuration from: {}.'.format(config_file)) config = EnvironmentConfiguration.load_json(config_file) # if testing, we want to force a seed different than training (diff. test environments) config.seed += 1 else: # tries to load env config from provided file path config_file = args.config config = DEFAULT_CONFIG if config_file is None or not exists(config_file) \ else EnvironmentConfiguration.load_json(config_file) # creates env helper helper = create_helper(config) # checks for provided output dir output_dir = args.output if args.output is not None else get_agent_output_dir(config, agent_t, args.trial) if not exists(output_dir): makedirs(output_dir) # saves / copies configs to file config.save_json(join(output_dir, 'config.json')) helper.save_state_features(join(output_dir, 'state_features.csv')) # register environment in Gym according to env config env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial) helper.register_gym_environment(env_id, False, FPS, SHOW_SCORE_BAR) # create environment and monitor env = gym.make(env_id) # todo config.num_episodes = 100 video_callable = video_schedule(config, args.record) env = Monitor(env, directory=output_dir, force=True, video_callable=video_callable) # adds reference to monitor to allow for gym environments to update video frames if video_callable(0): env.env.monitor = env # initialize seeds (one for the environment, another for the agent) env.seed(config.seed + args.trial) agent_rng = np.random.RandomState(config.seed + args.trial) # creates the agent agent, exploration_strategy = create_agent(helper, agent_t, agent_rng) # if testing, loads tables from file (some will be filled by the agent during the interaction) if agent_t == AgentType.Testing: agent.load(results_dir, ) # runs episodes behavior_tracker = BehaviorTracker(config.num_episodes) recorded_episodes = [] for e in range(config.num_episodes): # checks whether to activate video monitoring env.env.monitor = env if video_callable(e) else None # reset environment old_obs = env.reset() old_s = helper.get_state_from_observation(old_obs, 0, False) if args.verbose: helper.update_stats_episode(e) exploration_strategy.update(e) t = 0 done = False while not done: # select action a = agent.act(old_s) # observe transition obs, r, done, _ = env.step(a) s = helper.get_state_from_observation(obs, r, done) r = helper.get_reward(old_s, a, r, s, done) # update agent and stats agent.update(old_s, a, r, s) behavior_tracker.add_sample(old_s, a) helper.update_stats(e, t, old_obs, obs, old_s, a, r, s) old_s = s old_obs = obs t += 1 # adds to recorded episodes list if video_callable(e): recorded_episodes.append(e) # signals new episode to tracker behavior_tracker.new_episode() # writes results to files agent.save(output_dir) behavior_tracker.save(output_dir) write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv')) helper.save_stats(join(output_dir, 'results'), CLEAR_RESULTS) print('\nResults of trial {} written to:\n\t\'{}\''.format(args.trial, output_dir)) env.close()