def save(self,
          name,
          t,
          file_path,
          binary=False,
          compressed=False,
          delimiter=','):
     """
     Saves all the samples collected for the given variable in a text or binary file.
     :param str name: the name of the variable for which we want the statistic.
     :param int t: the trial number until which to save the samples.
     :param str file_path: the path to the file.
     :param bool binary: whether to save a binary file. If False, a CSV text file will be used.
     :param bool compressed: whether to save a compressed binary file. Only works if binary is True.
     :param str delimiter: the delimiter for the fields in the CSV file, if Binary is False.
     :return:
     """
     if name in self._vars:
         data = self._vars[name][:t]
         if binary:
             if compressed:
                 np.savez_compressed('{}.npz'.format(file_path), a=data)
             else:
                 np.save('{}.npy'.format(file_path), data)
         else:
             col_names = ['Trial {}'.format(_t) for _t in range(t)]
             write_table_csv(data.T, '{}.csv'.format(file_path), delimiter,
                             '%s', col_names)
Example #2
0
 def save(self, output_dir):
     super().save(output_dir)
     write_table_csv(self.q,
                     join(output_dir, Q_TABLE_FILE_NAME) + TEXT_EXT,
                     col_names=self.action_names)
     np.savez_compressed(join(output_dir, Q_TABLE_FILE_NAME) + BIN_EXT,
                         a=self.q)
     write_table_csv(self.dq,
                     join(output_dir, DQ_TABLE_FILE_NAME) + TEXT_EXT,
                     col_names=self.action_names)
     np.savez_compressed(join(output_dir, DQ_TABLE_FILE_NAME) + BIN_EXT,
                         a=self.dq)
Example #3
0
    def save(self, output_dir):
        if not self._collect_stats:
            return

        # writes each table to a csv and binary files
        write_table_csv(self.c_s,
                        join(output_dir, C_S_TABLE_FILE_NAME) + TEXT_EXT,
                        col_names=['state'])
        np.savez_compressed(join(output_dir, C_S_TABLE_FILE_NAME) + BIN_EXT,
                            a=self.c_s)
        write_table_csv(self.c_sa,
                        join(output_dir, C_SA_TABLE_FILE_NAME) + TEXT_EXT,
                        col_names=self.action_names)
        np.savez_compressed(join(output_dir, C_SA_TABLE_FILE_NAME) + BIN_EXT,
                            a=self.c_sa)
        write_3d_table_csv(
            self.c_sas,
            join(output_dir, C_SAS_TABLE_FILE_NAME) + TEXT_EXT,
            col_names=['state', 'action', 'next-state', 'count'])
        np.savez_compressed(join(output_dir, C_SAS_TABLE_FILE_NAME) + BIN_EXT,
                            a=self.c_sas)
        write_table_csv(self.r_sa,
                        join(output_dir, R_SA_TABLE_FILE_NAME) + TEXT_EXT,
                        col_names=self.action_names)
        np.savez_compressed(join(output_dir, R_SA_TABLE_FILE_NAME) + BIN_EXT,
                            a=self.r_sa)
        write_table_csv(self.t_s,
                        join(output_dir, T_S_TABLE_FILE_NAME) + TEXT_EXT,
                        col_names=['state'])
        np.savez_compressed(join(output_dir, T_S_TABLE_FILE_NAME) + BIN_EXT,
                            a=self.t_s)
        write_table_csv(self.t_sa,
                        join(output_dir, T_SA_TABLE_FILE_NAME) + TEXT_EXT,
                        col_names=self.action_names)
        np.savez_compressed(join(output_dir, T_SA_TABLE_FILE_NAME) + BIN_EXT,
                            a=self.t_sa)
def run_trial(args):
    # tries to get agent type
    agent_t = args.agent
    if agent_t == AgentType.Testing:
        # tries to load a pre-trained agent configuration file
        config, results_dir = load_agent_config(args.results, args.trial)
    else:
        # tries to load env config from provided file path
        config_file = args.config_file_path
        config = args.default_frogger_config if config_file is None or not exists(config_file) \
            else EnvironmentConfiguration.load_json(config_file)
    # creates env helper
    helper = create_helper(config)
    # checks for provided output dir
    output_dir = args.output if args.output is not None else \
        get_agent_output_dir(config, agent_t, args.trial)
    if not exists(output_dir):
        makedirs(output_dir)
    # saves / copies configs to file
    config.save_json(join(output_dir, 'config.json'))
    helper.save_state_features(join(output_dir, 'state_features.csv'))
    # register environment in Gym according to env config
    env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial)
    helper.register_gym_environment(env_id, False, args.fps,
                                    args.show_score_bar)
    # create environment and monitor
    env = gym.make(env_id)
    config.num_episodes = args.num_episodes
    video_callable = video_schedule(config, args.record)
    env = Monitor(env,
                  directory=output_dir,
                  force=True,
                  video_callable=video_callable)
    # adds reference to monitor to allow for gym environments to update video frames
    if video_callable(0):
        env.env.monitor = env
    # initialize seeds (one for the environment, another for the agent)
    env.seed(config.seed + args.trial)
    agent_rng = np.random.RandomState(config.seed + args.trial)
    # creates the agent
    agent, exploration_strategy = create_agent(helper, agent_t, agent_rng)
    # if testing, loads tables from file (some will be filled by the agent during the interaction)
    if agent_t == AgentType.Testing:
        agent.load(results_dir)
    # runs episodes
    behavior_tracker = BehaviorTracker(config.num_episodes)
    recorded_episodes = []
    for e in range(config.num_episodes):
        # checks whether to activate video monitoring
        env.env.monitor = env if video_callable(e) else None
        # reset environment
        old_obs = env.reset()
        old_s = helper.get_state_from_observation(old_obs, 0, False)
        if args.verbose:
            print(f'Episode: {e}')
            # helper.update_stats_episode(e)
        exploration_strategy.update(e)  # update for learning agent
        t = 0
        done = False
        while not done:
            # select action
            a = agent.act(old_s)
            # observe transition
            obs, r, done, _ = env.step(a)
            s = helper.get_state_from_observation(obs, r, done)
            r = helper.get_reward(old_s, a, r, s, done)
            # update agent and stats
            agent.update(old_s, a, r, s)
            behavior_tracker.add_sample(old_s, a)
            helper.update_stats(e, t, old_obs, obs, old_s, a, r, s)
            old_s = s
            old_obs = obs
            t += 1
        # adds to recorded episodes list
        if video_callable(e):
            recorded_episodes.append(e)
        # signals new episode to tracker
        behavior_tracker.new_episode()
    # writes results to files
    agent.save(output_dir)
    behavior_tracker.save(output_dir)
    write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv'))
    helper.save_stats(join(output_dir, 'results'), args.clear_results)
    print('\nResults of trial {} written to:\n\t\'{}\''.format(
        args.trial, output_dir))
    env.close()
Example #5
0
def run_trial(args):
    # tries to get agent type
    agent_t = args.agent
    results_dir = ''
    if agent_t == AgentType.Testing:

        # tries to load config from provided results dir path
        results_dir = args.results if args.results is not None else \
            get_agent_output_dir(DEFAULT_CONFIG, AgentType.Learning)
        config_file = join(results_dir, 'config.json')
        if not exists(results_dir) or not exists(config_file):
            raise ValueError('Could not load configuration from: {}.'.format(config_file))
        config = EnvironmentConfiguration.load_json(config_file)

        # if testing, we want to force a seed different than training (diff. test environments)
        config.seed += 1

    else:
        # tries to load env config from provided file path
        config_file = args.config
        config = DEFAULT_CONFIG if config_file is None or not exists(config_file) \
            else EnvironmentConfiguration.load_json(config_file)

    # creates env helper
    helper = create_helper(config)

    # checks for provided output dir
    output_dir = args.output if args.output is not None else get_agent_output_dir(config, agent_t, args.trial)
    if not exists(output_dir):
        makedirs(output_dir)

    # saves / copies configs to file
    config.save_json(join(output_dir, 'config.json'))
    helper.save_state_features(join(output_dir, 'state_features.csv'))

    # register environment in Gym according to env config
    env_id = '{}-{}-v0'.format(config.gym_env_id, args.trial)
    helper.register_gym_environment(env_id, False, FPS, SHOW_SCORE_BAR)

    # create environment and monitor
    env = gym.make(env_id)
    # todo
    config.num_episodes = 100
    video_callable = video_schedule(config, args.record)
    env = Monitor(env, directory=output_dir, force=True, video_callable=video_callable)

    # adds reference to monitor to allow for gym environments to update video frames
    if video_callable(0):
        env.env.monitor = env

    # initialize seeds (one for the environment, another for the agent)
    env.seed(config.seed + args.trial)
    agent_rng = np.random.RandomState(config.seed + args.trial)

    # creates the agent
    agent, exploration_strategy = create_agent(helper, agent_t, agent_rng)

    # if testing, loads tables from file (some will be filled by the agent during the interaction)
    if agent_t == AgentType.Testing:
        agent.load(results_dir, )

    # runs episodes
    behavior_tracker = BehaviorTracker(config.num_episodes)
    recorded_episodes = []
    for e in range(config.num_episodes):

        # checks whether to activate video monitoring
        env.env.monitor = env if video_callable(e) else None

        # reset environment
        old_obs = env.reset()
        old_s = helper.get_state_from_observation(old_obs, 0, False)

        if args.verbose:
            helper.update_stats_episode(e)
        exploration_strategy.update(e)

        t = 0
        done = False
        while not done:
            # select action
            a = agent.act(old_s)

            # observe transition
            obs, r, done, _ = env.step(a)
            s = helper.get_state_from_observation(obs, r, done)
            r = helper.get_reward(old_s, a, r, s, done)

            # update agent and stats
            agent.update(old_s, a, r, s)
            behavior_tracker.add_sample(old_s, a)
            helper.update_stats(e, t, old_obs, obs, old_s, a, r, s)

            old_s = s
            old_obs = obs
            t += 1

        # adds to recorded episodes list
        if video_callable(e):
            recorded_episodes.append(e)

        # signals new episode to tracker
        behavior_tracker.new_episode()

    # writes results to files
    agent.save(output_dir)
    behavior_tracker.save(output_dir)
    write_table_csv(recorded_episodes, join(output_dir, 'rec_episodes.csv'))
    helper.save_stats(join(output_dir, 'results'), CLEAR_RESULTS)
    print('\nResults of trial {} written to:\n\t\'{}\''.format(args.trial, output_dir))

    env.close()