Esempio n. 1
0
def run_episodes(name, new_episode_func, update_func):
    global behavior_tracker, helper

    for e in range(behavior_tracker.num_episodes):

        # gets state and action sequence
        s_seq = behavior_tracker.s_s[e]
        a_seq = behavior_tracker.s_a[e]
        seq_len = len(s_seq)

        # signals new episode
        new_episode_func(e, seq_len)

        old_obs = env.reset()
        helper.get_state_from_observation(old_obs, 0, False)

        clean_console()
        print('Processing {} episode {}...'.format(name, e))

        for t in range(seq_len):

            # gets state-action pair
            old_s = s_seq[t]
            a = a_seq[t]

            # steps the environment, gets next state
            obs, r, done, _ = env.step(a)
            s = helper.get_state_from_observation(obs, r, done)
            r = helper.get_reward(old_s, a, r, s, done)

            # checks possible synchronization errors
            if done and t != seq_len - 1:
                raise ValueError(
                    'Environment ended at {}, before tracked behavior which ended at: {}'
                    .format(t, seq_len - 1))
            if t == seq_len - 1 and not done:
                raise ValueError(
                    'Environment did not end at {} like it was supposed to'.
                    format(t))
            if not done and s != s_seq[t + 1]:
                raise ValueError(
                    'Environment state {} does not match tracked state {}'.
                    format(s, s_seq[t + 1]))

            if name == 'analysis' and explanation_t == ReportType.Heatmaps:
                helper.update_stats(e, t, old_obs, obs, old_s, a, r, s)
            update_func(t, old_obs, old_s, a, r, s)

            old_obs = obs
Esempio n. 2
0
    agent.load(agent_dir)

    # runs all episodes
    advance_time = advance_episode = False
    for e in range(behavior_tracker.num_episodes):

        # gets state and action sequence
        s_seq = behavior_tracker.s_s[e]
        a_seq = behavior_tracker.s_a[e]

        advance_episode = e < START_EPISODE
        old_obs = env.reset()
        old_s = helper.get_state_from_observation(old_obs, 0, False)

        seq_len = len(s_seq)
        clean_console()
        print('Replaying episode {} ({} time-steps)...'.format(e, seq_len))

        for t in range(seq_len):

            # waits until one of the keys is pressed if not advancing episode
            while not advance_episode and not advance_time:
                process_keys()
                pygame.event.pump()

            advance_time = False

            # hides the display when advancing an episode
            if advance_episode:
                pygame.display.iconify()
            else:
Esempio n. 3
0
 def update_stats_episode(self, e, path=None):
     if (e + 1) % (self.config.num_episodes / 100) == 0:
         clean_console()
         print('Episode {} ({:.0f}%)...'.format(
             e + 1, ((e + 1) / self.config.num_episodes) * 100))
         self._print_stats(e, PRINT_SCREEN_VAR_NAMES)