def run_agent(player: MarioPlayer, env: Wrapper, record: bool, vids_path: str,
              index):
    if record:
        rec_output_path = os.path.join(vids_path,
                                       "{name}.mp4".format(name=index))
        rec = monitor.video_recorder.VideoRecorder(env, path=rec_output_path)

    state = env.reset()
    done = False

    for step in range(steps_limit):
        if done:
            break
        action = player.act(state)
        state, reward, done, info = env.step(action)
        env.render()
        if record:
            rec.capture_frame()
        player.update_info(info)
        player.update_reward(reward)
        if info['flag_get']:  # if got to the flag - run is ended.
            done = True

    if record:
        rec.close()
    player.calculate_fitness()
    outcome = player.get_run_info()
    outcome['index'] = index
    return outcome
Esempio n. 2
0
def test(env: gym.Wrapper, model: tf.keras.Model, log_dir: Path) -> None:
    """Test the DQN on Pong.

    Args:
        env: The Atari Pong environment
        model: The model to be trained
        log_dir: Path where to save the video
    """
    env = Monitor(
        env,
        log_dir,
        force=True,  # overwrite existing videos
        video_callable=lambda count: True,  # force save this episode
    )

    state = Deque[tf.Tensor](maxlen=STATE_FRAMES)
    state.append(preprocess(env.reset()))  # initial state

    print("Starting testing...")
    while True:
        if len(state) < STATE_FRAMES:
            initial = None
            action = env.action_space.sample()
        else:
            initial = tf.stack(state, axis=-1)
            action = choose(model, initial, 0)  # choose greedily

        state_new, _, done, _ = env.step(action)
        state_new = preprocess(state_new)
        state.append(state_new)

        if done:
            break
    print("Testing done")
Esempio n. 3
0
    def fix_visual_game(env: gym.Wrapper) -> gym.Wrapper:
        w, h, channels = env.observation_space.shape
        setattr(env, 'env_name', env.unwrapped.spec.id)
        setattr(env, 'state_num', (channels, w, h))
        setattr(env, 'state_dim', 3)
        setattr(env, 'action_dim', env.action_space.n)
        setattr(env, 'if_discrete',
                isinstance(env.action_space, gym.spaces.Discrete))
        target_reward = getattr(env, 'target_reward', None)
        target_reward_default = getattr(env.spec, 'reward_threshold', None)
        if target_reward is None:
            target_reward = target_reward_default
        if target_reward is None:
            target_reward = 2**16
        setattr(env, 'target_reward', target_reward)

        def convert_image_shape(img: np.ndarray) -> np.ndarray:
            (w, h, channels) = img.shape
            return img.reshape((channels, w, h))

        def fix_step(env_step):
            def step(action):
                observation, reward, terminal, info = env_step(action)
                print(type(observation))
                return convert_image_shape(observation), reward, terminal, info

            return step

        env.step = fix_step(env.step)
        return env
Esempio n. 4
0
def _test(id: int, env: gym.Wrapper, model: TD3Network, render: bool = False, recording_path=None,
          save_video=False):
    episode_rewards = []
    action_repeats = []

    state = env.reset()
    done = False
    episode_images = []

    while not done:
        # get action
        state = torch.FloatTensor(state).unsqueeze(0)
        action = model.actor(state)
        repeat_q = model.critic_1(state, action)
        repeat_idx = repeat_q.argmax(1).item()

        action = action.data.cpu().numpy()[0]
        repeat = model.action_repeats[repeat_idx]
        action_repeats.append(repeat)

        for _ in range(repeat):
            if render:
                if save_video:
                    img = env.render(mode='rgb_array')
                    episode_images.append(img)
                else:
                    env.render(mode='human')

            # step
            state, reward, done, info = env.step(action)
            episode_rewards.append(reward)
            if done:
                break

    if render and save_video:
        write_gif(episode_images, action_repeats, episode_rewards,
                  os.path.join(recording_path, 'ep_{}.gif'.format(id)))

    return sum(episode_rewards), action_repeats
Esempio n. 5
0
 def step(self, action):
     """Environment step."""
     observation, reward, done, info = Wrapper.step(self, action)
     self._callback._on_step(action, observation, reward, done, info)
     return observation, reward, done, info