def run_agent(player: MarioPlayer, env: Wrapper, record: bool, vids_path: str, index): if record: rec_output_path = os.path.join(vids_path, "{name}.mp4".format(name=index)) rec = monitor.video_recorder.VideoRecorder(env, path=rec_output_path) state = env.reset() done = False for step in range(steps_limit): if done: break action = player.act(state) state, reward, done, info = env.step(action) env.render() if record: rec.capture_frame() player.update_info(info) player.update_reward(reward) if info['flag_get']: # if got to the flag - run is ended. done = True if record: rec.close() player.calculate_fitness() outcome = player.get_run_info() outcome['index'] = index return outcome
def test(env: gym.Wrapper, model: tf.keras.Model, log_dir: Path) -> None: """Test the DQN on Pong. Args: env: The Atari Pong environment model: The model to be trained log_dir: Path where to save the video """ env = Monitor( env, log_dir, force=True, # overwrite existing videos video_callable=lambda count: True, # force save this episode ) state = Deque[tf.Tensor](maxlen=STATE_FRAMES) state.append(preprocess(env.reset())) # initial state print("Starting testing...") while True: if len(state) < STATE_FRAMES: initial = None action = env.action_space.sample() else: initial = tf.stack(state, axis=-1) action = choose(model, initial, 0) # choose greedily state_new, _, done, _ = env.step(action) state_new = preprocess(state_new) state.append(state_new) if done: break print("Testing done")
def _test(id: int, env: gym.Wrapper, model: TD3Network, render: bool = False, recording_path=None, save_video=False): episode_rewards = [] action_repeats = [] state = env.reset() done = False episode_images = [] while not done: # get action state = torch.FloatTensor(state).unsqueeze(0) action = model.actor(state) repeat_q = model.critic_1(state, action) repeat_idx = repeat_q.argmax(1).item() action = action.data.cpu().numpy()[0] repeat = model.action_repeats[repeat_idx] action_repeats.append(repeat) for _ in range(repeat): if render: if save_video: img = env.render(mode='rgb_array') episode_images.append(img) else: env.render(mode='human') # step state, reward, done, info = env.step(action) episode_rewards.append(reward) if done: break if render and save_video: write_gif(episode_images, action_repeats, episode_rewards, os.path.join(recording_path, 'ep_{}.gif'.format(id))) return sum(episode_rewards), action_repeats
def reset(self): """Reset the environment.""" obs = Wrapper.reset(self) self._callback._on_reset(obs) return obs