def _thunk(): env = make_atari(env_id) env.seed(seed + rank) if record_video: video_path = os.path.join(output_dir, 'video/env-%d' % rank) ensure_dir(video_path) env = Monitor(env, video_path, video_callable=lambda episode_id: episode_id % record_video_freq == 0, force=True) return wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False)
def run(env_id, model_path): env = make_atari(env_id) env = wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False) num_env = 1 valid_actions = valid_atari_actions(env, env_id) num_actions = len(valid_actions) ob_space = env.observation_space obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8) next_obs = env.reset() obs = update_obs(obs, next_obs) steps = 0 total_reward = 0 with tf.Session() as sess: print('Loading Model %s' % model_path) policy = Policy(sess, num_actions, num_env, num_steps=1) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) saver.restore(sess, ckpt.model_checkpoint_path) while True: env.render() actions, values = policy.step(obs) value = values[0] steps += 1 next_obs, rewards, dones, info = env.step( valid_actions[actions[0]]) total_reward += rewards print('%d: reward=%f value=%f' % (steps, total_reward, value)) obs = update_obs(obs, next_obs) if dones: print('DONE') steps = 0 total_reward = 0 next_obs = env.reset() obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8) obs = update_obs(obs, next_obs) time.sleep(2)
def main(): env_id = 'PongNoFrameskip-v4' seed = randint(0, 1000000) env = make_atari(env_id, frame_skip=1) env.seed(seed) env = wrap_deepmind(env) env.reset() t = 1 while True: env.render() next_obs, reward, done, info = env.step(env.action_space.sample()) print('%d: reward=%f done=%s info=%s' % (t, reward, done, info)) if done: print('DONE') next_obs = env.reset() t = 1 time.sleep(2)
def run(env_id, model_path, record_video, video_path=None): env = make_atari(env_id) env = wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False) num_env = 1 ob_space = env.observation_space ac_space = env.action_space obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8) next_obs = env.reset() obs = update_obs(obs, next_obs) ep = 1 steps = 0 total_reward = 0 with tf.Session() as sess: print('Loading Model %s' % model_path) policy = CnnPolicy(sess, ob_space, ac_space, nbatch=1, nsteps=1) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(model_path) saver.restore(sess, ckpt.model_checkpoint_path) ts = ts_rand() if record_video: ensure_dir(video_path) video_recorder = VideoRecorder(env, path=ep_video_path( video_path, ts, env_id, ep)) while True: env.render() if record_video: video_recorder.capture_frame() actions, values, _ = policy.step(obs) value = values[0] steps += 1 next_obs, rewards, dones, info = env.step(actions) total_reward += rewards print('%d: reward=%f value=%f' % (steps, total_reward, value)) obs = update_obs(obs, next_obs) if dones: print('DONE') ep += 1 steps = 0 total_reward = 0 next_obs = env.reset() obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8) obs = update_obs(obs, next_obs) if record_video: video_recorder.close() video_recorder = VideoRecorder(env, path=ep_video_path( video_path, ts, env_id, ep), enabled=record_video) '`' time.sleep(2)