예제 #1
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     if record_video:
         video_path = os.path.join(output_dir, 'video/env-%d' % rank)
         ensure_dir(video_path)
         env = Monitor(env, video_path, video_callable=lambda episode_id: episode_id % record_video_freq == 0, force=True)
     return wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False)
예제 #2
0
def run(env_id, model_path):

    env = make_atari(env_id)
    env = wrap_deepmind(env,
                        episode_life=True,
                        clip_rewards=True,
                        frame_stack=False)

    num_env = 1
    valid_actions = valid_atari_actions(env, env_id)
    num_actions = len(valid_actions)

    ob_space = env.observation_space

    obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8)
    next_obs = env.reset()
    obs = update_obs(obs, next_obs)

    steps = 0
    total_reward = 0

    with tf.Session() as sess:

        print('Loading Model %s' % model_path)
        policy = Policy(sess, num_actions, num_env, num_steps=1)

        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, ckpt.model_checkpoint_path)

        while True:
            env.render()
            actions, values = policy.step(obs)

            value = values[0]
            steps += 1
            next_obs, rewards, dones, info = env.step(
                valid_actions[actions[0]])
            total_reward += rewards
            print('%d: reward=%f value=%f' % (steps, total_reward, value))
            obs = update_obs(obs, next_obs)

            if dones:
                print('DONE')
                steps = 0
                total_reward = 0
                next_obs = env.reset()
                obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8)
                obs = update_obs(obs, next_obs)
                time.sleep(2)
예제 #3
0
def main():

    env_id = 'PongNoFrameskip-v4'
    seed = randint(0, 1000000)

    env = make_atari(env_id, frame_skip=1)
    env.seed(seed)
    env = wrap_deepmind(env)
    env.reset()

    t = 1
    while True:
        env.render()
        next_obs, reward, done, info = env.step(env.action_space.sample())
        print('%d: reward=%f done=%s info=%s' % (t, reward, done, info))

        if done:
            print('DONE')
            next_obs = env.reset()
            t = 1
            time.sleep(2)
예제 #4
0
def run(env_id, model_path, record_video, video_path=None):

    env = make_atari(env_id)
    env = wrap_deepmind(env,
                        episode_life=True,
                        clip_rewards=True,
                        frame_stack=False)

    num_env = 1
    ob_space = env.observation_space
    ac_space = env.action_space

    obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8)
    next_obs = env.reset()
    obs = update_obs(obs, next_obs)

    ep = 1
    steps = 0
    total_reward = 0

    with tf.Session() as sess:

        print('Loading Model %s' % model_path)
        policy = CnnPolicy(sess, ob_space, ac_space, nbatch=1, nsteps=1)
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, ckpt.model_checkpoint_path)

        ts = ts_rand()
        if record_video:
            ensure_dir(video_path)
            video_recorder = VideoRecorder(env,
                                           path=ep_video_path(
                                               video_path, ts, env_id, ep))

        while True:
            env.render()

            if record_video:
                video_recorder.capture_frame()

            actions, values, _ = policy.step(obs)
            value = values[0]
            steps += 1
            next_obs, rewards, dones, info = env.step(actions)
            total_reward += rewards
            print('%d: reward=%f value=%f' % (steps, total_reward, value))
            obs = update_obs(obs, next_obs)

            if dones:
                print('DONE')
                ep += 1
                steps = 0
                total_reward = 0
                next_obs = env.reset()
                obs = np.zeros((num_env, 84, 84, 4), dtype=np.uint8)
                obs = update_obs(obs, next_obs)

                if record_video:
                    video_recorder.close()
                    video_recorder = VideoRecorder(env,
                                                   path=ep_video_path(
                                                       video_path, ts, env_id,
                                                       ep),
                                                   enabled=record_video)
                '`'
                time.sleep(2)