Exemple #1
0
def wang2015_eval(game_name, act, stochastic):
    print("==================== wang2015 evaluation ====================")
    episode_rewards = []

    for num_noops in range(1, 31):
        env_monitored, eval_env = make_env(game_name)
        eval_env.unwrapped.seed(1)

        get_wrapper_by_name(eval_env, "NoopResetEnv").override_num_noops = num_noops

        eval_episode_steps = 0
        done = True
        while True:
            if done:
                obs = eval_env.reset()
            eval_episode_steps += 1
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

            obs, reward, done, info = eval_env.step(action)
            if done:
                obs = eval_env.reset()
            if len(info["rewards"]) > 0:
                episode_rewards.append(info["rewards"][0])
                break
            if info["steps"] > 108000:  # 5 minutes of gameplay
                episode_rewards.append(env_monitored._current_reward)
                break
        print("Num steps in episode {} was {} yielding {} reward".format(
              num_noops, eval_episode_steps, episode_rewards[-1]), flush=True)
    print("Evaluation results: " + str(np.mean(episode_rewards)))
    print("=============================================================")
    return np.mean(episode_rewards)
Exemple #2
0
def wang2015_eval(game_name, act, stochastic):
    print("==================== wang2015 evaluation ====================")
    episode_rewards = []

    for num_noops in range(1, 31):
        env_monitored, eval_env = make_env(game_name)
        eval_env.unwrapped.seed(1)

        get_wrapper_by_name(eval_env,
                            "NoopResetEnv").override_num_noops = num_noops

        eval_episode_steps = 0
        done = True
        while True:
            if done:
                obs = eval_env.reset()
            eval_episode_steps += 1
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

            obs, _reward, done, info = eval_env.step(action)
            if done:
                obs = eval_env.reset()
            if len(info["rewards"]) > 0:
                episode_rewards.append(info["rewards"][0])
                break
            if info["steps"] > 108000:  # 5 minutes of gameplay
                episode_rewards.append(sum(env_monitored.rewards))
                break
        print("Num steps in episode {} was {} yielding {} reward".format(
            num_noops, eval_episode_steps, episode_rewards[-1]),
              flush=True)
    print("Evaluation results: " + str(np.mean(episode_rewards)))
    print("=============================================================")
    return np.mean(episode_rewards)