Ejemplo n.º 1
0
    def test_kick_and_quit(self, env):
        env.reset()
        kick = env.actions.index(nethack.Command.KICK)
        obs, reward, done, _ = env.step(kick)
        assert b"In what direction? " in bytes(obs["message"])
        env.step(nethack.MiscAction.MORE)

        # Hack to quit.
        env.nethack.step(nethack.M("q"))
        obs, reward, done, _ = env.step(env.actions.index(ord("y")))

        assert done
        assert reward == 0.0
Ejemplo n.º 2
0
    def test_ttyrec_every(self):
        path = pathlib.Path(".")
        env = gym.make("NetHackChallenge-v0", save_ttyrec_every=2, savedir=str(path))
        pid = os.getpid()
        for episode in range(10):
            env.reset()
            for c in [ord(" "), ord(" "), ord("<"), ord("y")]:
                _, _, done, *_ = env.step(env.actions.index(c))
            assert done

            if episode % 2 != 0:
                continue
            contents = set(str(p) for p in path.iterdir())
            # `contents` includes xlogfile and ttyrecs.
            assert len(contents) - 1 == episode // 2 + 1
            assert (
                "nle.%i.%i.ttyrec%i.bz2" % (pid, episode, nethack.TTYREC_VERSION)
                in contents
            )
            assert "nle.%i.xlogfile" % pid in contents

        with open("nle.%i.xlogfile" % pid, "r") as f:
            entries = f.readlines()

        assert len(entries) == 10
Ejemplo n.º 3
0
 def test_render_ansi(self, env_name, rollout_len):
     env = gym.make(env_name)
     env.reset()
     for _ in range(rollout_len):
         action = env.action_space.sample()
         _, _, done, _ = env.step(action)
         if done:
             env.reset()
         output = env.render(mode="ansi")
         assert isinstance(output, str)
         assert len(output.replace("\n", "")) == np.prod(nle.env.DUNGEON_SHAPE)
Ejemplo n.º 4
0
def main(args):
    env = make_venv(args)
    env.reset()

    start_time = time.time()
    for i in range(args.num_steps):
        env.step([np.random.randint(8)] * args.num_env)
        if (i - 1) % 200 == 0:
            env.reset()
    total_time_multi = time.time() - start_time

    print(
        "Took {:.2f}s with subproc={} on {} steps on {} envs - {:.2f} FPS".format(
            total_time_multi,
            args.subproc,
            args.num_steps,
            args.num_env,
            args.num_steps / total_time_multi,
        )
    )
Ejemplo n.º 5
0
    def test_final_reward(self, env):
        obs = env.reset()

        for _ in range(100):
            obs, reward, done, info = env.step(env.action_space.sample())
            if done:
                break

        if done:
            assert reward == 0.0
            return

        # Hopefully, we got some positive reward by now.

        # Get out of any menu / yn_function.
        env.step(env.actions.index(ord("\r")))

        # Hack to quit.
        env.nethack.step(nethack.M("q"))
        _, reward, done, _ = env.step(env.actions.index(ord("y")))

        assert done
        assert reward == 0.0
Ejemplo n.º 6
0
def rollout_env(env, max_rollout_len):
    """Produces a rollout and asserts step outputs.

    Does *not* assume that the environment has already been reset.
    """
    obs = env.reset()
    assert env.observation_space.contains(obs)

    step = 0
    while True:
        a = env.action_space.sample()
        obs, reward, done, info = env.step(a)
        assert env.observation_space.contains(obs)
        assert isinstance(reward, float)
        assert isinstance(done, bool)
        assert isinstance(info, dict)
        if done or step >= max_rollout_len:
            break
    env.close()
Ejemplo n.º 7
0
def rollout_env(env, max_rollout_len):
    """Produces a rollout and asserts step outputs.

    Returns final reward. Does not assume that the environment has already been
    reset.
    """
    obs = env.reset()
    assert env.observation_space.contains(obs)

    for _ in range(max_rollout_len):
        a = env.action_space.sample()
        obs, reward, done, info = env.step(a)
        assert env.observation_space.contains(obs)
        assert isinstance(reward, float)
        assert isinstance(done, bool)
        assert isinstance(info, dict)
        if done:
            break
    env.close()
    return reward
Ejemplo n.º 8
0
 def test_no_reset(self, env):
     with pytest.raises(RuntimeError, match="step called without reset()"):
         env.step(0)