def test_rollout(self, env_name, rollout_len): """Tests rollout_len steps (or until termination) of random policy.""" with tempfile.TemporaryDirectory() as savedir: env = gym.make(env_name, savedir=savedir) rollout_env(env, rollout_len) env.close() assert os.path.exists( os.path.join(savedir, "nle.%i.0.ttyrec.bz2" % os.getpid()))
def test_rollout(self, env_name, rollout_len): """Tests rollout_len steps (or until termination) of random policy.""" with tempfile.TemporaryDirectory() as savedir: env = gym.make(env_name, save_ttyrec_every=1, savedir=savedir) rollout_env(env, rollout_len) env.close() assert os.path.exists( os.path.join( savedir, "nle.%i.0.ttyrec%i.bz2" % (os.getpid(), nethack.TTYREC_VERSION), ) ) assert os.path.exists( os.path.join(savedir, "nle.%i.xlogfile" % os.getpid()) )
def rollout_env(env, max_rollout_len): """Produces a rollout and asserts step outputs. Does *not* assume that the environment has already been reset. """ obs = env.reset() assert env.observation_space.contains(obs) step = 0 while True: a = env.action_space.sample() obs, reward, done, info = env.step(a) assert env.observation_space.contains(obs) assert isinstance(reward, float) assert isinstance(done, bool) assert isinstance(info, dict) if done or step >= max_rollout_len: break env.close()
def rollout_env(env, max_rollout_len): """Produces a rollout and asserts step outputs. Returns final reward. Does not assume that the environment has already been reset. """ obs = env.reset() assert env.observation_space.contains(obs) for _ in range(max_rollout_len): a = env.action_space.sample() obs, reward, done, info = env.step(a) assert env.observation_space.contains(obs) assert isinstance(reward, float) assert isinstance(done, bool) assert isinstance(info, dict) if done: break env.close() return reward