def test_run(self): archivefile = tempfile.mktemp(suffix="nethack_test", prefix=".zip") game = nethack.NetHack(archivefile=archivefile) response = game.reset() actions = [ nethack.MiscAction.MORE, nethack.MiscAction.MORE, nethack.MiscAction.MORE, nethack.MiscAction.MORE, nethack.MiscAction.MORE, nethack.MiscAction.MORE, ] for action in actions: while not response.ProgramState().InMoveloop(): response, done, info = game.step(nethack.MiscAction.MORE) response, done, info = game.step(action) if done: # Only the good die young. response = game.reset() obs = response.Observation() chars = _fb_ndarray_to_np(obs.Chars()) glyphs = _fb_ndarray_to_np(obs.Glyphs()) status = response.Blstats() x, y = status.CursX(), status.CursY() self.assertEqual(np.count_nonzero(chars == ord("@")), 1) self.assertEqual(chars[y, x], ord("@")) mon = nethack.permonst(nethack.glyph_to_mon(glyphs[y][x])) self.assertEqual(mon.mname, "monk") self.assertEqual(mon.mlevel, 10) class_sym = nethack.class_sym.from_mlet(mon.mlet) self.assertEqual(class_sym.sym, "@") self.assertEqual(class_sym.explain, "human or elf") self.assertEqual(os.waitpid(info["pid"], os.WNOHANG), (0, 0)) del game # Should kill process. with self.assertRaisesRegex(OSError, "No (child|such)? process"): os.waitpid(info["pid"], 0)
def main(): episodes = 0 steps = 0 game = nethack.NetHack(archivefile="random.zip") start = timeit.default_timer() while True: steps_delta = play(game, False) time_delta = timeit.default_timer() - start episodes += 1 steps += steps_delta print("Episde: %i. Steps: %i. SPS: %f" % (episodes, steps, steps_delta / time_delta)) start = timeit.default_timer()
def play(should_stop, queue): game = nethack.NetHack(archivefile=None) done = True steps = 0 while not should_stop.is_set(): if done or steps >= 1000: queue.put(steps) steps = 0 observation = game.reset() if observation.Internal() and observation.Internal().Xwaitforspace(): ch = nethack.MiscAction.MORE else: ch = random.choice(ACTIONS) observation, done, _ = game.step(ch) steps += 1
def __init__( self, savedir=None, archivefile="nethack.%(pid)i.%(time)s.zip", character="mon-hum-neu-mal", max_episode_steps=5000, observation_keys=("glyphs", "status", "message", "inventory"), actions=None, options=None, ): """Constructs a new NLE environment. Args: savedir (str or None): path to save archives into. Defaults to None. archivefile (str or None): Template for the zip archive filename of NetHack ttyrec files. Use "%(pid)i" for the process id of the NetHack process, "%(time)s" for the creation time. Use None to disable writing archivefiles. character (str): name of character. Defaults to "mon-hum-neu-mal". max_episode_steps (int): maximum amount of steps allowed before the game is forcefully quit. In such cases, ``info["end_status"]`` will be equal to ``StepStatus.ABORTED``. Defaults to 5000. observation_keys (list): keys to use when creating the observation. Defaults to all. actions (list): list of actions. If None, the full action space will be used, i.e. ``nle.nethack.ACTIONS``. Defaults to None. options (list): list of game options to initialize NetHack. If None, NetHack will be initialized with the options found in ``nle.nethack.NETHACKOPTIONS`. Defaults to None. """ self.character = character self._max_episode_steps = max_episode_steps if actions is None: actions = FULL_ACTIONS self._actions = actions self.response = None if archivefile is not None: try: if savedir is None: parent_dir = os.path.join(os.getcwd(), "nle_data") os.makedirs(parent_dir, exist_ok=True) # Create a unique subdirectory for us. self.savedir = tempfile.mkdtemp( prefix=time.strftime("%Y%m%d-%H%M%S_"), dir=parent_dir) else: self.savedir = savedir os.makedirs(self.savedir) except FileExistsError: logger.info("Using existing savedir: %s", self.savedir) else: logger.info("Created savedir: %s", self.savedir) self.archivefile = os.path.join(self.savedir, archivefile) else: self.savedir = None self.archivefile = None self._stats_file = None self._stats_logger = None self._setup_statsfile = archivefile is not None self.env = nethack.NetHack( archivefile=self.archivefile, options=options, playername="Agent%(pid)i-" + self.character, ) self._random = random.SystemRandom() # -1 so that it's 0-based on first reset self._episode = -1 space_dict = { "glyphs": gym.spaces.Box( low=np.iinfo(np.int16).min, high=np.iinfo(np.int16).max, shape=DUNGEON_SHAPE, dtype=np.int16, ), "status": gym.spaces.Box( low=np.iinfo(np.int32).min, high=np.iinfo(np.int32).max, shape=(23, ), dtype=np.int32, ), "message": gym.spaces.Box( low=np.iinfo(np.uint8).min, high=np.iinfo(np.uint8).max, shape=(DEFAULT_MSG_PAD, ), dtype=np.uint8, ), "inventory": gym.spaces.Tuple(( gym.spaces.Box( low=np.iinfo(np.int16).min, high=np.iinfo(np.int16).max, shape=(DEFAULT_INV_PAD, ), dtype=np.int16, ), gym.spaces.Box( low=np.iinfo(np.uint8).min, high=np.iinfo(np.uint8).max, shape=(DEFAULT_INV_PAD, DEFAULT_INVSTR_PAD), dtype=np.uint8, ), gym.spaces.Box( low=np.iinfo(np.uint8).min, high=np.iinfo(np.uint8).max, shape=(DEFAULT_INV_PAD, ), dtype=np.uint8, ), gym.spaces.Box( low=np.iinfo(np.uint8).min, high=np.iinfo(np.uint8).max, shape=(DEFAULT_INV_PAD, ), dtype=np.uint8, ), )), } self.observation_space = gym.spaces.Dict( {key: space_dict[key] for key in observation_keys}) self._key_functions = { "glyphs": _get_glyphs, "status": _get_status_fast, "message": _get_padded_message, "inventory": _get_padded_inv, } for key in list(self._key_functions.keys()): if key not in observation_keys: del self._key_functions[key] self.action_space = gym.spaces.Discrete(len(self._actions))
def test_forking_with_nethack_in_parent(self, num_procs=NUM_SUBPROCESSES): # Breaks for pyzmq <= 18.0.0, fixed in # https://github.com/zeromq/pyzmq/commit/28c2a36836fc45c09ede4d9962498db449b642d1 # noqa env = nethack.NetHack(archivefile=None) # noqa: F841 self.assertEqual(_run_nethack_in_subprocesses(num_procs), [0] * num_procs)
def test_forking_with_nethack_in_parent_new_context( self, num_procs=NUM_SUBPROCESSES ): env = nethack.NetHack(archivefile=None, context=zmq.Context()) # noqa: F841 self.assertEqual(_run_nethack_in_subprocesses(num_procs), [0] * num_procs)
def _run_nethack(): env = nethack.NetHack(archivefile=None) env.reset() env.step(0) env.step(0) env.step(0)
def play(env_name, play_mode, ngames, max_steps, seeds, no_clear, no_render): is_raw_env = env_name == "nethack" if is_raw_env: # TODO save data somewhere reasonable env = nethack.NetHack(archivefile="./nle_data/play_data.zip") else: env = gym.make(env_name) if seeds is not None: env.seed(seeds) obs = env.reset() last_obs = None # needed for "nethack" env steps = 0 episodes = 0 reward = 0.0 action = None ch = None start_time = timeit.default_timer() while True: if not no_render: if not no_clear: os.system("cls" if os.name == "nt" else "clear") if not is_raw_env: print("Previous reward:", reward) print("Available actions:", env._actions) print("Previous action: {}{!r})".format( "{} --".format(chr(ch)) if ch is not None else "", env._actions[action] if action is not None else None, )) env.render() else: print("Available actions:", nle.env.base.FULL_ACTIONS) print("Previous actions:", action) print_message.print_message(obs) action = get_action(env, play_mode, is_raw_env) if action is None: break if is_raw_env: last_obs = obs obs, done, info = env.step(action) else: obs, reward, done, info = env.step(action) steps += 1 done = done or steps >= max_steps if not done: continue if not is_raw_env: print("Final reward:", reward) print("End status:", info["end_status"].name) print("Env stats:") pprint.pprint(info["stats"]) else: if last_obs.ProgramState().Gameover(): # Print tombstone. if last_obs.WindowsLength() < 1: return steps window = last_obs.Windows(1) if not no_render: for i in range(window.StringsLength()): print(window.Strings(i).decode("ascii")) time_delta = timeit.default_timer() - start_time print("Episode: {}. Steps: {}. SPS: {:f}".format( episodes, steps, steps / time_delta)) start_time = timeit.default_timer() episodes += 1 steps = 0 if episodes == ngames: break env.reset()