def step(self, command: str): command = command.strip() self._prev_state = self.state self.state = GameState() self.state.last_command = command self.state.raw = DEFAULT_OBSERVATION self.state.feedback = DEFAULT_OBSERVATION self._previous_winning_policy = self._current_winning_policy self._last_action = None try: # Find the action corresponding to the command. idx = self._prev_state["_valid_commands"].index(command) self._last_action = self._game_progression.valid_actions[idx] # An action that affects the state of the game. self._game_progression.update(self._last_action) self._current_winning_policy = self._game_progression.winning_policy self._moves += 1 except ValueError: self.state.feedback = "Invalid command." pass # We assume nothing happened in the game. self._gather_infos() self.state["score"] = self._game_progression self.state["done"] = self.state["won"] or self.state["lost"] return self.state, self.state["score"], self.state["done"]
def reset(self): if not self.game_running: raise GameNotRunningError( "Call env.load(gamefile) before env.reset().") self.state = GameState() self.state.raw, _ = self._jericho.reset() self._gather_infos() self._reset = True return self.state
def step(self, command): if not self.game_running or not self._reset: raise GameNotRunningError() self.state = GameState() self.state.last_command = command.strip() res = self._jericho.step(self.state.last_command) # As of Jericho >= 2.1.0, the reward is returned instead of the score. self.state.raw, _, self.state.done, _ = res self._gather_infos() return self.state, self.state.score, self.state.done
def reset(self): self._prev_state = None self.state = GameState() self._game_progression = GameProgression(self._game, track_quests=True) self._last_action = None self._previous_winning_policy = None self._current_winning_policy = self._game_progression.winning_policy self._moves = 0 self.state.raw = DEFAULT_OBSERVATION self.state.feedback = DEFAULT_OBSERVATION self._gather_infos() return self.state
def reset(self): self._prev_state = None self.state = GameState() track_quests = (self.infos.intermediate_reward or self.infos.policy_commands) self._game_progression = GameProgression(self._game, track_quests=track_quests) self._last_action = None self._previous_winning_policy = None self._current_winning_policy = self._game_progression.winning_policy self._moves = 0 self.state.raw = DEFAULT_OBSERVATION self.state.feedback = DEFAULT_OBSERVATION self._gather_infos() return self.state
def _get_requested_infos(self, game_state: GameState): infos = {attr: getattr(game_state, attr) for attr in self.infos.basics} if self.infos.extras: for attr in self.infos.extras: key = "extra.{}".format(attr) infos[key] = game_state.get(key) return infos
def reset(self): self.close() # In case, it is running. # Start the game using Jericho. self._jericho = jericho.FrotzEnv(self.gamefile, self._seed) self.state = GameState() self.state.raw, _ = self._jericho.reset() self._gather_infos() return self.state
def step(self, command: str) -> str: if not self.game_running: raise GameNotRunningError() self.state = GameState() self.state.last_command = command.strip() self.state.raw = self._send(self.state.last_command) if self.state.raw is None: raise GameNotRunningError() self.state.feedback = _strip_input_prompt_symbol(self.state.raw) self.state.score = 0 # Default value. self.state.done = False # Default value. return self.state, self.state.score, self.state.done
def reset(self): # Since Jericho is not thread-safe, we load it locally so # its global memory is not shared accross multiple processes # when forking. import jericho self.close() # In case, it is running. # Start the game using Jericho. self._jericho = jericho.FrotzEnv(self.gamefile, self._seed) self.state = GameState() self.state.raw, _ = self._jericho.reset() self._gather_infos() return self.state
def reset(self) -> str: self.close() # Terminate existing process if needed. self._names_struct = ffi.new('struct sock_names*') lib.init_glulx(self._names_struct) sock_name = ffi.string(self._names_struct.sock_name).decode('utf-8') self._process = subprocess.Popen(["%s/git-glulx-ml" % (GLULX_PATH,), self._gamefile, '-g', sock_name, '-q']) c_feedback = lib.get_output_nosend(self._names_struct) if c_feedback == ffi.NULL: self.close() raise ValueError("Game failed to start properly: {}.".format(self._gamefile)) c_feedback = ffi.gc(c_feedback, lib.free) feedback = ffi.string(c_feedback).decode('utf-8') feedback = _strip_input_prompt_symbol(feedback) self.state = GameState(feedback=feedback, raw=feedback) return self.state
class TextWorldEnv(textworld.Environment): """ Environment for playing games by TextWorld. """ def __init__(self, infos: Optional[EnvInfos] = None) -> None: """ Arguments: infos: Information to be included in the game state. By default, only the game's narrative is included. """ super().__init__(infos) self._gamefile = None self._game = None self._inform7 = None self._last_action = None self._prev_state = None self._previous_winning_policy = None self._current_winning_policy = None self._moves = None self._game_progression = None def load(self, path: str) -> None: self._gamefile = path self._game = textworld.Game.load(self._gamefile) self._game_progression = None self._inform7 = Inform7Game(self._game) def _gather_infos(self): self.state["game"] = self._game self.state["command_templates"] = self._game.command_templates self.state["verbs"] = self._game.verbs self.state["entities"] = self._game.entity_names self.state["objective"] = self._game.objective self.state["max_score"] = self._game.max_score for k, v in self._game.metadata.items(): self.state["extra.{}".format(k)] = v self.state["_game_progression"] = self._game_progression self.state["_facts"] = list(self._game_progression.state.facts) self.state["won"] = self._game_progression.completed self.state["lost"] = self._game_progression.failed self.state["_winning_policy"] = self._current_winning_policy if self.infos.policy_commands: self.state["policy_commands"] = [] if self._game_progression.winning_policy is not None: self.state["policy_commands"] = self._inform7.gen_commands_from_actions(self._current_winning_policy) if self.infos.intermediate_reward: self.state["intermediate_reward"] = 0 if self.state["won"]: # The last action led to winning the game. self.state["intermediate_reward"] = 1 elif self.state["lost"]: # The last action led to losing the game. self.state["intermediate_reward"] = -1 elif self._previous_winning_policy is None: self.state["intermediate_reward"] = 0 else: diff = len(self._previous_winning_policy) - len(self._current_winning_policy) self.state["intermediate_reward"] = int(diff > 0) - int(diff < 0) # Sign function. if self.infos.facts: self.state["facts"] = list(map(self._inform7.get_human_readable_fact, self.state["_facts"])) self.state["last_action"] = None self.state["_last_action"] = self._last_action if self.infos.last_action and self._last_action is not None: self.state["last_action"] = self._inform7.get_human_readable_action(self._last_action) self.state["_valid_actions"] = self._game_progression.valid_actions self.state["_valid_commands"] = self._inform7.gen_commands_from_actions(self._game_progression.valid_actions) # To guarantee the order from one execution to another, we sort the commands. # Remove any potential duplicate commands (they would lead to the same result anyway). self.state["admissible_commands"] = sorted(set(self.state["_valid_commands"])) if self.infos.moves: self.state["moves"] = self._moves def reset(self): self._prev_state = None self.state = GameState() track_quests = (self.infos.intermediate_reward or self.infos.policy_commands) self._game_progression = GameProgression(self._game, track_quests=track_quests) self._last_action = None self._previous_winning_policy = None self._current_winning_policy = self._game_progression.winning_policy self._moves = 0 self.state.raw = DEFAULT_OBSERVATION self.state.feedback = DEFAULT_OBSERVATION self._gather_infos() return self.state def step(self, command: str): command = command.strip() self._prev_state = self.state self.state = GameState() self.state.last_command = command self.state.raw = DEFAULT_OBSERVATION self.state.feedback = DEFAULT_OBSERVATION self._previous_winning_policy = self._current_winning_policy self._last_action = None try: # Find the action corresponding to the command. idx = self._prev_state["_valid_commands"].index(command) self._last_action = self._game_progression.valid_actions[idx] # An action that affects the state of the game. self._game_progression.update(self._last_action) self._current_winning_policy = self._game_progression.winning_policy self._moves += 1 except ValueError: self.state.feedback = "Invalid command." pass # We assume nothing happened in the game. self._gather_infos() self.state["score"] = self._game_progression self.state["done"] = self.state["won"] or self.state["lost"] return self.state, self.state["score"], self.state["done"] def copy(self) -> "TextWorldEnv": """ Return a copy of this environment. It is safe to call `step` and `reset` on the copied environment. .. warning:: The `Game` and `Inform7Game` private objects are *soft* copies. """ env = TextWorldEnv() # Copy core Environment's attributes. env.state = self.state.copy() env.infos = self.infos.copy() env._gamefile = self._gamefile env._game = self._game # Reference env._inform7 = self._inform7 # Reference env._prev_state = self._prev_state.copy() if self._prev_state is not None else None env._last_action = self._last_action env._moves = self._moves if self._previous_winning_policy is not None: env._previous_winning_policy = tuple(self._previous_winning_policy) if self._current_winning_policy is not None: env._current_winning_policy = tuple(self._current_winning_policy) if self._game_progression is not None: env._game_progression = self._game_progression.copy() return env
class JerichoEnv(textworld.Environment): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._seed = -1 self._jericho = None self.gamefile = None self._reset = False def load(self, z_file: str) -> None: self.gamefile = os.path.abspath(z_file) _, ext = os.path.splitext(os.path.basename(self.gamefile)) # Check if game is supported by Jericho. if not ext.startswith(".z"): raise ValueError("Only .z[1-8] files are supported!") if not os.path.isfile(self.gamefile): raise FileNotFoundError(self.gamefile) if self._jericho is None: # Start the game using Jericho. self._jericho = jericho.FrotzEnv(self.gamefile, self._seed) else: self._jericho.load(self.gamefile) def __del__(self) -> None: self.close() @property def game_running(self) -> bool: """ Determines if the game is still running. """ return self._jericho is not None def seed(self, seed=None): self._seed = seed if self._jericho: self._jericho.seed(self._seed) return self._seed def _gather_infos(self): """ Adds additional information to the internal state. """ self.state.feedback = self.state.raw if not self._jericho.is_fully_supported: return # No more information can be gathered. for attr in self.infos.basics: self.state[attr] = getattr(self._jericho, "get_" + attr, lambda: self.state.get(attr))() for attr in self.infos.extras: self.state["extra.{}".format(attr)] = getattr( self._jericho, "get_" + attr, lambda: None)() # Deal with information that has different method name in Jericho. self.state["won"] = self._jericho.victory() self.state["lost"] = self._jericho.game_over() self.state["score"] = self._jericho.get_score() self.state["moves"] = self._jericho.get_moves() self.state["location"] = self._jericho.get_player_location() if self.infos.description: bkp = self._jericho.get_state() self.state["description"], _, _, _ = self._jericho.step("look") self._jericho.set_state(bkp) if self.infos.inventory: bkp = self._jericho.get_state() self.state["inventory"], _, _, _ = self._jericho.step("inventory") self._jericho.set_state(bkp) def reset(self): if not self.game_running: raise GameNotRunningError( "Call env.load(gamefile) before env.reset().") self.state = GameState() self.state.raw, _ = self._jericho.reset() self._gather_infos() self._reset = True return self.state def _send(self, command: str) -> str: """ Send a command directly to the interpreter. This method will not affect the internal state variable. """ feedback, _, _, _ = self._jericho.step(command) return feedback def step(self, command): if not self.game_running or not self._reset: raise GameNotRunningError() self.state = GameState() self.state.last_command = command.strip() res = self._jericho.step(self.state.last_command) # As of Jericho >= 2.1.0, the reward is returned instead of the score. self.state.raw, _, self.state.done, _ = res self._gather_infos() return self.state, self.state.score, self.state.done def close(self): if self.game_running: self._jericho.close() self._jericho = None self._reset = False def copy(self) -> "JerichoEnv": """ Return a copy of this environment at the same state. """ env = JerichoEnv(self.infos) env._seed = self._seed if self.gamefile: env.load(self.gamefile) if self._jericho: env._jericho = self._jericho.copy() env._reset = True # Copy core Environment's attributes. env.state = self.state.copy() env.infos = self.infos.copy() return env
def setUpClass(cls): cls.state = GameState() cls.state["field_str"] = "value1" cls.state["field_int"] = 42 cls.state["field_float"] = 4.2 cls.state["field_list"] = ["str", -1, True, 1.2]