예제 #1
0
파일: tw.py 프로젝트: zhaozj89/TextWorld
    def step(self, command: str):
        command = command.strip()
        self._prev_state = self.state

        self.state = GameState()
        self.state.last_command = command
        self.state.raw = DEFAULT_OBSERVATION
        self.state.feedback = DEFAULT_OBSERVATION
        self._previous_winning_policy = self._current_winning_policy

        self._last_action = None
        try:
            # Find the action corresponding to the command.
            idx = self._prev_state["_valid_commands"].index(command)
            self._last_action = self._game_progression.valid_actions[idx]
            # An action that affects the state of the game.
            self._game_progression.update(self._last_action)
            self._current_winning_policy = self._game_progression.winning_policy
            self._moves += 1
        except ValueError:
            self.state.feedback = "Invalid command."
            pass  # We assume nothing happened in the game.

        self._gather_infos()
        self.state["score"] = self._game_progression
        self.state["done"] = self.state["won"] or self.state["lost"]
        return self.state, self.state["score"], self.state["done"]
예제 #2
0
    def reset(self):
        if not self.game_running:
            raise GameNotRunningError(
                "Call env.load(gamefile) before env.reset().")

        self.state = GameState()
        self.state.raw, _ = self._jericho.reset()
        self._gather_infos()
        self._reset = True
        return self.state
예제 #3
0
    def step(self, command):
        if not self.game_running or not self._reset:
            raise GameNotRunningError()

        self.state = GameState()
        self.state.last_command = command.strip()
        res = self._jericho.step(self.state.last_command)
        # As of Jericho >= 2.1.0, the reward is returned instead of the score.
        self.state.raw, _, self.state.done, _ = res
        self._gather_infos()
        return self.state, self.state.score, self.state.done
예제 #4
0
    def reset(self):
        self._prev_state = None
        self.state = GameState()
        self._game_progression = GameProgression(self._game, track_quests=True)
        self._last_action = None
        self._previous_winning_policy = None
        self._current_winning_policy = self._game_progression.winning_policy
        self._moves = 0

        self.state.raw = DEFAULT_OBSERVATION
        self.state.feedback = DEFAULT_OBSERVATION
        self._gather_infos()
        return self.state
예제 #5
0
파일: tw.py 프로젝트: zhaozj89/TextWorld
    def reset(self):
        self._prev_state = None
        self.state = GameState()
        track_quests = (self.infos.intermediate_reward or self.infos.policy_commands)
        self._game_progression = GameProgression(self._game, track_quests=track_quests)
        self._last_action = None
        self._previous_winning_policy = None
        self._current_winning_policy = self._game_progression.winning_policy
        self._moves = 0

        self.state.raw = DEFAULT_OBSERVATION
        self.state.feedback = DEFAULT_OBSERVATION
        self._gather_infos()
        return self.state
예제 #6
0
    def _get_requested_infos(self, game_state: GameState):
        infos = {attr: getattr(game_state, attr) for attr in self.infos.basics}

        if self.infos.extras:
            for attr in self.infos.extras:
                key = "extra.{}".format(attr)
                infos[key] = game_state.get(key)

        return infos
예제 #7
0
    def reset(self):
        self.close()  # In case, it is running.

        # Start the game using Jericho.
        self._jericho = jericho.FrotzEnv(self.gamefile, self._seed)

        self.state = GameState()
        self.state.raw, _ = self._jericho.reset()
        self._gather_infos()
        return self.state
예제 #8
0
    def step(self, command: str) -> str:
        if not self.game_running:
            raise GameNotRunningError()

        self.state = GameState()
        self.state.last_command = command.strip()
        self.state.raw = self._send(self.state.last_command)
        if self.state.raw is None:
            raise GameNotRunningError()

        self.state.feedback = _strip_input_prompt_symbol(self.state.raw)
        self.state.score = 0  # Default value.
        self.state.done = False  # Default value.
        return self.state, self.state.score, self.state.done
예제 #9
0
    def reset(self):
        # Since Jericho is not thread-safe, we load it locally so
        # its global memory is not shared accross multiple processes
        # when forking.
        import jericho

        self.close()  # In case, it is running.

        # Start the game using Jericho.
        self._jericho = jericho.FrotzEnv(self.gamefile, self._seed)

        self.state = GameState()
        self.state.raw, _ = self._jericho.reset()
        self._gather_infos()
        return self.state
예제 #10
0
    def reset(self) -> str:
        self.close()  # Terminate existing process if needed.

        self._names_struct = ffi.new('struct sock_names*')

        lib.init_glulx(self._names_struct)
        sock_name = ffi.string(self._names_struct.sock_name).decode('utf-8')
        self._process = subprocess.Popen(["%s/git-glulx-ml" % (GLULX_PATH,), self._gamefile, '-g', sock_name, '-q'])
        c_feedback = lib.get_output_nosend(self._names_struct)
        if c_feedback == ffi.NULL:
            self.close()
            raise ValueError("Game failed to start properly: {}.".format(self._gamefile))
        c_feedback = ffi.gc(c_feedback, lib.free)

        feedback = ffi.string(c_feedback).decode('utf-8')
        feedback = _strip_input_prompt_symbol(feedback)
        self.state = GameState(feedback=feedback, raw=feedback)
        return self.state
예제 #11
0
파일: tw.py 프로젝트: zhaozj89/TextWorld
class TextWorldEnv(textworld.Environment):
    """
    Environment for playing games by TextWorld.
    """

    def __init__(self, infos: Optional[EnvInfos] = None) -> None:
        """
        Arguments:
            infos: Information to be included in the game state. By
                   default, only the game's narrative is included.
        """
        super().__init__(infos)
        self._gamefile = None
        self._game = None
        self._inform7 = None
        self._last_action = None
        self._prev_state = None
        self._previous_winning_policy = None
        self._current_winning_policy = None
        self._moves = None
        self._game_progression = None

    def load(self, path: str) -> None:
        self._gamefile = path
        self._game = textworld.Game.load(self._gamefile)
        self._game_progression = None
        self._inform7 = Inform7Game(self._game)

    def _gather_infos(self):
        self.state["game"] = self._game
        self.state["command_templates"] = self._game.command_templates
        self.state["verbs"] = self._game.verbs
        self.state["entities"] = self._game.entity_names
        self.state["objective"] = self._game.objective
        self.state["max_score"] = self._game.max_score

        for k, v in self._game.metadata.items():
            self.state["extra.{}".format(k)] = v

        self.state["_game_progression"] = self._game_progression
        self.state["_facts"] = list(self._game_progression.state.facts)

        self.state["won"] = self._game_progression.completed
        self.state["lost"] = self._game_progression.failed

        self.state["_winning_policy"] = self._current_winning_policy
        if self.infos.policy_commands:
            self.state["policy_commands"] = []
            if self._game_progression.winning_policy is not None:
                self.state["policy_commands"] = self._inform7.gen_commands_from_actions(self._current_winning_policy)

        if self.infos.intermediate_reward:
            self.state["intermediate_reward"] = 0
            if self.state["won"]:
                # The last action led to winning the game.
                self.state["intermediate_reward"] = 1

            elif self.state["lost"]:
                # The last action led to losing the game.
                self.state["intermediate_reward"] = -1

            elif self._previous_winning_policy is None:
                self.state["intermediate_reward"] = 0

            else:
                diff = len(self._previous_winning_policy) - len(self._current_winning_policy)
                self.state["intermediate_reward"] = int(diff > 0) - int(diff < 0)  # Sign function.

        if self.infos.facts:
            self.state["facts"] = list(map(self._inform7.get_human_readable_fact, self.state["_facts"]))

        self.state["last_action"] = None
        self.state["_last_action"] = self._last_action
        if self.infos.last_action and self._last_action is not None:
            self.state["last_action"] = self._inform7.get_human_readable_action(self._last_action)

        self.state["_valid_actions"] = self._game_progression.valid_actions
        self.state["_valid_commands"] = self._inform7.gen_commands_from_actions(self._game_progression.valid_actions)
        # To guarantee the order from one execution to another, we sort the commands.
        # Remove any potential duplicate commands (they would lead to the same result anyway).
        self.state["admissible_commands"] = sorted(set(self.state["_valid_commands"]))

        if self.infos.moves:
            self.state["moves"] = self._moves

    def reset(self):
        self._prev_state = None
        self.state = GameState()
        track_quests = (self.infos.intermediate_reward or self.infos.policy_commands)
        self._game_progression = GameProgression(self._game, track_quests=track_quests)
        self._last_action = None
        self._previous_winning_policy = None
        self._current_winning_policy = self._game_progression.winning_policy
        self._moves = 0

        self.state.raw = DEFAULT_OBSERVATION
        self.state.feedback = DEFAULT_OBSERVATION
        self._gather_infos()
        return self.state

    def step(self, command: str):
        command = command.strip()
        self._prev_state = self.state

        self.state = GameState()
        self.state.last_command = command
        self.state.raw = DEFAULT_OBSERVATION
        self.state.feedback = DEFAULT_OBSERVATION
        self._previous_winning_policy = self._current_winning_policy

        self._last_action = None
        try:
            # Find the action corresponding to the command.
            idx = self._prev_state["_valid_commands"].index(command)
            self._last_action = self._game_progression.valid_actions[idx]
            # An action that affects the state of the game.
            self._game_progression.update(self._last_action)
            self._current_winning_policy = self._game_progression.winning_policy
            self._moves += 1
        except ValueError:
            self.state.feedback = "Invalid command."
            pass  # We assume nothing happened in the game.

        self._gather_infos()
        self.state["score"] = self._game_progression
        self.state["done"] = self.state["won"] or self.state["lost"]
        return self.state, self.state["score"], self.state["done"]

    def copy(self) -> "TextWorldEnv":
        """ Return a copy of this environment.

        It is safe to call `step` and `reset` on the copied environment.

        .. warning:: The `Game` and `Inform7Game` private objects are *soft* copies.
        """
        env = TextWorldEnv()

        # Copy core Environment's attributes.
        env.state = self.state.copy()
        env.infos = self.infos.copy()

        env._gamefile = self._gamefile
        env._game = self._game  # Reference
        env._inform7 = self._inform7  # Reference

        env._prev_state = self._prev_state.copy() if self._prev_state is not None else None
        env._last_action = self._last_action
        env._moves = self._moves
        if self._previous_winning_policy is not None:
            env._previous_winning_policy = tuple(self._previous_winning_policy)

        if self._current_winning_policy is not None:
            env._current_winning_policy = tuple(self._current_winning_policy)

        if self._game_progression is not None:
            env._game_progression = self._game_progression.copy()

        return env
예제 #12
0
class JerichoEnv(textworld.Environment):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._seed = -1
        self._jericho = None
        self.gamefile = None
        self._reset = False

    def load(self, z_file: str) -> None:
        self.gamefile = os.path.abspath(z_file)
        _, ext = os.path.splitext(os.path.basename(self.gamefile))

        # Check if game is supported by Jericho.
        if not ext.startswith(".z"):
            raise ValueError("Only .z[1-8] files are supported!")

        if not os.path.isfile(self.gamefile):
            raise FileNotFoundError(self.gamefile)

        if self._jericho is None:
            # Start the game using Jericho.
            self._jericho = jericho.FrotzEnv(self.gamefile, self._seed)
        else:
            self._jericho.load(self.gamefile)

    def __del__(self) -> None:
        self.close()

    @property
    def game_running(self) -> bool:
        """ Determines if the game is still running. """
        return self._jericho is not None

    def seed(self, seed=None):
        self._seed = seed
        if self._jericho:
            self._jericho.seed(self._seed)

        return self._seed

    def _gather_infos(self):
        """ Adds additional information to the internal state. """
        self.state.feedback = self.state.raw
        if not self._jericho.is_fully_supported:
            return  # No more information can be gathered.

        for attr in self.infos.basics:
            self.state[attr] = getattr(self._jericho, "get_" + attr,
                                       lambda: self.state.get(attr))()

        for attr in self.infos.extras:
            self.state["extra.{}".format(attr)] = getattr(
                self._jericho, "get_" + attr, lambda: None)()

        # Deal with information that has different method name in Jericho.
        self.state["won"] = self._jericho.victory()
        self.state["lost"] = self._jericho.game_over()
        self.state["score"] = self._jericho.get_score()
        self.state["moves"] = self._jericho.get_moves()
        self.state["location"] = self._jericho.get_player_location()

        if self.infos.description:
            bkp = self._jericho.get_state()
            self.state["description"], _, _, _ = self._jericho.step("look")
            self._jericho.set_state(bkp)

        if self.infos.inventory:
            bkp = self._jericho.get_state()
            self.state["inventory"], _, _, _ = self._jericho.step("inventory")
            self._jericho.set_state(bkp)

    def reset(self):
        if not self.game_running:
            raise GameNotRunningError(
                "Call env.load(gamefile) before env.reset().")

        self.state = GameState()
        self.state.raw, _ = self._jericho.reset()
        self._gather_infos()
        self._reset = True
        return self.state

    def _send(self, command: str) -> str:
        """ Send a command directly to the interpreter.

        This method will not affect the internal state variable.
        """
        feedback, _, _, _ = self._jericho.step(command)
        return feedback

    def step(self, command):
        if not self.game_running or not self._reset:
            raise GameNotRunningError()

        self.state = GameState()
        self.state.last_command = command.strip()
        res = self._jericho.step(self.state.last_command)
        # As of Jericho >= 2.1.0, the reward is returned instead of the score.
        self.state.raw, _, self.state.done, _ = res
        self._gather_infos()
        return self.state, self.state.score, self.state.done

    def close(self):
        if self.game_running:
            self._jericho.close()
            self._jericho = None
            self._reset = False

    def copy(self) -> "JerichoEnv":
        """ Return a copy of this environment at the same state. """
        env = JerichoEnv(self.infos)
        env._seed = self._seed

        if self.gamefile:
            env.load(self.gamefile)

        if self._jericho:
            env._jericho = self._jericho.copy()
            env._reset = True

        # Copy core Environment's attributes.
        env.state = self.state.copy()
        env.infos = self.infos.copy()
        return env
예제 #13
0
 def setUpClass(cls):
     cls.state = GameState()
     cls.state["field_str"] = "value1"
     cls.state["field_int"] = 42
     cls.state["field_float"] = 4.2
     cls.state["field_list"] = ["str", -1, True, 1.2]