Beispiel #1
0
    def get_game_step_info(self, obs: List[str], infos: Dict[str, List[Any]]):
        """
        Get all the available information, and concat them together to be tensor for
        a neural model. we use post padding here, all information are tokenized here.

        Arguments:
            obs: Previous command's feedback for each game.
            infos: Additional information for each game.
        """
        inventory_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["inventory"]]
        inventory_id_list = [_words_to_ids(tokens, self.word2id) for tokens in inventory_token_list]

        feedback_token_list = [preproc(item, str_type='feedback', tokenizer=self.nlp) for item in obs]
        feedback_id_list = [_words_to_ids(tokens, self.word2id) for tokens in feedback_token_list]

        quest_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["extra.recipe"]]
        quest_id_list = [_words_to_ids(tokens, self.word2id) for tokens in quest_token_list]

        prev_action_token_list = [preproc(item, tokenizer=self.nlp) for item in self.prev_actions]
        prev_action_id_list = [_words_to_ids(tokens, self.word2id) for tokens in prev_action_token_list]

        description_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["description"]]
        for i, d in enumerate(description_token_list):
            if len(d) == 0:
                description_token_list[i] = ["end"]  # if empty description, insert word "end"
        description_id_list = [_words_to_ids(tokens, self.word2id) for tokens in description_token_list]
        description_id_list = [_d + _i + _q + _f + _pa for (_d, _i, _q, _f, _pa) in zip(description_id_list, inventory_id_list, quest_id_list, feedback_id_list, prev_action_id_list)]

        input_description = pad_sequences(description_id_list, maxlen=max_len(description_id_list)).astype('int32')
        input_description = to_pt(input_description, self.use_cuda)

        return input_description, description_id_list
Beispiel #2
0
    def get_game_info_at_certain_step(self, obs, infos):
        """
        Get all needed info from game engine for training.
        Arguments:
            obs: Previous command's feedback for each game.
            infos: Additional information for each game.
        """
        batch_size = len(obs)
        feedback_strings = [preproc(item, tokenizer=self.nlp) for item in obs]
        description_strings = [
            preproc(item, tokenizer=self.nlp) for item in infos["description"]
        ]
        observation_strings = [
            d + " <|> " + fb if fb != d else d + " <|> hello"
            for fb, d in zip(feedback_strings, description_strings)
        ]

        inventory_strings = [
            preproc(item, tokenizer=self.nlp) for item in infos["inventory"]
        ]
        local_word_list = [
            obs.split() + inv.split()
            for obs, inv in zip(observation_strings, inventory_strings)
        ]

        directions = ["east", "west", "north", "south"]
        if self.question_type in ["location", "existence"]:
            # agents observes the env, but do not change them
            possible_verbs = [["go", "inventory", "wait", "open", "examine"]
                              for _ in range(batch_size)]
        else:
            possible_verbs = [
                list(set(item) - set(["", "look"])) for item in infos["verbs"]
            ]

        possible_adjs, possible_nouns = [], []
        for i in range(batch_size):
            object_nouns = [
                item.split()[-1] for item in infos["object_nouns"][i]
            ]
            object_adjs = [
                w for item in infos["object_adjs"][i] for w in item.split()
            ]
            possible_nouns.append(
                list(set(object_nouns) & set(local_word_list[i]) - set([""])) +
                directions)
            possible_adjs.append(
                list(set(object_adjs) & set(local_word_list[i]) - set([""])) +
                ["</s>"])

        return observation_strings, [
            possible_verbs, possible_adjs, possible_nouns
        ]
def collect_data_from_game(gamefile, seed, branching_depth):
    tokenizer = spacy.load('en', disable=['ner', 'parser', 'tagger'])
    rng = np.random.RandomState(seed)

    # Ignore the following commands.
    commands_to_ignore = ["look", "examine", "inventory"]

    env_infos = textworld.EnvInfos(description=True,
                                   location=True,
                                   facts=True,
                                   last_action=True,
                                   admissible_commands=True,
                                   game=True,
                                   extras=["walkthrough"])
    env = textworld.start(gamefile, env_infos)
    env = textworld.envs.wrappers.Filter(env)

    obs, infos = env.reset()
    walkthrough = infos["extra.walkthrough"]

    # Make sure we start with listing the inventory.
    if walkthrough[0] != "inventory":
        walkthrough = ["inventory"] + walkthrough

    # Add 'restart' command as a way to indicate the beginning of the game.
    walkthrough = ["restart"] + walkthrough

    dataset = []

    done = False
    facts_seen = set()
    for i, cmd in enumerate(walkthrough):
        last_facts = facts_seen
        if i > 0:  # != "restart"
            obs, _, done, infos = env.step(cmd)

        facts_seen = process_facts(last_facts, infos["game"], infos["facts"],
                                   infos["last_action"], cmd)

        dataset += [{
            "game":
            os.path.basename(gamefile),
            "step": (i, 0),
            "observation":
            preproc(obs, tokenizer=tokenizer),
            "previous_action":
            cmd.lower(),
            "target_commands":
            sorted(
                gen_graph_commands(facts_seen - last_facts, cmd="add") +
                gen_graph_commands(last_facts - facts_seen, cmd="delete")),
            "previous_graph_seen":
            sorted(serialize_facts(last_facts)),
            "graph_seen":
            sorted(serialize_facts(facts_seen)),
        }]

        if done:
            break  # Stop collecting data if game is done.

        # Fork the current game & seen facts.
        env_ = env.copy()
        facts_seen_ = facts_seen

        # Then, take N random actions.
        for j in range(1, branching_depth + 1):
            commands = [
                c for c in infos["admissible_commands"]
                if ((c == "examine cookbook"
                     or c.split()[0] not in commands_to_ignore) and
                    (i + 1) != len(walkthrough) and c != walkthrough[i + 1])
            ]

            if len(commands) == 0:
                break

            cmd_ = rng.choice(commands)
            obs, _, done, infos = env_.step(cmd_)

            if done:
                break  # Stop collecting data if game is done.

            last_facts_ = facts_seen_
            facts_seen_ = process_facts(last_facts_, infos["game"],
                                        infos["facts"], infos["last_action"],
                                        cmd_)

            dataset += [{
                "game":
                os.path.basename(gamefile),
                "step": (i, j),
                "observation":
                preproc(obs, tokenizer=tokenizer),
                "previous_action":
                cmd_.lower(),
                "target_commands":
                sorted(
                    gen_graph_commands(facts_seen_ - last_facts_, cmd="add") +
                    gen_graph_commands(last_facts_ -
                                       facts_seen_, cmd="delete")),
                "previous_graph_seen":
                sorted(serialize_facts(last_facts_)),
                "graph_seen":
                sorted(serialize_facts(facts_seen_)),
            }]

    return gamefile, dataset