Beispiel #1
0
    def update(self):
        """
        Update neural model in agent. In this example we follow algorithm
        of updating model in dqn with replay memory.

        """
        if len(self.replay_memory) < self.replay_batch_size:
            return None
        transitions = self.replay_memory.sample(self.replay_batch_size)
        batch = Transition(*zip(*transitions))

        observation_id_list = pad_sequences(
            batch.observation_id_list,
            maxlen=max_len(batch.observation_id_list)).astype('int32')
        input_observation = to_pt(observation_id_list, self.use_cuda)
        next_observation_id_list = pad_sequences(
            batch.next_observation_id_list,
            maxlen=max_len(batch.next_observation_id_list)).astype('int32')
        next_input_observation = to_pt(next_observation_id_list, self.use_cuda)
        chosen_indices = list(list(zip(*batch.word_indices)))
        chosen_indices = [torch.stack(item, 0)
                          for item in chosen_indices]  # list of batch x 1

        word_ranks = self.infer_word_ranks(
            input_observation
        )  # list of batch x vocab, len=5 (one per potential output word)
        word_qvalues = [
            w_rank.gather(1, idx).squeeze(-1)
            for w_rank, idx in zip(word_ranks, chosen_indices)
        ]  # list of batch
        q_value = torch.mean(torch.stack(word_qvalues, -1), -1)  # batch

        next_word_ranks = self.infer_word_ranks(
            next_input_observation
        )  # batch x n_verb, batch x n_noun, batchx n_second_noun
        next_word_masks = list(list(zip(*batch.next_word_masks)))
        next_word_masks = [np.stack(item, 0) for item in next_word_masks]
        next_word_qvalues, _ = _choose_maxQ_command(next_word_ranks,
                                                    next_word_masks,
                                                    self.use_cuda)
        next_q_value = torch.mean(torch.stack(next_word_qvalues, -1),
                                  -1)  # batch
        next_q_value = next_q_value.detach()

        rewards = torch.stack(batch.reward)  # batch
        not_done = 1.0 - np.array(batch.done, dtype='float32')  # batch
        not_done = to_pt(not_done, self.use_cuda, type='float')
        rewards = rewards + not_done * next_q_value * self.discount_gamma  # batch
        mask = torch.stack(batch.mask)  # batch
        loss = F.smooth_l1_loss(q_value * mask, rewards * mask)
        return loss
Beispiel #2
0
    def get_game_step_info(self, obs: List[str], infos: Dict[str, List[Any]]):
        """
        Get all the available information, and concat them together to be tensor for
        a neural model. we use post padding here, all information are tokenized here.

        Arguments:
            obs: Previous command's feedback for each game.
            infos: Additional information for each game.
        """
        inventory_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["inventory"]]
        inventory_id_list = [_words_to_ids(tokens, self.word2id) for tokens in inventory_token_list]

        feedback_token_list = [preproc(item, str_type='feedback', tokenizer=self.nlp) for item in obs]
        feedback_id_list = [_words_to_ids(tokens, self.word2id) for tokens in feedback_token_list]

        quest_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["extra.recipe"]]
        quest_id_list = [_words_to_ids(tokens, self.word2id) for tokens in quest_token_list]

        prev_action_token_list = [preproc(item, tokenizer=self.nlp) for item in self.prev_actions]
        prev_action_id_list = [_words_to_ids(tokens, self.word2id) for tokens in prev_action_token_list]

        description_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["description"]]
        for i, d in enumerate(description_token_list):
            if len(d) == 0:
                description_token_list[i] = ["end"]  # if empty description, insert word "end"
        description_id_list = [_words_to_ids(tokens, self.word2id) for tokens in description_token_list]
        description_id_list = [_d + _i + _q + _f + _pa for (_d, _i, _q, _f, _pa) in zip(description_id_list, inventory_id_list, quest_id_list, feedback_id_list, prev_action_id_list)]

        input_description = pad_sequences(description_id_list, maxlen=max_len(description_id_list)).astype('int32')
        input_description = to_pt(input_description, self.use_cuda)

        return input_description, description_id_list
Beispiel #3
0
 def get_agent_inputs(self, string_list):
     sentence_token_list = [item.split() for item in string_list]
     sentence_id_list = [
         _words_to_ids(tokens, self.word2id)
         for tokens in sentence_token_list
     ]
     input_sentence_char = list_of_token_list_to_char_input(
         sentence_token_list, self.char2id)
     input_sentence = pad_sequences(
         sentence_id_list, maxlen=max_len(sentence_id_list)).astype('int32')
     input_sentence = to_pt(input_sentence, self.use_cuda)
     input_sentence_char = to_pt(input_sentence_char, self.use_cuda)
     return input_sentence, input_sentence_char, sentence_id_list
Beispiel #4
0
    def get_game_step_info(self, obs: List[str], infos: Dict[str, List[Any]]):
        """
        Get all the available information, and concat them together to be tensor for
        a neural model. we use post padding here, all information are tokenized here.

        Arguments:
            obs: Previous command's feedback for each game.
            infos: Additional information for each game.
        """
        word2id = self.vocab.word2id
        inventory_id_list = get_token_ids_for_items(infos["inventory"],
                                                    word2id,
                                                    tokenizer=self.nlp)

        feedback_id_list = get_token_ids_for_items(obs,
                                                   word2id,
                                                   tokenizer=self.nlp)

        quest_id_list = get_token_ids_for_items(infos["extra.recipe"],
                                                word2id,
                                                tokenizer=self.nlp)

        prev_action_id_list = get_token_ids_for_items(self.prev_actions,
                                                      word2id,
                                                      tokenizer=self.nlp)

        description_id_list = get_token_ids_for_items(infos["description"],
                                                      word2id,
                                                      tokenizer=self.nlp,
                                                      subst_if_empty=['end'])

        description_id_list = [
            _d + _i + _q + _f + _pa
            for (_d, _i, _q, _f, _pa
                 ) in zip(description_id_list, inventory_id_list,
                          quest_id_list, feedback_id_list, prev_action_id_list)
        ]

        input_description = pad_sequences(
            description_id_list,
            maxlen=max_len(description_id_list)).astype('int32')
        input_description = to_pt(input_description, self.use_cuda)

        return input_description, description_id_list