Example #1
0
 def test_padded_tensor(self):
     # list of lists
     lol = [[1, 2], [3, 4, 5]]
     output, lens = padded_tensor(lol)
     assert np.all(output.numpy() == np.array([[1, 2, 0], [3, 4, 5]]))
     assert lens == [2, 3]
     output, _ = padded_tensor(lol, left_padded=True)
     assert np.all(output.numpy() == np.array([[0, 1, 2], [3, 4, 5]]))
     output, _ = padded_tensor(lol, pad_idx=99)
     assert np.all(output.numpy() == np.array([[1, 2, 99], [3, 4, 5]]))
Example #2
0
    def _add_prev_responses(self, batch, cands, cand_vecs, label_inds, source):
        assert source not in ['fixed', 'vocab']
        self._extract_prev_responses(batch)

        # Add prev_responses as negatives
        prev_cands = self.prev_responses
        prev_cand_vecs = [
            torch.Tensor(self.dict.txt2vec(cand)) for cand in prev_cands
        ]
        if source == 'batch':
            # Option 1: Change from one set of shared candidates to separate per example
            # cands = [cands + [prev_cand] for prev_cand in prev_cands]
            # list_of_lists_of_cand_vecs = [[vec for vec in cand_vecs] + [prev_cand_vec]
            #                            for prev_cand_vec in prev_cand_vecs]
            # cand_vecs = padded_3d(list_of_lists_of_cand_vecs, use_cuda=self.use_cuda,
            #                       dtype=cand_vecs[0].dtype)

            # Option 2: Just add all prev responses for the whole batch (doubles bs)
            cands += prev_cands
            cand_vecs, _ = padded_tensor([vec for vec in cand_vecs] +
                                         prev_cand_vecs,
                                         use_cuda=self.use_cuda)
        elif source == 'inline':
            raise NotImplementedError

        return cands, cand_vecs
Example #3
0
    def predict_satisfaction_by_uncertainty(self, batch):
        # Use the dialog model's confidence to predict the rating on previous response
        # HACK: this test is run using a model that was trained on dialog but is now
        # being evaluated on satisfaction. We use a satisfaction dataset so that we have
        # access to the satisfaction labels. Therefore, we do a sloppy hack here to pull
        # out what would have been the candidates for the dialog task. We use histsz=4
        # and ignore the last response (the user's feedback) and use the penultimate
        # utterances as the candidates. The (up to) two utterances before that are
        # context.

        # pull out dialog candidates from text_vecs since this is a satisfaction task
        assert self.opt['history_size'] > 2
        text_vecs = []
        cand_vecs = []
        for vec in batch.text_vec:
            last_p1 = (
                vec == self.dict.txt2vec('__p1__')[0]).nonzero()[-1].item()
            last_p2 = (
                vec == self.dict.txt2vec('__p2__')[0]).nonzero()[-1].item()
            text_vecs.append(vec[:last_p2])
            cand_vecs.append(vec[last_p2 + 1:last_p1])
        text_padded, _ = padded_tensor(text_vecs)
        cand_padded, _ = padded_tensor(cand_vecs)
        scores = self.model.score_dialog(text_padded, cand_padded)
        confidences = F.softmax(scores, dim=1).cpu().detach().numpy()

        preds = []
        for example in confidences:
            ranked_confidences = sorted(list(example), reverse=True)
            if self.opt['uncertainty_style'] == 'mag':
                # If the most confident choice isn't confident enough, predict that
                # the response the bot gives will be bad (pred=0)
                mag = ranked_confidences[0]
                preds.append(mag > self.opt['uncertainty_threshold'])
            elif self.opt['uncertainty_style'] == 'gap':
                # If the gap between the first and second most confident choices isn't
                # large enough, predict that the response the bot gives will be bad
                # (pred=0)
                gap = ranked_confidences[0] - ranked_confidences[1]
                preds.append(gap > self.opt['uncertainty_threshold'])

        loss = torch.tensor(0)
        preds = torch.LongTensor(preds)
        labels = torch.LongTensor([int(l) == 1 for l in batch.labels])
        batchsize = len(labels)
        self.update_sat_metrics(loss, preds, labels, batchsize)
        return preds
Example #4
0
    def _build_train_cands(self, labels, label_cands=None):
        """Build candidates from batch labels.

        When the batchsize is 1, first we look for label_cands to be filled
        (from batch.candidate_vecs). If available, we'll use those candidates.
        Otherwise, we'll rank each token in the dictionary except NULL.

        For batches of labels of a single token, we use torch.unique to return
        only the unique tokens.
        For batches of label sequences of length greater than one, we keep them
        all so as not to waste too much time calculating uniqueness.

        :param labels:      (bsz x seqlen) LongTensor.
        :param label_cands: default None. if bsz is 1 and label_cands is not
                            None, will use label_cands for training.

        :return: tuple of tensors (cands, indices)
            cands is (num_cands <= bsz x seqlen) candidates
            indices is (bsz) index in cands of each original label
        """
        assert labels.dim() == 2
        if labels.size(0) == 1:
            # we can't rank the batch of labels, see if there are label_cands
            label = labels[0]  # there's just one
            if label_cands is not None:
                self._warn_once(
                    'ranking_labelcands',
                    '[ Training using label_candidates fields as cands. ]')
                label_cands, _ = padded_tensor(label_cands[0],
                                               use_cuda=self.use_cuda)
                label_index = (label_cands == label).all(1).nonzero()
                return label_cands, label_index.squeeze(1)
            else:
                self._warn_once(
                    'ranking_dict',
                    '[ Training using dictionary of tokens as cands. ]')
                dict_size = len(self.dict)
                full_dict = labels.new(range(1, dict_size))
                # pick random token from label
                if len(label) > 1:
                    token = self.random.choice(label)
                else:
                    token = label[0] - 1
                return full_dict.unsqueeze_(1), token.unsqueeze(0)
        elif labels.size(1) == 1:
            self._warn_once(
                'ranking_unique',
                '[ Training using unique labels in batch as cands. ]')
            # use unique if input is 1D
            cands, label_inds = labels.unique(return_inverse=True)
            cands.unsqueeze_(1)
            label_inds.squeeze_(1)
            return cands, label_inds
        else:
            self._warn_once(
                'ranking_batch',
                '[ Training using other labels in batch as cands. ]')
            return labels, labels.new(range(labels.size(0)))
    def batchify(self, *args, **kwargs):
        """Override batchify options for seq2seq."""
        kwargs['sort'] = True  # need sorted for pack_padded
        batch = super().batchify(*args, **kwargs)

        # Get some args needed for batchify
        obs_batch = args[0]
        sort = kwargs['sort']
        is_valid = (lambda obs: 'text_vec' in obs or 'image' in obs
                    )  # from TorchAgent.batchify

        # Run this part of TorchAgent's batchify to get exs in correct order

        # ==================== START COPIED FROM TORCHAGENT ===================
        if len(obs_batch) == 0:
            return Batch()

        valid_obs = [(i, ex) for i, ex in enumerate(obs_batch) if is_valid(ex)]

        if len(valid_obs) == 0:
            return Batch()

        valid_inds, exs = zip(*valid_obs)

        # TEXT
        xs, x_lens = None, None
        if any('text_vec' in ex for ex in exs):
            _xs = [ex.get('text_vec', self.EMPTY) for ex in exs]
            xs, x_lens = padded_tensor(_xs, self.NULL_IDX, self.use_cuda)
            if sort:
                sort = False  # now we won't sort on labels
                xs, x_lens, valid_inds, exs = argsort(x_lens,
                                                      xs,
                                                      x_lens,
                                                      valid_inds,
                                                      exs,
                                                      descending=True)

        # ======== END COPIED FROM TORCHAGENT ========

        # Add history to the batch
        history = [
            ConvAI2History(ex['text'], dictionary=self.dict) for ex in exs
        ]

        # Add CT control vars to batch
        ctrl_vec = get_ctrl_vec(exs, history,
                                self.control_settings)  # tensor or None
        if self.use_cuda and ctrl_vec is not None:
            ctrl_vec = ctrl_vec.cuda()

        # Replace the old namedtuple with a new one that includes ctrl_vec and history
        ControlBatch = namedtuple(
            'Batch',
            tuple(batch.keys()) + ('ctrl_vec', 'history'))
        batch = ControlBatch(ctrl_vec=ctrl_vec, history=history, **dict(batch))

        return batch
Example #6
0
 def _build_cands(self, batch):
     if not batch.candidates:
         return None, None
     cand_inds = [i for i in range(len(batch.candidates))
                  if batch.candidates[i]]
     cands = [batch.candidate_vecs[i] for i in cand_inds]
     for i, c in enumerate(cands):
         cands[i] = padded_tensor(c, use_cuda=self.use_cuda)[0]
     return cands, cand_inds
Example #7
0
        def add_labels(self, batch, label_vecs):
            """"""
            labels = [self._v2t(l) for l in label_vecs]
            ys, y_lens = padded_tensor(label_vecs, self.NULL_IDX,
                                       self.use_cuda)

            batch.labels = labels
            batch.label_vec = ys
            batch.label_lengths = y_lens
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss = self.compute_loss(
                batch)  # noqa: F841  we need the side effects
            self.metrics['loss'] += loss.item()

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning,
            )
        else:
            maxlen = self.label_truncate or 256
            beam_preds_scores, _ = self._generate(batch, self.beam_size,
                                                  maxlen)
            preds, scores = zip(*beam_preds_scores)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._model_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds] if preds is not None else None
        return Output(text, cand_choices)
 def _build_cands(self, batch):
     if not batch.candidates:
         return None, None
     cand_inds = [
         i for i in range(len(batch.candidates)) if batch.candidates[i]
     ]
     cands = [batch.candidate_vecs[i] for i in cand_inds]
     max_cands_len = max(
         [max([cand.size(0) for cand in cands_i]) for cands_i in cands])
     for i, c in enumerate(cands):
         cands[i] = padded_tensor(c,
                                  use_cuda=self.use_cuda,
                                  max_len=max_cands_len)[0].unsqueeze(0)
     cands = torch.cat(cands, 0)
     return cands, cand_inds
Example #10
0
 def _extract_and_tokenize(self, candidates, correct):
     selected = []
     for j in range(self.numcandidates - 1):
         candidate = None
         while candidate is None or candidate in selected or candidate == correct:
             candidate = random.choice(candidates)
         selected.append(candidate)
         candidate_toks = self.tokenizer.encode(candidate)
         candidates_toks.append(candidate_toks)
     # add the correct candidate as the last entry
     correct_toks = self.tokenizer.encode(correct)
     candidates_toks.append(correct_toks)
     candidates_toks = padded_tensor(candidates_toks)[0]
     selected.append(correct)
     return candidates_toks, selected
Example #11
0
 def _build_cands(self, batch):
     """Augment with injected pred here
     """
     if not batch.candidates:
         return None, None
     cand_inds = [
         i for i in range(len(batch.candidates)) if batch.candidates[i]
     ]
     cands = []
     for i in cand_inds:
         with_history = batch.candidate_vecs[i]
         if batch.injected_pred is not None:
             with_history.append(batch.injected_pred_vecs[i])
         cands.append(with_history)
     lengths = []
     for i, c in enumerate(cands):
         cands[i], length = padded_tensor(c, use_cuda=self.use_cuda)
         lengths.append(length)
     return cands, cand_inds, lengths
Example #12
0
    def rerank_candidates(self, candidates, encoder_states):
        """
        Candidates reranking based on the post ranker in the self.model
        encoder_states are assumed from the whole minibatch
        candidates is a list of lists, i.e. internal list is a list of candidates
        for the sample in the mini-batch
        This reranking is used only with minibatch = 1
        """
        assert encoder_states[0].size(
            0) == 1, 'Batch size 1 is only supported here'
        rank_scores = []
        with torch.no_grad():
            cands_vecs, lengths = padded_tensor(
                candidates, use_cuda=self.use_cuda)  # (cand_num, max_length)
            exp_enc_states = []
            exp_enc_states.append(encoder_states[0].expand(
                len(candidates), -1, -1))
            if isinstance(encoder_states[1], tuple):
                exp_enc_states.append(
                    (encoder_states[1][0].expand(-1, len(candidates),
                                                 -1).contiguous(),
                     encoder_states[1][1].expand(-1, len(candidates),
                                                 -1).contiguous()))
            else:
                exp_enc_states.append(encoder_states[1].expand(
                    -1, len(candidates), -1))
            exp_enc_states.append(encoder_states[2].expand(
                len(candidates), -1))
            scores, hidden_cand, cells_cand = self.model._decode_forced(
                cands_vecs, exp_enc_states, with_cells=True)
            ranker_input_cand = self.model._get_ranker_input(
                cells_cand, lengths)
            ranker_output_cand = self.model.forward_post_ranker(
                ranker_input_cand)

        rank_scores = ranker_output_cand.view(-1)
        topcand = candidates[torch.argmax(rank_scores)]
        return [topcand], rank_scores
Example #13
0
    def _build_candidates(self, batch, source, mode):
        """
        Build a candidate set for this batch.

        :param batch:
            a Batch object (defined in torch_agent.py)
        :param source:
            the source from which candidates should be built, one of
            ['batch', 'batch-all-cands', 'inline', 'fixed']
        :param mode:
            'train' or 'eval'

        :return: tuple of tensors (label_inds, cands, cand_vecs)

            label_inds: A [bsz] LongTensor of the indices of the labels for each
                example from its respective candidate set
            cands: A [num_cands] list of (text) candidates
                OR a [batchsize] list of such lists if source=='inline'
            cand_vecs: A padded [num_cands, seqlen] LongTensor of vectorized candidates
                OR a [batchsize, num_cands, seqlen] LongTensor if source=='inline'

        Possible sources of candidates:

            * batch: the set of all labels in this batch
                Use all labels in the batch as the candidate set (with all but the
                example's label being treated as negatives).
                Note: with this setting, the candidate set is identical for all
                examples in a batch. This option may be undesirable if it is possible
                for duplicate labels to occur in a batch, since the second instance of
                the correct label will be treated as a negative.
            * batch-all-cands: the set of all candidates in this batch
                Use all candidates in the batch as candidate set.
                Note 1: This can result in a very large number of candidates.
                Note 2: In this case we will deduplicate candidates.
                Note 3: just like with 'batch' the candidate set is identical
                for all examples in a batch.
            * inline: batch_size lists, one list per example
                If each example comes with a list of possible candidates, use those.
                Note: With this setting, each example will have its own candidate set.
            * fixed: one global candidate list, provided in a file from the user
                If self.fixed_candidates is not None, use a set of fixed candidates for
                all examples.
                Note: this setting is not recommended for training unless the
                universe of possible candidates is very small.
            * vocab: one global candidate list, extracted from the vocabulary with the
                exception of self.NULL_IDX.
        """
        label_vecs = batch.label_vec  # [bsz] list of lists of LongTensors
        label_inds = None
        batchsize = batch.text_vec.shape[0]

        if label_vecs is not None:
            assert label_vecs.dim() == 2

        if source == 'batch':
            warn_once(
                '[ Executing {} mode with batch labels as set of candidates. ]'
                ''.format(mode))
            if batchsize == 1:
                warn_once(
                    "[ Warning: using candidate source 'batch' and observed a "
                    "batch of size 1. This may be due to uneven batch sizes at "
                    "the end of an epoch. ]")
            if label_vecs is None:
                raise ValueError(
                    "If using candidate source 'batch', then batch.label_vec cannot be "
                    "None.")

            cands = batch.labels
            cand_vecs = label_vecs
            label_inds = label_vecs.new_tensor(range(batchsize))

        elif source == 'batch-all-cands':
            warn_once(
                '[ Executing {} mode with all candidates provided in the batch ]'
                ''.format(mode))
            if batch.candidate_vecs is None:
                raise ValueError(
                    "If using candidate source 'batch-all-cands', then batch."
                    "candidate_vecs cannot be None. If your task does not have "
                    "inline candidates, consider using one of "
                    "--{m}={{'batch','fixed','vocab'}}."
                    "".format(m='candidates' if mode ==
                              'train' else 'eval-candidates'))
            # initialize the list of cands with the labels
            cands = []
            all_cands_vecs = []
            # dictionary used for deduplication
            cands_to_id = {}
            for i, cands_for_sample in enumerate(batch.candidates):
                for j, cand in enumerate(cands_for_sample):
                    if cand not in cands_to_id:
                        cands.append(cand)
                        cands_to_id[cand] = len(cands_to_id)
                        all_cands_vecs.append(batch.candidate_vecs[i][j])
            cand_vecs, _ = padded_tensor(
                all_cands_vecs,
                self.NULL_IDX,
                use_cuda=self.use_cuda,
                fp16friendly=self.fp16,
            )
            label_inds = label_vecs.new_tensor(
                [cands_to_id[label] for label in batch.labels])

        elif source == 'inline':
            warn_once(
                '[ Executing {} mode with provided inline set of candidates ]'
                ''.format(mode))
            if batch.candidate_vecs is None:
                raise ValueError(
                    "If using candidate source 'inline', then batch.candidate_vecs "
                    "cannot be None. If your task does not have inline candidates, "
                    "consider using one of --{m}={{'batch','fixed','vocab'}}."
                    "".format(m='candidates' if mode ==
                              'train' else 'eval-candidates'))

            cands = batch.candidates
            cand_vecs = padded_3d(
                batch.candidate_vecs,
                self.NULL_IDX,
                use_cuda=self.use_cuda,
                fp16friendly=self.fp16,
            )
            if label_vecs is not None:
                label_inds = label_vecs.new_empty((batchsize))
                bad_batch = False
                for i, label_vec in enumerate(label_vecs):
                    label_vec_pad = label_vec.new_zeros(
                        cand_vecs[i].size(1)).fill_(self.NULL_IDX)
                    if cand_vecs[i].size(1) < len(label_vec):
                        label_vec = label_vec[0:cand_vecs[i].size(1)]
                    label_vec_pad[0:label_vec.size(0)] = label_vec
                    label_inds[i] = self._find_match(cand_vecs[i],
                                                     label_vec_pad)
                    if label_inds[i] == -1:
                        bad_batch = True
                if bad_batch:
                    if self.ignore_bad_candidates and not self.is_training:
                        label_inds = None
                    else:
                        raise RuntimeError(
                            'At least one of your examples has a set of label candidates '
                            'that does not contain the label. To ignore this error '
                            'set `--ignore-bad-candidates True`.')

        elif source == 'fixed':
            warn_once(
                "[ Executing {} mode with a common set of fixed candidates "
                "(n = {}). ]".format(mode, len(self.fixed_candidates)))
            if self.fixed_candidates is None:
                raise ValueError(
                    "If using candidate source 'fixed', then you must provide the path "
                    "to a file of candidates with the flag --fixed-candidates-path"
                )

            cands = self.fixed_candidates
            cand_vecs = self.fixed_candidate_vecs

            if label_vecs is not None:
                label_inds = label_vecs.new_empty((batchsize))
                bad_batch = False
                for i, label_vec in enumerate(label_vecs):
                    label_vec_pad = label_vec.new_zeros(
                        cand_vecs[i].size(0)).fill_(self.NULL_IDX)
                    if cand_vecs[i].size(0) < len(label_vec):
                        label_vec = label_vec[0:cand_vecs[i].size(1)]
                    label_vec_pad[0:label_vec.size(0)] = label_vec
                    label_inds[i] = self._find_match(cand_vecs, label_vec_pad)
                    if label_inds[i] == -1:
                        bad_batch = True
                if bad_batch:
                    if self.ignore_bad_candidates and not self.is_training:
                        label_inds = None
                    else:
                        raise RuntimeError(
                            'At least one of your examples has a set of label candidates '
                            'that does not contain the label. To ignore this error '
                            'set `--ignore-bad-candidates True`.')

        elif source == 'vocab':
            warn_once(
                '[ Executing {} mode with tokens from vocabulary as candidates. ]'
                ''.format(mode))
            cands = self.vocab_candidates
            cand_vecs = self.vocab_candidate_vecs
            # NOTE: label_inds is None here, as we will not find the label in
            # the set of vocab candidates
        else:
            raise Exception("Unrecognized source: %s" % source)

        return (cands, cand_vecs, label_inds)
Example #14
0
    def batchify(self, obs_batch):
        """
        Wizard custom batchify, which passes along the knowledge/title.

        Following the docstring of TorchAgent.batchify, it calls super, then
        uses an extended version of the torch_agent.Batch namedtuple.

        The purpose of extending the info is to keep track of some custom
        metrics.
        """
        batch = super().batchify(obs_batch)
        reordered_observations = [obs_batch[i] for i in batch.valid_indices]
        is_training = 'labels' in reordered_observations[0]

        # first parse and compile all the knowledge together
        all_knowledges = []  # list-of-lists knowledge items for each observation
        knowledge_counts = []  # how much knowledge each observation gets
        for obs in reordered_observations:
            obs_know = self._parse_knowledge(obs)
            # downsample if desired
            if (
                is_training
                and self.max_knowledge
                and len(obs_know) > self.max_knowledge
            ):
                # offset by one so that we don't choose 0
                keepers = 1 + np.random.choice(
                    len(obs_know) - 1, self.max_knowledge, False
                )
                # correct answer is always the first one
                keepers[0] = 0
                obs_know = [obs_know[i] for i in keepers]
            all_knowledges.append(obs_know)
            knowledge_counts.append(len(obs_know))

        # now we want to actually pack this into a tensor, along with the mask
        N = len(reordered_observations)
        K = max(knowledge_counts)
        # round out the array so everything is equally sized
        for i in range(N):
            all_knowledges[i] += [''] * (K - knowledge_counts[i])
        flattened_knowledge = list(chain(*all_knowledges))

        knowledge_vec = [
            self._vectorize_text(
                # the beginning of the sentence is more useful
                k,
                truncate=self.knowledge_truncate,
                add_end=True,
                truncate_left=False,
            )
            for k in flattened_knowledge
        ]
        knowledge_vec, _ = padded_tensor(
            knowledge_vec, self.NULL_IDX, self.use_cuda, left_padded=True
        )
        knowledge_vec[:, -1] = self.END_IDX
        T = knowledge_vec.size(-1)
        knowledge_vec = knowledge_vec.view(N, K, T)

        # knowledge mask is a N x K tensor saying which items we're allowed to
        # attend over
        bsz = len(reordered_observations)
        ck_mask = th.zeros(bsz, K, dtype=th.uint8)
        for i, klen in enumerate(knowledge_counts):
            ck_mask[i, :klen] = 1
        ck_mask = ck_mask != 0  # for pytorch 1.0/1.2 uint8/bool compatibility
        # and the correct labels
        cs_ids = th.LongTensor(bsz).zero_()

        if self.use_cuda:
            knowledge_vec = knowledge_vec.cuda()
            ck_mask = ck_mask.cuda()
            cs_ids = cs_ids.cuda()

        batch['know_vec'] = knowledge_vec
        batch['ck_mask'] = ck_mask
        batch['cs_ids'] = cs_ids
        batch['use_cs_ids'] = is_training
        batch['knowledge'] = np.array(flattened_knowledge).reshape(N, K)
        return batch
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss = self.compute_loss(
                batch)  # noqa: F841  we need the side effects
            self.metrics['loss'] += loss.item()

        preds = None
        if self.skip_generation:
            # noinspection PyTypeChecker
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning)
        elif self.beam_size == 1:
            # greedy decode
            _, preds, *_ = self.model(*self._model_input(batch), bsz=bsz)
        elif self.beam_size > 1:
            out = self.beam_search(self.model,
                                   batch,
                                   self.beam_size,
                                   start=self.START_IDX,
                                   end=self.END_IDX,
                                   pad=self.NULL_IDX,
                                   min_length=self.beam_min_length,
                                   min_n_best=self.beam_min_n_best,
                                   block_ngram=self.beam_block_ngram)
            beam_preds_scores, _, beams = out
            preds, scores = zip(*beam_preds_scores)

            if self.beam_dot_log is True:
                self._write_beam_dots(batch.text_vec, beams)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._model_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        if batch.label_vec is not None:
            label_text = batch.labels
            # we are in the validation mode, print some generated responses for debugging
            for i in range(len(preds)):
                if random.random() > (1 - self.opt['report_freq']):
                    context_text = batch.observations[i]['text']
                    print('TEXT: ', context_text)
                    print('TARGET: ', self._v2t(batch.label_vec[i]))
                    print('PREDICTION: ', self._v2t(preds[i]), '\n~')
        else:
            label_text = None

        text = [self._v2t(p) for p in preds] if preds is not None else None
        context = [obs['text'] for obs in batch.observations]
        return Output(text, cand_choices), label_text, context
Example #16
0
    def eval_step(self, batch):
        """Process batch of inputs.

        If the batch includes labels, calculate validation metrics as well.
        If --skip-generation is not set, return a prediction for each input.

        :param batch: parlai.core.torch_agent.Batch, contains tensorized
                      version of observations.
        """
        if batch.text_vec is None:
            return
        self.is_training = False
        samples = self._make_sample(batch.text_vec, batch.label_vec)
        self.model.eval()
        if batch.label_vec is not None:
            # Interactive mode won't have a gold label
            self.trainer.valid_step(samples)

        # Output placeholders
        reranked_cands = None
        generated_output = None

        # Grade each of the candidate sequences
        if batch.candidate_vecs is not None:
            bsz = len(batch.text_vec)
            reranked_cands = []
            # score the candidates for each item in the batch separately, so that
            # we can support variable number of candidates
            for i in range(bsz):
                cands = batch.candidate_vecs[i]
                if not cands:
                    reranked_cands.append(None)
                    continue
                ncand = len(cands)
                # repeat the input many times
                xs = batch.text_vec[i].unsqueeze(0).expand(ncand, -1)
                # some models crash if there's leading padding on every example
                xs = xs[:, :batch.text_lengths[i]]
                # and appropriately pack the outputs
                ys, _ = padded_tensor(cands, self.NULL_IDX, self.use_cuda)
                s = self._make_sample(xs, ys)
                # perform the actual grading, extract the scores
                scored = list(
                    self.scorer.score_batched_itr([s], cuda=self.use_cuda))
                scores = [s[3][0]['score'].item() for s in scored]
                # intentional hanging comma here; argsort returns a list
                ranked, = argsort(scores, batch.candidates[i], descending=True)
                reranked_cands.append(ranked)

        # Next generate freely to create our response
        if not self.args.skip_generation:
            generated_output = self._generate(samples)
        elif reranked_cands:
            # we're skiping generation, but we're also grading candidates
            # so output the highest ranked candidate
            # In the case of zero candidates, we don't have something to rank,
            # so we may need to pass on that None
            generated_output = [
                ranked and ranked[0] or None for ranked in reranked_cands
            ]
        else:
            # no output at all
            pass

        return Output(generated_output, reranked_cands)
Example #17
0
    def batchify(self, obs_batch, sort=False):
        """
        Create a batch of valid observations from an unchecked batch.

        A valid observation is one that passes the lambda provided to the
        function, which defaults to checking if the preprocessed 'text_vec'
        field is present which would have been set by this agent's 'vectorize'
        function.

        Returns a namedtuple Batch. See original definition above for in-depth
        explanation of each field.

        If you want to include additonal fields in the batch, you can subclass
        this function and return your own "Batch" namedtuple: copy the Batch
        namedtuple at the top of this class, and then add whatever additional
        fields that you want to be able to access. You can then call
        super().batchify(...) to set up the original fields and then set up the
        additional fields in your subclass and return that batch instead.

        :param obs_batch:
            List of vectorized observations

        :param sort:
            Default False, orders the observations by length of vectors. Set to
            true when using torch.nn.utils.rnn.pack_padded_sequence.  Uses the text
            vectors if available, otherwise uses the label vectors if available.
        """
        if len(obs_batch) == 0:
            return Batch()

        valid_obs = [(i, ex) for i, ex in enumerate(obs_batch)
                     if self.is_valid(ex)]

        if len(valid_obs) == 0:
            return Batch()

        valid_inds, exs = zip(*valid_obs)

        # TEXT
        xs, x_lens, context_lens, floors = None, None, None, None
        if any('text_vec' in ex for ex in exs):
            _xs = [ex.get('text_vec', [self.EMPTY]) for ex in exs]
            xs = padded_3d(
                _xs,
                self.NULL_IDX,
                self.use_cuda,
                fp16friendly=self.opt.get('fp16'),
            )
            x_lens = (xs != self.NULL_IDX).sum(dim=-1)  # bsz, context_len
            context_lens = (x_lens != 0).sum(dim=-1)  # bsz
            floors, _ = padded_tensor(
                [make_floor(c_len.item()) for c_len in context_lens],
                use_cuda=self.use_cuda)
            # We do not sort on the xs which in the shape of [bsz, context_len, utt_len] is this agent
            # if sort:
            #     sort = False  # now we won't sort on labels
            #     xs, x_lens, valid_inds, exs = argsort(
            #         x_lens, xs, x_lens, valid_inds, exs, descending=True
            #     )

        # LABELS
        labels_avail = any('labels_vec' in ex for ex in exs)
        some_labels_avail = (labels_avail
                             or any('eval_labels_vec' in ex for ex in exs))

        ys, y_lens, labels = None, None, None
        if some_labels_avail:
            field = 'labels' if labels_avail else 'eval_labels'

            label_vecs = [ex.get(field + '_vec', self.EMPTY) for ex in exs]
            labels = [ex.get(field + '_choice') for ex in exs]
            y_lens = [y.shape[0] for y in label_vecs]

            ys, y_lens = padded_tensor(label_vecs,
                                       self.NULL_IDX,
                                       self.use_cuda,
                                       fp16friendly=self.opt.get('fp16'))
            y_lens = torch.LongTensor(y_lens)
            if self.use_cuda:
                y_lens = y_lens.cuda()
            # We do not sort examples in batch for this agent
            # if sort and xs is None:
            #     ys, valid_inds, label_vecs, labels, y_lens = argsort(
            #         y_lens, ys, valid_inds, label_vecs, labels, y_lens,
            #         descending=True
            #     )

        # LABEL_CANDIDATES
        cands, cand_vecs = None, None
        if any('label_candidates_vecs' in ex for ex in exs):
            cands = [ex.get('label_candidates', None) for ex in exs]
            cand_vecs = [ex.get('label_candidates_vecs', None) for ex in exs]

        # IMAGE
        imgs = None
        if any('image' in ex for ex in exs):
            imgs = [ex.get('image', None) for ex in exs]

        return Batch(text_vec=xs,
                     text_lengths=x_lens,
                     context_lens=context_lens,
                     floors=floors,
                     label_vec=ys,
                     label_lengths=y_lens,
                     labels=labels,
                     valid_indices=valid_inds,
                     candidates=cands,
                     candidate_vecs=cand_vecs,
                     image=imgs,
                     observations=exs)
Example #18
0
    def batchify(self, obs_batch, sort=False,
                 is_valid=lambda obs: 'text_vec' in obs or 'image' in obs):
        """Create a batch of valid observations from an unchecked batch.

        A valid observation is one that passes the lambda provided to the
        function, which defaults to checking if the preprocessed 'text_vec'
        field is present which would have been set by this agent's 'vectorize'
        function.

        Returns a namedtuple Batch. See original definition above for in-depth
        explanation of each field.

        If you want to include additonal fields in the batch, you can subclass
        this function and return your own "Batch" namedtuple: copy the Batch
        namedtuple at the top of this class, and then add whatever additional
        fields that you want to be able to access. You can then call
        super().batchify(...) to set up the original fields and then set up the
        additional fields in your subclass and return that batch instead.

        :param obs_batch: List of vectorized observations
        :param sort:      Default False, orders the observations by length of
                          vectors. Set to true when using
                          torch.nn.utils.rnn.pack_padded_sequence.
                          Uses the text vectors if available, otherwise uses
                          the label vectors if available.
        :param is_valid:  Function that checks if 'text_vec' is in the
                          observation, determines if an observation is valid
        """
        if len(obs_batch) == 0:
            return Batch()

        valid_obs = [(i, ex) for i, ex in enumerate(obs_batch) if is_valid(ex)]

        if len(valid_obs) == 0:
            return Batch()

        valid_inds, exs = zip(*valid_obs)

        # TEXT
        xs, x_lens = None, None
        if any('text_vec' in ex for ex in exs):
            _xs = [ex.get('text_vec', self.EMPTY) for ex in exs]
            xs, x_lens = padded_tensor(_xs, self.NULL_IDX, self.use_cuda)
            if sort:
                sort = False  # now we won't sort on labels
                xs, x_lens, valid_inds, exs = argsort(
                    x_lens, xs, x_lens, valid_inds, exs, descending=True
                )

        # LABELS
        labels_avail = any('labels_vec' in ex for ex in exs)
        some_labels_avail = (labels_avail or
                             any('eval_labels_vec' in ex for ex in exs))

        ys, y_lens, labels = None, None, None
        if some_labels_avail:
            field = 'labels' if labels_avail else 'eval_labels'

            label_vecs = [ex.get(field + '_vec', self.EMPTY) for ex in exs]
            labels = [ex.get(field + '_choice') for ex in exs]
            y_lens = [y.shape[0] for y in label_vecs]

            ys, y_lens = padded_tensor(label_vecs, self.NULL_IDX, self.use_cuda)
            if sort and xs is None:
                ys, valid_inds, label_vecs, labels, y_lens = argsort(
                    y_lens, ys, valid_inds, label_vecs, labels, y_lens,
                    descending=True
                )

        # LABEL_CANDIDATES
        cands, cand_vecs = None, None
        if any('label_candidates_vecs' in ex for ex in exs):
            cands = [ex.get('label_candidates', None) for ex in exs]
            cand_vecs = [ex.get('label_candidates_vecs', None) for ex in exs]

        # IMAGE
        imgs = None
        if any('image' in ex for ex in exs):
            imgs = [ex.get('image', None) for ex in exs]

        # MEMORIES
        mems = None
        if any('memory_vecs' in ex for ex in exs):
            mems = [ex.get('memory_vecs', None) for ex in exs]

        return Batch(text_vec=xs, text_lengths=x_lens, label_vec=ys,
                     label_lengths=y_lens, labels=labels,
                     valid_indices=valid_inds, candidates=cands,
                     candidate_vecs=cand_vecs, image=imgs, memory_vecs=mems,
                     observations=exs)
Example #19
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None

        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning)
            logits, preds, _ = self.model(batch.text_vec, batch.label_vec)
        elif self.beam_size == 1:
            # greedy decode
            logits, preds, _ = self.model(batch.text_vec)
        elif self.beam_size > 1:
            out = self.beam_search(self.model,
                                   batch,
                                   self.beam_size,
                                   start=self.START_IDX,
                                   end=self.END_IDX,
                                   pad=self.NULL_IDX,
                                   min_length=self.beam_min_length,
                                   min_n_best=self.beam_min_n_best,
                                   block_ngram=self.beam_block_ngram)
            beam_preds_scores, _, beams = out
            preds, scores = zip(*beam_preds_scores)

            if self.beam_dot_log is True:
                self._write_beam_dots(batch.text_vec, beams)

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            f_scores, f_preds, _ = self.model(batch.text_vec, batch.label_vec)
            score_view = f_scores.view(-1, f_scores.size(-1))
            loss = self.criterion(score_view, batch.label_vec.view(-1))
            # save loss to metrics
            notnull = batch.label_vec.ne(self.NULL_IDX)
            target_tokens = notnull.long().sum().item()
            correct = ((batch.label_vec == f_preds) * notnull).sum().item()
            self.metrics['correct_tokens'] += correct
            self.metrics['loss'] += loss.item()
            self.metrics['num_tokens'] += target_tokens

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(batch.text_vec)
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds]
        return Output(text, cand_choices)
Example #20
0
    def train_step(self, batch):
        """Train on a single batch of examples."""
        batchsize = batch.text_vec.size(0)
        #ipdb.set_trace()
        # helps with memory usage
        self._init_cuda_buffer(self.model, self.criterion, batchsize,
                               self.truncate or 180)
        self.model.train()
        self.zero_grad()
        try:
            dropped_input = self._add_input_dropout(batch)
            out = self.model(dropped_input, batch.label_vec)

            # generated response
            scores = out[0]
            _, preds = scores.max(2)

            score_view = scores.view(-1, scores.size(-1))
            loss = self.criterion(score_view, batch.label_vec.view(-1))
            # save loss to metrics
            notnull = batch.label_vec.ne(self.NULL_IDX)
            target_tokens = notnull.long().sum().item()
            correct = ((batch.label_vec == preds) * notnull).sum().item()
            self.metrics['correct_tokens'] += correct
            self.metrics['loss'] += loss.item()
            self.metrics['num_tokens'] += target_tokens
            loss /= target_tokens  # average loss per token
            loss *= self.opt['lmweight']
            self.metrics['total_batches'] += 1

            rank_loss = 0
            rank_cands = []
            #  now we can train against different candidates
            if 'none' not in self.cand_type:
                # this is our correct candidate (target)
                encoder_states = out[2]
                _, hidden_target, cells_target = self.model._decode_forced(
                    batch.label_vec, encoder_states, with_cells=True)
                target_hidden = out[3]
                target_ranker_input = self.model._get_ranker_input(
                    cells_target, batch.label_lengths)
                target_ranker_output = self.model.forward_post_ranker(
                    target_ranker_input)
                rank_cands.append(target_ranker_output)

            if 'current_labels' in self.cand_type:
                # this is the case when we add random candidates
                for i in range(self.opt['num_rank_cand']):
                    shuffled_targets = torch.cat(
                        [batch.label_vec[i:], batch.label_vec[:i]], dim=0)
                    shifted_lengths = batch.label_lengths[
                        i:] + batch.label_lengths[:i]
                    scores, hidden_cand, cells_cand = self.model._decode_forced(
                        shuffled_targets, encoder_states, with_cells=True)
                    ranker_input_cand = self.model._get_ranker_input(
                        cells_cand, shifted_lengths)
                    ranker_output_cand = self.model.forward_post_ranker(
                        ranker_input_cand)
                    rank_cands.append(ranker_output_cand)

            if 'history' in self.cand_type:
                # history cands, we take it from batch.injected_pred, i.e. from input
                # type of the history is controlled via the dataset file
                assert batch.injected_pred is not None, 'history cands work only in case of injected dataset'
                injected_pred_padded, lengths = padded_tensor(
                    batch.injected_pred_vecs, use_cuda=True)

                scores, hidden_cand, cells_cand = self.model._decode_forced(
                    injected_pred_padded, encoder_states, with_cells=True)
                ranker_input_cand = self.model._get_ranker_input(
                    cells_cand, lengths)
                ranker_output_cand = self.model.forward_post_ranker(
                    ranker_input_cand)
                rank_cands.append(ranker_output_cand)

            if 'none' not in self.cand_type:
                p_y_given_x = torch.cat(rank_cands, dim=1)
                rank_targets = torch.Tensor(batchsize).fill_(0).long().to(
                    p_y_given_x.device)
                rank_loss = self.rank_criterion(p_y_given_x, rank_targets)
                rank_loss = self.opt['rankweight'] * rank_loss
                self.metrics['rank_loss'] = rank_loss

            loss = loss + rank_loss
            loss.backward()
            self.update_params()

        except RuntimeError as e:
            # catch out of memory exceptions during fwd/bck (skip batch)
            if 'out of memory' in str(e):
                print('| WARNING: ran out of memory, skipping batch. '
                      'if this happens frequently, decrease batchsize or '
                      'truncate the inputs to the model.')
                self.metrics['total_skipped_batches'] += 1
            else:
                raise e
Example #21
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None
        if getattr(batch, 'movies', None):
            assert hasattr(self.model, 'kbrd')
            self.model.user_representation, _ = self.model.kbrd.user_representation(
                batch.movies)
            self.model.user_representation = self.model.user_representation.detach(
            )

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss = self.compute_loss(
                batch)  # noqa: F841  we need the side effects
            self.metrics['loss'] += loss.item()

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning)
        elif self.beam_size == 1:
            # greedy decode
            _, preds, *_ = self.model(*self._model_input(batch), bsz=bsz)
        elif self.beam_size > 1:
            out = self.beam_search(self.model,
                                   batch,
                                   self.beam_size,
                                   start=self.START_IDX,
                                   end=self.END_IDX,
                                   pad=self.NULL_IDX,
                                   min_length=self.beam_min_length,
                                   min_n_best=self.beam_min_n_best,
                                   block_ngram=self.beam_block_ngram)
            beam_preds_scores, _, beams = out
            preds, scores = zip(*beam_preds_scores)

            if self.beam_dot_log is True:
                self._write_beam_dots(batch.text_vec, beams)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._model_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds] if preds is not None else None
        return Output(text, cand_choices)
Example #22
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None
        if getattr(batch, 'movies', None):
            assert hasattr(self.model, 'kbrd')
            self.model.user_representation, self.model.nodes_features = self.model.kbrd.user_representation(
                batch.movies)
            self.model.user_representation, self.model.nodes_features = self.model.user_representation.detach(
            ), self.model.nodes_features.detach()

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss = self.compute_loss(
                batch)  # noqa: F841  we need the side effects
            self.metrics['loss'] += loss.item()

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning)
        elif self.beam_size == 1:
            # greedy decode
            _, preds, *_ = self.model(*self._model_input(batch), bsz=bsz)
        elif self.beam_size > 1:
            out = self.beam_search(self.model,
                                   batch,
                                   self.beam_size,
                                   start=self.START_IDX,
                                   end=self.END_IDX,
                                   pad=self.NULL_IDX,
                                   min_length=self.beam_min_length,
                                   min_n_best=self.beam_min_n_best,
                                   block_ngram=self.beam_block_ngram)
            beam_preds_scores, _, beams = out
            preds, scores = zip(*beam_preds_scores)

            if self.beam_dot_log is True:
                self._write_beam_dots(batch.text_vec, beams)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._model_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds] if preds is not None else None
        # Replace __unk__ with recommendations when generating responses
        if text is not None and bsz == 1:
            for j, t in enumerate(text):
                scores = F.linear(self.model.user_representation,
                                  self.model.nodes_features,
                                  self.model.kbrd.output.bias)
                outputs = scores.cpu()
                outputs = outputs[0, torch.LongTensor(self.movie_ids)]
                rec_movies = list(
                    map(lambda x: str(self.movie_ids[x]),
                        outputs.argsort(descending=True).tolist()))
                movie_idx = 0
                while "__unk__" in t:
                    pos = t.index("__unk__")
                    while int(rec_movies[movie_idx]) in batch.movies[0]:
                        movie_idx += 1
                    entity = self.entityId2entity[int(rec_movies[movie_idx])]
                    if entity in self.entity2id:
                        mid = self.entity2id[entity]
                    else:
                        mid = entity
                    t = t[:pos] + "\"" + self.mid2movie[mid] + "\"" + t[pos +
                                                                        7:]
                    movie_idx += 1
                text[j] = t

        return Output(text, cand_choices)