def rl_train_step(self, batch):
     maxlen = self.question_truncate or 30
     preds, text, nll = self.sample(batch, latest_turn_only=True)
     if self.rl_baseline_method == "self_critic":
         g_preds, g_text, g_scores = self.predict(batch,
                                                  latest_turn_only=True,
                                                  no_grad=True)
         retval = Output(text[:1],
                         log_probs=nll[:1],
                         episode_end=[batch['episode_end']],
                         ques_len=[len(preds[0]) - 1],
                         diverged_outputs=[[(t, nll[i], len(preds[i]) - 1)
                                            for i, t in enumerate(text[1:])]
                                           ],
                         greedy_master_output=g_text[:1],
                         greedy_output=[[t for t in g_text[1:]]])
     else:
         retval = Output(text[:1],
                         log_probs=nll[:1],
                         episode_end=[batch['episode_end']],
                         ques_len=[len(preds[0]) - 1],
                         diverged_outputs=[[(t, nll[i], len(preds[i]) - 1)
                                            for i, t in enumerate(text[1:])]
                                           ])
     return retval
Ejemplo n.º 2
0
    def eval_step(self, batch):
        """
        Evaluate a single batch of examples.
        """
        if batch.text_vec is None:
            return

        self.model.eval()
        scores = self.score(batch)
        probs = F.softmax(scores, dim=1)
        _, prediction_id = torch.max(probs.float().cpu(), 1)
        preds = [self.class_list[idx] for idx in prediction_id]

        if batch.labels is None or self.opt['ignore_labels']:
            # interactive mode
            if self.opt.get('print_scores', False):
                preds = self._format_interactive_output(probs, prediction_id)
        else:
            labels = self._get_label_tensor(batch)
            loss = self.criterion(scores, labels)
            self.record_local_metric('loss', AverageMetric.many(loss))

            preds = [self.class_list[idx] for idx in prediction_id]
            labels = batch.labels

            if preds is not None and labels is not None:
                self._update_confusion_matrix(preds, labels)

        if self.opt.get('print_scores', False):
            return Output(preds, probs=probs.cpu())
        else:
            return Output(preds)
Ejemplo n.º 3
0
    def train_step(self, batch):
        """Train on a single batch of examples."""
        if batch.text_vec is None:
            return
        batchsize = batch.text_vec.size(0)
        self.model.train()
        self.zero_grad()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.opt['candidates'], mode='train')
        scores = self.score_candidates(batch, cand_vecs)
        loss = self.rank_loss(scores, label_inds)

        # Update loss
        self.metrics['loss'] += loss.item()
        self.metrics['examples'] += batchsize
        loss.backward()
        self.update_params()

        # Get train predictions
        if self.opt['candidates'] == 'batch':
            self.get_batch_train_metrics(scores)
            return Output()
        if not self.opt.get('train_predict', False):
            warn_once(
                "Some training metrics are omitted for speed. Set the flag "
                "`--train-predict` to calculate train metrics.")
            return Output()
        return self.get_train_preds(scores, label_inds, cands, cand_vecs)
Ejemplo n.º 4
0
    def train_step(self, batch):
        """
        Train on a single batch of examples.
        """
        if batch.text_vec is None:
            return Output()
        self.model.train()
        self.zero_grad()

        # Calculate loss
        labels = self._get_label_tensor(batch)
        scores = self.score(batch)
        loss = self.criterion(scores, labels)
        self.record_local_metric('loss', AverageMetric.many(loss))
        loss = loss.mean()
        self.backward(loss)
        self.update_params()

        # Get predictions
        _, prediction_id = torch.max(scores.float().cpu(), 1)
        preds = [self.class_list[idx] for idx in prediction_id]
        labels_field = self.get_labels_field(batch['observations'])
        labels_lst = self._get_labels(batch['observations'], labels_field)
        self._update_confusion_matrix(preds, labels_lst)

        return Output(preds)
Ejemplo n.º 5
0
    def eval_step(self, batch):
        if batch.text_vec is None:
            return

        self.model.eval()

        batchsize = batch.text_vec.size(0)
        self.metrics['examples'] += batchsize

        if self.subtask == 'dialog':
            _, preds, cand_ranked = self.dialog_step(batch)
            if self.opt['interactive']:
                if self.opt['prev_response_filter']:
                    preds = self.check_prev_response(preds, cand_ranked)
                return Output(preds)
            else:
                return Output(preds, cand_ranked)

        elif self.subtask == 'feedback':
            _, preds, cand_ranked = self.feedback_step(batch)
            return Output(preds, cand_ranked)

        elif self.subtask == 'satisfaction':
            if self.opt['uncertainty_predictor']:
                # Use uncertainty of dialog model to classify bot's previous utterance
                preds = self.predict_satisfaction_by_uncertainty(batch)
            else:
                # Use satisfaction of user response to classify bot's previous response
                _, preds = self.satisfaction_step(batch)
            preds = [str(p) for p in preds]
            return Output(preds)
    def eval_step(self, batch):
        """
        Train on a single batch of examples.
        """
        if batch.text_vec is None:
            return

        self.model.eval()
        scores = self.score(batch)
        probs = F.softmax(scores, dim=1)
        if self.threshold is None:
            _, prediction_id = torch.max(probs.cpu(), 1)
        else:
            ref_prob = probs.cpu()[:, 0]
            # choose ref class if Prob(ref class) > threshold
            prediction_id = (ref_prob <= self.threshold).to(torch.int64)
        preds = [self.class_list[idx] for idx in prediction_id]

        if batch.labels is None or self.opt['ignore_labels']:
            # interactive mode
            if self.opt.get('print_scores', False):
                preds = self._format_interactive_output(probs, prediction_id)
        else:
            labels = self._get_labels(batch)
            loss = self.criterion(scores, labels)
            self.record_local_metric('loss', AverageMetric.many(loss))
            loss = loss.mean()
            self._update_confusion_matrix(batch, preds)

        if self.opt.get('print_scores', False):
            return Output(preds, probs=probs.cpu())
        else:
            return Output(preds)
Ejemplo n.º 7
0
    def train_step(self, batch):
        """
        Train on a single batch of examples.
        """
        if batch.text_vec is None:
            return Output()
        self.model.train()
        self.optimizer.zero_grad()

        # calculate loss
        labels = self._get_labels(batch)
        scores = self.score(batch)
        loss = self.criterion(scores, labels)
        loss.backward()
        self.update_params()

        # update metrics
        self.metrics['loss'] += loss.item()
        self.metrics['examples'] += len(batch.text_vec)

        # get predictions
        _, prediction_id = torch.max(scores.cpu(), 1)
        preds = [self.class_list[idx] for idx in prediction_id]
        self._update_confusion_matrix(batch, preds)

        return Output(preds)
Ejemplo n.º 8
0
    def eval_step(self, batch):
        """
        Evaluate a single batch of examples.
        """
        #print(self.model._encoder_input(batch))
        if batch.text_vec is None and batch.image is None:
            return Output('N')
        if batch.text_vec is not None:
            bsz = batch.text_vec.size(0)
        else:
            bsz = len(batch.image)
        self.model.eval()
        cand_scores = None
        token_losses = None

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss, model_output = self.compute_loss(batch, return_output=True)
            if self.output_token_losses:
                token_losses = self._construct_token_losses(
                    batch.label_vec, model_output)

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning,
            )
        else:
            maxlen = self.label_truncate or 20
            n_best_beam_preds_scores, _ = self._generate(
                batch, self.beam_size, maxlen)
            preds = []
            scores = []
            for n_best_list in n_best_beam_preds_scores:
                p, s = zip(*n_best_list)
                preds.append(p)
                scores.append(s)
        cand_choices = None
        self.rank_candidates = True
        if self.rank_candidates:
            # compute MMI to rank candidates
            bestpreds = []
            for i in range(bsz):
                cands, _ = self._pad_tensor(preds[i])
                cand_scores = self.computeMMI(batch.text_vec[i], cands,
                                              list(scores[i]))
                _, ordering = cand_scores.sort()
                bestpreds.append(preds[i][ordering[0]])
        text = [self._v2t(p)
                for p in bestpreds] if bestpreds is not None else None

        if text and self.compute_tokenized_bleu:
            # compute additional bleu scores
            self._compute_fairseq_bleu(batch, preds)
            self._compute_nltk_bleu(batch, text)
        return Output(text, cand_choices, token_losses=token_losses)
Ejemplo n.º 9
0
    def train_step(self, batch):
        """
        Train on a single batch of examples.
        """
        self._maybe_invalidate_fixed_encs_cache()
        if batch.text_vec is None and batch.image is None:
            return
        batchsize = (
            batch.text_vec.size(0)
            if batch.text_vec is not None
            else batch.image.size(0)
        )
        self.model.train()
        self.zero_grad()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.candidates, mode='train'
        )

        try:
            scores = self.score_candidates(batch, cand_vecs)
            loss = self.criterion(scores, label_inds)
            self.backward(loss)
            self.update_params()
        except RuntimeError as e:
            # catch out of memory exceptions during fwd/bck (skip batch)
            if 'out of memory' in str(e):
                print(
                    '| WARNING: ran out of memory, skipping batch. '
                    'if this happens frequently, decrease batchsize or '
                    'truncate the inputs to the model.'
                )
                return Output()
            else:
                raise e

        # Update loss
        self.metrics['loss'] += loss.item()
        self.metrics['examples'] += batchsize

        # Get train predictions
        if self.candidates == 'batch':
            self._get_batch_train_metrics(scores)
            return Output()
        if not self.opt.get('train_predict', False):
            warn_once(
                "Some training metrics are omitted for speed. Set the flag "
                "`--train-predict` to calculate train metrics."
            )
            return Output()
        return self._get_train_preds(scores, label_inds, cands, cand_vecs)
    def eval_step(self, batch):
        questions = batch.text_vec
        contexts = padded_3d(batch.memory_vecs)

        if contexts.shape[0] != self.batch_size:
            return Output(
                self.dict.vec2txt(
                    np.random.choice(self.dictionnary_size,
                                     size=contexts.shape[0])).split(" "))

        output = self.recurrent_entity_network(questions, contexts)
        pred = output.argmax(dim=1)

        return Output(self.dict.vec2txt(pred).split(" "))
Ejemplo n.º 11
0
    def eval_step(self, batch):
        """
        Evaluate a single batch of examples.
        """
        if batch.text_vec is None:
            return

        self.model.eval()
        scores = self.score(batch)
        probs = F.softmax(scores, dim=1)

        if self.calc_auc:
            self._update_aucs(batch, probs)

        if self.threshold is None:
            _, prediction_id = torch.max(probs.cpu(), 1)
        else:
            ref_prob = probs.cpu()[:, 0]
            # choose ref class if Prob(ref class) > threshold
            prediction_id = (ref_prob <= self.threshold).to(torch.int64)
        preds = [self.class_list[idx] for idx in prediction_id]
        if batch.labels is None or self.opt['ignore_labels']:
            # interactive mode
            if self.opt.get('print_scores', False):
                preds = self._format_interactive_output(probs, prediction_id)
        else:
            labels = self._get_labels(batch)
            loss = self.criterion(scores, labels)
            self.record_local_metric('loss', AverageMetric.many(loss))
            loss = loss.mean()
            self._update_confusion_matrix(batch, preds)

        if self.opt.get('print_scores', False):
            return Output(preds,
                          class_list=[self.class_list],
                          probs=probs.cpu())
        if self.opt.get('return_cand_scores', False):
            sorted_scores, ranks = probs.sort(1, descending=True)
            sorted_scores = sorted_scores.cpu()
            text_cands = []
            for i in range(0, ranks.size(0)):
                ordered_list = [self.class_list[i] for i in ranks[i]]
                text_cands.append(ordered_list)
            return Output(preds,
                          text_candidates=text_cands,
                          sorted_scores=sorted_scores)
        else:
            return Output(preds)
Ejemplo n.º 12
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        self.model.eval()
        cand_params = self._build_cands(batch)
        out = self.model(batch.text_vec, ys=None, cand_params=cand_params)
        scores, cand_scores = out[0], out[1]
        _, preds = scores.max(2)

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            out = self.model(batch.text_vec, batch.label_vec)
            f_scores = out[0]  # forced scores
            _, f_preds = f_scores.max(2)  # forced preds
            score_view = f_scores.view(-1, f_scores.size(-1))
            loss = self.criterion(score_view, batch.label_vec.view(-1))
            # save loss to metrics
            notnull = batch.label_vec.ne(self.NULL_IDX)
            target_tokens = notnull.long().sum().item()
            correct = ((batch.label_vec == f_preds) * notnull).sum().item()
            self.metrics['correct_tokens'] += correct
            self.metrics['loss'] += loss.item()
            self.metrics['num_tokens'] += target_tokens

        cand_choices = None
        if cand_scores is not None:
            cand_preds = cand_scores.sort(1, True)[1]
            # now select the text of the cands based on their scores
            cand_choices = self._pick_cands(cand_preds, cand_params[1],
                                            batch.candidates)

        text = [self._v2t(p) for p in preds.cpu()]
        return Output(text, cand_choices)
 def rl_eval_step(self, batch):
     div_batch = batch.get('diverged_batch', None)
     if not div_batch:
         div_batch = batch
     token_losses = None
     if batch.label_vec is not None:
         # calculate loss on targets with teacher forcing
         loss, model_output = self.compute_loss(batch, return_output=True)
         if self.output_token_losses:
             token_losses = self._construct_token_losses(
                 batch.label_vec, model_output)
     preds = None
     maxlen = self.question_truncate or 30
     if self.eva_sample:
         preds, text, scores = self.sample(div_batch, latest_turn_only=True)
     else:
         preds, text, scores = self.predict(div_batch,
                                            latest_turn_only=True)
     retval = Output(text[:1],
                     log_probs=scores[:1],
                     episode_end=[batch.episode_end],
                     ques_len=[len(preds[0]) - 1],
                     diverged_outputs=[[(t, scores[i], len(preds[i]) - 1)
                                        for i, t in enumerate(text[1:])]])
     return retval
    def _eval_dnli_step(self, batch):
        """Evaluate a single batch of examples."""

        assert self.alpha >= 0

        self.model.eval()
        ranked_cands, ordering = self.rank(batch)

        bsz = len(ranked_cands)
        dnli_metrics = []
        for batch_idx in range(bsz):
            dnli_score = {'contradict@1': 0, 'entail@1': 0, 'neutral@1': 0}
            top1_idx = ordering[batch_idx][0].item()
            if top1_idx == 0:
                pass
                # dnli_metrics['dnli_hit@1'] += 1
            elif top1_idx > 0 and top1_idx < 11:
                dnli_score['contradict@1'] += 1
            elif top1_idx >= 11 and top1_idx < 21:
                dnli_score['entail@1'] += 1
            else:
                dnli_score['neutral@1'] += 1
            dnli_metrics.append(dnli_score)

        return Output(text_candidates=ranked_cands, metrics=dnli_metrics)
    def probe_step(self, batch):
        """Probe a single batch of examples."""
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None

        if self.opt['probe'] == 'word_embeddings':
            embeddings = self.probe_word_embeddings(batch)
        elif self.opt['probe'] == 'encoder_state':
            embeddings = self.probe_encoder_state(batch)
        elif self.opt['probe'] == 'combined':
            embeddings = self.probe_combined(batch)
        else:
            raise Exception(f"Input type {self.opt['probe']} not understood.")

        try:
            self.probing_outputs = np.vstack(
                (self.probing_outputs, embeddings))
        except:
            # In case probing_outputs empty array
            self.probing_outputs = embeddings

        cand_choices = None
        text = None
        return Output(text, cand_choices)
Ejemplo n.º 16
0
    def train_step(self, batch):
        """
        Train on a single batch of examples.
        """
        if batch.text_vec is None:
            return

        self.model.train()
        self.optimizer.zero_grad()

        batchsize = batch.text_vec.size(0)
        self.metrics['examples'] += batchsize

        if self.subtask == 'dialog':
            loss, preds, _ = self.dialog_step(batch)
        elif self.subtask == 'feedback':
            loss, preds, _ = self.feedback_step(batch)
        elif self.subtask == 'satisfaction':
            loss, preds = self.satisfaction_step(batch)
            preds = [str(p) for p in preds]

        # Weight loss by task-weight
        loss *= self.task_weight[self.subtask]

        loss.backward()
        self.update_params()

        return Output(preds)
Ejemplo n.º 17
0
    def eval_step(self, batch):
        """Generate a response to the input tokens.

        :param batch: parlai.core.torch_agent.Batch, contains tensorized
                      version of observations.

        Return predicted responses (list of strings of length batchsize).
        """
        item1 = {
            "text": "Do you like playing, or watching sports?",
            "labels": ["I like watching sports."],
            "label_candidates": list(batch.observations[0]["label_candidates"])
        }
        item1["label_candidates"].append(item1["labels"][0])

        item2 = {
            "text": "Do you think chess counts as a sport?",
            "labels": ["Yes, I think it does."],
            "label_candidates": list(batch.observations[0]["label_candidates"])
        }
        item2["label_candidates"].append(item2["labels"][0])
        batch = Batch(observations=[item1, item2])
        inputs, candidates = self._tokenize_observation(batch)
        # just predict
        self.model.eval()
        output = self.model(inputs.cuda())
        pred_text = self._get_predictions(output, candidates, 2)
        print(pred_text)
        print("EVALUATING")
        return Output(pred_text)
Ejemplo n.º 18
0
 def eval_step(self, batch):
     """Return confirmation of evaluation."""
     return Output([
         'Evaluating {} (responding to {})!'.format(
             i, batch.observations[i]['text'])
         for i in range(len(batch.text_vec))
     ])
Ejemplo n.º 19
0
    def make_preds_for_inject(self, batch):
        assert self.inject == True
        assert len(batch.labels
                   ) == 1, 'only single batch is assumed here to keep order'
        # human
        human = batch.labels

        # greedy
        out = self.model(batch.text_vec, ys=None)
        scores, _ = out[0], out[1]
        _, preds = scores.max(2)
        greedy = [self._v2t(p) for p in preds]
        encoder_states = out[2]

        # beam
        out = SteroidSeq2seqAgent.beam_search(
            self.model,
            batch,
            encoder_states,
            self.beam_size,
            start=self.START_IDX,
            end=self.END_IDX,
            pad=self.NULL_IDX,
            min_length=self.beam_min_length,
            min_n_best=self.beam_min_n_best,
            beam_block_hypos=[],
            block_ngram=self.beam_block_ngram)
        beam10 = [self._v2t(out[0][0][0][1:])]

        output = Output(beam10, cand_choices, human, greedy, beam10)
        return output
Ejemplo n.º 20
0
    def train_step(self, batch):
        """Train on a single batch of examples."""
        if batch.text_vec is None:
            return
        batchsize = batch.text_vec.size(0)
        self.model.train()
        self.optimizer.zero_grad()
        mems = self._build_mems(batch.memory_vecs)

        cands, label_inds = self._build_train_cands(batch.label_vec,
                                                    batch.candidate_vecs)

        scores = self.model(batch.text_vec, mems, cands)
        loss = self.rank_loss(scores, label_inds)

        self.metrics['loss'] += loss.item()
        self.metrics['batches'] += batchsize
        _, ranks = scores.sort(1, descending=True)
        for b in range(batchsize):
            rank = (ranks[b] == label_inds[b]).nonzero().item()
            self.metrics['rank'] += 1 + rank
        loss.backward()
        self.update_params()

        # get predictions but not full rankings--too slow to get hits@1 score
        preds = [self._v2t(cands[row[0]]) for row in ranks]
        return Output(preds)
Ejemplo n.º 21
0
    def train_step(self, batch):
        """Train on a single batch of examples."""
        if batch.text_vec is None:
            return
        batchsize = batch.text_vec.size(0)
        self.model.train()
        self.optimizer.zero_grad()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.opt['candidates'], mode='train')
        scores = self.score_candidates(batch, cand_vecs)
        loss = self.rank_loss(scores, label_inds)

        # Update metrics
        self.metrics['loss'] += loss.item()
        self.metrics['examples'] += batchsize
        _, ranks = scores.sort(1, descending=True)
        for b in range(batchsize):
            rank = (ranks[b] == label_inds[b]).nonzero().item()
            self.metrics['rank'] += 1 + rank

        loss.backward()
        self.update_params()

        # Get predictions but not full rankings for the sake of speed
        if cand_vecs.dim() == 2:
            preds = [cands[ordering[0]] for ordering in ranks]
        elif cand_vecs.dim() == 3:
            preds = [cands[i][ordering[0]] for i, ordering in enumerate(ranks)]
        return Output(preds)
Ejemplo n.º 22
0
    def eval_step(self, batch):
        images = torch.stack([self.transform(img) for img in batch.image])
        if self.use_cuda:
            images = images.cuda(async=True)

        # Need to collate then sort the captions by length
        cands = [
            self.candidate_helper(label_cands_vec, label_cands,
                                  self.mode == 'test') for label_cands_vec,
            label_cands in zip(batch.candidate_vecs, batch.candidates)
        ]
        self.model.eval()
        # Obtain the image embeddings
        img_embs, _ = self.model(images, None, None)
        ranks = []
        top1 = []
        # Each image has their own caption candidates, so we need to
        # iteratively create the embeddings and rank
        for i, (cap, _, lens, truth_idx) in enumerate(cands):
            _, embs = self.model(None, cap, lens)
            # Hack to pass through the truth label's index to compute the
            # rank and top metrics
            offset = truth_idx if truth_idx is not None else 0
            _, rank, top = self.criterion(img_embs[i, :].unsqueeze(0), embs,
                                          offset)
            ranks += rank
            top1.append(top[0])
        self.metrics['r@'] += ranks
        predictions = []
        for i, score_idx in enumerate(top1):
            predictions.append(cands[i][1][score_idx])
        return Output(predictions, None)
Ejemplo n.º 23
0
    def eval_step(self, batch):
        """Train on a single batch of examples."""
        if batch.text_vec is None:
            return

        self.model.eval()
        scores = self.score(batch)
        probs = F.softmax(scores, dim=1)
        if self.threshold is None:
            _, prediction_id = torch.max(probs.cpu(), 1)
        else:
            ref_prob = probs.cpu()[:, 0]
            # choose ref class if Prob(ref class) > threshold
            prediction_id = ref_prob <= self.threshold
        preds = [self.class_list[idx] for idx in prediction_id]

        if batch.labels is None:
            # interactive mode
            if self.opt.get('print_scores', False):
                preds = self._format_interactive_output(probs, prediction_id)
        else:
            labels = self._get_labels(batch)
            loss = self.criterion(scores, labels)
            self.metrics['loss'] += loss.item()
            self.metrics['examples'] += len(batch.text_vec)
            self._update_confusion_matrix(batch, preds)

        return Output(preds)
Ejemplo n.º 24
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None:
            return
        batchsize = batch.text_vec.size(0)
        self.model.eval()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.opt['eval_candidates'], mode='eval')

        scores = self.score_candidates(batch, cand_vecs)
        _, ranks = scores.sort(1, descending=True)

        # Update metrics
        if label_inds is not None:
            loss = self.rank_loss(scores, label_inds)
            self.metrics['loss'] += loss.item()
            self.metrics['examples'] += batchsize
            for b in range(batchsize):
                rank = (ranks[b] == label_inds[b]).nonzero().item()
                self.metrics['rank'] += 1 + rank

        cand_preds = []
        for i, ordering in enumerate(ranks):
            if cand_vecs.dim() == 2:
                cand_list = cands
            elif cand_vecs.dim() == 3:
                cand_list = cands[i]
            cand_preds.append([cand_list[rank] for rank in ordering])
        preds = [cand_preds[i][0] for i in range(batchsize)]
        return Output(preds, cand_preds)
Ejemplo n.º 25
0
    def eval_step(self, batch):
        """Process batch of inputs.

    If the batch includes labels, calculate validation metrics as well.

    :param batch: parlai.core.torch_agent.Batch, contains tensorized
                  version of observations.
    """
        if batch.text_vec is None:
            return
        self.is_training = False
        self.model.eval()
        output = self.model(batch.text_vec, batch.text_mask)
        if batch.label_vec is not None:
            # Interactive mode won't have a gold label
            missed = self.criterion(batch.label_vec, output[0],
                                    batch.text_mask)
            self.metrics['error.sum'] += float(missed.sum())
            self.metrics['eval_exs'] += int(batch.text_lengths.sum())

        pred = predict(output[0], batch.text_lengths, batch.text_vec,
                       batch.text_mask)
        text = self._v2t(batch.text_vec[0])
        self.vars = (text, pred[0], batch.text_vec[0],
                     int(batch.text_lengths[0]), *tuple(v[0]
                                                        for v in output[2:]))
        return Output(text=pred)
Ejemplo n.º 26
0
    def eval_step(self, batch):
        """
        Evaluate a single batch of examples.
        """
        if batch.text_vec is None:
            return
        bsz = batch.text_vec.size(0)
        self.model.eval()
        cand_scores = None
        token_losses = None

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss, model_output = self.compute_loss(batch, return_output=True)
            self.metrics['loss'] += loss.item()
            if self.output_token_losses:
                token_losses = self._construct_token_losses(
                    batch.label_vec, model_output)

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning,
            )
        else:
            maxlen = self.label_truncate or 256
            beam_preds_scores, _ = self._generate(batch, self.beam_size,
                                                  maxlen)
            preds, scores = zip(*beam_preds_scores)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._model_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = padded_tensor(batch.candidate_vecs[i],
                                         self.NULL_IDX, self.use_cuda)
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds] if preds is not None else None
        return Output(text, cand_choices, token_losses=token_losses)
Ejemplo n.º 27
0
    def eval_step(self, batch):
        """Evaluate a single batch of examples."""
        if batch.text_vec is None and batch.image is None:
            return
        batchsize = (batch.text_vec.size(0)
                     if batch.text_vec is not None else batch.image.size(0))
        self.model.eval()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.eval_candidates, mode='eval')

        cand_encs = None
        if self.encode_candidate_vecs:
            # if we cached candidate encodings for a fixed list of candidates,
            # pass those into the score_candidates function
            if self.eval_candidates == 'fixed':
                cand_encs = self.fixed_candidate_encs
            elif self.eval_candidates == 'vocab':
                cand_encs = self.vocab_candidate_encs

        scores = self.score_candidates(batch, cand_vecs, cand_encs=cand_encs)
        if self.rank_top_k > 0:
            _, ranks = scores.topk(min(self.rank_top_k, scores.size(1)),
                                   1,
                                   largest=True)
        else:
            _, ranks = scores.sort(1, descending=True)

        # Update metrics
        if label_inds is not None:
            loss = self.rank_loss(scores, label_inds)
            self.metrics['loss'] += loss.item()
            self.metrics['examples'] += batchsize
            for b in range(batchsize):
                rank = (ranks[b] == label_inds[b]).nonzero()
                rank = rank.item() if len(rank) == 1 else scores.size(1)
                self.metrics['rank'] += 1 + rank
                self.metrics['mrr'] += 1.0 / (1 + rank)

        ranks = ranks.cpu()
        max_preds = self.opt['cap_num_predictions']
        cand_preds = []
        for i, ordering in enumerate(ranks):
            if cand_vecs.dim() == 2:
                cand_list = cands
            elif cand_vecs.dim() == 3:
                cand_list = cands[i]
            # using a generator instead of a list comprehension allows
            # to cap the number of elements.
            cand_preds_generator = (cand_list[rank] for rank in ordering
                                    if rank < len(cand_list))
            cand_preds.append(list(islice(cand_preds_generator, max_preds)))

        if (self.opt.get('repeat_blocking_heuristic', True)
                and self.eval_candidates == 'fixed'):
            cand_preds = self.block_repeats(cand_preds)

        preds = [cand_preds[i][0] for i in range(batchsize)]
        return Output(preds, cand_preds)
Ejemplo n.º 28
0
 def eval_step(self, batch):
     """
     Return confirmation of evaluation.
     """
     return Output([
         f'Evaluating {i} (responding to {batch.text_vec.tolist()})!'
         for i in range(batch.batchsize)
     ])
Ejemplo n.º 29
0
    def eval_step(self, batch):
        if batch.text_vec is None:
            return

        self.model.eval()
        bs = (batch.label_vec == 1).sum().item()
        labels = torch.zeros(bs, dtype=torch.long)

        # create subgraph for propagation
        seed_sets = []
        turns = []
        for i, (b, movieIdx) in enumerate(batch.label_vec.nonzero().tolist()):
            # seed set (i.e. mentioned movies + entitites)
            seed_set = batch.text_vec[b].nonzero().view(-1).tolist()
            labels[i] = movieIdx
            seed_sets.append(seed_set)
            turns.append(batch.turn[b])

        if self.use_cuda:
            labels = labels.cuda()

        return_dict = self.model(seed_sets, labels)

        loss = return_dict["loss"]

        self.metrics["base_loss"] += return_dict["base_loss"].item()
        self.metrics["loss"] += loss.item()
        self.counts["num_tokens"] += bs
        self.counts["num_batches"] += 1

        outputs = return_dict["scores"].cpu()
        outputs = outputs[:, torch.LongTensor(self.movie_ids)]
        _, pred_idx = torch.topk(outputs, k=100, dim=1)
        for b in range(bs):
            target_idx = self.movie_ids.index(labels[b].item())
            self.metrics["recall@1"] += int(
                target_idx in pred_idx[b][:1].tolist())
            self.metrics["recall@10"] += int(
                target_idx in pred_idx[b][:10].tolist())
            self.metrics["recall@50"] += int(
                target_idx in pred_idx[b][:50].tolist())
            self.metrics[f"recall@1@turn{turns[b]}"] += int(
                target_idx in pred_idx[b][:1].tolist())
            self.metrics[f"recall@10@turn{turns[b]}"] += int(
                target_idx in pred_idx[b][:10].tolist())
            self.metrics[f"recall@50@turn{turns[b]}"] += int(
                target_idx in pred_idx[b][:50].tolist())
            self.counts[f"recall@1@turn{turns[b]}"] += 1
            self.counts[f"recall@10@turn{turns[b]}"] += 1
            self.counts[f"recall@50@turn{turns[b]}"] += 1
            self.counts[f"recall@1"] += 1
            self.counts[f"recall@10"] += 1
            self.counts[f"recall@50"] += 1
        return Output(
            list(
                map(lambda x: str(self.movie_ids[x]),
                    outputs.argmax(dim=1).tolist())))
Ejemplo n.º 30
0
 def eval_step(self, batch):
     if batch.batchsize <= 0:
         return
     else:
         bsz = batch.batchsize
     self.model.eval()
     loss, outputs = self.compute_loss(batch, return_output=True)
     batch_best_preds = outputs['pred']['outputs']
     return Output(batch_best_preds)