Beispiel #1
0
 def calculate_metric(self):
     total_exact_match = 0
     pred_exact_match = 0
     num_samples = len(self.all_target_trees)
     for (beam_pred, target) in zip(self.all_beam_preds, self.all_target_trees):
         for (index, pred) in enumerate(beam_pred):
             if self._compare_target_prediction_tokens(pred, target):
                 total_exact_match += 1
                 if index == 0:
                     pred_exact_match += 1
                 break
     exact_match = round(safe_division(pred_exact_match, num_samples) * 100.0, 2)
     exact_match_top_k = round(
         safe_division(total_exact_match, num_samples) * 100.0, 2
     )
     k = 0 if len(self.all_preds) == 0 else len(self.all_beam_preds[0])
     length_metrics, length_reports = compute_length_metrics(
         self.all_target_lens, self.all_target_length_preds, self.select_length_beam
     )
     return MaskedSeq2SeqTopKMetrics(
         loss=self.calculate_loss(),
         exact_match=exact_match,
         f1=-1,
         bleu=-1,
         k=k,
         exact_match_top_k=exact_match_top_k,
         f1_top_k=-1,
         bleu_top_k=-1,
         length_metrics=length_metrics,
         length_reports=length_reports,
     )
    def calculate_metric(self):
        total_exact_match = 0
        total_f1 = 0.0
        num_samples = len(self.all_targets)

        trg_vocab = self.tensorizers["trg_seq_tokens"].vocab
        bleu_scorer = bleu.Scorer(
            bleu.BleuConfig(
                pad=trg_vocab.get_pad_index(),
                eos=trg_vocab.get_eos_index(),
                unk=trg_vocab.get_unk_index(),
            ))

        for (beam_preds, target) in zip(self.all_preds, self.all_targets):
            pred = beam_preds[0]
            if self._compare_target_prediction_tokens(pred, target):
                total_exact_match += 1
            total_f1 += compute_f1(pred, target)
            # Bleu Metric calculation is always done with tensors on CPU or
            # type checks in fairseq/bleu.py:add() will fail
            bleu_scorer.add(
                torch.IntTensor(target).cpu(),
                torch.IntTensor(pred).cpu())

        loss = self.calculate_loss()
        exact_match = round(
            safe_division(total_exact_match, num_samples) * 100.0, 2)
        f1 = round(safe_division(total_f1, num_samples) * 100.0, 2)
        bleu_score = round(
            0.0 if len(self.all_preds) == 0 else bleu_scorer.score(), 2)

        return Seq2SeqMetrics(loss, exact_match, f1, bleu_score)
def compute_topk_classification_metrics(
    predictions: Sequence[LabelTopKPrediction],
    label_names: Sequence[str],
    loss: float,
    log_per_label_metrics: bool = True,
) -> ClassificationMetrics:
    """
    A general function that computes classification metrics given a list of label predictions.

    Args:
        predictions: Label predictions, including the confidence score for each label.
        label_names: Indexed label names.
        loss: Calculated loss.
        log_per_label_metrics: Report macro prf1 values per label.

    Returns:
        ClassificationMetrics which contains various classification metrics.
    """
    num_correct = 0
    per_label_confusions = PerLabelConfusions()
    for _, topk, label in predictions:
        predicted_labels = list(map(label_names.__getitem__, topk))
        expected_label = label_names[label]

        if expected_label in predicted_labels:
            num_correct += 1
            per_label_confusions.update(expected_label, "TP", 1)
        else:
            per_label_confusions.update(expected_label, "FN", 1)
            per_label_confusions.update(predicted_labels[0], "FP", 1)

    macro_prf1_metrics = per_label_confusions.compute_metrics(
        log_per_label_metrics=log_per_label_metrics
    )

    return ClassificationMetrics(
        accuracy=safe_division(num_correct, len(predictions)),
        macro_prf1_metrics=macro_prf1_metrics,
        per_label_soft_scores={},
        mcc=None,
        roc_auc=None,
        loss=loss,
    )
Beispiel #4
0
    def calculate_metric(self):
        num_correct = 0
        total_count = len(self.all_targets)
        trg_vocab = self.tensorizers["trg_seq_tokens"].vocab
        bleu_scorer = bleu.Scorer(
            trg_vocab.get_pad_index(),
            trg_vocab.get_eos_index(),
            trg_vocab.get_unk_index(),
        )
        for beam_pred, target in zip(self.all_preds, self.all_targets):
            pred = beam_pred[0]
            if self._compare_target_prediction_tokens(pred, target):
                num_correct = num_correct + 1
            # Bleu Metric calculation is always done with tensors on CPU or
            # type checks in fairseq/bleu.py:add() will fail
            bleu_scorer.add(torch.IntTensor(target).cpu(), torch.IntTensor(pred).cpu())

        bleu_score = 0.0 if len(self.all_preds) == 0 else bleu_scorer.score()
        accuracy = safe_division(num_correct, total_count)
        cross_entropy_loss = self.calculate_loss()
        return Seq2SeqMetrics(accuracy, cross_entropy_loss, bleu_score)