def evaluate(self,
                 sentences: List[Sentence],
                 eval_class_metrics: bool = False,
                 mini_batch_size: int = 32,
                 embeddings_in_memory: bool = False) -> (dict, float):
        """
        Evaluates the model with the given list of sentences.
        :param sentences: the list of sentences
        :param eval_class_metrics: boolean indicating whether to print class metrics or not
        :param mini_batch_size: the mini batch size to use
        :param embeddings_in_memory: boolean value indicating, if embeddings should be kept in memory or not
        :return: list of metrics, and the loss
        """
        with torch.no_grad():
            eval_loss = 0

            batches = [
                sentences[x:x + mini_batch_size]
                for x in range(0, len(sentences), mini_batch_size)
            ]

            y_pred = []
            y_true = []

            for batch in batches:
                scores = self.model.forward(batch)
                labels = self.model.obtain_labels(scores)
                loss = self.model.calculate_loss(scores, batch)

                clear_embeddings(
                    batch, also_clear_word_embeddings=not embeddings_in_memory)

                eval_loss += loss

                y_pred.extend(
                    convert_labels_to_one_hot(
                        [[label.value for label in sent_labels]
                         for sent_labels in labels], self.label_dict))
                y_true.extend(
                    convert_labels_to_one_hot(
                        [sentence.get_label_names() for sentence in batch],
                        self.label_dict))

            metrics = [
                calculate_micro_avg_metric(y_true, y_pred, self.label_dict)
            ]
            if eval_class_metrics:
                metrics.extend(
                    calculate_class_metrics(y_true, y_pred, self.label_dict))

            eval_loss /= len(sentences)

            metrics_dict = {metric.name: metric for metric in metrics}

            return metrics_dict, eval_loss
    def evaluate(self,
                 sentences: List[Sentence],
                 eval_class_metrics: bool = False,
                 mini_batch_size: int = 32,
                 embeddings_in_memory: bool = True) -> (dict, float):
        """
        Evaluates the model with the given list of sentences.
        :param sentences: the list of sentences
        :param mini_batch_size: the mini batch size to use
        :return: list of metrics, and the loss
        """
        eval_loss = 0

        batches = [
            sentences[x:x + mini_batch_size]
            for x in range(0, len(sentences), mini_batch_size)
        ]

        y_pred = []
        y_true = []

        for batch in batches:
            scores = self.model.forward(batch)
            labels = self.model.obtain_labels(scores)
            loss = self.model.calculate_loss(scores, batch)

            eval_loss += loss

            y_true.extend([sentence.get_label_names() for sentence in batch])
            y_pred.extend([[label.name for label in sent_labels]
                           for sent_labels in labels])

            if not embeddings_in_memory:
                clear_embeddings(batch)

        y_pred = convert_labels_to_one_hot(y_pred, self.label_dict)
        y_true = convert_labels_to_one_hot(y_true, self.label_dict)

        metrics = [calculate_micro_avg_metric(y_true, y_pred, self.label_dict)]
        if eval_class_metrics:
            metrics.extend(
                calculate_class_metrics(y_true, y_pred, self.label_dict))

        eval_loss /= len(sentences)

        metrics_dict = {metric.name: metric for metric in metrics}

        return metrics_dict, eval_loss
Esempio n. 3
0
def test_calculate_class_metrics():
    y_true, y_pred, labels = init()

    metrics = calculate_class_metrics(y_true, y_pred, labels)

    metrics_dict = {metric.name: metric for metric in metrics}

    assert (3 == len(metrics))

    assert (1 == metrics_dict['class-1']._tp)
    assert (0 == metrics_dict['class-1']._fp)
    assert (2 == metrics_dict['class-1']._tn)
    assert (0 == metrics_dict['class-1']._fn)

    assert (1 == metrics_dict['class-2']._tp)
    assert (0 == metrics_dict['class-2']._fp)
    assert (1 == metrics_dict['class-2']._tn)
    assert (1 == metrics_dict['class-2']._fn)

    assert (1 == metrics_dict['class-3']._tp)
    assert (0 == metrics_dict['class-3']._fp)
    assert (1 == metrics_dict['class-3']._tn)
    assert (1 == metrics_dict['class-3']._fn)