예제 #1
0
 def evaluate(self,
              data_loader: DataLoader,
              out_path: Path = None,
              embeddings_storage_mode: str = 'cpu') -> (Result, float):
     with torch.no_grad():
         eval_loss = 0
         metric = MetricRegression('Evaluation')
         lines = []
         total_count = 0
         for (batch_nr, batch) in enumerate(data_loader):
             if isinstance(batch, Sentence):
                 batch = [batch]
             (scores, loss) = self.forward_labels_and_loss(batch)
             true_values = []
             for sentence in batch:
                 total_count += 1
                 for label in sentence.labels:
                     true_values.append(float(label.value))
             results = []
             for score in scores:
                 if (type(score[0]) is Label):
                     results.append(float(score[0].score))
                 else:
                     results.append(float(score[0]))
             eval_loss += loss
             metric.true.extend(true_values)
             metric.pred.extend(results)
             for (sentence, prediction,
                  true_value) in zip(batch, results, true_values):
                 eval_line = '{}\t{}\t{}\n'.format(
                     sentence.to_original_text(), true_value, prediction)
                 lines.append(eval_line)
             store_embeddings(batch, embeddings_storage_mode)
         eval_loss /= total_count
         if (out_path is not None):
             with open(out_path, 'w', encoding='utf-8') as outfile:
                 outfile.write(''.join(lines))
         log_line = ''.join([
             '{}'.format(metric.mean_squared_error()), '\t',
             '{}'.format(metric.spearmanr()), '\t',
             '{}'.format(metric.pearsonr())
         ])
         log_header = 'MSE\tSPEARMAN\tPEARSON'
         detailed_result = ''.join([
             'AVG: mse: ', '{:.4f}'.format(metric.mean_squared_error()),
             ' - mae: ', '{:.4f}'.format(metric.mean_absolute_error()),
             ' - pearson: ', '{:.4f}'.format(metric.pearsonr()),
             ' - spearman: ', '{:.4f}'.format(metric.spearmanr())
         ])
         result = Result(metric.pearsonr(), log_header, log_line,
                         detailed_result)
         return (result, eval_loss)
예제 #2
0
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embeddings_storage_mode: str = "cpu",
    ) -> (Result, float):

        with torch.no_grad():
            eval_loss = 0

            metric = MetricRegression("Evaluation")

            lines: List[str] = []
            total_count = 0
            for batch_nr, batch in enumerate(data_loader):

                if isinstance(batch, Sentence):
                    batch = [batch]

                scores, loss = self.forward_labels_and_loss(batch)

                eval_loss += loss

                metric = self.obtain_performance_metric(batch, scores, lines, metric)

                total_count += len(batch)

                store_embeddings(batch, embeddings_storage_mode)

            eval_loss /= total_count

            ##TODO: not saving lines yet
            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}"
            log_header = "MSE\tSPEARMAN\tPEARSON"

            detailed_result = (
                f"AVG: mse: {metric.mean_squared_error():.4f} - "
                f"mae: {metric.mean_absolute_error():.4f} - "
                f"pearson: {metric.pearsonr():.4f} - "
                f"spearman: {metric.spearmanr():.4f}"
            )

            result: Result = Result(
                metric.pearsonr(), log_header, log_line, detailed_result
            )

            return result, eval_loss
예제 #3
0
    def evaluate(
        self,
        sentences: List[Sentence],
        eval_mini_batch_size: int = 32,
        embeddings_in_memory: bool = False,
        out_path: Path = None,
    ) -> (Result, float):

        with torch.no_grad():
            eval_loss = 0

            batches = [
                sentences[x:x + eval_mini_batch_size]
                for x in range(0, len(sentences), eval_mini_batch_size)
            ]

            metric = MetricRegression("Evaluation")

            lines: List[str] = []
            for batch in batches:

                scores, loss = self.forward_labels_and_loss(batch)

                true_values = []
                for sentence in batch:
                    for label in sentence.labels:
                        true_values.append(float(label.value))

                results = []
                for score in scores:
                    if type(score[0]) is Label:
                        results.append(float(score[0].score))
                    else:
                        results.append(float(score[0]))

                clear_embeddings(
                    batch, also_clear_word_embeddings=not embeddings_in_memory)

                eval_loss += loss

                metric.true.extend(true_values)
                metric.pred.extend(results)

                for sentence, prediction, true_value in zip(
                        batch, results, true_values):
                    eval_line = "{}\t{}\t{}\n".format(
                        sentence.to_original_text(), true_value, prediction)
                    lines.append(eval_line)

            eval_loss /= len(batches)

            ##TODO: not saving lines yet
            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}"
            log_header = "MSE\tSPEARMAN\tPEARSON"

            detailed_result = (
                f"AVG: mse: {metric.mean_squared_error():.4f} - "
                f"mae: {metric.mean_absolute_error():.4f} - "
                f"pearson: {metric.pearsonr():.4f} - "
                f"spearman: {metric.spearmanr():.4f}")

            result: Result = Result(metric.pearsonr(), log_header, log_line,
                                    detailed_result)

            return result, eval_loss
    def evaluate(self,
                 sentences: Union[List[DataPoint], Dataset],
                 out_path: Union[str, Path] = None,
                 embedding_storage_mode: str = "none",
                 mini_batch_size: int = 32,
                 num_workers: int = 8,
                 **kwargs) -> (Result, float):

        # read Dataset into data loader (if list of sentences passed, make Dataset first)
        if not isinstance(sentences, Dataset):
            sentences = SentenceDataset(sentences)
        data_loader = DataLoader(sentences,
                                 batch_size=mini_batch_size,
                                 num_workers=num_workers)

        with torch.no_grad():
            eval_loss = 0

            metric = MetricRegression("Evaluation")

            lines: List[str] = []
            total_count = 0
            for batch_nr, batch in enumerate(data_loader):

                if isinstance(batch, Sentence):
                    batch = [batch]

                scores, loss = self.forward_labels_and_loss(batch)

                true_values = []
                for sentence in batch:
                    total_count += 1
                    for label in sentence.labels:
                        true_values.append(float(label.value))

                results = []
                for score in scores:
                    if type(score[0]) is Label:
                        results.append(float(score[0].score))
                    else:
                        results.append(float(score[0]))

                eval_loss += loss

                metric.true.extend(true_values)
                metric.pred.extend(results)

                for sentence, prediction, true_value in zip(
                        batch, results, true_values):
                    eval_line = "{}\t{}\t{}\n".format(
                        sentence.to_original_text(), true_value, prediction)
                    lines.append(eval_line)

                store_embeddings(batch, embedding_storage_mode)

            eval_loss /= total_count

            ##TODO: not saving lines yet
            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}"
            log_header = "MSE\tSPEARMAN\tPEARSON"

            detailed_result = (
                f"AVG: mse: {metric.mean_squared_error():.4f} - "
                f"mae: {metric.mean_absolute_error():.4f} - "
                f"pearson: {metric.pearsonr():.4f} - "
                f"spearman: {metric.spearmanr():.4f}")

            result: Result = Result(
                main_score=metric.pearsonr(),
                loss=eval_loss,
                log_header=log_header,
                log_line=log_line,
                detailed_results=detailed_result,
            )

            return result
예제 #5
0
    def evaluate_regression(
        self,
        sentences: List[Sentence],
        out_path: Union[str, Path] = None,
        embedding_storage_mode: str = "none",
    ) -> Result:

        with torch.no_grad():

            buckets = [0 for _ in range(11)]

            eval_loss = torch.zeros(1, device=flair.device)

            metric = MetricRegression("Evaluation")

            lines: List[str] = []

            max_dist_plus_one = max([len(sent) for sent in sentences]) - 1

            num_occurences = [0 for _ in range(max_dist_plus_one)]

            cumulated_values = [0 for _ in range(max_dist_plus_one)]

            for sentence in sentences:

                if len(sentence) < 2:  # we need at least 2 words per sentence
                    continue

                scores, loss = self._forward_scores_and_loss(sentence, return_loss=True)

                predictions = scores.tolist()

                # gold labels
                true_values_for_sentence = []
                numberOfPairs = 0
                numberOfWords = len(sentence)
                lines.append(sentence.to_tokenized_string() + "\n")
                for i in range(numberOfWords):
                    for j in range(i + 1, min(i + self.max_distance + 2, numberOfWords)):
                        true_dist = j - i - 1
                        pred = predictions[numberOfPairs]

                        true_values_for_sentence.append(true_dist)

                        # for output text file
                        eval_line = f"({i},{j})\t{true_dist}\t{pred:.2f}\n"
                        lines.append(eval_line)

                        # for buckets
                        error = abs(true_dist - pred)
                        if error >= 10:
                            buckets[10] += 1
                        else:
                            buckets[floor(error)] += 1

                        # for average prediction
                        num_occurences[true_dist] += 1
                        cumulated_values[true_dist] += pred

                        numberOfPairs += 1

                eval_loss += loss / numberOfPairs

                metric.true.extend(true_values_for_sentence)
                metric.pred.extend(predictions)

                store_embeddings([sentence], embedding_storage_mode)

            eval_loss /= len(sentences)  # w.r.t self.loss

            # add some statistics to the output
            eval_line = f"Number of Sentences: {len(sentences)}\nBuckets:\n | 0-1 | 1-2 | 2-3 | 3-4 | 4-5 | 5-6 | 6-7 | 7-8 | 8-9 | 9-10 | >10 |\n"
            lines.append(eval_line)
            eval_line = "| {} | {} | {} | {} | {} | {} | {} | {} | {} | {} | {} |".format(
                buckets[0],
                buckets[1],
                buckets[2],
                buckets[3],
                buckets[4],
                buckets[5],
                buckets[6],
                buckets[7],
                buckets[8],
                buckets[9],
                buckets[10],
            )
            lines.append(eval_line)
            lines.append("\nAverage predicted values per distance:\n")
            eval_line = ""
            for i in range(max_dist_plus_one):
                eval_line += str(i) + ": " + f"{cumulated_values[i] / num_occurences[i]:.2f}" + " "
                if i != 0 and i % 15 == 0:
                    eval_line += "\n"

            lines.append(eval_line)

            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}"
            log_header = "MSE\tSPEARMAN\tPEARSON"

            detailed_result = (
                f"AVG: mse: {metric.mean_squared_error():.4f} - "
                f"mae: {metric.mean_absolute_error():.4f} - "
                f"pearson: {metric.pearsonr():.4f} - "
                f"spearman: {metric.spearmanr():.4f}"
            )

            result: Result = Result(metric.pearsonr(), log_header, log_line, detailed_result, loss=eval_loss.item())

            return result
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embeddings_storage_mode: str = "none",
    ) -> (Result, float):

        with torch.no_grad():
            eval_loss = 0

            metric = MetricRegression("Evaluation")

            lines: List[str] = []
            total_count = 0
            for batch_nr, batch in enumerate(data_loader):

                if isinstance(batch, Sentence):
                    batch = [batch]

                scores, loss = self.forward_labels_and_loss(batch)

                true_values = []
                for sentence in batch:
                    total_count += 1
                    for label in sentence.labels:
                        true_values.append(float(label.value))

                results = []
                for score in scores:
                    if type(score[0]) is Label:
                        results.append(float(score[0].score))
                    else:
                        results.append(float(score[0]))

                eval_loss += loss

                metric.true.extend(true_values)
                metric.pred.extend(results)

                for sentence, prediction, true_value in zip(
                    batch, results, true_values
                ):
                    eval_line = "{}\t{}\t{}\n".format(
                        sentence.to_original_text(), true_value, prediction
                    )
                    lines.append(eval_line)

                store_embeddings(batch, embeddings_storage_mode)

            eval_loss /= total_count

            ##TODO: not saving lines yet
            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}"
            log_header = "MSE\tSPEARMAN\tPEARSON"

            detailed_result = (
                f"AVG: mse: {metric.mean_squared_error():.4f} - "
                f"mae: {metric.mean_absolute_error():.4f} - "
                f"pearson: {metric.pearsonr():.4f} - "
                f"spearman: {metric.spearmanr():.4f}"
            )

            result: Result = Result(
                metric.pearsonr(), log_header, log_line, detailed_result
            )

            return result, eval_loss