def evaluate(self, data_loader: DataLoader, out_path: Path = None, embeddings_storage_mode: str = 'cpu') -> (Result, float): with torch.no_grad(): eval_loss = 0 metric = MetricRegression('Evaluation') lines = [] total_count = 0 for (batch_nr, batch) in enumerate(data_loader): if isinstance(batch, Sentence): batch = [batch] (scores, loss) = self.forward_labels_and_loss(batch) true_values = [] for sentence in batch: total_count += 1 for label in sentence.labels: true_values.append(float(label.value)) results = [] for score in scores: if (type(score[0]) is Label): results.append(float(score[0].score)) else: results.append(float(score[0])) eval_loss += loss metric.true.extend(true_values) metric.pred.extend(results) for (sentence, prediction, true_value) in zip(batch, results, true_values): eval_line = '{}\t{}\t{}\n'.format( sentence.to_original_text(), true_value, prediction) lines.append(eval_line) store_embeddings(batch, embeddings_storage_mode) eval_loss /= total_count if (out_path is not None): with open(out_path, 'w', encoding='utf-8') as outfile: outfile.write(''.join(lines)) log_line = ''.join([ '{}'.format(metric.mean_squared_error()), '\t', '{}'.format(metric.spearmanr()), '\t', '{}'.format(metric.pearsonr()) ]) log_header = 'MSE\tSPEARMAN\tPEARSON' detailed_result = ''.join([ 'AVG: mse: ', '{:.4f}'.format(metric.mean_squared_error()), ' - mae: ', '{:.4f}'.format(metric.mean_absolute_error()), ' - pearson: ', '{:.4f}'.format(metric.pearsonr()), ' - spearman: ', '{:.4f}'.format(metric.spearmanr()) ]) result = Result(metric.pearsonr(), log_header, log_line, detailed_result) return (result, eval_loss)
def evaluate( self, data_loader: DataLoader, out_path: Path = None, embeddings_storage_mode: str = "cpu", ) -> (Result, float): with torch.no_grad(): eval_loss = 0 metric = MetricRegression("Evaluation") lines: List[str] = [] total_count = 0 for batch_nr, batch in enumerate(data_loader): if isinstance(batch, Sentence): batch = [batch] scores, loss = self.forward_labels_and_loss(batch) eval_loss += loss metric = self.obtain_performance_metric(batch, scores, lines, metric) total_count += len(batch) store_embeddings(batch, embeddings_storage_mode) eval_loss /= total_count ##TODO: not saving lines yet if out_path is not None: with open(out_path, "w", encoding="utf-8") as outfile: outfile.write("".join(lines)) log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}" log_header = "MSE\tSPEARMAN\tPEARSON" detailed_result = ( f"AVG: mse: {metric.mean_squared_error():.4f} - " f"mae: {metric.mean_absolute_error():.4f} - " f"pearson: {metric.pearsonr():.4f} - " f"spearman: {metric.spearmanr():.4f}" ) result: Result = Result( metric.pearsonr(), log_header, log_line, detailed_result ) return result, eval_loss
def evaluate( self, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = False, out_path: Path = None, ) -> (Result, float): with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size) ] metric = MetricRegression("Evaluation") lines: List[str] = [] for batch in batches: scores, loss = self.forward_labels_and_loss(batch) true_values = [] for sentence in batch: for label in sentence.labels: true_values.append(float(label.value)) results = [] for score in scores: if type(score[0]) is Label: results.append(float(score[0].score)) else: results.append(float(score[0])) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss metric.true.extend(true_values) metric.pred.extend(results) for sentence, prediction, true_value in zip( batch, results, true_values): eval_line = "{}\t{}\t{}\n".format( sentence.to_original_text(), true_value, prediction) lines.append(eval_line) eval_loss /= len(batches) ##TODO: not saving lines yet if out_path is not None: with open(out_path, "w", encoding="utf-8") as outfile: outfile.write("".join(lines)) log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}" log_header = "MSE\tSPEARMAN\tPEARSON" detailed_result = ( f"AVG: mse: {metric.mean_squared_error():.4f} - " f"mae: {metric.mean_absolute_error():.4f} - " f"pearson: {metric.pearsonr():.4f} - " f"spearman: {metric.spearmanr():.4f}") result: Result = Result(metric.pearsonr(), log_header, log_line, detailed_result) return result, eval_loss
def evaluate(self, sentences: Union[List[DataPoint], Dataset], out_path: Union[str, Path] = None, embedding_storage_mode: str = "none", mini_batch_size: int = 32, num_workers: int = 8, **kwargs) -> (Result, float): # read Dataset into data loader (if list of sentences passed, make Dataset first) if not isinstance(sentences, Dataset): sentences = SentenceDataset(sentences) data_loader = DataLoader(sentences, batch_size=mini_batch_size, num_workers=num_workers) with torch.no_grad(): eval_loss = 0 metric = MetricRegression("Evaluation") lines: List[str] = [] total_count = 0 for batch_nr, batch in enumerate(data_loader): if isinstance(batch, Sentence): batch = [batch] scores, loss = self.forward_labels_and_loss(batch) true_values = [] for sentence in batch: total_count += 1 for label in sentence.labels: true_values.append(float(label.value)) results = [] for score in scores: if type(score[0]) is Label: results.append(float(score[0].score)) else: results.append(float(score[0])) eval_loss += loss metric.true.extend(true_values) metric.pred.extend(results) for sentence, prediction, true_value in zip( batch, results, true_values): eval_line = "{}\t{}\t{}\n".format( sentence.to_original_text(), true_value, prediction) lines.append(eval_line) store_embeddings(batch, embedding_storage_mode) eval_loss /= total_count ##TODO: not saving lines yet if out_path is not None: with open(out_path, "w", encoding="utf-8") as outfile: outfile.write("".join(lines)) log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}" log_header = "MSE\tSPEARMAN\tPEARSON" detailed_result = ( f"AVG: mse: {metric.mean_squared_error():.4f} - " f"mae: {metric.mean_absolute_error():.4f} - " f"pearson: {metric.pearsonr():.4f} - " f"spearman: {metric.spearmanr():.4f}") result: Result = Result( main_score=metric.pearsonr(), loss=eval_loss, log_header=log_header, log_line=log_line, detailed_results=detailed_result, ) return result
def evaluate_regression( self, sentences: List[Sentence], out_path: Union[str, Path] = None, embedding_storage_mode: str = "none", ) -> Result: with torch.no_grad(): buckets = [0 for _ in range(11)] eval_loss = torch.zeros(1, device=flair.device) metric = MetricRegression("Evaluation") lines: List[str] = [] max_dist_plus_one = max([len(sent) for sent in sentences]) - 1 num_occurences = [0 for _ in range(max_dist_plus_one)] cumulated_values = [0 for _ in range(max_dist_plus_one)] for sentence in sentences: if len(sentence) < 2: # we need at least 2 words per sentence continue scores, loss = self._forward_scores_and_loss(sentence, return_loss=True) predictions = scores.tolist() # gold labels true_values_for_sentence = [] numberOfPairs = 0 numberOfWords = len(sentence) lines.append(sentence.to_tokenized_string() + "\n") for i in range(numberOfWords): for j in range(i + 1, min(i + self.max_distance + 2, numberOfWords)): true_dist = j - i - 1 pred = predictions[numberOfPairs] true_values_for_sentence.append(true_dist) # for output text file eval_line = f"({i},{j})\t{true_dist}\t{pred:.2f}\n" lines.append(eval_line) # for buckets error = abs(true_dist - pred) if error >= 10: buckets[10] += 1 else: buckets[floor(error)] += 1 # for average prediction num_occurences[true_dist] += 1 cumulated_values[true_dist] += pred numberOfPairs += 1 eval_loss += loss / numberOfPairs metric.true.extend(true_values_for_sentence) metric.pred.extend(predictions) store_embeddings([sentence], embedding_storage_mode) eval_loss /= len(sentences) # w.r.t self.loss # add some statistics to the output eval_line = f"Number of Sentences: {len(sentences)}\nBuckets:\n | 0-1 | 1-2 | 2-3 | 3-4 | 4-5 | 5-6 | 6-7 | 7-8 | 8-9 | 9-10 | >10 |\n" lines.append(eval_line) eval_line = "| {} | {} | {} | {} | {} | {} | {} | {} | {} | {} | {} |".format( buckets[0], buckets[1], buckets[2], buckets[3], buckets[4], buckets[5], buckets[6], buckets[7], buckets[8], buckets[9], buckets[10], ) lines.append(eval_line) lines.append("\nAverage predicted values per distance:\n") eval_line = "" for i in range(max_dist_plus_one): eval_line += str(i) + ": " + f"{cumulated_values[i] / num_occurences[i]:.2f}" + " " if i != 0 and i % 15 == 0: eval_line += "\n" lines.append(eval_line) if out_path is not None: with open(out_path, "w", encoding="utf-8") as outfile: outfile.write("".join(lines)) log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}" log_header = "MSE\tSPEARMAN\tPEARSON" detailed_result = ( f"AVG: mse: {metric.mean_squared_error():.4f} - " f"mae: {metric.mean_absolute_error():.4f} - " f"pearson: {metric.pearsonr():.4f} - " f"spearman: {metric.spearmanr():.4f}" ) result: Result = Result(metric.pearsonr(), log_header, log_line, detailed_result, loss=eval_loss.item()) return result
def evaluate( self, data_loader: DataLoader, out_path: Path = None, embeddings_storage_mode: str = "none", ) -> (Result, float): with torch.no_grad(): eval_loss = 0 metric = MetricRegression("Evaluation") lines: List[str] = [] total_count = 0 for batch_nr, batch in enumerate(data_loader): if isinstance(batch, Sentence): batch = [batch] scores, loss = self.forward_labels_and_loss(batch) true_values = [] for sentence in batch: total_count += 1 for label in sentence.labels: true_values.append(float(label.value)) results = [] for score in scores: if type(score[0]) is Label: results.append(float(score[0].score)) else: results.append(float(score[0])) eval_loss += loss metric.true.extend(true_values) metric.pred.extend(results) for sentence, prediction, true_value in zip( batch, results, true_values ): eval_line = "{}\t{}\t{}\n".format( sentence.to_original_text(), true_value, prediction ) lines.append(eval_line) store_embeddings(batch, embeddings_storage_mode) eval_loss /= total_count ##TODO: not saving lines yet if out_path is not None: with open(out_path, "w", encoding="utf-8") as outfile: outfile.write("".join(lines)) log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}" log_header = "MSE\tSPEARMAN\tPEARSON" detailed_result = ( f"AVG: mse: {metric.mean_squared_error():.4f} - " f"mae: {metric.mean_absolute_error():.4f} - " f"pearson: {metric.pearsonr():.4f} - " f"spearman: {metric.spearmanr():.4f}" ) result: Result = Result( metric.pearsonr(), log_header, log_line, detailed_result ) return result, eval_loss