def predict(self, sentences: Union[List[Sentence], Sentence], mini_batch_size=32) -> List[Sentence]: with torch.no_grad(): if type(sentences) is Sentence: sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) # remove previous embeddings clear_embeddings(filtered_sentences, also_clear_word_embeddings=True) # make mini-batches batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] for batch in batches: scores, predicted_ids = self._predict_scores_batch(batch) all_tokens = [] for sentence in batch: all_tokens.extend(sentence.tokens) for (token, score, predicted_id) in zip(all_tokens, scores, predicted_ids): token: Token = token # get the predicted tag predicted_tag = self.tag_dictionary.get_item_for_index( predicted_id) token.add_tag(self.tag_type, predicted_tag, score) return sentences
def _evaluate_sequence_tagger(model, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = True, out_path: Path = None) -> (dict, float): with torch.no_grad(): eval_loss = 0 batch_no: int = 0 batches = [sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size)] metric = Metric('Evaluation') lines: List[str] = [] for batch in batches: batch_no += 1 tags, loss = model.forward_labels_and_loss(batch) eval_loss += loss for (sentence, sent_tags) in zip(batch, tags): for (token, tag) in zip(sentence.tokens, sent_tags): token: Token = token token.add_tag_label('predicted', tag) # append both to file for evaluation eval_line = '{} {} {} {}\n'.format(token.text, token.get_tag(model.tag_type).value, tag.value, tag.score) lines.append(eval_line) lines.append('\n') for sentence in batch: # make list of gold tags gold_tags = [(tag.tag, str(tag)) for tag in sentence.get_spans(model.tag_type)] # make list of predicted tags predicted_tags = [(tag.tag, str(tag)) for tag in sentence.get_spans('predicted')] # check for true positives, false positives and false negatives for tag, prediction in predicted_tags: if (tag, prediction) in gold_tags: metric.add_tp(tag) else: metric.add_fp(tag) for tag, gold in gold_tags: if (tag, gold) not in predicted_tags: metric.add_fn(tag) else: metric.add_tn(tag) clear_embeddings(batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss /= len(sentences) if out_path is not None: with open(out_path, "w", encoding='utf-8') as outfile: outfile.write(''.join(lines)) return metric, eval_loss
def _evaluate_text_classifier(model: flair.nn.Model, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = False) -> (dict, float): with torch.no_grad(): eval_loss = 0 batches = [sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size)] metric = Metric('Evaluation') for batch in batches: labels, loss = model.forward_labels_and_loss(batch) clear_embeddings(batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss predictions_for_batch = [[label.value for label in sent_labels] for sent_labels in labels] true_values_for_batch = [sentence.get_label_names() for sentence in batch] available_labels = model.label_dictionary.get_items() for predictions_for_sentence, true_values_for_sentence in zip(predictions_for_batch, true_values_for_batch): ModelTrainer._evaluate_sentence_for_text_classification(metric, available_labels, predictions_for_sentence, true_values_for_sentence) eval_loss /= len(sentences) return metric, eval_loss
def predict(self, sentences: Union[List[Sentence], Sentence], mini_batch_size=32, verbose=False) -> List[Sentence]: with torch.no_grad(): if isinstance(sentences, Sentence): sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) # remove previous embeddings clear_embeddings(filtered_sentences, also_clear_word_embeddings=True) # revere sort all sequences by their length filtered_sentences.sort(key=lambda x: len(x), reverse=True) # make mini-batches batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] # progress bar for verbosity if verbose: batches = tqdm(batches) for i, batch in enumerate(batches): if verbose: batches.set_description(f'Inferencing on batch {i}') tags, _ = self.forward_labels_and_loss(batch, sort=False) # for (sentence, sent_tags) in zip(batch, tags): # for (token, tag) in zip(sentence.tokens, sent_tags): # token: Token = token # for tag_type in self.tag_types: # token.add_tag_label(tag_type, tag) # for b in range(len(batch)): # sentence = batch[b] for (sentence, sent_tags) in zip(batch, tags): for s in range(len(sentence.tokens)): token: Token = sentence.tokens[s] for t in range(len(self.tag_types)): token.add_tag_label(self.tag_types[t], sent_tags[t][s]) # for (token, tag) in zip(sentence.tokens, sent_tags): # for tag_type in self.tag_types: # clearing token embeddings to save memory clear_embeddings(batch, also_clear_word_embeddings=True) return sentences
def predict(self, sentences: Union[List[Sentence], Sentence], mini_batch_size=32) -> List[Sentence]: with torch.no_grad(): if type(sentences) is Sentence: sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) # remove previous embeddings clear_embeddings(filtered_sentences, also_clear_word_embeddings=True) # make mini-batches batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] for batch in batches: tags, _ = self.forward_labels_and_loss(batch) for (sentence, sent_tags) in zip(batch, tags): for (token, tag) in zip(sentence.tokens, sent_tags): token: Token = token token.add_tag_label(self.tag_type, tag) return sentences
def predict(self, sentences: Union[List[Sentence], Sentence], mini_batch_size=32) -> List[Sentence]: if type(sentences) is Sentence: sentences = [sentences] # remove previous embeddings clear_embeddings(sentences) # make mini-batches batches = [sentences[x:x + mini_batch_size] for x in range(0, len(sentences), mini_batch_size)] for batch in batches: score, tag_seq = self._predict_scores_batch(batch) predicted_id = tag_seq all_tokens = [] for sentence in batch: all_tokens.extend(sentence.tokens) for (token, pred_id) in zip(all_tokens, predicted_id): token: Token = token # get the predicted tag predicted_tag = self.tag_dictionary.get_item_for_index(pred_id) token.add_tag(self.tag_type, predicted_tag) return sentences
def _evaluate_text_classifier(model: flair.nn.Model, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = False, out_path: Path = None) -> (dict, float): with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size) ] metric = Metric('Evaluation') lines: List[str] = [] for batch in batches: labels, loss = model.forward_labels_and_loss(batch) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss sentences_for_batch = [ sent.to_plain_string() for sent in batch ] confidences_for_batch = [[ label.score for label in sent_labels ] for sent_labels in labels] predictions_for_batch = [[ label.value for label in sent_labels ] for sent_labels in labels] true_values_for_batch = [ sentence.get_label_names() for sentence in batch ] available_labels = model.label_dictionary.get_items() for sentence, confidence, prediction, true_value in zip( sentences_for_batch, confidences_for_batch, predictions_for_batch, true_values_for_batch): eval_line = '{}\t{}\t{}\t{}\n'.format( sentence, true_value, prediction, confidence) lines.append(eval_line) for predictions_for_sentence, true_values_for_sentence in zip( predictions_for_batch, true_values_for_batch): ModelTrainer._evaluate_sentence_for_text_classification( metric, available_labels, predictions_for_sentence, true_values_for_sentence) eval_loss /= len(sentences) if out_path is not None: with open(out_path, "w", encoding='utf-8') as outfile: outfile.write(''.join(lines)) return metric, eval_loss
def predict( self, sentences: Union[Sentence, List[Sentence]], mini_batch_size: int = 32, multi_class_prob: bool = False, ) -> List[Sentence]: """ Predicts the class labels for the given sentences. The labels are directly added to the sentences. :param sentences: list of sentences :param mini_batch_size: mini batch size to use :param multi_class_prob : return probability for all class for multiclass :return: the list of sentences containing the labels """ with torch.no_grad(): if type(sentences) is Sentence: sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] for batch in batches: scores = self.forward(batch) predicted_labels = self._obtain_labels( scores, predict_prob=multi_class_prob) for (sentence, labels) in zip(batch, predicted_labels): sentence.labels = labels clear_embeddings(batch) return sentences
def predict(self, sentences, mini_batch_size=32, verbose=False): with torch.no_grad(): if isinstance(sentences, Sentence): sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) clear_embeddings(filtered_sentences, also_clear_word_embeddings=True) filtered_sentences.sort(key=(lambda x: len(x)), reverse=True) batches = [ filtered_sentences[x:(x + mini_batch_size)] for x in range(0, len(filtered_sentences), mini_batch_size) ] if verbose: batches = tqdm(batches) for (i, batch) in enumerate(batches): if verbose: batches.set_description(u''.join( [u'Inferencing on batch ', u'{}'.format(i)])) (tags, _) = self.forward_labels_and_loss(batch, sort=False) for (sentence, sent_tags) in zip(batch, tags): for (token, tag) in zip(sentence.tokens, sent_tags): token = token token.add_tag_label(self.tag_type, tag) clear_embeddings(batch, also_clear_word_embeddings=True) return sentences
def predict(self, sentences: Union[Sentence, List[Sentence]], mini_batch_size: int = 32) -> List[Sentence]: """ Predicts the class labels for the given sentences. The labels are directly added to the sentences. :param sentences: list of sentences :param mini_batch_size: mini batch size to use :return: the list of sentences containing the labels """ if type(sentences) is Sentence: sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] for batch in batches: scores = self.forward(batch) predicted_labels = self.obtain_labels(scores) for (sentence, labels) in zip(batch, predicted_labels): sentence.labels = labels clear_embeddings(batch) return sentences
def evaluate(self, sentences: List[Sentence], eval_class_metrics: bool = False, mini_batch_size: int = 32, embeddings_in_memory: bool = False, metric_name: str = 'MICRO_AVG') -> (dict, float): """ Evaluates the model with the given list of sentences. :param sentences: the list of sentences :param eval_class_metrics: boolean indicating whether to print class metrics or not :param mini_batch_size: the mini batch size to use :param embeddings_in_memory: boolean value indicating, if embeddings should be kept in memory or not :param metric_name: the name of the metrics to compute :return: list of metrics, and the loss """ with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + mini_batch_size] for x in range(0, len(sentences), mini_batch_size) ] metric = Metric(metric_name) for batch in batches: scores = self.model.forward(batch) labels = self.model.obtain_labels(scores) loss = self.model.calculate_loss(scores, batch) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss for predictions, true_values in zip( [[label.value for label in sent_labels] for sent_labels in labels], [sentence.get_label_names() for sentence in batch]): for prediction in predictions: if prediction in true_values: metric.tp() if eval_class_metrics: metric.tp(prediction) else: metric.fp() if eval_class_metrics: metric.fp(prediction) for true_value in true_values: if true_value not in predictions: metric.fn() if eval_class_metrics: metric.fn(true_value) else: metric.tn() if eval_class_metrics: metric.tn(true_value) eval_loss /= len(sentences) return metric, eval_loss
def evaluate(self, sentences: List[Sentence], eval_class_metrics: bool = False, mini_batch_size: int = 32, embeddings_in_memory: bool = False) -> (dict, float): """ Evaluates the model with the given list of sentences. :param sentences: the list of sentences :param eval_class_metrics: boolean indicating whether to print class metrics or not :param mini_batch_size: the mini batch size to use :param embeddings_in_memory: boolean value indicating, if embeddings should be kept in memory or not :return: list of metrics, and the loss """ with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + mini_batch_size] for x in range(0, len(sentences), mini_batch_size) ] y_pred = [] y_true = [] for batch in batches: scores = self.model.forward(batch) labels = self.model.obtain_labels(scores) loss = self.model.calculate_loss(scores, batch) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss y_pred.extend( convert_labels_to_one_hot( [[label.value for label in sent_labels] for sent_labels in labels], self.label_dict)) y_true.extend( convert_labels_to_one_hot( [sentence.get_label_names() for sentence in batch], self.label_dict)) metrics = [ calculate_micro_avg_metric(y_true, y_pred, self.label_dict) ] if eval_class_metrics: metrics.extend( calculate_class_metrics(y_true, y_pred, self.label_dict)) eval_loss /= len(sentences) metrics_dict = {metric.name: metric for metric in metrics} return metrics_dict, eval_loss
def predict( self, sentences: Union[List[Sentence], Sentence], mini_batch_size=32, verbose=False, clear_word_embeddings=True, ) -> List[Sentence]: with torch.no_grad(): if isinstance(sentences, Sentence): sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) # remove previous embeddings clear_embeddings(filtered_sentences, also_clear_word_embeddings=True) # revere sort all sequences by their length filtered_sentences.sort(key=lambda x: len(x), reverse=True) # make mini-batches batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] # progress bar for verbosity if verbose: batches = tqdm(batches) for i, batch in enumerate(batches): if verbose: batches.set_description(f"Inferencing on batch {i}") with torch.no_grad(): feature = self.forward(batch) tags, all_tags = self._obtain_labels(feature, batch) for (sentence, sent_tags, sent_all_tags) in zip(batch, tags, all_tags): for (token, tag, token_all_tags) in zip(sentence.tokens, sent_tags, sent_all_tags): token.add_tag_label(self.tag_type, tag) token.add_tags_proba_dist(self.tag_type, token_all_tags) # clearing token embeddings to save memory clear_embeddings( batch, also_clear_word_embeddings=clear_word_embeddings) return sentences
def predict(self, sentences: Union[List[Sentence], Sentence], mini_batch_size=32, verbose=False) -> List[Sentence]: """ Apply the model to an ensemble of Sentence objects :param sentences: ensemble of Sentence objects for which the model predict the tags :param mini_batch_size: number of Sentence in the mini-batch used for prediction :param verbose: :return: list of Sentence objects tagged according to the model """ with torch.no_grad(): # forbid the use of back-prop if isinstance(sentences, Sentence): sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) # remove previous embeddings clear_embeddings(filtered_sentences, also_clear_word_embeddings=True) # revere sort all sequences by their length filtered_sentences.sort(key=lambda x: len(x), reverse=True) # make mini-batches batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] # progress bar for verbosity if verbose: batches = tqdm(batches) for i, batch in enumerate(batches): if verbose: batches.set_description(f'Inferencing on batch {i}') tags, _ = self.forward_labels_and_loss(batch, sort=False) for (sentence, sent_tags) in zip(batch, tags): for (token, tag) in zip(sentence.tokens, sent_tags): token: Token = token token.add_tag_label(self.tag_type, tag) # clearing token embeddings to save memory clear_embeddings(batch, also_clear_word_embeddings=True) return sentences
def _forward_loss_stability(self, sentences: Union[List[Sentence], Sentence], alpha: float, misspelling_rate: float, char_vocab: dict, lut: dict = {}, cmx: np.array = None, embeddings_in_memory: bool = True, verbose: bool = False) -> (torch.tensor, dict): """ stability objective for classification -> KL divergence (see Zheng 2016 Eq.10) L_stab(x,x') = -sum_j(P(yj|x)*log(P(yj|x'))) The output loss is the sum of the standard loss and the similarity objective. """ misspelled_sentences, _ = noise_sentences(sentences, self.misspell_mode, misspelling_rate, char_vocab, cmx, lut, {}, verbose) clear_embeddings(misspelled_sentences, also_clear_word_embeddings=True) embeddings, lengths = self._embed_sentences(sentences) embeddings_misspell, lengths_misspell = self._embed_sentences( misspelled_sentences) if not check_embeddings(sentences, misspelled_sentences, embeddings, embeddings_misspell): log.warning( "WARNING: embedding of the misspelled text may be invalid!") outputs_base, features_base = self._forward(embeddings, lengths) outputs_misspell, features_misspell = self._forward( embeddings_misspell, lengths_misspell) loss_base = self._calculate_loss(outputs_base, sentences) target_distrib = F.softmax(outputs_base, dim=2).transpose(1, 2).detach() input_log_distrib = F.log_softmax(outputs_misspell, dim=2).transpose(1, 2) loss_stability = alpha * F.kl_div(input_log_distrib, target_distrib, reduction='none').transpose(2, 1) loss_sum = get_masked_sum(loss_stability, lengths) loss_mean = get_per_batch_mean(loss_sum, lengths) # log.info(f"loss_base: {loss_base.item():.4f} loss_stability: {loss_mean.item():.4f}") auxilary_losses = {'loss_base': loss_base, 'loss_kldiv': loss_mean} return (loss_base + loss_mean), auxilary_losses
def predict(self, sentences: Union[List[Sentence], Sentence], mini_batch_size=32, verbose=False, return_featmat=False) -> List[Sentence]: with torch.no_grad(): if isinstance(sentences, Sentence): sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) # remove previous embeddings clear_embeddings(filtered_sentences, also_clear_word_embeddings=True) # revere sort all sequences by their length if not return_featmat: filtered_sentences.sort(key=lambda x: len(x), reverse=True) # make mini-batches batches = [filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size)] # progress bar for verbosity if verbose: batches = tqdm(batches) all_features = [] # AZ for i, batch in enumerate(batches): if verbose: batches.set_description(f'Inferencing on batch {i}') if not return_featmat: # [AZ] tags, _ = self.forward_labels_and_loss(batch, sort=False) else: tags, _, feature_matrix = self.forward_labels_and_loss_feature(batch, sort=False) # [AZ] all_features.append(feature_matrix) for (sentence, sent_tags) in zip(batch, tags): for (token, tag) in zip(sentence.tokens, sent_tags): token: Token = token token.add_tag_label(self.tag_type, tag) # clearing token embeddings to save memory clear_embeddings(batch, also_clear_word_embeddings=True) if return_featmat: #[AZ] return sentences, all_features else: return sentences
def _evaluate_text_regressor(model: flair.nn.Model, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = False, out_path: Path = None) -> (dict, float): with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size) ] metric = MetricRegression('Evaluation') lines: List[str] = [] for batch in batches: scores, loss = model.forward_labels_and_loss(batch) true_values = [] for sentence in batch: for label in sentence.labels: true_values.append(float(label.value)) results = [] for score in scores: if type(score[0]) is Label: results.append(float(score[0].score)) else: results.append(float(score[0])) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss metric.true.extend(true_values) metric.pred.extend(results) eval_loss /= len(sentences) ##TODO: not saving lines yet if out_path is not None: with open(out_path, "w", encoding='utf-8') as outfile: outfile.write(''.join(lines)) return metric, eval_loss
def evaluate(self, sentences: List[Sentence], eval_class_metrics: bool = False, mini_batch_size: int = 32, embeddings_in_memory: bool = True) -> (dict, float): """ Evaluates the model with the given list of sentences. :param sentences: the list of sentences :param mini_batch_size: the mini batch size to use :return: list of metrics, and the loss """ eval_loss = 0 batches = [ sentences[x:x + mini_batch_size] for x in range(0, len(sentences), mini_batch_size) ] y_pred = [] y_true = [] for batch in batches: scores = self.model.forward(batch) labels = self.model.obtain_labels(scores) loss = self.model.calculate_loss(scores, batch) eval_loss += loss y_true.extend([sentence.get_label_names() for sentence in batch]) y_pred.extend([[label.name for label in sent_labels] for sent_labels in labels]) if not embeddings_in_memory: clear_embeddings(batch) y_pred = convert_labels_to_one_hot(y_pred, self.label_dict) y_true = convert_labels_to_one_hot(y_true, self.label_dict) metrics = [calculate_micro_avg_metric(y_true, y_pred, self.label_dict)] if eval_class_metrics: metrics.extend( calculate_class_metrics(y_true, y_pred, self.label_dict)) eval_loss /= len(sentences) metrics_dict = {metric.name: metric for metric in metrics} return metrics_dict, eval_loss
def _forward_standard(self, sentences: List[Sentence], spell_check=None): # self.zero_grad() if spell_check != None: from robust_ner.spellcheck import correct_sentences corrected_sentences = correct_sentences(spell_check, sentences) clear_embeddings(corrected_sentences, also_clear_word_embeddings=True) embeddings, lengths = self._embed_sentences(corrected_sentences) else: embeddings, lengths = self._embed_sentences(sentences) outputs, _ = self._forward(embeddings, lengths) return outputs
def predict(self, sentences, mini_batch_size=32): u'\n Predicts the class labels for the given sentences. The labels are directly added to the sentences.\n :param sentences: list of sentences\n :param mini_batch_size: mini batch size to use\n :return: the list of sentences containing the labels\n ' with torch.no_grad(): if (type(sentences) is Sentence): sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) batches = [ filtered_sentences[x:(x + mini_batch_size)] for x in range(0, len(filtered_sentences), mini_batch_size) ] for batch in batches: scores = self.forward(batch) predicted_labels = self._obtain_labels(scores) for (sentence, labels) in zip(batch, predicted_labels): sentence.labels = labels clear_embeddings(batch) return sentences
def _forward_loss_data_augmentation( self, sentences: Union[List[Sentence], Sentence], alpha: float, misspelling_rate: float, char_vocab: dict, lut: dict = {}, cmx: np.array = None, embeddings_in_memory: bool = True, verbose: bool = False) -> (torch.tensor, dict): """ Data augmentation objective. Returns the auxiliary loss as the sum of standard objectives calculated on the original and the perturbed samples. """ misspelled_sentences, _ = noise_sentences(sentences, self.misspell_mode, misspelling_rate, char_vocab, cmx, lut, {}, verbose) clear_embeddings(misspelled_sentences, also_clear_word_embeddings=True) embeddings, lengths = self._embed_sentences(sentences) embeddings_misspell, lengths_misspell = self._embed_sentences( misspelled_sentences) if not check_embeddings(sentences, misspelled_sentences, embeddings, embeddings_misspell): log.warning( "WARNING: embedding of the misspelled text may be invalid!") outputs_base, _ = self._forward(embeddings, lengths) outputs_misspell, _ = self._forward(embeddings_misspell, lengths_misspell) loss_base = self._calculate_loss(outputs_base, sentences) loss_misspell = alpha * self._calculate_loss(outputs_misspell, misspelled_sentences) auxilary_losses = { 'loss_base': loss_base, 'loss_misspell': loss_misspell } return (loss_base + loss_misspell), auxilary_losses
def _evaluate_text_classifier( model: flair.nn.Model, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = False) -> (dict, float): with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size) ] metric = Metric('Evaluation') for batch in batches: labels, loss = model.forward_labels_and_loss(batch) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss for predictions, true_values in zip( [[label.value for label in sent_labels] for sent_labels in labels], [sentence.get_label_names() for sentence in batch]): for prediction in predictions: if prediction in true_values: metric.add_tp(prediction) else: metric.add_fp(prediction) for true_value in true_values: if true_value not in predictions: metric.add_fn(true_value) else: metric.add_tn(true_value) eval_loss /= len(sentences) return metric, eval_loss
def _forward_misspelled(self, sentences: Union[List[Sentence], Sentence], misspelling_rate: float, misspell_mode: MisspellingMode, char_vocab: set, cmx: np.array, lut: dict, typos: dict, spell_check=None, verbose: bool = False) -> (torch.tensor, dict): misspelled_sentences, _ = noise_sentences(sentences, misspell_mode, misspelling_rate, char_vocab, cmx, lut, typos, verbose) clear_embeddings(misspelled_sentences, also_clear_word_embeddings=True) outputs_misspell = self._forward_standard(misspelled_sentences, spell_check) return outputs_misspell
def evaluate_sequence_tagger( model: SequenceTagger, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = True, ) -> (dict, float): with torch.no_grad(): eval_loss = 0 batch_no: int = 0 batches = [ sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size) ] gold_seqs = [] pred_seqs = [] for batch in batches: batch_no += 1 features = model.forward(batch) loss = model._calculate_loss(features, batch) pred_tags = model._obtain_labels(features, batch) eval_loss += loss for (sentence, pred_sent_tags) in zip(batch, pred_tags): gold_tags = [tok.tags['ner'].value for tok in sentence] predicted_tags = [l.value for l in pred_sent_tags] gold_seqs.append(gold_tags) pred_seqs.append(predicted_tags) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss /= len(sentences) scores = calc_seqtag_eval_scores(gold_seqs, pred_seqs) scores['eval-loss'] = eval_loss return scores
def predict(self, sentences: Union[Sentence, List[Sentence]], mini_batch_size: int = 32) -> List[Sentence]: with torch.no_grad(): if type(sentences) is Sentence: sentences = [sentences] filtered_sentences = self._filter_empty_sentences(sentences) batches = [ filtered_sentences[x:x + mini_batch_size] for x in range(0, len(filtered_sentences), mini_batch_size) ] for batch in batches: scores = self.forward(batch) for (sentence, score) in zip(batch, scores.tolist()): sentence.labels = [Label(value=str(score[0]))] clear_embeddings(batch) return sentences
def evaluate( self, sentences: List[Sentence], eval_mini_batch_size: int = 32, embeddings_in_memory: bool = False, out_path: Path = None, ) -> (Result, float): with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + eval_mini_batch_size] for x in range(0, len(sentences), eval_mini_batch_size) ] metric = Metric("Evaluation") lines: List[str] = [] for batch in batches: labels, loss = self.forward_labels_and_loss(batch) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss sentences_for_batch = [ sent.to_plain_string() for sent in batch ] confidences_for_batch = [[ label.score for label in sent_labels ] for sent_labels in labels] predictions_for_batch = [[ label.value for label in sent_labels ] for sent_labels in labels] true_values_for_batch = [ sentence.get_label_names() for sentence in batch ] available_labels = self.label_dictionary.get_items() for sentence, confidence, prediction, true_value in zip( sentences_for_batch, confidences_for_batch, predictions_for_batch, true_values_for_batch, ): eval_line = "{}\t{}\t{}\t{}\n".format( sentence, true_value, prediction, confidence) lines.append(eval_line) for predictions_for_sentence, true_values_for_sentence in zip( predictions_for_batch, true_values_for_batch): for label in available_labels: if (label in predictions_for_sentence and label in true_values_for_sentence): metric.add_tp(label) elif (label in predictions_for_sentence and label not in true_values_for_sentence): metric.add_fp(label) elif (label not in predictions_for_sentence and label in true_values_for_sentence): metric.add_fn(label) elif (label not in predictions_for_sentence and label not in true_values_for_sentence): metric.add_tn(label) eval_loss /= len(sentences) detailed_result = ( f"\nMICRO_AVG: acc {metric.micro_avg_accuracy()} - f1-score {metric.micro_avg_f_score()}" f"\nMACRO_AVG: acc {metric.macro_avg_accuracy()} - f1-score {metric.macro_avg_f_score()}" ) for class_name in metric.get_classes(): detailed_result += ( f"\n{class_name:<10} tp: {metric.get_tp(class_name)} - fp: {metric.get_fp(class_name)} - " f"fn: {metric.get_fn(class_name)} - tn: {metric.get_tn(class_name)} - precision: " f"{metric.precision(class_name):.4f} - recall: {metric.recall(class_name):.4f} - " f"accuracy: {metric.accuracy(class_name):.4f} - f1-score: " f"{metric.f_score(class_name):.4f}") result = Result( main_score=metric.micro_avg_f_score(), log_line= f"{metric.precision()}\t{metric.recall()}\t{metric.micro_avg_f_score()}", log_header="PRECISION\tRECALL\tF1", detailed_results=detailed_result, ) if out_path is not None: with open(out_path, "w", encoding="utf-8") as outfile: outfile.write("".join(lines)) return result, eval_loss
from __future__ import absolute_import
def train(self, base_path: str, learning_rate: float = 0.1, mini_batch_size: int = 32, max_epochs: int = 50, anneal_factor: float = 0.5, patience: int = 5, train_with_dev: bool = False, embeddings_in_memory: bool = False, checkpoint: bool = False, save_final_model: bool = True, anneal_with_restarts: bool = False, eval_on_train: bool = True): """ Trains a text classification model using the training data of the corpus. :param base_path: the directory to which any results should be written to :param learning_rate: the learning rate :param mini_batch_size: the mini batch size :param max_epochs: the maximum number of epochs to train :param anneal_factor: learning rate will be decreased by this factor :param patience: number of 'bad' epochs before learning rate gets decreased :param train_with_dev: boolean indicating, if the dev data set should be used for training or not :param embeddings_in_memory: boolean indicating, if embeddings should be kept in memory or not :param checkpoint: boolean indicating, whether the model should be save after every epoch or not :param save_final_model: boolean indicating, whether the final model should be saved or not :param anneal_with_restarts: boolean indicating, whether the best model should be reloaded once the learning rate changed or not :param eval_on_train: boolean value indicating, if evaluation metrics should be calculated on training data set or not """ loss_txt = init_output_file(base_path, 'loss.tsv') with open(loss_txt, 'a') as f: f.write( 'EPOCH\tTIMESTAMP\tTRAIN_LOSS\t{}\tDEV_LOSS\t{}\tTEST_LOSS\t{}\n' .format(Metric.tsv_header('TRAIN'), Metric.tsv_header('DEV'), Metric.tsv_header('TEST'))) weight_extractor = WeightExtractor(base_path) optimizer = torch.optim.SGD(self.model.parameters(), lr=learning_rate) anneal_mode = 'min' if train_with_dev else 'max' scheduler: ReduceLROnPlateau = ReduceLROnPlateau(optimizer, factor=anneal_factor, patience=patience, mode=anneal_mode) train_data = self.corpus.train # if training also uses dev data, include in training set if train_with_dev: train_data.extend(self.corpus.dev) # At any point you can hit Ctrl + C to break out of training early. try: previous_learning_rate = learning_rate for epoch in range(max_epochs): log.info('-' * 100) bad_epochs = scheduler.num_bad_epochs for group in optimizer.param_groups: learning_rate = group['lr'] # reload last best model if annealing with restarts is enabled if learning_rate != previous_learning_rate and anneal_with_restarts and \ os.path.exists(base_path + "/best-model.pt"): log.info('Resetting to best model ...') self.model.load_from_file(base_path + "/best-model.pt") previous_learning_rate = learning_rate # stop training if learning rate becomes too small if learning_rate < 0.001: log.info('Learning rate too small - quitting training!') break if not self.test_mode: random.shuffle(train_data) self.model.train() batches = [ self.corpus.train[x:x + mini_batch_size] for x in range(0, len(self.corpus.train), mini_batch_size) ] current_loss: float = 0 seen_sentences = 0 modulo = max(1, int(len(batches) / 10)) for batch_no, batch in enumerate(batches): scores = self.model.forward(batch) loss = self.model.calculate_loss(scores, batch) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0) optimizer.step() seen_sentences += len(batch) current_loss += loss.item() clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) if batch_no % modulo == 0: log.info( "epoch {0} - iter {1}/{2} - loss {3:.8f}".format( epoch + 1, batch_no, len(batches), current_loss / seen_sentences)) iteration = epoch * len(batches) + batch_no weight_extractor.extract_weights( self.model.state_dict(), iteration) current_loss /= len(train_data) self.model.eval() # if checkpoint is enable, save model at each epoch if checkpoint: self.model.save(base_path + "/checkpoint.pt") log.info('-' * 100) log.info("EPOCH {0}: lr {1:.4f} - bad epochs {2}".format( epoch + 1, learning_rate, bad_epochs)) dev_metric = train_metric = None dev_loss = '_' train_loss = current_loss if eval_on_train: train_metric, train_loss = self._calculate_evaluation_results_for( 'TRAIN', self.corpus.train, embeddings_in_memory, mini_batch_size) if not train_with_dev: dev_metric, dev_loss = self._calculate_evaluation_results_for( 'DEV', self.corpus.dev, embeddings_in_memory, mini_batch_size) with open(loss_txt, 'a') as f: train_metric_str = train_metric.to_tsv( ) if train_metric is not None else Metric.to_empty_tsv() dev_metric_str = dev_metric.to_tsv( ) if dev_metric is not None else Metric.to_empty_tsv() f.write('{}\t{:%H:%M:%S}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( epoch, datetime.datetime.now(), train_loss, train_metric_str, dev_loss, dev_metric_str, '_', Metric.to_empty_tsv())) # anneal against train loss if training with dev, otherwise anneal against dev score scheduler.step( current_loss) if train_with_dev else scheduler.step( dev_metric.f_score()) current_score = dev_metric.f_score( ) if not train_with_dev else train_metric.f_score() # if we use dev data, remember best model based on dev evaluation score if not train_with_dev and current_score == scheduler.best: self.model.save(base_path + "/best-model.pt") if save_final_model: self.model.save(base_path + "/final-model.pt") log.info('-' * 100) log.info('Testing using best model ...') self.model.eval() if os.path.exists(base_path + "/best-model.pt"): self.model = TextClassifier.load_from_file(base_path + "/best-model.pt") test_metric, test_loss = self.evaluate( self.corpus.test, mini_batch_size=mini_batch_size, eval_class_metrics=True, embeddings_in_memory=embeddings_in_memory, metric_name='TEST') test_metric.print() self.model.train() log.info('-' * 100) except KeyboardInterrupt: log.info('-' * 100) log.info('Exiting from training early.') log.info('Saving model ...') with open(base_path + "/final-model.pt", 'wb') as model_save_file: torch.save(self.model, model_save_file, pickle_protocol=4) model_save_file.close() log.info('Done.')
def evaluate( self, sentences: Dataset, eval_mini_batch_size: int = 32, embeddings_in_memory: bool = True, out_path: Path = None, ) -> (Result, float): with torch.no_grad(): eval_loss = 0 batch_no: int = 0 batch_loader = torch.utils.data.DataLoader( sentences, batch_size=eval_mini_batch_size, shuffle=False, num_workers=4, collate_fn=list, ) metric = Metric("Evaluation") lines: List[str] = [] for batch in batch_loader: batch_no += 1 with torch.no_grad(): features = self.forward(batch) loss = self._calculate_loss(features, batch) tags = self._obtain_labels(features, batch) eval_loss += loss for (sentence, sent_tags) in zip(batch, tags): for (token, tag) in zip(sentence.tokens, sent_tags): token: Token = token token.add_tag_label("predicted", tag) # append both to file for evaluation eval_line = "{} {} {} {}\n".format( token.text, token.get_tag(self.tag_type).value, tag.value, tag.score, ) lines.append(eval_line) lines.append("\n") for sentence in batch: # make list of gold tags gold_tags = [ (tag.tag, str(tag)) for tag in sentence.get_spans(self.tag_type) ] # make list of predicted tags predicted_tags = [ (tag.tag, str(tag)) for tag in sentence.get_spans("predicted") ] # check for true positives, false positives and false negatives for tag, prediction in predicted_tags: if (tag, prediction) in gold_tags: metric.add_tp(tag) else: metric.add_fp(tag) for tag, gold in gold_tags: if (tag, gold) not in predicted_tags: metric.add_fn(tag) else: metric.add_tn(tag) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory ) eval_loss /= len(sentences) if out_path is not None: with open(out_path, "w", encoding="utf-8") as outfile: outfile.write("".join(lines)) detailed_result = ( f"\nMICRO_AVG: acc {metric.micro_avg_accuracy()} - f1-score {metric.micro_avg_f_score()}" f"\nMACRO_AVG: acc {metric.macro_avg_accuracy()} - f1-score {metric.macro_avg_f_score()}" ) for class_name in metric.get_classes(): detailed_result += ( f"\n{class_name:<10} tp: {metric.get_tp(class_name)} - fp: {metric.get_fp(class_name)} - " f"fn: {metric.get_fn(class_name)} - tn: {metric.get_tn(class_name)} - precision: " f"{metric.precision(class_name):.4f} - recall: {metric.recall(class_name):.4f} - " f"accuracy: {metric.accuracy(class_name):.4f} - f1-score: " f"{metric.f_score(class_name):.4f}" ) result = Result( main_score=metric.micro_avg_f_score(), log_line=f"{metric.precision()}\t{metric.recall()}\t{metric.micro_avg_f_score()}", log_header="PRECISION\tRECALL\tF1", detailed_results=detailed_result, ) return result, eval_loss
def train(self, base_path: Union[Path, str], evaluation_metric: EvaluationMetric = EvaluationMetric. MICRO_F1_SCORE, learning_rate: float = 0.1, mini_batch_size: int = 32, eval_mini_batch_size: int = None, max_epochs: int = 100, anneal_factor: float = 0.5, patience: int = 3, anneal_against_train_loss: bool = True, train_with_dev: bool = False, monitor_train: bool = False, embeddings_in_memory: bool = True, checkpoint: bool = False, save_final_model: bool = True, anneal_with_restarts: bool = False, test_mode: bool = False, param_selection_mode: bool = False, **kwargs) -> dict: if eval_mini_batch_size is None: eval_mini_batch_size = mini_batch_size # cast string to Path if type(base_path) is str: base_path = Path(base_path) add_file_handler(log, base_path / 'training.log') log_line(log) log.info(f'Evaluation method: {evaluation_metric.name}') if not param_selection_mode: loss_txt = init_output_file(base_path, 'loss.tsv') with open(loss_txt, 'a') as f: f.write( f'EPOCH\tTIMESTAMP\tBAD_EPOCHS\tLEARNING_RATE\tTRAIN_LOSS\t{Metric.tsv_header("TRAIN")}\tDEV_LOSS\t{Metric.tsv_header("DEV")}' f'\tTEST_LOSS\t{Metric.tsv_header("TEST")}\n') weight_extractor = WeightExtractor(base_path) optimizer = self.optimizer(self.model.parameters(), lr=learning_rate, **kwargs) if self.optimizer_state is not None: optimizer.load_state_dict(self.optimizer_state) # annealing scheduler anneal_mode = 'min' if anneal_against_train_loss else 'max' if isinstance(optimizer, (AdamW, SGDW)): scheduler = ReduceLRWDOnPlateau(optimizer, factor=anneal_factor, patience=patience, mode=anneal_mode, verbose=True) else: scheduler = ReduceLROnPlateau(optimizer, factor=anneal_factor, patience=patience, mode=anneal_mode, verbose=True) if self.scheduler_state is not None: scheduler.load_state_dict(self.scheduler_state) train_data = self.corpus.train # if training also uses dev data, include in training set if train_with_dev: train_data.extend(self.corpus.dev) dev_score_history = [] dev_loss_history = [] train_loss_history = [] # At any point you can hit Ctrl + C to break out of training early. try: previous_learning_rate = learning_rate for epoch in range(0 + self.epoch, max_epochs + self.epoch): log_line(log) try: bad_epochs = scheduler.num_bad_epochs except: bad_epochs = 0 for group in optimizer.param_groups: learning_rate = group['lr'] # reload last best model if annealing with restarts is enabled if learning_rate != previous_learning_rate and anneal_with_restarts and \ (base_path / 'best-model.pt').exists(): log.info('resetting to best model') self.model.load_from_file(base_path / 'best-model.pt') previous_learning_rate = learning_rate # stop training if learning rate becomes too small if learning_rate < 0.0001: log_line(log) log.info('learning rate too small - quitting training!') log_line(log) break if not test_mode: random.shuffle(train_data) batches = [ train_data[x:x + mini_batch_size] for x in range(0, len(train_data), mini_batch_size) ] self.model.train() train_loss: float = 0 seen_sentences = 0 modulo = max(1, int(len(batches) / 10)) for batch_no, batch in enumerate(batches): loss = self.model.forward_loss(batch) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0) optimizer.step() seen_sentences += len(batch) train_loss += loss.item() clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) if batch_no % modulo == 0: log.info( f'epoch {epoch + 1} - iter {batch_no}/{len(batches)} - loss ' f'{train_loss / seen_sentences:.8f}') iteration = epoch * len(batches) + batch_no if not param_selection_mode: weight_extractor.extract_weights( self.model.state_dict(), iteration) train_loss /= len(train_data) self.model.eval() log_line(log) log.info( f'EPOCH {epoch + 1} done: loss {train_loss:.4f} - lr {learning_rate:.4f} - bad epochs {bad_epochs}' ) dev_metric = None dev_loss = '_' train_metric = None if monitor_train: train_metric, train_loss = self._calculate_evaluation_results_for( 'TRAIN', self.corpus.train, evaluation_metric, embeddings_in_memory, eval_mini_batch_size) if not train_with_dev: dev_metric, dev_loss = self._calculate_evaluation_results_for( 'DEV', self.corpus.dev, evaluation_metric, embeddings_in_memory, eval_mini_batch_size) if not param_selection_mode: test_metric, test_loss = self._calculate_evaluation_results_for( 'TEST', self.corpus.test, evaluation_metric, embeddings_in_memory, eval_mini_batch_size, base_path / 'test.tsv') if not param_selection_mode: with open(loss_txt, 'a') as f: train_metric_str = train_metric.to_tsv( ) if train_metric is not None else Metric.to_empty_tsv( ) dev_metric_str = dev_metric.to_tsv( ) if dev_metric is not None else Metric.to_empty_tsv() test_metric_str = test_metric.to_tsv( ) if test_metric is not None else Metric.to_empty_tsv( ) f.write( f'{epoch}\t{datetime.datetime.now():%H:%M:%S}\t{bad_epochs}\t{learning_rate:.4f}\t' f'{train_loss}\t{train_metric_str}\t{dev_loss}\t{dev_metric_str}\t_\t{test_metric_str}\n' ) # calculate scores using dev data if available dev_score = 0. if not train_with_dev: if evaluation_metric == EvaluationMetric.MACRO_ACCURACY: dev_score = dev_metric.macro_avg_accuracy() elif evaluation_metric == EvaluationMetric.MICRO_ACCURACY: dev_score = dev_metric.micro_avg_accuracy() elif evaluation_metric == EvaluationMetric.MACRO_F1_SCORE: dev_score = dev_metric.macro_avg_f_score() else: dev_score = dev_metric.micro_avg_f_score() # append dev score to score history dev_score_history.append(dev_score) dev_loss_history.append(dev_loss.item()) # anneal against train loss if training with dev, otherwise anneal against dev score current_score = train_loss if anneal_against_train_loss else dev_score scheduler.step(current_score) train_loss_history.append(train_loss) # if checkpoint is enable, save model at each epoch if checkpoint and not param_selection_mode: self.model.save_checkpoint(base_path / 'checkpoint.pt', optimizer.state_dict(), scheduler.state_dict(), epoch + 1, train_loss) # if we use dev data, remember best model based on dev evaluation score if not train_with_dev and not param_selection_mode and current_score == scheduler.best: self.model.save(base_path / 'best-model.pt') # if we do not use dev data for model selection, save final model if save_final_model and not param_selection_mode: self.model.save(base_path / 'final-model.pt') except KeyboardInterrupt: log_line(log) log.info('Exiting from training early.') if not param_selection_mode: log.info('Saving model ...') self.model.save(base_path / 'final-model.pt') log.info('Done.') # test best model on test data final_score = self.final_test(base_path, embeddings_in_memory, evaluation_metric, eval_mini_batch_size) return { 'test_score': final_score, 'dev_score_history': dev_score_history, 'train_loss_history': train_loss_history, 'dev_loss_history': dev_loss_history }