def epoch_end(self, outputs, step): tgts, preds = [], [] for output in outputs: tgts += output[0] preds += output[1] f1s, exacts = [], [] for tgt, pred in zip(tgts, preds): f1s.append(compute_f1(tgt, pred)) exacts.append(compute_exact(tgt, pred)) if self.hparams.debug: print(f"Preds: {preds}") print(f"Real: {tgts}") else: self.logger.experiment.log_text( "Outputs", f"Preds: {preds[-10:]} \n Real: {tgts[-10:]} \n\n") self.log(f"f1_{step}", torch.Tensor([np.array(f1s).mean()]).to(self.device), prog_bar=True, on_step=False, on_epoch=True, sync_dist=self.sync_dist) self.log(f"{step}_exact_match", torch.Tensor([np.array(exacts).mean()]).to(self.device), prog_bar=True, on_step=False, on_epoch=True, sync_dist=self.sync_dist)
def val_epoch_bert(model, val_loader): model.eval() total = 0 loss = 0 total_loss = 0 pred_answers, known_answers, indices = [], [], [] with torch.no_grad(): for input_ids, attention_mask, answer_start, answer_end, is_answerable, index in tqdm(val_loader, leave=False, desc="Training Batches"): inputs = {"input_ids": input_ids.to(get_device()), "attention_mask": attention_mask.to(get_device())} start_probs, end_probs = model(inputs) start_loss = model.compute_loss(start_probs, answer_start.to(get_device())) end_loss = model.compute_loss(end_probs, answer_end.to(get_device())) loss = start_loss + end_loss total_loss += loss.detach().item() total += 1 pred_answers += find_answer(start_probs.cpu(), end_probs.cpu()) known_answers += zip(answer_start, answer_end) indices += index.tolist() print(f"Val ave loss: {total_loss / total}") print("EM:", compute_exact(known_answers, pred_answers)) print("F1:", compute_f1(known_answers, pred_answers)) return known_answers, pred_answers, indices
def evaluate_crf(y_true, y_pred, tag): if tag == 'BIO': gold_sentences = [compute_f1_crf(i) for i in y_true] pred_sentences = [compute_f1_crf(i) for i in y_pred] elif tag == 'BIEOS': gold_sentences = [compute_f1_crf_BIEOS(i) for i in y_true] pred_sentences = [compute_f1_crf_BIEOS(i) for i in y_pred] metric = compute_f1(gold_sentences, pred_sentences) return metric
def epoch_end(self, outputs, phase): tgts, preds = [], [] for output in outputs: tgts += output[0] preds += output[1] f1s, exacts = [], [] for tgt, pred in zip(tgts, preds): f1s.append(compute_f1(tgt, pred)) exacts.append(compute_exact(tgt, pred)) self.log_dict( { f"{phase}_f1": np.array(f1s).mean(), f"{phase}_exact_match": np.array(exacts).mean() }, prog_bar=True, on_step=False, on_epoch=True)
def evaluation_step(self, batch): ''' Same step for validation and testing. ''' originals = batch["target_text"] pred_token_phrases = self(batch) preds = [ self.tokenizer.decode(pred_tokens, skip_special_tokens=True) for pred_tokens in pred_token_phrases ] exact_matches = [] f1s = [] for original, pred in zip(originals, preds): exact_matches.append(compute_exact(original, pred)) f1s.append(compute_f1(original, pred)) exact_match = np.array(exact_matches).mean() f1 = np.array(f1s).mean() return exact_match, f1
def val_epoch(model, val_loader): model.eval() total = 0 loss = 0 total_loss = 0 pred_answers, known_answers, indices = [], [], [] with torch.no_grad(): for context, c_lens, question, q_lens, is_answerable, answer_start, answer_end, index in tqdm( val_loader, leave=False, desc="Val Batches"): prob_start, prob_end = model(context.to(get_device()), question.to(get_device()), c_lens, q_lens) start_loss = model.compute_loss(prob_start, answer_start.to(get_device())) end_loss = model.compute_loss(prob_end, answer_end.to(get_device())) loss = start_loss + end_loss total_loss += loss.detach().item() total += 1 pred_answer = find_answer(prob_start, prob_end, c_lens) pred_answers += pred_answer known_answers += list( zip(answer_start.cpu().tolist(), answer_end.cpu().tolist())) indices += index.cpu().tolist() print(f"Val ave loss: {total_loss / total}") print("EM:", compute_exact(known_answers, pred_answers)) print("F1:", compute_f1(known_answers, pred_answers)) return pred_answers, known_answers, indices
print("Optimization Finished!") save_path = saver.save(sess, "checkpoints/tf_deepUD_tri_model.ckpt") with open("valaccs/a" + datestring + ".txt", "w+") as f: f.write("Validation Accuracy:\n") f.write(str(valaccs)) f.write("\nPrecision:\n") f.write(str(precisions)) f.write("\nRecall:\n") f.write(str(recalls)) f.close() print("Model saved in file: %s" % save_path) preds, labels = sess.run([tf.argmax(valpred, 1), val_labels]) conf_mat = tf.math.confusion_matrix(labels, preds) conf_mat = conf_mat.eval(session=sess) # Normalize the Confusion Matrix to get percentages cfsum = np.sum(conf_mat) cf_norm = conf_mat / cfsum plot_filename = "plots/conf_mat" + datestring + ".jpg" print("Confusion Matrix - saved to " + plot_filename) print(conf_mat) sns.heatmap(cf_norm, annot=True, fmt='.2%', cmap='Blues') plt.savefig(plot_filename) # Compute F1 score, precision, and recall precision = compute_precision(conf_mat) recall = compute_recall(conf_mat) f1 = compute_f1(conf_mat) print("F1 Score: %0.6f, Precision: %0.6f, Recall: %0.6f " % (f1, precision, recall))