예제 #1
0
    def epoch_end(self, outputs, step):
        tgts, preds = [], []
        for output in outputs:
            tgts += output[0]
            preds += output[1]

        f1s, exacts = [], []
        for tgt, pred in zip(tgts, preds):
            f1s.append(compute_f1(tgt, pred))
            exacts.append(compute_exact(tgt, pred))
        if self.hparams.debug:
            print(f"Preds: {preds}")
            print(f"Real: {tgts}")
        else:
            self.logger.experiment.log_text(
                "Outputs", f"Preds: {preds[-10:]} \n Real: {tgts[-10:]} \n\n")

        self.log(f"f1_{step}",
                 torch.Tensor([np.array(f1s).mean()]).to(self.device),
                 prog_bar=True,
                 on_step=False,
                 on_epoch=True,
                 sync_dist=self.sync_dist)

        self.log(f"{step}_exact_match",
                 torch.Tensor([np.array(exacts).mean()]).to(self.device),
                 prog_bar=True,
                 on_step=False,
                 on_epoch=True,
                 sync_dist=self.sync_dist)
def val_epoch_bert(model, val_loader):

  model.eval()
  total = 0
  loss = 0
  total_loss = 0
  pred_answers, known_answers, indices = [], [], []

  with torch.no_grad():
    for input_ids, attention_mask, answer_start, answer_end, is_answerable, index in tqdm(val_loader, leave=False, desc="Training Batches"):

      inputs = {"input_ids": input_ids.to(get_device()),
                "attention_mask": attention_mask.to(get_device())}

      start_probs, end_probs = model(inputs)
      
      start_loss = model.compute_loss(start_probs, answer_start.to(get_device()))
      end_loss = model.compute_loss(end_probs, answer_end.to(get_device()))

      loss = start_loss + end_loss
      total_loss += loss.detach().item()
      total += 1

      pred_answers += find_answer(start_probs.cpu(), end_probs.cpu())
      known_answers += zip(answer_start, answer_end)
      indices += index.tolist()

    print(f"Val ave loss: {total_loss / total}")

    print("EM:", compute_exact(known_answers, pred_answers))
    print("F1:", compute_f1(known_answers, pred_answers))

    return known_answers, pred_answers, indices
예제 #3
0
def evaluate_crf(y_true, y_pred, tag):
    if tag == 'BIO':
        gold_sentences = [compute_f1_crf(i) for i in y_true]
        pred_sentences = [compute_f1_crf(i) for i in y_pred]
    elif tag == 'BIEOS':
        gold_sentences = [compute_f1_crf_BIEOS(i) for i in y_true]
        pred_sentences = [compute_f1_crf_BIEOS(i) for i in y_pred]
    metric = compute_f1(gold_sentences, pred_sentences)
    return metric
예제 #4
0
    def epoch_end(self, outputs, phase):
        tgts, preds = [], []
        for output in outputs:
            tgts += output[0]
            preds += output[1]

        f1s, exacts = [], []
        for tgt, pred in zip(tgts, preds):
            f1s.append(compute_f1(tgt, pred))
            exacts.append(compute_exact(tgt, pred))

        self.log_dict(
            {
                f"{phase}_f1": np.array(f1s).mean(),
                f"{phase}_exact_match": np.array(exacts).mean()
            },
            prog_bar=True,
            on_step=False,
            on_epoch=True)
예제 #5
0
    def evaluation_step(self, batch):
        '''
        Same step for validation and testing.
        '''
        originals = batch["target_text"]

        pred_token_phrases = self(batch)
        preds = [
            self.tokenizer.decode(pred_tokens, skip_special_tokens=True)
            for pred_tokens in pred_token_phrases
        ]

        exact_matches = []
        f1s = []
        for original, pred in zip(originals, preds):
            exact_matches.append(compute_exact(original, pred))
            f1s.append(compute_f1(original, pred))

        exact_match = np.array(exact_matches).mean()
        f1 = np.array(f1s).mean()

        return exact_match, f1
def val_epoch(model, val_loader):
    model.eval()
    total = 0
    loss = 0
    total_loss = 0

    pred_answers, known_answers, indices = [], [], []

    with torch.no_grad():
        for context, c_lens, question, q_lens, is_answerable, answer_start, answer_end, index in tqdm(
                val_loader, leave=False, desc="Val Batches"):
            prob_start, prob_end = model(context.to(get_device()),
                                         question.to(get_device()), c_lens,
                                         q_lens)

            start_loss = model.compute_loss(prob_start,
                                            answer_start.to(get_device()))
            end_loss = model.compute_loss(prob_end,
                                          answer_end.to(get_device()))

            loss = start_loss + end_loss
            total_loss += loss.detach().item()
            total += 1

            pred_answer = find_answer(prob_start, prob_end, c_lens)
            pred_answers += pred_answer

            known_answers += list(
                zip(answer_start.cpu().tolist(),
                    answer_end.cpu().tolist()))
            indices += index.cpu().tolist()

        print(f"Val ave loss: {total_loss / total}")

        print("EM:", compute_exact(known_answers, pred_answers))
        print("F1:", compute_f1(known_answers, pred_answers))

        return pred_answers, known_answers, indices
예제 #7
0
print("Optimization Finished!")
save_path = saver.save(sess, "checkpoints/tf_deepUD_tri_model.ckpt")
with open("valaccs/a" + datestring + ".txt", "w+") as f:
    f.write("Validation Accuracy:\n")
    f.write(str(valaccs))
    f.write("\nPrecision:\n")
    f.write(str(precisions))
    f.write("\nRecall:\n")
    f.write(str(recalls))
f.close()
print("Model saved in file: %s" % save_path)

preds, labels = sess.run([tf.argmax(valpred, 1), val_labels])
conf_mat = tf.math.confusion_matrix(labels, preds)
conf_mat = conf_mat.eval(session=sess)
# Normalize the Confusion Matrix to get percentages
cfsum = np.sum(conf_mat)
cf_norm = conf_mat / cfsum
plot_filename = "plots/conf_mat" + datestring + ".jpg"
print("Confusion Matrix - saved to " + plot_filename)
print(conf_mat)
sns.heatmap(cf_norm, annot=True, fmt='.2%', cmap='Blues')
plt.savefig(plot_filename)

# Compute F1 score, precision, and recall
precision = compute_precision(conf_mat)
recall = compute_recall(conf_mat)
f1 = compute_f1(conf_mat)
print("F1 Score: %0.6f, Precision: %0.6f, Recall: %0.6f " %
      (f1, precision, recall))