Esempio n. 1
0
def compute_prf1(fname):
    with open(fname) as f:
        total = json.load(f)
    true_and_predictions = []
    for k, v in total.items():
        true_and_predictions.append((v['gold'], v['pred']))
    count, pred_count, avg_pred_count, p, r, f1 = macro(true_and_predictions)
    perf_total = "{0}\t{1:.2f}\tP:{2:.1f}\tR:{3:.1f}\tF1:{4:.1f}".format(
        count, avg_pred_count, p * 100, r * 100, f1 * 100)
    print(perf_total)
Esempio n. 2
0
def evaluate_data(batch_num, dev_fname, model, tensorboard, val_type_name, args, elmo, bert, actual_f1=True, vocab=None):
  model.eval()
  if vocab is None:
    vocab = (constant.CHAR_DICT, None)
  dev_gen = get_data_gen(dev_fname, 'test', args, vocab, args.goal, elmo=elmo, bert=bert)
  gold_pred = []
  binary_out = []
  eval_loss = 0.
  total_ex_count = 0
  if args.mode in ['train_labeler', 'test_labeler']:
    cls_correct = 0.
    cls_total = 0.
    cls_tp = 0.
    cls_t_gold = 0.
    cls_t_pred = 0.
  for n, batch in enumerate(dev_gen): 
    total_ex_count += len(batch['y'])
    eval_batch, annot_ids = to_torch(batch)
    if args.mode in ['train_labeler', 'test_labeler']:
      loss, output_logits, cls_logits = model(eval_batch, val_type_name)
      if cls_logits is not None:
        cls_correct +=  sum([(1. if pred > 0. else 0.) == gold for pred, gold in zip(cls_logits, batch['y_cls'])])
        cls_total += float(cls_logits.size()[0])
        cls_tp += sum([(1. if pred > 0. else 0.) == 1. and gold == 1. for pred, gold in zip(cls_logits, batch['y_cls'])])
        cls_t_gold += float(sum(batch['y_cls']))
        cls_t_pred += float(sum([1. if pred > 0. else 0. for pred in cls_logits]))
    else:
      loss, output_logits, _ = model(eval_batch, val_type_name)
    output_index = get_output_index(output_logits, threshold=args.threshold)
    gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal)
    eval_loss += loss.clone().item()
  eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred)
  eval_str = get_eval_string(gold_pred)
  _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred)
  eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num)
  tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num)
  tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num)
  print('EVAL: seen ' + repr(total_ex_count) + ' examples.')
  print(val_type_name + ":" +eval_loss_str)
  print(gold_pred[:3])
  if args.mode in ['train_labeler', 'test_labeler'] and cls_logits is not None:
    cls_accuracy = cls_correct / cls_total * 100.
    cls_precision = cls_tp / cls_t_pred
    cls_recall = cls_tp / cls_t_gold
    cls_f1 = f1(cls_precision, cls_recall)
    cls_str = '  CLS accuracy: {0:.2f}%  P: {1:.3f}  R: {2:.3f}  F1: {3:.3f}'.format(cls_accuracy, cls_precision, cls_recall, cls_f1) 
    print(val_type_name+":"+ eval_str + cls_str)
  else:
    print(val_type_name+":"+ eval_str)
  logging.info(val_type_name + ":" + eval_loss_str)
  logging.info(val_type_name +":" +  eval_str)
  model.train()
  dev_gen = None
  return eval_loss, macro_f1 
Esempio n. 3
0
def get_eval_string(true_prediction):
  """
  Given a list of (gold, prediction)s, generate output string.
  """
  count, pred_count, avg_pred_count, p, r, f1 = eval_metric.micro(true_prediction)
  _, _, _, ma_p, ma_r, ma_f1 = eval_metric.macro(true_prediction)
  output_str = "Eval: {0} {1} {2:.3f} P:{3:.3f} R:{4:.3f} F1:{5:.3f} Ma_P:{6:.3f} Ma_R:{7:.3f} Ma_F1:{8:.3f}".format(
    count, pred_count, avg_pred_count, p, r, f1, ma_p, ma_r, ma_f1)
  accuracy = sum([set(y) == set(yp) for y, yp in true_prediction]) * 1.0 / len(true_prediction)
  output_str += '\t Dev accuracy: {0:.1f}%'.format(accuracy * 100)
  return output_str
Esempio n. 4
0
def compute_length_prf1(fname, data_fname):
    with open(fname) as f:
        total = json.load(f)
    data = original = load_augmented_input(data_fname)
    counts = {}
    for annot_id, v in total.items():
        ex = data[annot_id]
        mention_len = len(ex['mention_span'].strip().split())
        if mention_len not in counts:
            counts[mention_len] = []
        counts[mention_len].append((v['gold'], v['pred']))

    for k, v in sorted(counts.items(), key=lambda x: x[0])[:20]:
        count, pred_count, avg_pred_count, p, r, f1 = macro(v)
        perf = "{0}\t{1:.2f}\tP:{2:.1f}\tR:{3:.1f}\tF1:{4:.1f}\tLEN:{5}".format(
            count, avg_pred_count, p * 100, r * 100, f1 * 100, k)
        print(perf)
Esempio n. 5
0
def metric_dicts(true_prediction):
    count, pred_count, avg_pred_count, p, r, f1 = eval_metric.micro(
        true_prediction)
    _, _, _, ma_p, ma_r, ma_f1 = eval_metric.macro(true_prediction)
    output_str = "Eval: {0} {1} {2:.3f} P:{3:.3f} R:{4:.3f} F1:{5:.3f} Ma_P:{6:.3f} Ma_R:{7:.3f} Ma_F1:{8:.3f}".format(
        count, pred_count, avg_pred_count, p, r, f1, ma_p, ma_r, ma_f1)
    accuracy = sum([set(y) == set(yp)
                    for y, yp in true_prediction]) * 1.0 / len(true_prediction)
    output_str += '\t Dev accuracy: {0:.1f}%'.format(accuracy * 100)
    result = {
        "precision": p,
        "recall": r,
        'f1': f1,
        "ma_precision": ma_p,
        "ma_recall": ma_r,
        "ma_f1": ma_f1,
        "accu": accuracy
    }
    return result, output_str
Esempio n. 6
0
def evaluate_data(batch_num, dev_fname, model, args, elmo, device, char_vocab, dev_type='original'):
  model.eval()
  dev_gen = get_data_gen(dev_fname, 'test', args, char_vocab, elmo=elmo)
  gold_pred = []
  eval_loss = 0.
  total_ex_count = 0
  for batch in tqdm(dev_gen):
    total_ex_count += len(batch['y'])
    eval_batch, annot_ids = to_torch(batch, device)
    loss, output_logits, _ = model(eval_batch)
    output_index = get_output_index(output_logits, threshold=args.threshold)
    gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal)
    eval_loss += loss.clone().item()
  eval_str = get_eval_string(gold_pred)
  _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred)
  eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num)
  print('==> ' + dev_type + ' EVAL: seen ' + repr(total_ex_count) + ' examples.')
  print(eval_loss_str)
  print(gold_pred[:3])
  print('==> ' + dev_type + ' : ' + eval_str)
  logging.info(eval_loss_str)
  logging.info(eval_str)
  model.train()
  return eval_loss, macro_f1
Esempio n. 7
0
def compute_granul_prf1(fname, type_fname):
    with open(fname) as f:
        total = json.load(f)
    coarse_true_and_predictions = []
    fine_true_and_predictions = []
    finer_true_and_predictions = []
    with open(type_fname) as f:
        types = [x.strip() for x in f.readlines()]
    for k, v in total.items():
        coarse_gold, fine_gold, finer_gold = stratify(v['gold'], types)
        coarse_pred, fine_pred, finer_pred = stratify(v['pred'], types)
        coarse_true_and_predictions.append((coarse_gold, coarse_pred))
        fine_true_and_predictions.append((fine_gold, fine_pred))
        finer_true_and_predictions.append((finer_gold, finer_pred))

    for true_and_predictions in [
            coarse_true_and_predictions, fine_true_and_predictions,
            finer_true_and_predictions
    ]:
        count, pred_count, avg_pred_count, p, r, f1 = macro(
            true_and_predictions)
        perf = "{0}\t{1:.2f}\tP:{2:.1f}\tR:{3:.1f}\tF1:{4:.1f}".format(
            count, avg_pred_count, p * 100, r * 100, f1 * 100)
        print(perf)