Beispiel #1
0
def compute_prf1_single_type(fname, type_, data=None):
    print('---------- ' + type_ + ' ----------')
    with open(fname) as f:
        total = json.load(f)
    gold_binary = []
    pred_binary = []
    for k, v in total.items():
        if type_ in v['gold']:
            gold_binary.append(1.)
        else:
            gold_binary.append(0.)
        if type_ in v['pred']:
            pred_binary.append(1.)
            print_example(data[k])
        else:
            pred_binary.append(0.)
    count = len(gold_binary)
    TP_FN_counts = sum([1. for gold in gold_binary if int(gold) == 1])
    TP_FP_counts = sum([1. for pred in pred_binary if int(pred) == 1])
    TP_counts = sum([
        1. for pred, gold in zip(pred_binary, gold_binary)
        if int(pred) == 1 and int(gold) == 1
    ])
    p = TP_counts / TP_FP_counts if TP_FP_counts > 0 else 0.
    r = TP_counts / TP_FN_counts if TP_FN_counts > 0 else 0.
    f1_ = f1(p, r)
    output_str = "Type: {0}\t#: {1} TP:{2} TP+FP:{3} TP+FN:{4} P:{5:.3f} R:{6:.3f} F1:{7:.3f}".format(
        type_, count, int(TP_counts), int(TP_FP_counts), int(TP_FN_counts), p,
        r, f1_)
    accuracy = sum([
        pred == gold for pred, gold in zip(pred_binary, gold_binary)
    ]) / float(len(gold_binary))
    output_str += '\t Dev accuracy: {0:.1f}%'.format(accuracy * 100)
    print(output_str)
Beispiel #2
0
def evaluate_data(batch_num, dev_fname, model, tensorboard, val_type_name, args, elmo, bert, actual_f1=True, vocab=None):
  model.eval()
  if vocab is None:
    vocab = (constant.CHAR_DICT, None)
  dev_gen = get_data_gen(dev_fname, 'test', args, vocab, args.goal, elmo=elmo, bert=bert)
  gold_pred = []
  binary_out = []
  eval_loss = 0.
  total_ex_count = 0
  if args.mode in ['train_labeler', 'test_labeler']:
    cls_correct = 0.
    cls_total = 0.
    cls_tp = 0.
    cls_t_gold = 0.
    cls_t_pred = 0.
  for n, batch in enumerate(dev_gen): 
    total_ex_count += len(batch['y'])
    eval_batch, annot_ids = to_torch(batch)
    if args.mode in ['train_labeler', 'test_labeler']:
      loss, output_logits, cls_logits = model(eval_batch, val_type_name)
      if cls_logits is not None:
        cls_correct +=  sum([(1. if pred > 0. else 0.) == gold for pred, gold in zip(cls_logits, batch['y_cls'])])
        cls_total += float(cls_logits.size()[0])
        cls_tp += sum([(1. if pred > 0. else 0.) == 1. and gold == 1. for pred, gold in zip(cls_logits, batch['y_cls'])])
        cls_t_gold += float(sum(batch['y_cls']))
        cls_t_pred += float(sum([1. if pred > 0. else 0. for pred in cls_logits]))
    else:
      loss, output_logits, _ = model(eval_batch, val_type_name)
    output_index = get_output_index(output_logits, threshold=args.threshold)
    gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal)
    eval_loss += loss.clone().item()
  eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred)
  eval_str = get_eval_string(gold_pred)
  _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred)
  eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num)
  tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num)
  tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num)
  print('EVAL: seen ' + repr(total_ex_count) + ' examples.')
  print(val_type_name + ":" +eval_loss_str)
  print(gold_pred[:3])
  if args.mode in ['train_labeler', 'test_labeler'] and cls_logits is not None:
    cls_accuracy = cls_correct / cls_total * 100.
    cls_precision = cls_tp / cls_t_pred
    cls_recall = cls_tp / cls_t_gold
    cls_f1 = f1(cls_precision, cls_recall)
    cls_str = '  CLS accuracy: {0:.2f}%  P: {1:.3f}  R: {2:.3f}  F1: {3:.3f}'.format(cls_accuracy, cls_precision, cls_recall, cls_f1) 
    print(val_type_name+":"+ eval_str + cls_str)
  else:
    print(val_type_name+":"+ eval_str)
  logging.info(val_type_name + ":" + eval_loss_str)
  logging.info(val_type_name +":" +  eval_str)
  model.train()
  dev_gen = None
  return eval_loss, macro_f1 
Beispiel #3
0
def compute_acc_by_type_freq(fname, type_bucket_count_file, types_file):
    with open(fname) as f:
        total = json.load(f)
    with open(type_bucket_count_file, 'rb') as f:
        type_bucket_count = pickle.load(f)
    with open(types_file, 'r') as f:
        types = [t.strip() for t in f.readlines()]
    #print('TOTAL:', sum([len(v['gold']) for k, v in total.items()]))
    type2bucket = {t[0]: k for k, v in type_bucket_count.items() for t in v}
    TP_FP_counts = {'unseen': 0.}
    TP_FN_counts = {'unseen': 0.}
    TP_counts = {'unseen': 0.}
    for annot_id, v in total.items():
        gold = v['gold']
        pred = v['pred']
        for t in set(pred).intersection(set(gold)):
            if t in type2bucket:
                bucket = type2bucket[t]
                if bucket not in TP_counts:
                    TP_counts[bucket] = 0.
                TP_counts[bucket] += 1.
            else:
                TP_counts['unseen'] += 1.
        for t in set(pred):
            if t in type2bucket:
                bucket = type2bucket[t]
                if bucket not in TP_FP_counts:
                    TP_FP_counts[bucket] = 0.
                TP_FP_counts[bucket] += 1.
            else:
                TP_FP_counts['unseen'] += 1.
        for t in set(gold):
            if t in type2bucket:
                bucket = type2bucket[t]
                if bucket not in TP_FN_counts:
                    TP_FN_counts[bucket] = 0.
                TP_FN_counts[bucket] += 1.
            else:
                TP_FN_counts['unseen'] += 1.

    ordered_keys = sorted([k for k, v in TP_counts.items() if k != 'unseen'],
                          key=lambda x: int(x.split('-')[0]),
                          reverse=True)  # + ['unseen']
    for k in ordered_keys:
        precision = TP_counts[k] / TP_FP_counts[k]
        recall = TP_counts[k] / TP_FN_counts[k]
        f1_score = f1(precision, recall)
        perf = "{0}\tCORRECT:{1}\tP:{2:.2f}\tR:{3:.2f}\tF1:{4:.2}".format(
            k, int(TP_counts[k]), precision * 100., recall * 100.,
            f1_score * 100.)
        print(perf)
Beispiel #4
0
def get_eval_string_binary(binary_out, y):
    assert len(binary_out) == len(y)
    count = len(binary_out)
    TP_FN_counts = sum([1. for gold in y if int(gold) == 1])
    TP_FP_counts = sum([1. for pred in binary_out if int(pred) == 1])
    TP_counts = sum([
        1. for pred, gold in zip(binary_out, y)
        if int(pred) == 1 and int(gold) == 1
    ])
    p = TP_counts / TP_FP_counts if TP_FP_counts > 0 else 0.
    r = TP_counts / TP_FN_counts if TP_FN_counts > 0 else 0.
    f1 = eval_metric.f1(p, r)
    output_str = "Eval: {0} TP:{1} TP+FP:{2} TP+FN:{3} P:{4:.3f} R:{5:.3f} F1:{6:.3f}".format(
        count, int(TP_counts), int(TP_FP_counts), int(TP_FN_counts), p, r, f1)
    accuracy = sum([pred == gold for pred, gold in zip(binary_out, y)
                    ]) / float(len(binary_out))
    output_str += '\t Dev accuracy: {0:.1f}%'.format(accuracy * 100)
    return output_str, accuracy
Beispiel #5
0
def _train_labeler(args):
  if args.data_setup == 'joint':
    train_gen_list, val_gen_list, crowd_dev_gen, elmo, bert, vocab = get_joint_datasets(args)
  else:
    train_fname = args.train_data
    dev_fname = args.dev_data
    print(train_fname, dev_fname)
    data_gens, elmo = get_datasets([(train_fname, 'train', args.goal),
                              (dev_fname, 'dev', args.goal)], args)
    train_gen_list = [(args.goal, data_gens[0])]
    val_gen_list = [(args.goal, data_gens[1])]
  train_log = SummaryWriter(os.path.join(constant.EXP_ROOT, args.model_id, "log", "train"))
  validation_log = SummaryWriter(os.path.join(constant.EXP_ROOT, args.model_id, "log", "validation"))
  tensorboard = TensorboardWriter(train_log, validation_log)

  if args.model_type == 'labeler':
    print('==> Labeler')
    model = denoising_models.Labeler(args, constant.ANSWER_NUM_DICT[args.goal])
  elif args.model_type == 'filter':
    print('==> Filter')
    model = denoising_models.Filter(args, constant.ANSWER_NUM_DICT[args.goal])
  else:
    print('Invalid model type: -model_type ' + args.model_type)
    raise NotImplementedError

  model.cuda()
  total_loss = 0
  batch_num = 0
  best_macro_f1 = 0.
  start_time = time.time()
  init_time = time.time()

  if args.bert:
    if args.bert_param_path:
      print('==> Loading BERT from ' + args.bert_param_path)
      model.bert.load_state_dict(torch.load(args.bert_param_path, map_location='cpu'))
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_parameters = [
        {'params': [p for n, p in model.named_parameters() if n not in no_decay], 'weight_decay_rate': 0.01},
        {'params': [p for n, p in model.named_parameters() if n in no_decay], 'weight_decay_rate': 0.0}
        ]
    optimizer = BERTAdam(optimizer_parameters,
                         lr=args.bert_learning_rate,
                         warmup=args.bert_warmup_proportion,
                         t_total=-1) # TODO: 
  else:
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
  #optimizer = optim.SGD(model.parameters(), lr=1., momentum=0.)

  if args.load:
    load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model, optimizer)

  for idx, m in enumerate(model.modules()):
    logging.info(str(idx) + '->' + str(m))

  while True:
    batch_num += 1  # single batch composed of all train signal passed by.
    for (type_name, data_gen) in train_gen_list:
      try:
        batch = next(data_gen)
        batch, _ = to_torch(batch)
      except StopIteration:
        logging.info(type_name + " finished at " + str(batch_num))
        print('Done!')
        torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()},
                   '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id))
        return
      optimizer.zero_grad()
      loss, output_logits, cls_logits = model(batch, type_name)
      loss.backward()
      total_loss += loss.item()
      optimizer.step()

      if batch_num % args.log_period == 0 and batch_num > 0:
        gc.collect()
        cur_loss = float(1.0 * loss.clone().item())
        elapsed = time.time() - start_time
        train_loss_str = ('|loss {0:3f} | at {1:d}step | @ {2:.2f} ms/batch'.format(cur_loss, batch_num,
                                                                                    elapsed * 1000 / args.log_period))
        start_time = time.time()
        print(train_loss_str)
        logging.info(train_loss_str)
        tensorboard.add_train_scalar('train_loss_' + type_name, cur_loss, batch_num)

      if batch_num % args.eval_period == 0 and batch_num > 0:
        output_index = get_output_index(output_logits, threshold=args.threshold)
        gold_pred_train = get_gold_pred_str(output_index, batch['y'].data.cpu().clone(), args.goal)
        print(gold_pred_train[:10])
        accuracy = sum([set(y) == set(yp) for y, yp in gold_pred_train]) * 1.0 / len(gold_pred_train)

        train_acc_str = '{1:s} Train accuracy: {0:.1f}%'.format(accuracy * 100, type_name)
        if cls_logits is not None:
          cls_accuracy =  sum([(1. if pred > 0. else 0.) == gold for pred, gold in zip(cls_logits, batch['y_cls'].data.cpu().numpy())])  / float(cls_logits.size()[0])
          cls_tp = sum([(1. if pred > 0. else 0.) == 1. and gold == 1. for pred, gold in zip(cls_logits, batch['y_cls'].data.cpu().numpy())])
          cls_precision = cls_tp  / float(sum([1. if pred > 0. else 0. for pred in cls_logits])) 
          cls_recall = cls_tp  / float(sum(batch['y_cls'].data.cpu().numpy()))
          cls_f1 = f1(cls_precision, cls_recall)
          train_cls_acc_str = '{1:s} Train cls accuracy: {0:.2f}%  P: {2:.3f}  R: {3:.3f}  F1: {4:.3f}'.format(cls_accuracy * 100, type_name, cls_precision, cls_recall, cls_f1)
        print(train_acc_str)
        if cls_logits is not None:
          print(train_cls_acc_str)
        logging.info(train_acc_str)
        tensorboard.add_train_scalar('train_acc_' + type_name, accuracy, batch_num)
        if args.goal != 'onto':
          for (val_type_name, val_data_gen) in val_gen_list:
            if val_type_name == type_name:
              eval_batch, _ = to_torch(next(val_data_gen))
              evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name, args, args.goal)

    if batch_num % args.eval_period == 0 and batch_num > 0 and args.data_setup == 'joint':
      # Evaluate Loss on the Turk Dev dataset.
      print('---- eval at step {0:d} ---'.format(batch_num))
      crowd_eval_loss, macro_f1 = evaluate_data(batch_num, 'crowd/dev_tree.json', model,
                                                tensorboard, "open", args, elmo, bert, vocab=vocab)

      if best_macro_f1 < macro_f1:
        best_macro_f1 = macro_f1
        save_fname = '{0:s}/{1:s}_best.pt'.format(constant.EXP_ROOT, args.model_id)
        torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname)
        print(
          'Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname))

    if batch_num % args.eval_period == 0 and batch_num > 0 and args.goal == 'onto':
      # Evaluate Loss on the Turk Dev dataset.
      print('---- OntoNotes: eval at step {0:d} ---'.format(batch_num))
      crowd_eval_loss, macro_f1 = evaluate_data(batch_num, args.dev_data, model, tensorboard,
                                                args.goal, args, elmo)

    if batch_num % args.save_period == 0 and batch_num > 0:
      save_fname = '{0:s}/{1:s}_{2:d}.pt'.format(constant.EXP_ROOT, args.model_id, batch_num)
      torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname)
      print(
        'Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname))
  # Training finished! 
  torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()},
             '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id))