Example #1
0
def evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name,
                   goal):
    model.eval()
    loss, output_logits = model(eval_batch, val_type_name)
    output_index = get_output_index(output_logits)
    # eval_loss = loss.data.cpu().clone()[0]
    eval_loss = loss.data.cpu().numpy()
    eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(
        eval_loss, batch_num)
    gold_pred = get_gold_pred_str(output_index,
                                  eval_batch['y'].data.cpu().clone(), goal)
    eval_accu = sum([set(y) == set(yp)
                     for y, yp in gold_pred]) * 1.0 / len(gold_pred)
    tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu,
                                      batch_num)
    tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss,
                                      batch_num)
    eval_str = get_eval_string(gold_pred)
    print(val_type_name + ":" + eval_loss_str)
    print(gold_pred[:3])
    print(val_type_name + ":" + eval_str)
    logging.info(val_type_name + ":" + eval_loss_str)
    logging.info(val_type_name + ":" + eval_str)
    model.train()
    return eval_loss
Example #2
0
def evaluate_data_cv(k_fold_count,
                     batch_num,
                     model,
                     tensorboard,
                     val_type_name,
                     args,
                     elmo,
                     actual_f1=False):
    model.eval()
    data_gen = get_data_gen('crowd/cv_3fold/dev_tree_{0}.json'.format(
        repr(k_fold_count)),
                            'test',
                            args, (constant.CHAR_DICT, None),
                            args.goal,
                            elmo=elmo)
    gold_pred = []
    annot_ids = []
    binary_out = []
    eval_loss = 0.
    total_ex_count = 0
    print('==> evaluate_data_cv')
    for n, batch in enumerate(data_gen):
        total_ex_count += len(batch['y'])
        eval_batch, annot_id = to_torch(batch)
        loss, output_logits, _ = model(eval_batch, val_type_name)
        if actual_f1:
            output_index = get_output_index(output_logits,
                                            threshold=args.threshold)
        else:
            output_index = get_output_index_rank(output_logits, topk=args.topk)

        y = eval_batch['y'].data.cpu().clone().numpy()
        gold_pred = get_gold_pred_str(output_index, y, args.goal)
        annot_ids.extend(annot_id)
        eval_loss += loss.clone().item()
    eval_accu = sum([set(y) == set(yp)
                     for y, yp in gold_pred]) * 1.0 / len(gold_pred)
    eval_str = get_eval_string(gold_pred)
    eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(
        eval_loss, batch_num)
    tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu,
                                      batch_num)
    tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss,
                                      batch_num)
    print('EVAL: seen ' + repr(total_ex_count) + ' examples.')
    print(val_type_name + ":" + eval_loss_str)
    #print(gold_pred[:3])
    print(val_type_name + ":" + eval_str)
    logging.info(val_type_name + ":" + eval_loss_str)
    logging.info(val_type_name + ":" + eval_str)
    model.train()
    data_gen = None
    output_dict = {}
    for a_id, (gold, pred) in zip(annot_ids, gold_pred):
        output_dict[a_id] = {"gold": gold, "pred": pred}
    return eval_loss, output_dict
Example #3
0
def evaluate_data(batch_num, dev_fname, model, tensorboard, val_type_name, args, elmo, bert, actual_f1=True, vocab=None):
  model.eval()
  if vocab is None:
    vocab = (constant.CHAR_DICT, None)
  dev_gen = get_data_gen(dev_fname, 'test', args, vocab, args.goal, elmo=elmo, bert=bert)
  gold_pred = []
  binary_out = []
  eval_loss = 0.
  total_ex_count = 0
  if args.mode in ['train_labeler', 'test_labeler']:
    cls_correct = 0.
    cls_total = 0.
    cls_tp = 0.
    cls_t_gold = 0.
    cls_t_pred = 0.
  for n, batch in enumerate(dev_gen): 
    total_ex_count += len(batch['y'])
    eval_batch, annot_ids = to_torch(batch)
    if args.mode in ['train_labeler', 'test_labeler']:
      loss, output_logits, cls_logits = model(eval_batch, val_type_name)
      if cls_logits is not None:
        cls_correct +=  sum([(1. if pred > 0. else 0.) == gold for pred, gold in zip(cls_logits, batch['y_cls'])])
        cls_total += float(cls_logits.size()[0])
        cls_tp += sum([(1. if pred > 0. else 0.) == 1. and gold == 1. for pred, gold in zip(cls_logits, batch['y_cls'])])
        cls_t_gold += float(sum(batch['y_cls']))
        cls_t_pred += float(sum([1. if pred > 0. else 0. for pred in cls_logits]))
    else:
      loss, output_logits, _ = model(eval_batch, val_type_name)
    output_index = get_output_index(output_logits, threshold=args.threshold)
    gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal)
    eval_loss += loss.clone().item()
  eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred)
  eval_str = get_eval_string(gold_pred)
  _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred)
  eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num)
  tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num)
  tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num)
  print('EVAL: seen ' + repr(total_ex_count) + ' examples.')
  print(val_type_name + ":" +eval_loss_str)
  print(gold_pred[:3])
  if args.mode in ['train_labeler', 'test_labeler'] and cls_logits is not None:
    cls_accuracy = cls_correct / cls_total * 100.
    cls_precision = cls_tp / cls_t_pred
    cls_recall = cls_tp / cls_t_gold
    cls_f1 = f1(cls_precision, cls_recall)
    cls_str = '  CLS accuracy: {0:.2f}%  P: {1:.3f}  R: {2:.3f}  F1: {3:.3f}'.format(cls_accuracy, cls_precision, cls_recall, cls_f1) 
    print(val_type_name+":"+ eval_str + cls_str)
  else:
    print(val_type_name+":"+ eval_str)
  logging.info(val_type_name + ":" + eval_loss_str)
  logging.info(val_type_name +":" +  eval_str)
  model.train()
  dev_gen = None
  return eval_loss, macro_f1 
Example #4
0
def _test(args):
    assert args.load
    test_fname = args.eval_data
    data_gens = get_datasets([(test_fname, 'test', args.goal)], args)
    model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal])
    model.cuda()
    model.eval()
    load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model)

    for name, dataset in [(test_fname, data_gens[0])]:
        print('Processing... ' + name)
        total_gold_pred = []
        total_annot_ids = []
        total_probs = []
        total_ys = []
        for batch_num, batch in enumerate(dataset):
            eval_batch, annot_ids = to_torch(batch)
            loss, masked_logits, raw_logits, mask = model(
                eval_batch, args.goal)
            print(mask)
            # output_index = get_output_index(masked_logits)
            # output_prob = masked_logits.data.cpu().clone().numpy()
            # y = eval_batch['y'].data.cpu().clone().numpy()
            # gold_pred = get_gold_pred_str(output_index, y, args.goal)
            # total_probs.extend(output_prob)
            # total_ys.extend(y)
            # total_gold_pred.extend(gold_pred)
            # total_annot_ids.extend(annot_ids)
            # raw
            output_index = get_output_index(raw_logits)
            output_prob = raw_logits.data.cpu().clone().numpy()
            y = eval_batch['y'].data.cpu().clone().numpy()
            gold_pred = get_gold_pred_str(output_index, y, args.goal)
            total_probs.extend(output_prob)
            total_ys.extend(y)
            total_gold_pred.extend(gold_pred)
            total_annot_ids.extend(annot_ids)
        mrr_val = mrr(total_probs, total_ys)
        print('mrr_value: ', mrr_val)
        pickle.dump({
            'gold_id_array': total_ys,
            'pred_dist': total_probs
        }, open('./{0:s}.p'.format(args.reload_model_name), "wb"))
        with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out:
            output_dict = {}
            for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred):
                output_dict[a_id] = {"gold": gold, "pred": pred}
            json.dump(output_dict, f_out, indent=2)
        eval_str = get_eval_string(total_gold_pred)
        print(eval_str)
        logging.info('processing: ' + name)
        logging.info(eval_str)
def _test(args):
    assert args.load
    test_fname = args.eval_data
    data_gens = get_datasets([(test_fname, 'test', args.goal)], args)
    model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal])
    model.cuda()
    model.eval()
    # load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model)

    saved_path = constant.EXP_ROOT
    model.load_state_dict(
        torch.load(saved_path + '/' + args.model_id +
                   '_best.pt')["state_dict"])
    data_gens = get_datasets([(test_fname, 'test', args.goal)],
                             args)  #, eval_epoch=1)
    for name, dataset in [(test_fname, data_gens[0])]:
        print('Processing... ' + name)
        batch = next(dataset)
        eval_batch, annot_ids = to_torch(batch)
        loss, output_logits = model(eval_batch, args.goal)

        threshes = np.arange(0, 1, 0.005)
        p_and_r = []
        for thresh in tqdm(threshes):
            total_gold_pred = []
            total_annot_ids = []
            total_probs = []
            total_ys = []
            print('thresh {}'.format(thresh))
            output_index = get_output_index(output_logits, thresh)
            output_prob = model.sigmoid_fn(
                output_logits).data.cpu().clone().numpy()
            y = eval_batch['y'].data.cpu().clone().numpy()
            gold_pred = get_gold_pred_str(output_index, y, args.goal)
            total_probs.extend(output_prob)
            total_ys.extend(y)
            total_gold_pred.extend(gold_pred)
            total_annot_ids.extend(annot_ids)
            # mrr_val = mrr(total_probs, total_ys)
            # print('mrr_value: ', mrr_val)
            # pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs},
            # open('./{0:s}.p'.format(args.reload_model_name), "wb"))
            # with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out:
            #   output_dict = {}
            #   for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred):
            #     output_dict[a_id] = {"gold": gold, "pred": pred}
            #   json.dump(output_dict, f_out)
            eval_str, p, r = get_eval_string(total_gold_pred)
            p_and_r.append([p, r])
            print(eval_str)

        np.save(saved_path + '/baseline_pr_dev', p_and_r)
Example #6
0
def evaluate_data(batch_num, dev_fname, model, args, elmo, device, char_vocab, dev_type='original'):
  model.eval()
  dev_gen = get_data_gen(dev_fname, 'test', args, char_vocab, elmo=elmo)
  gold_pred = []
  eval_loss = 0.
  total_ex_count = 0
  for batch in tqdm(dev_gen):
    total_ex_count += len(batch['y'])
    eval_batch, annot_ids = to_torch(batch, device)
    loss, output_logits, _ = model(eval_batch)
    output_index = get_output_index(output_logits, threshold=args.threshold)
    gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal)
    eval_loss += loss.clone().item()
  eval_str = get_eval_string(gold_pred)
  _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred)
  eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num)
  print('==> ' + dev_type + ' EVAL: seen ' + repr(total_ex_count) + ' examples.')
  print(eval_loss_str)
  print(gold_pred[:3])
  print('==> ' + dev_type + ' : ' + eval_str)
  logging.info(eval_loss_str)
  logging.info(eval_str)
  model.train()
  return eval_loss, macro_f1
Example #7
0
def _test_labeler(args):
  assert args.load
  test_fname = args.eval_data
  data_gens, _ = get_datasets([(test_fname, 'test', args.goal)], args)
  if args.model_type == 'labeler':
    print('==> Labeler')
    model = denoising_models.Labeler(args, constant.ANSWER_NUM_DICT[args.goal])
  elif args.model_type == 'filter':
    print('==> Filter')
    model = denoising_models.Filter(args, constant.ANSWER_NUM_DICT[args.goal])
  else:
    print('Invalid model type: -model_type ' + args.model_type)
    raise NotImplementedError

  model.cuda()
  model.eval()
  load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model)

  for name, dataset in [(test_fname, data_gens[0])]:
    print('Processing... ' + name)
    total_gold_pred_pcls_ycls_ynoise = []
    total_annot_ids = []
    total_probs = []
    total_ys = []
    batch_attn = []
    for batch_num, batch in enumerate(dataset):
      print(batch_num)
      if not isinstance(batch, dict):
        print('==> batch: ', batch)
      eval_batch, annot_ids = to_torch(batch)
      #print('eval_batch')
      #for k, v in eval_batch.items():
      #  print(k, v.size())
      loss, output_logits, cls_logits = model(eval_batch, args.goal)
      #print('loss', loss)
      #print('output_logits', output_logits)
      #print('cls_logits', cls_logits)
      #batch_attn.append((batch, attn_score.data))
      output_index = get_output_index(output_logits, threshold=args.threshold)
      #print('output_index', output_index)
      #output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy()
      y = eval_batch['y'].data.cpu().clone().numpy()
      y_cls = eval_batch['y_cls'].data.cpu().clone().numpy()
      y_noisy_idx = eval_batch['y_noisy_idx'].data.cpu().clone().numpy()
      gold_pred_pcls_ycls_ynoise = get_gold_pred_str(output_index, y, args.goal, cls_logits=cls_logits, y_cls=y_cls, y_noisy_idx=y_noisy_idx)
      #print('gold_pred_pcls_ycls_ynoise', gold_pred_pcls_ycls_ynoise)
      #total_probs.extend(output_prob)
      #total_ys.extend(y)
      total_gold_pred_pcls_ycls_ynoise.extend(gold_pred_pcls_ycls_ynoise)
      total_annot_ids.extend(annot_ids)
    #mrr_val = mrr(total_probs, total_ys)
    #print('mrr_value: ', mrr_val)
    #pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs},
    #            open('./{0:s}.p'.format(args.reload_model_name), "wb"))
    pickle.dump((total_annot_ids, total_gold_pred_pcls_ycls_ynoise),
                open('./{0:s}_gold_pred.p'.format(args.reload_model_name), "wb"))
    with open('./{0:s}.json'.format(args.model_id), 'w') as f_out:
      output_dict = {}
      if args.model_type == 'filter':
        for a_id, (gold, pred, cls, ycls, ynoise) in zip(total_annot_ids, total_gold_pred_pcls_ycls_ynoise):
          output_dict[a_id] = {"gold": gold, "pred": pred, "cls_pred": cls, "cls_gold": ycls, "y_noisy": ynoise}
      elif args.model_type == 'labeler':
        for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred_pcls_ycls_ynoise):
          output_dict[a_id] = {"gold": gold, "pred": pred}
      else:
        print('Invalid model type: -model_type ' + args.model_type)
        raise NotImplementedError
      json.dump(output_dict, f_out)
    eval_str = get_eval_string(list(zip(*list(zip(*gold_pred_pcls_ycls_ynoise))[:2])))
    print(eval_str)
    logging.info('processing: ' + name)
    logging.info(eval_str)
Example #8
0
def _test(args):
  assert args.load
  test_fname = args.eval_data
  data_gens, _ = get_datasets([(test_fname, 'test', args.goal)], args)
  if args.model_type == 'et_model':
    print('==> Entity Typing Model')
    model = models.ETModel(args, constant.ANSWER_NUM_DICT[args.goal])
  elif args.model_type == 'bert_uncase_small':
    print('==> Bert Uncased Small')
    model = models.Bert(args, constant.ANSWER_NUM_DICT[args.goal])
  else:
    print('Invalid model type: -model_type ' + args.model_type)
    raise NotImplementedError
  model.cuda()
  model.eval()
  load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model)

  for name, dataset in [(test_fname, data_gens[0])]:
    print('Processing... ' + name)
    total_gold_pred = []
    total_annot_ids = []
    total_probs = []
    total_ys = []
    batch_attn = []
    for batch_num, batch in enumerate(dataset):
      print(batch_num)
      if not isinstance(batch, dict):
        print('==> batch: ', batch)
      eval_batch, annot_ids = to_torch(batch)
      loss, output_logits, attn_score = model(eval_batch, args.goal)
      #batch_attn.append((batch, attn_score.data))
      output_index = get_output_index(output_logits, threshold=args.threshold)
      #output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy()
      y = eval_batch['y'].data.cpu().clone().numpy()
      gold_pred = get_gold_pred_str(output_index, y, args.goal)
      #total_probs.extend(output_prob)
      #total_ys.extend(y)
      total_gold_pred.extend(gold_pred)
      total_annot_ids.extend(annot_ids)
    #mrr_val = mrr(total_probs, total_ys)
    #print('mrr_value: ', mrr_val)
    #pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs},
    #            open('./{0:s}.p'.format(args.reload_model_name), "wb"))
    with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out:
      output_dict = {}
      counter = 0
      for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred):
        #attn = batch_attn[0][1].squeeze(2)[counter]
        #attn = attn.cpu().numpy().tolist()
        #print(attn, int(batch_attn[0][0]['mention_span_length'][counter]), sum(attn))
        #print(mntn_emb[counter])
        #print()
        #print(int(batch_attn[0][0]['mention_span_length'][counter]), batch_attn[0][0]['mention_embed'][counter].shape)
        #attn = attn[:int(batch_attn[0][0]['mention_span_length'][counter])]
        output_dict[a_id] = {"gold": gold, "pred": pred} #, "attn": attn, "mntn_len": int(batch_attn[0][0]['mention_span_length'][counter])}
        counter += 1
      json.dump(output_dict, f_out)
    eval_str = get_eval_string(total_gold_pred)
    print(eval_str)
    logging.info('processing: ' + name)
    logging.info(eval_str)