def evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name, goal): model.eval() loss, output_logits = model(eval_batch, val_type_name) output_index = get_output_index(output_logits) # eval_loss = loss.data.cpu().clone()[0] eval_loss = loss.data.cpu().numpy() eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format( eval_loss, batch_num) gold_pred = get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), goal) eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num) tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num) eval_str = get_eval_string(gold_pred) print(val_type_name + ":" + eval_loss_str) print(gold_pred[:3]) print(val_type_name + ":" + eval_str) logging.info(val_type_name + ":" + eval_loss_str) logging.info(val_type_name + ":" + eval_str) model.train() return eval_loss
def _test(args): mention = "Deutsche Bundesbank balance of payments statistics" text = "We use the Deutsche Bundesbank balance of payments statistics as our main source of data." assert args.load test_fname = args.eval_data data_gens = get_datasets([(test_fname, 'test', args.goal)], args, text, mention) model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) for name, dataset in [(test_fname, data_gens[0])]: #print('Processing... ' + name) total_gold_pred = [] total_probs = [] total_ys = [] total_annot_ids = [] for batch_num, batch in enumerate(dataset): eval_batch, annot_ids = to_torch(batch) loss, output_logits = model(eval_batch, args.goal) output_index = get_output_index(output_logits) output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids)
def _test(args): assert args.load test_fname = args.eval_data model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() model.eval() # load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) if args.goal == "onto": saved_path = constant.EXP_ROOT_ONTO else: saved_path = constant.EXP_ROOT model.load_state_dict( torch.load(saved_path + '/' + args.model_id + '_best.pt')["state_dict"]) data_gens = get_datasets([(test_fname, 'test', args.goal)], args, eval_epoch=1) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) batch = next(dataset) eval_batch, _ = to_torch(batch) loss, output_logits = model(eval_batch, args.goal) threshes = np.arange(0, 1, 0.02) # threshes = [0.65, 0.68, 0.7, 0.71] # threshes = [0.5] p_and_r = [] for thresh in tqdm(threshes): total_gold_pred = [] total_probs = [] total_ys = [] print('\nthresh {}'.format(thresh)) output_index = get_output_index(output_logits, thresh) output_prob = model.sigmoid_fn( output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) # mrr_val = mrr(total_probs, total_ys) # json.dump(gold_pred, open('nomulti_predictions.json', 'w')) # np.save('y', total_ys) # np.save('probs', total_probs) # print('mrr_value: ', mrr_val) # result, eval_str = metric_dicts(total_gold_pred) result, eval_str = fine_grained_eval(total_gold_pred) # fine_grained_eval(total_gold_pred) p_and_r.append([result["ma_precision"], result["ma_recall"]]) print(eval_str) np.save(saved_path + '/{}_pr_else_dev'.format(args.model_id), p_and_r)
def evaluate_data_cv(k_fold_count, batch_num, model, tensorboard, val_type_name, args, elmo, actual_f1=False): model.eval() data_gen = get_data_gen('crowd/cv_3fold/dev_tree_{0}.json'.format( repr(k_fold_count)), 'test', args, (constant.CHAR_DICT, None), args.goal, elmo=elmo) gold_pred = [] annot_ids = [] binary_out = [] eval_loss = 0. total_ex_count = 0 print('==> evaluate_data_cv') for n, batch in enumerate(data_gen): total_ex_count += len(batch['y']) eval_batch, annot_id = to_torch(batch) loss, output_logits, _ = model(eval_batch, val_type_name) if actual_f1: output_index = get_output_index(output_logits, threshold=args.threshold) else: output_index = get_output_index_rank(output_logits, topk=args.topk) y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) annot_ids.extend(annot_id) eval_loss += loss.clone().item() eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) eval_str = get_eval_string(gold_pred) eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format( eval_loss, batch_num) tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num) tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num) print('EVAL: seen ' + repr(total_ex_count) + ' examples.') print(val_type_name + ":" + eval_loss_str) #print(gold_pred[:3]) print(val_type_name + ":" + eval_str) logging.info(val_type_name + ":" + eval_loss_str) logging.info(val_type_name + ":" + eval_str) model.train() data_gen = None output_dict = {} for a_id, (gold, pred) in zip(annot_ids, gold_pred): output_dict[a_id] = {"gold": gold, "pred": pred} return eval_loss, output_dict
def evaluate_data(batch_num, dev_fname, model, tensorboard, val_type_name, args, elmo, bert, actual_f1=True, vocab=None): model.eval() if vocab is None: vocab = (constant.CHAR_DICT, None) dev_gen = get_data_gen(dev_fname, 'test', args, vocab, args.goal, elmo=elmo, bert=bert) gold_pred = [] binary_out = [] eval_loss = 0. total_ex_count = 0 if args.mode in ['train_labeler', 'test_labeler']: cls_correct = 0. cls_total = 0. cls_tp = 0. cls_t_gold = 0. cls_t_pred = 0. for n, batch in enumerate(dev_gen): total_ex_count += len(batch['y']) eval_batch, annot_ids = to_torch(batch) if args.mode in ['train_labeler', 'test_labeler']: loss, output_logits, cls_logits = model(eval_batch, val_type_name) if cls_logits is not None: cls_correct += sum([(1. if pred > 0. else 0.) == gold for pred, gold in zip(cls_logits, batch['y_cls'])]) cls_total += float(cls_logits.size()[0]) cls_tp += sum([(1. if pred > 0. else 0.) == 1. and gold == 1. for pred, gold in zip(cls_logits, batch['y_cls'])]) cls_t_gold += float(sum(batch['y_cls'])) cls_t_pred += float(sum([1. if pred > 0. else 0. for pred in cls_logits])) else: loss, output_logits, _ = model(eval_batch, val_type_name) output_index = get_output_index(output_logits, threshold=args.threshold) gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal) eval_loss += loss.clone().item() eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) eval_str = get_eval_string(gold_pred) _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred) eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num) tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num) tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num) print('EVAL: seen ' + repr(total_ex_count) + ' examples.') print(val_type_name + ":" +eval_loss_str) print(gold_pred[:3]) if args.mode in ['train_labeler', 'test_labeler'] and cls_logits is not None: cls_accuracy = cls_correct / cls_total * 100. cls_precision = cls_tp / cls_t_pred cls_recall = cls_tp / cls_t_gold cls_f1 = f1(cls_precision, cls_recall) cls_str = ' CLS accuracy: {0:.2f}% P: {1:.3f} R: {2:.3f} F1: {3:.3f}'.format(cls_accuracy, cls_precision, cls_recall, cls_f1) print(val_type_name+":"+ eval_str + cls_str) else: print(val_type_name+":"+ eval_str) logging.info(val_type_name + ":" + eval_loss_str) logging.info(val_type_name +":" + eval_str) model.train() dev_gen = None return eval_loss, macro_f1
def _test(args, device): assert args.load test_fname = args.eval_data data_gens, _, _ = get_datasets([(test_fname, 'test')], args) if args.model_type == 'ETModel': print('==> ETModel') model = models.ETModel(args, constant.ANSWER_NUM_DICT[args.goal]) else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError model.to(device) model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) if args.multi_gpu: model = torch.nn.DataParallel(model) print("==> use", torch.cuda.device_count(), "GPUs.") for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) total_gold_pred = [] total_annot_ids = [] total_probs = [] total_ys = [] for batch_num, batch in enumerate(dataset): if batch_num % 10 == 0: print(batch_num) if not isinstance(batch, dict): print('==> batch: ', batch) eval_batch, annot_ids = to_torch(batch, device) if args.multi_gpu: output_logits = model(eval_batch) else: _, output_logits, _ = model(eval_batch) output_index = get_output_index(output_logits, threshold=args.threshold) output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, open(constant.OUT_ROOT + '{0:s}.pkl'.format(args.model_id), "wb")) print(len(total_annot_ids), len(total_gold_pred)) with open(constant.OUT_ROOT + '{0:s}.json'.format(args.model_id), 'w') as f_out: output_dict = {} counter = 0 for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): output_dict[a_id] = {"gold": gold, "pred": pred} counter += 1 json.dump(output_dict, f_out) logging.info('processing: ' + name) print('Done!')
def _test(args): assert args.load test_fname = args.eval_data data_gens = get_datasets([(test_fname, 'test', args.goal)], args) model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) total_gold_pred = [] total_annot_ids = [] total_probs = [] total_ys = [] for batch_num, batch in enumerate(dataset): eval_batch, annot_ids = to_torch(batch) loss, masked_logits, raw_logits, mask = model( eval_batch, args.goal) print(mask) # output_index = get_output_index(masked_logits) # output_prob = masked_logits.data.cpu().clone().numpy() # y = eval_batch['y'].data.cpu().clone().numpy() # gold_pred = get_gold_pred_str(output_index, y, args.goal) # total_probs.extend(output_prob) # total_ys.extend(y) # total_gold_pred.extend(gold_pred) # total_annot_ids.extend(annot_ids) # raw output_index = get_output_index(raw_logits) output_prob = raw_logits.data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) mrr_val = mrr(total_probs, total_ys) print('mrr_value: ', mrr_val) pickle.dump({ 'gold_id_array': total_ys, 'pred_dist': total_probs }, open('./{0:s}.p'.format(args.reload_model_name), "wb")) with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: output_dict = {} for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): output_dict[a_id] = {"gold": gold, "pred": pred} json.dump(output_dict, f_out, indent=2) eval_str = get_eval_string(total_gold_pred) print(eval_str) logging.info('processing: ' + name) logging.info(eval_str)
def _test(args): assert args.load test_fname = args.eval_data data_gens = get_datasets([(test_fname, 'test', args.goal)], args) model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() model.eval() # load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) saved_path = constant.EXP_ROOT model.load_state_dict( torch.load(saved_path + '/' + args.model_id + '_best.pt')["state_dict"]) data_gens = get_datasets([(test_fname, 'test', args.goal)], args) #, eval_epoch=1) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) batch = next(dataset) eval_batch, annot_ids = to_torch(batch) loss, output_logits = model(eval_batch, args.goal) threshes = np.arange(0, 1, 0.005) p_and_r = [] for thresh in tqdm(threshes): total_gold_pred = [] total_annot_ids = [] total_probs = [] total_ys = [] print('thresh {}'.format(thresh)) output_index = get_output_index(output_logits, thresh) output_prob = model.sigmoid_fn( output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) # mrr_val = mrr(total_probs, total_ys) # print('mrr_value: ', mrr_val) # pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, # open('./{0:s}.p'.format(args.reload_model_name), "wb")) # with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: # output_dict = {} # for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): # output_dict[a_id] = {"gold": gold, "pred": pred} # json.dump(output_dict, f_out) eval_str, p, r = get_eval_string(total_gold_pred) p_and_r.append([p, r]) print(eval_str) np.save(saved_path + '/baseline_pr_dev', p_and_r)
def get_ultra_fine_entity_type(args, test_fname, model, vocab_set, mention, text): data_gens = get_datasets([(test_fname, 'test', args.goal)], args, text, mention, vocab_set) for name, dataset in [(test_fname, data_gens[0])]: #print('Processing... ' + name) total_gold_pred = [] total_probs = [] total_ys = [] total_annot_ids = [] for batch_num, batch in enumerate(dataset): eval_batch, annot_ids = to_torch(batch) loss, output_logits = model(eval_batch, args.goal) output_index = get_output_index(output_logits) output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) return gold_pred[0][1] #list of ultra fine entities
def evaluate_data(batch_num, dev_fname, model, args, elmo, device, char_vocab, dev_type='original'): model.eval() dev_gen = get_data_gen(dev_fname, 'test', args, char_vocab, elmo=elmo) gold_pred = [] eval_loss = 0. total_ex_count = 0 for batch in tqdm(dev_gen): total_ex_count += len(batch['y']) eval_batch, annot_ids = to_torch(batch, device) loss, output_logits, _ = model(eval_batch) output_index = get_output_index(output_logits, threshold=args.threshold) gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal) eval_loss += loss.clone().item() eval_str = get_eval_string(gold_pred) _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred) eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num) print('==> ' + dev_type + ' EVAL: seen ' + repr(total_ex_count) + ' examples.') print(eval_loss_str) print(gold_pred[:3]) print('==> ' + dev_type + ' : ' + eval_str) logging.info(eval_loss_str) logging.info(eval_str) model.train() return eval_loss, macro_f1
def _test_labeler(args): assert args.load test_fname = args.eval_data data_gens, _ = get_datasets([(test_fname, 'test', args.goal)], args) if args.model_type == 'labeler': print('==> Labeler') model = denoising_models.Labeler(args, constant.ANSWER_NUM_DICT[args.goal]) elif args.model_type == 'filter': print('==> Filter') model = denoising_models.Filter(args, constant.ANSWER_NUM_DICT[args.goal]) else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError model.cuda() model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) total_gold_pred_pcls_ycls_ynoise = [] total_annot_ids = [] total_probs = [] total_ys = [] batch_attn = [] for batch_num, batch in enumerate(dataset): print(batch_num) if not isinstance(batch, dict): print('==> batch: ', batch) eval_batch, annot_ids = to_torch(batch) #print('eval_batch') #for k, v in eval_batch.items(): # print(k, v.size()) loss, output_logits, cls_logits = model(eval_batch, args.goal) #print('loss', loss) #print('output_logits', output_logits) #print('cls_logits', cls_logits) #batch_attn.append((batch, attn_score.data)) output_index = get_output_index(output_logits, threshold=args.threshold) #print('output_index', output_index) #output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() y_cls = eval_batch['y_cls'].data.cpu().clone().numpy() y_noisy_idx = eval_batch['y_noisy_idx'].data.cpu().clone().numpy() gold_pred_pcls_ycls_ynoise = get_gold_pred_str(output_index, y, args.goal, cls_logits=cls_logits, y_cls=y_cls, y_noisy_idx=y_noisy_idx) #print('gold_pred_pcls_ycls_ynoise', gold_pred_pcls_ycls_ynoise) #total_probs.extend(output_prob) #total_ys.extend(y) total_gold_pred_pcls_ycls_ynoise.extend(gold_pred_pcls_ycls_ynoise) total_annot_ids.extend(annot_ids) #mrr_val = mrr(total_probs, total_ys) #print('mrr_value: ', mrr_val) #pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, # open('./{0:s}.p'.format(args.reload_model_name), "wb")) pickle.dump((total_annot_ids, total_gold_pred_pcls_ycls_ynoise), open('./{0:s}_gold_pred.p'.format(args.reload_model_name), "wb")) with open('./{0:s}.json'.format(args.model_id), 'w') as f_out: output_dict = {} if args.model_type == 'filter': for a_id, (gold, pred, cls, ycls, ynoise) in zip(total_annot_ids, total_gold_pred_pcls_ycls_ynoise): output_dict[a_id] = {"gold": gold, "pred": pred, "cls_pred": cls, "cls_gold": ycls, "y_noisy": ynoise} elif args.model_type == 'labeler': for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred_pcls_ycls_ynoise): output_dict[a_id] = {"gold": gold, "pred": pred} else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError json.dump(output_dict, f_out) eval_str = get_eval_string(list(zip(*list(zip(*gold_pred_pcls_ycls_ynoise))[:2]))) print(eval_str) logging.info('processing: ' + name) logging.info(eval_str)
def _test(args): assert args.load test_fname = args.eval_data data_gens, _ = get_datasets([(test_fname, 'test', args.goal)], args) if args.model_type == 'et_model': print('==> Entity Typing Model') model = models.ETModel(args, constant.ANSWER_NUM_DICT[args.goal]) elif args.model_type == 'bert_uncase_small': print('==> Bert Uncased Small') model = models.Bert(args, constant.ANSWER_NUM_DICT[args.goal]) else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError model.cuda() model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) total_gold_pred = [] total_annot_ids = [] total_probs = [] total_ys = [] batch_attn = [] for batch_num, batch in enumerate(dataset): print(batch_num) if not isinstance(batch, dict): print('==> batch: ', batch) eval_batch, annot_ids = to_torch(batch) loss, output_logits, attn_score = model(eval_batch, args.goal) #batch_attn.append((batch, attn_score.data)) output_index = get_output_index(output_logits, threshold=args.threshold) #output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) #total_probs.extend(output_prob) #total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) #mrr_val = mrr(total_probs, total_ys) #print('mrr_value: ', mrr_val) #pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, # open('./{0:s}.p'.format(args.reload_model_name), "wb")) with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: output_dict = {} counter = 0 for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): #attn = batch_attn[0][1].squeeze(2)[counter] #attn = attn.cpu().numpy().tolist() #print(attn, int(batch_attn[0][0]['mention_span_length'][counter]), sum(attn)) #print(mntn_emb[counter]) #print() #print(int(batch_attn[0][0]['mention_span_length'][counter]), batch_attn[0][0]['mention_embed'][counter].shape) #attn = attn[:int(batch_attn[0][0]['mention_span_length'][counter])] output_dict[a_id] = {"gold": gold, "pred": pred} #, "attn": attn, "mntn_len": int(batch_attn[0][0]['mention_span_length'][counter])} counter += 1 json.dump(output_dict, f_out) eval_str = get_eval_string(total_gold_pred) print(eval_str) logging.info('processing: ' + name) logging.info(eval_str)
def _train_labeler(args): if args.data_setup == 'joint': train_gen_list, val_gen_list, crowd_dev_gen, elmo, bert, vocab = get_joint_datasets(args) else: train_fname = args.train_data dev_fname = args.dev_data print(train_fname, dev_fname) data_gens, elmo = get_datasets([(train_fname, 'train', args.goal), (dev_fname, 'dev', args.goal)], args) train_gen_list = [(args.goal, data_gens[0])] val_gen_list = [(args.goal, data_gens[1])] train_log = SummaryWriter(os.path.join(constant.EXP_ROOT, args.model_id, "log", "train")) validation_log = SummaryWriter(os.path.join(constant.EXP_ROOT, args.model_id, "log", "validation")) tensorboard = TensorboardWriter(train_log, validation_log) if args.model_type == 'labeler': print('==> Labeler') model = denoising_models.Labeler(args, constant.ANSWER_NUM_DICT[args.goal]) elif args.model_type == 'filter': print('==> Filter') model = denoising_models.Filter(args, constant.ANSWER_NUM_DICT[args.goal]) else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError model.cuda() total_loss = 0 batch_num = 0 best_macro_f1 = 0. start_time = time.time() init_time = time.time() if args.bert: if args.bert_param_path: print('==> Loading BERT from ' + args.bert_param_path) model.bert.load_state_dict(torch.load(args.bert_param_path, map_location='cpu')) no_decay = ['bias', 'gamma', 'beta'] optimizer_parameters = [ {'params': [p for n, p in model.named_parameters() if n not in no_decay], 'weight_decay_rate': 0.01}, {'params': [p for n, p in model.named_parameters() if n in no_decay], 'weight_decay_rate': 0.0} ] optimizer = BERTAdam(optimizer_parameters, lr=args.bert_learning_rate, warmup=args.bert_warmup_proportion, t_total=-1) # TODO: else: optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) #optimizer = optim.SGD(model.parameters(), lr=1., momentum=0.) if args.load: load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model, optimizer) for idx, m in enumerate(model.modules()): logging.info(str(idx) + '->' + str(m)) while True: batch_num += 1 # single batch composed of all train signal passed by. for (type_name, data_gen) in train_gen_list: try: batch = next(data_gen) batch, _ = to_torch(batch) except StopIteration: logging.info(type_name + " finished at " + str(batch_num)) print('Done!') torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id)) return optimizer.zero_grad() loss, output_logits, cls_logits = model(batch, type_name) loss.backward() total_loss += loss.item() optimizer.step() if batch_num % args.log_period == 0 and batch_num > 0: gc.collect() cur_loss = float(1.0 * loss.clone().item()) elapsed = time.time() - start_time train_loss_str = ('|loss {0:3f} | at {1:d}step | @ {2:.2f} ms/batch'.format(cur_loss, batch_num, elapsed * 1000 / args.log_period)) start_time = time.time() print(train_loss_str) logging.info(train_loss_str) tensorboard.add_train_scalar('train_loss_' + type_name, cur_loss, batch_num) if batch_num % args.eval_period == 0 and batch_num > 0: output_index = get_output_index(output_logits, threshold=args.threshold) gold_pred_train = get_gold_pred_str(output_index, batch['y'].data.cpu().clone(), args.goal) print(gold_pred_train[:10]) accuracy = sum([set(y) == set(yp) for y, yp in gold_pred_train]) * 1.0 / len(gold_pred_train) train_acc_str = '{1:s} Train accuracy: {0:.1f}%'.format(accuracy * 100, type_name) if cls_logits is not None: cls_accuracy = sum([(1. if pred > 0. else 0.) == gold for pred, gold in zip(cls_logits, batch['y_cls'].data.cpu().numpy())]) / float(cls_logits.size()[0]) cls_tp = sum([(1. if pred > 0. else 0.) == 1. and gold == 1. for pred, gold in zip(cls_logits, batch['y_cls'].data.cpu().numpy())]) cls_precision = cls_tp / float(sum([1. if pred > 0. else 0. for pred in cls_logits])) cls_recall = cls_tp / float(sum(batch['y_cls'].data.cpu().numpy())) cls_f1 = f1(cls_precision, cls_recall) train_cls_acc_str = '{1:s} Train cls accuracy: {0:.2f}% P: {2:.3f} R: {3:.3f} F1: {4:.3f}'.format(cls_accuracy * 100, type_name, cls_precision, cls_recall, cls_f1) print(train_acc_str) if cls_logits is not None: print(train_cls_acc_str) logging.info(train_acc_str) tensorboard.add_train_scalar('train_acc_' + type_name, accuracy, batch_num) if args.goal != 'onto': for (val_type_name, val_data_gen) in val_gen_list: if val_type_name == type_name: eval_batch, _ = to_torch(next(val_data_gen)) evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name, args, args.goal) if batch_num % args.eval_period == 0 and batch_num > 0 and args.data_setup == 'joint': # Evaluate Loss on the Turk Dev dataset. print('---- eval at step {0:d} ---'.format(batch_num)) crowd_eval_loss, macro_f1 = evaluate_data(batch_num, 'crowd/dev_tree.json', model, tensorboard, "open", args, elmo, bert, vocab=vocab) if best_macro_f1 < macro_f1: best_macro_f1 = macro_f1 save_fname = '{0:s}/{1:s}_best.pt'.format(constant.EXP_ROOT, args.model_id) torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname) print( 'Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname)) if batch_num % args.eval_period == 0 and batch_num > 0 and args.goal == 'onto': # Evaluate Loss on the Turk Dev dataset. print('---- OntoNotes: eval at step {0:d} ---'.format(batch_num)) crowd_eval_loss, macro_f1 = evaluate_data(batch_num, args.dev_data, model, tensorboard, args.goal, args, elmo) if batch_num % args.save_period == 0 and batch_num > 0: save_fname = '{0:s}/{1:s}_{2:d}.pt'.format(constant.EXP_ROOT, args.model_id, batch_num) torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname) print( 'Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname)) # Training finished! torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id))
def _train(args): if args.data_setup == 'joint': train_gen_list, val_gen_list, crowd_dev_gen = get_joint_datasets(args) else: train_fname = args.train_data dev_fname = args.dev_data data_gens = get_datasets([(train_fname, 'train', args.goal), (dev_fname, 'dev', args.goal)], args) train_gen_list = [(args.goal, data_gens[0])] val_gen_list = [(args.goal, data_gens[1])] train_log = SummaryWriter( os.path.join(constant.EXP_ROOT, args.model_id, "log", "train")) validation_log = SummaryWriter( os.path.join(constant.EXP_ROOT, args.model_id, "log", "validation")) tensorboard = TensorboardWriter(train_log, validation_log) model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() total_loss = 0 batch_num = 0 start_time = time.time() init_time = time.time() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) if args.load: load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model, optimizer) for idx, m in enumerate(model.modules()): logging.info(str(idx) + '->' + str(m)) while True: batch_num += 1 # single batch composed of all train signal passed by. for (type_name, data_gen) in train_gen_list: try: batch = next(data_gen) batch, _ = to_torch(batch) except StopIteration: logging.info(type_name + " finished at " + str(batch_num)) torch.save( { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id)) return optimizer.zero_grad() loss, output_logits = model(batch, type_name) loss.backward() total_loss += loss.data.cpu() optimizer.step() if batch_num % args.log_period == 0 and batch_num > 0: gc.collect() cur_loss = float(1.0 * loss.data.cpu().clone()) elapsed = time.time() - start_time train_loss_str = ( '|loss {0:3f} | at {1:d}step | @ {2:.2f} ms/batch'.format( cur_loss, batch_num, elapsed * 1000 / args.log_period)) start_time = time.time() print(train_loss_str) logging.info(train_loss_str) tensorboard.add_train_scalar('train_loss_' + type_name, cur_loss, batch_num) if batch_num % args.eval_period == 0 and batch_num > 0: output_index = get_output_index(output_logits) gold_pred_train = get_gold_pred_str( output_index, batch['y'].data.cpu().clone(), args.goal) accuracy = sum([ set(y) == set(yp) for y, yp in gold_pred_train ]) * 1.0 / len(gold_pred_train) train_acc_str = '{1:s} Train accuracy: {0:.1f}%'.format( accuracy * 100, type_name) print(train_acc_str) logging.info(train_acc_str) tensorboard.add_train_scalar('train_acc_' + type_name, accuracy, batch_num) for (val_type_name, val_data_gen) in val_gen_list: if val_type_name == type_name: eval_batch, _ = to_torch(next(val_data_gen)) evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name, args.goal) if batch_num % args.eval_period == 0 and batch_num > 0 and args.data_setup == 'joint': # Evaluate Loss on the Turk Dev dataset. print('---- eval at step {0:d} ---'.format(batch_num)) feed_dict = next(crowd_dev_gen) eval_batch, _ = to_torch(feed_dict) crowd_eval_loss = evaluate_batch(batch_num, eval_batch, model, tensorboard, "open", args.goal) if batch_num % args.save_period == 0 and batch_num > 0: save_fname = '{0:s}/{1:s}_{2:d}.pt'.format(constant.EXP_ROOT, args.model_id, batch_num) torch.save( { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, save_fname) print('Total {0:.2f} minutes have passed, saving at {1:s} '.format( (time.time() - init_time) / 60, save_fname)) # Training finished! torch.save( { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id))
def _train(args, device): print('==> Loading data generator... ') train_gen_list, elmo, char_vocab = get_all_datasets(args) if args.model_type == 'ETModel': print('==> ETModel') model = models.ETModel(args, constant.ANSWER_NUM_DICT[args.goal]) else: print('ERROR: Invalid model type: -model_type ' + args.model_type) raise NotImplementedError model.to(device) total_loss = 0 batch_num = 0 best_macro_f1 = 0. start_time = time.time() init_time = time.time() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) if args.load: load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model, optimizer) for idx, m in enumerate(model.modules()): logging.info(str(idx) + '->' + str(m)) while True: batch_num += 1 for data_gen in train_gen_list: try: batch = next(data_gen) batch, _ = to_torch(batch, device) except StopIteration: logging.info('finished at ' + str(batch_num)) print('Done!') torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id)) return optimizer.zero_grad() loss, output_logits, _ = model(batch) loss.backward() total_loss += loss.item() optimizer.step() if batch_num % args.log_period == 0 and batch_num > 0: gc.collect() cur_loss = float(1.0 * loss.clone().item()) elapsed = time.time() - start_time train_loss_str = ('|loss {0:3f} | at {1:d}step | @ {2:.2f} ms/batch'.format(cur_loss, batch_num, elapsed * 1000 / args.log_period)) start_time = time.time() print(train_loss_str) logging.info(train_loss_str) if batch_num % args.eval_period == 0 and batch_num > 0: output_index = get_output_index(output_logits, threshold=args.threshold) gold_pred_train = get_gold_pred_str(output_index, batch['y'].data.cpu().clone(), args.goal) #print(gold_pred_train[:10]) accuracy = sum([set(y) == set(yp) for y, yp in gold_pred_train]) * 1.0 / len(gold_pred_train) train_acc_str = '==> Train accuracy: {0:.1f}%'.format(accuracy * 100) print(train_acc_str) logging.info(train_acc_str) if batch_num % args.eval_period == 0 and batch_num > args.eval_after: print('---- eval at step {0:d} ---'.format(batch_num)) _, macro_f1 = evaluate_data( batch_num, args.dev_data, model, args, elmo, device, char_vocab, dev_type='original' ) if best_macro_f1 < macro_f1: best_macro_f1 = macro_f1 save_fname = '{0:s}/{1:s}_best.pt'.format(constant.EXP_ROOT, args.model_id) torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname) print( 'Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname)) if batch_num % args.save_period == 0 and batch_num > args.save_after: save_fname = '{0:s}/{1:s}_{2:d}.pt'.format(constant.EXP_ROOT, args.model_id, batch_num) torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname) print( 'Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname))