def evaluation(args, data, split, model, domain, epoch, str_res='results', ner_model=True, predictor=None): # evaluate performance on data model.eval() eval_dict = initialize_eval_dict() eval_dict['epoch'] = epoch #for batch in prepare_data.iterate_batch(data, args.batch_size, args.device): for batch in prepare_data.iterate_batch_rand_bucket_choosing( data, args.batch_size, args.device, ner_model=ner_model, predictor=predictor): if ner_model is not None: word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths, f_f, f_p, b_f, b_p, w_f, mask_v, file_no = batch out_arc, out_arc_tag, masks, lengths = model.forward( word, char, pos, mask=masks, length=lengths, f_f=f_f, f_p=f_p, b_f=b_f, b_p=b_p, w_f=w_f, file_no=file_no, mask_v=mask_v) else: word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch out_arc, out_arc_tag, masks, lengths = model.forward( word, char, pos, mask=masks, length=lengths) heads_pred, arc_tags_pred, _ = model.decode( out_arc, out_arc_tag, mask=masks, length=lengths, leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS) lengths = lengths.cpu().numpy() word = word.data.cpu().numpy() pos = pos.data.cpu().numpy() ner = ner.data.cpu().numpy() heads = heads.data.cpu().numpy() arc_tags = arc_tags.data.cpu().numpy() heads_pred = heads_pred.data.cpu().numpy() arc_tags_pred = arc_tags_pred.data.cpu().numpy() stats, stats_nopunc, stats_root, num_inst = parse.eval_( word, pos, heads_pred, arc_tags_pred, heads, arc_tags, args.alphabets['word_alphabet'], args.alphabets['pos_alphabet'], lengths, punct_set=args.punct_set, symbolic_root=True) ucorr, lcorr, total, ucm, lcm = stats ucorr_nopunc, lcorr_nopunc, total_nopunc, ucm_nopunc, lcm_nopunc = stats_nopunc corr_root, total_root = stats_root eval_dict['dp_ucorrect'] += ucorr eval_dict['dp_lcorrect'] += lcorr eval_dict['dp_total'] += total eval_dict['dp_ucomplete_match'] += ucm eval_dict['dp_lcomplete_match'] += lcm eval_dict['dp_ucorrect_nopunc'] += ucorr_nopunc eval_dict['dp_lcorrect_nopunc'] += lcorr_nopunc eval_dict['dp_total_nopunc'] += total_nopunc eval_dict['dp_ucomplete_match_nopunc'] += ucm_nopunc eval_dict['dp_lcomplete_match_nopunc'] += lcm_nopunc eval_dict['dp_root_correct'] += corr_root eval_dict['dp_total_root'] += total_root eval_dict['dp_total_inst'] += num_inst eval_dict['dp_uas'] = eval_dict['dp_ucorrect'] * 100 / eval_dict[ 'dp_total'] # considering w. punctuation eval_dict['dp_las'] = eval_dict['dp_lcorrect'] * 100 / eval_dict[ 'dp_total'] # considering w. punctuation print_results(eval_dict, split, domain, str_res) return eval_dict
def main(): logger.info("Reading and creating arguments") args = read_arguments() logger.info("Reading Data") datasets = {} for split in args.splits: dataset = prepare_data.read_data_to_variable(args.data_paths[split], args.alphabets, args.device, symbolic_root=True) datasets[split] = dataset if args.set_num_training_samples is not None: print('Setting train and dev to %d samples' % args.set_num_training_samples) datasets = rearrange_splits.rearranging_splits( datasets, args.set_num_training_samples) logger.info("Creating Networks") num_data = sum(datasets['train'][1]) model, optimizer, dev_eval_dict, test_eval_dict, start_epoch = build_model_and_optimizer( args) best_model = deepcopy(model) best_optimizer = deepcopy(optimizer) logger.info('Training INFO of in domain %s' % args.domain) logger.info('Training on Dependecy Parsing') logger.info("train: gamma: %f, batch: %d, clip: %.2f, unk replace: %.2f" % (args.gamma, args.batch_size, args.clip, args.unk_replace)) logger.info('number of training samples for %s is: %d' % (args.domain, num_data)) logger.info("dropout(in, out, rnn): (%.2f, %.2f, %s)" % (args.p_in, args.p_out, args.p_rnn)) logger.info("num_epochs: %d" % (args.num_epochs)) print('\n') if not args.eval_mode: logger.info("Training") num_batches = prepare_data.calc_num_batches(datasets['train'], args.batch_size) lr = args.learning_rate patient = 0 decay = 0 for epoch in range(start_epoch + 1, args.num_epochs + 1): print( 'Epoch %d (Training: rnn mode: %s, optimizer: %s, learning rate=%.6f, eps=%.1e, decay rate=%.2f (schedule=%d, decay=%d)): ' % (epoch, args.rnn_mode, args.opt, lr, args.epsilon, args.decay_rate, args.schedule, decay)) model.train() total_loss = 0.0 total_arc_loss = 0.0 total_arc_tag_loss = 0.0 total_train_inst = 0.0 train_iter = prepare_data.iterate_batch_rand_bucket_choosing( datasets['train'], args.batch_size, args.device, unk_replace=args.unk_replace) start_time = time.time() batch_num = 0 for batch_num, batch in enumerate(train_iter): batch_num = batch_num + 1 optimizer.zero_grad() # compute loss of main task word, char, pos, ner_tags, heads, arc_tags, auto_label, masks, lengths = batch out_arc, out_arc_tag, masks, lengths = model.forward( word, char, pos, mask=masks, length=lengths) loss_arc, loss_arc_tag = model.loss(out_arc, out_arc_tag, heads, arc_tags, mask=masks, length=lengths) loss = loss_arc + loss_arc_tag # update losses num_insts = masks.data.sum() - word.size(0) total_arc_loss += loss_arc.item() * num_insts total_arc_tag_loss += loss_arc_tag.item() * num_insts total_loss += loss.item() * num_insts total_train_inst += num_insts # optimize parameters loss.backward() clip_grad_norm_(model.parameters(), args.clip) optimizer.step() time_ave = (time.time() - start_time) / batch_num time_left = (num_batches - batch_num) * time_ave # update log if batch_num % 50 == 0: log_info = 'train: %d/%d, domain: %s, total loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time left: %.2fs' % \ (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time_left) sys.stdout.write(log_info) sys.stdout.write('\n') sys.stdout.flush() print('\n') print( 'train: %d/%d, domain: %s, total_loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time: %.2fs' % (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time.time() - start_time)) dev_eval_dict, test_eval_dict, best_model, best_optimizer, patient = in_domain_evaluation( args, datasets, model, optimizer, dev_eval_dict, test_eval_dict, epoch, best_model, best_optimizer, patient) if patient >= args.schedule: lr = args.learning_rate / (1.0 + epoch * args.decay_rate) optimizer = generate_optimizer(args, lr, model.parameters()) print('updated learning rate to %.6f' % lr) patient = 0 print_results(test_eval_dict['in_domain'], 'test', args.domain, 'best_results') print('\n') for split in datasets.keys(): evaluation(args, datasets[split], split, best_model, args.domain, epoch, 'best_results') else: logger.info("Evaluating") epoch = start_epoch #for split in ['train', 'dev', 'test']: for split in ['test']: eval_dict = evaluation(args, datasets[split], split, model, args.domain, epoch, 'best_results') write_results(args, datasets[split], args.domain, split, model, args.domain, eval_dict)
def main(): logger.info("Reading and creating arguments") args = read_arguments() logger.info("Reading Data") datasets = {} for split in args.splits: dataset = prepare_data.read_data_to_variable(args.data_paths[split], args.alphabets, args.device, symbolic_root=True) datasets[split] = dataset if args.set_num_training_samples is not None: print('Note the change here') print('dev set in not touched similar to test set') # print('Setting train and dev to %d samples' % args.set_num_training_samples) datasets = rearrange_splits.rearranging_splits( datasets, args.set_num_training_samples) logger.info("Creating Networks") num_data = sum(datasets['train'][1]) ######################################################### # Here constraints need to be added. model, optimizer, dev_eval_dict, test_eval_dict, start_epoch = build_model_and_optimizer( args) best_model = deepcopy(model) best_optimizer = deepcopy(optimizer) ######################################################### logger.info('Training INFO of in domain %s' % args.domain) logger.info('Training on Dependecy Parsing') logger.info("train: gamma: %f, batch: %d, clip: %.2f, unk replace: %.2f" % (args.gamma, args.batch_size, args.clip, args.unk_replace)) logger.info('number of training samples for %s is: %d' % (args.domain, num_data)) logger.info("dropout(in, out, rnn): (%.2f, %.2f, %s)" % (args.p_in, args.p_out, args.p_rnn)) logger.info("num_epochs: %d" % (args.num_epochs)) print('\n') if not args.eval_mode: logger.info("Training") num_batches = prepare_data.calc_num_batches(datasets['train'], args.batch_size) lr = args.learning_rate patient = 0 decay = 0 for epoch in range(start_epoch + 1, args.num_epochs + 1): print( 'Epoch %d (Training: rnn mode: %s, optimizer: %s, learning rate=%.6f, eps=%.1e, decay rate=%.2f (schedule=%d, decay=%d)): ' % (epoch, args.rnn_mode, args.opt, lr, args.epsilon, args.decay_rate, args.schedule, decay)) model.train() total_loss = 0.0 total_arc_loss = 0.0 total_arc_tag_loss = 0.0 total_train_inst = 0.0 train_iter = prepare_data.iterate_batch_rand_bucket_choosing( datasets['train'], args.batch_size, args.device, unk_replace=args.unk_replace) start_time = time.time() batch_num = 0 for batch_num, batch in enumerate(train_iter): batch_num = batch_num + 1 optimizer.zero_grad() # compute loss of main task # word,pos,heads,ner_tags,masks,auto_label,heads: [16,25] # char: [16,25,29] # lengths: [16] # Why do we need auto_label word, char, pos, ner_tags, heads, arc_tags, auto_label, masks, lengths = batch out_arc, out_arc_tag, masks, lengths = model.forward( word, char, pos, mask=masks, length=lengths) # The decoder outputs a score s_ij , indicating the model belief that the latter should be the head of the former # out_arc:[16, 24, 24] #out_arc_tag_h: torch.Size([16, 24, 128]) #out_arc_tag_c: torch.Size([16, 24, 128]) # out_arc_tag =(out_arc_tag_h,out_arc_tag_c) loss_arc, loss_arc_tag = model.loss(out_arc, out_arc_tag, heads, arc_tags, mask=masks, length=lengths) loss = loss_arc + loss_arc_tag # update losses num_insts = masks.data.sum() - word.size(0) total_arc_loss += loss_arc.item() * num_insts total_arc_tag_loss += loss_arc_tag.item() * num_insts total_loss += loss.item() * num_insts total_train_inst += num_insts # optimize parameters loss.backward() clip_grad_norm_(model.parameters(), args.clip) optimizer.step() time_ave = (time.time() - start_time) / batch_num time_left = (num_batches - batch_num) * time_ave # update log if batch_num % 50 == 0: log_info = 'train: %d/%d, domain: %s, total loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time left: %.2fs' % \ (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time_left) sys.stdout.write(log_info) sys.stdout.write('\n') sys.stdout.flush() print('\n') print( 'train: %d/%d, domain: %s, total_loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time: %.2fs' % (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time.time() - start_time)) # dev_eval_dict, test_eval_dict, best_model, best_optimizer, patient, curr_dev_eval_dict = in_domain_evaluation( args, datasets, model, optimizer, dev_eval_dict, test_eval_dict, epoch, best_model, best_optimizer, patient) store ={'total_loss':str(total_loss.cpu().numpy() / total_train_inst.cpu().numpy())\ ,'arc_loss': str(total_arc_loss.cpu().numpy() / total_train_inst.cpu().numpy()), \ 'arc_tag_loss': str(total_arc_tag_loss.cpu().numpy() / total_train_inst.cpu().numpy()),'eval':curr_dev_eval_dict } ############################################# str_file = args.full_model_name + '_' + 'all_epochs' with open(str_file, 'a') as f: f.write(str(store) + '\n') ############################################### if patient >= args.schedule: lr = args.learning_rate / (1.0 + epoch * args.decay_rate) optimizer = generate_optimizer(args, lr, model.parameters()) print('updated learning rate to %.6f' % lr) patient = 0 print_results(test_eval_dict['in_domain'], 'test', args.domain, 'best_results') print('\n') for split in datasets.keys(): flag = False eval_dict = evaluation(args, datasets[split], split, best_model, args.domain, epoch, flag, 'best_results') else: logger.info("Evaluating") epoch = start_epoch # epoch = 70 # print('Start epoch is',start_epoch) for split in ['train', 'dev', 'test']: if split == 'dev': flag = False else: flag = False eval_dict = evaluation(args, datasets[split], split, model, args.domain, epoch, flag, 'best_results') # print_results(eval_dict, split, args.domain, 'results') print('\n')