def main(args): word_vec = pickle.load(open(args.word_vec, 'rb')) print('complete loading word vectors') train_text, null_index = read_conll(args.train_file) if args.test_file != '': test_text, null_index = read_conll(args.test_file) else: test_text = train_text train_data = sents_to_vec(word_vec, train_text) test_data = sents_to_vec(word_vec, test_text) test_tags = [sent["tag"] for sent in test_text] num_dims = len(train_data[0][0]) print('complete reading data') print('#training sentences: %d' % len(train_data)) print('#testing sentences: %d' % len(test_data)) log_niter = (len(train_data) // args.batch_size) // 10 pad = np.zeros(num_dims) device = torch.device("cuda" if args.cuda else "cpu") args.device = device init_seed = to_input_tensor(generate_seed(train_data, args.batch_size), pad, device=device) model = MarkovFlow(args, num_dims).to(device) model.init_params(init_seed) if args.tag_from != '': model.eval() with torch.no_grad(): accuracy, vm = model.test(test_data, test_tags, sentences=test_text, tagging=True, path=args.tag_path, null_index=null_index) print('\n***** M1 %f, VM %f, max_var %.4f, min_var %.4f*****\n' % (accuracy, vm, model.var.data.max(), model.var.data.min()), file=sys.stderr) return optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) begin_time = time.time() print('begin training') train_iter = report_obj = report_jc = report_ll = report_num_words = 0 # print the accuracy under init params model.eval() with torch.no_grad(): accuracy, vm = model.test(test_data, test_tags) print('\n*****starting M1 %f, VM %f, max_var %.4f, min_var %.4f*****\n' % (accuracy, vm, model.var.data.max(), model.var.data.min()), file=sys.stderr) model.train() for epoch in range(args.epochs): # model.print_params() report_obj = report_jc = report_ll = report_num_words = 0 for sents in data_iter(train_data, batch_size=args.batch_size, shuffle=True): train_iter += 1 batch_size = len(sents) num_words = sum(len(sent) for sent in sents) sents_var, masks = to_input_tensor(sents, pad, device=args.device) optimizer.zero_grad() likelihood, jacobian_loss = model(sents_var, masks) neg_likelihood_loss = -likelihood avg_ll_loss = (neg_likelihood_loss + jacobian_loss) / batch_size avg_ll_loss.backward() optimizer.step() log_likelihood_val = -neg_likelihood_loss.item() jacobian_val = -jacobian_loss.item() obj_val = log_likelihood_val + jacobian_val report_ll += log_likelihood_val report_jc += jacobian_val report_obj += obj_val report_num_words += num_words if train_iter % log_niter == 0: print('epoch %d, iter %d, log_likelihood %.2f, jacobian %.2f, obj %.2f, max_var %.4f ' \ 'min_var %.4f time elapsed %.2f sec' % (epoch, train_iter, report_ll / report_num_words, \ report_jc / report_num_words, report_obj / report_num_words, model.var.max(), \ model.var.min(), time.time() - begin_time), file=sys.stderr) print('\nepoch %d, log_likelihood %.2f, jacobian %.2f, obj %.2f\n' % \ (epoch, report_ll / report_num_words, report_jc / report_num_words, report_obj / report_num_words), file=sys.stderr) if epoch % args.valid_nepoch == 0: model.eval() with torch.no_grad(): accuracy, vm = model.test(test_data, test_tags) print('\n*****epoch %d, iter %d, M1 %f, VM %f*****\n' % (epoch, train_iter, accuracy, vm), file=sys.stderr) model.train() torch.save(model.state_dict(), args.save_path) model.eval() with torch.no_grad(): accuracy, vm = model.test(test_data, test_tags) print('\n complete training, accuracy %f, vm %f\n' % (accuracy, vm), file=sys.stderr)
def main(args): word_vec = pickle.load(open(args.word_vec, 'rb')) print('complete loading word vectors') train_sents, _ = read_conll(args.train_file) test_sents, _ = read_conll(args.test_file, max_len=10) test_deps = [sent["head"] for sent in test_sents] train_emb = sents_to_vec(word_vec, train_sents) test_emb = sents_to_vec(word_vec, test_sents) num_dims = len(train_emb[0][0]) train_tagid, tag2id = sents_to_tagid(train_sents) print('%d types of tags' % len(tag2id)) id2tag = {v: k for k, v in tag2id.items()} pad = np.zeros(num_dims) device = torch.device("cuda" if args.cuda else "cpu") args.device = device model = dmv.DMVFlow(args, id2tag, num_dims).to(device) init_seed = to_input_tensor(generate_seed(train_emb, args.batch_size), pad, device=device) with torch.no_grad(): model.init_params(init_seed, train_tagid, train_emb) print('complete init') if args.train_from != '': model.load_state_dict(torch.load(args.train_from)) with torch.no_grad(): directed, undirected = model.test(test_deps, test_emb, verbose=False) print('acc on length <= 10: #trees %d, undir %2.1f, dir %2.1f' \ % (len(test_gold), 100 * undirected, 100 * directed)) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) log_niter = (len(train_emb) // args.batch_size) // 5 report_ll = report_num_words = report_num_sents = epoch = train_iter = 0 stop_avg_ll = stop_num_words = 0 stop_avg_ll_last = 1 dir_last = 0 begin_time = time.time() print('begin training') with torch.no_grad(): directed, undirected = model.test(test_deps, test_emb) print('starting acc on length <= 10: #trees %d, undir %2.1f, dir %2.1f' \ % (len(test_deps), 100 * undirected, 100 * directed)) for epoch in range(args.epochs): report_ll = report_num_sents = report_num_words = 0 for sents in data_iter(train_emb, batch_size=args.batch_size): batch_size = len(sents) num_words = sum(len(sent) for sent in sents) stop_num_words += num_words optimizer.zero_grad() sents_var, masks = to_input_tensor(sents, pad, device) sents_var, _ = model.transform(sents_var) sents_var = sents_var.transpose(0, 1) log_likelihood = model.p_inside(sents_var, masks) avg_ll_loss = -log_likelihood / batch_size avg_ll_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) optimizer.step() report_ll += log_likelihood.item() report_num_words += num_words report_num_sents += batch_size stop_avg_ll += log_likelihood.item() if train_iter % log_niter == 0: print('epoch %d, iter %d, ll_per_sent %.4f, ll_per_word %.4f, ' \ 'max_var %.4f, min_var %.4f time elapsed %.2f sec' % \ (epoch, train_iter, report_ll / report_num_sents, \ report_ll / report_num_words, model.var.data.max(), \ model.var.data.min(), time.time() - begin_time), file=sys.stderr) train_iter += 1 if epoch % args.valid_nepoch == 0: with torch.no_grad(): directed, undirected = model.test(test_deps, test_emb) print('\n\nacc on length <= 10: #trees %d, undir %2.1f, dir %2.1f, \n\n' \ % (len(test_deps), 100 * undirected, 100 * directed)) stop_avg_ll = stop_avg_ll / stop_num_words rate = (stop_avg_ll - stop_avg_ll_last) / abs(stop_avg_ll_last) print('\n\nlikelihood: %.4f, likelihood last: %.4f, rate: %f\n' % \ (stop_avg_ll, stop_avg_ll_last, rate)) if rate < 0.001 and epoch >= 5: break stop_avg_ll_last = stop_avg_ll stop_avg_ll = stop_num_words = 0 torch.save(model.state_dict(), args.save_path) # eval on all lengths if args.eval_all: test_sents, _ = read_conll(args.test_file) test_deps = [sent["head"] for sent in test_sents] test_emb = sents_to_vec(word_vec, test_sents) print("start evaluating on all lengths") with torch.no_grad(): directed, undirected = model.test(test_deps, test_emb, eval_all=True) print('accuracy on all lengths: number of trees:%d, undir: %2.1f, dir: %2.1f' \ % (len(test_gold), 100 * undirected, 100 * directed))