# Build the model l_out, l_rec = build_model(input_var, ExptDict) # The generated output variable and the loss function if ExptDict["task"]["task_id"] in ['DE1', 'DE2', 'GDE2', 'VDE1', 'SINE']: pred_var = lasagne.layers.get_output(l_out) elif ExptDict["task"]["task_id"] in [ 'CD1', 'CD2', 'Harvey2012', 'Harvey2012Dynamic', 'Harvey2016', 'COMP' ]: pred_var = T.clip(lasagne.layers.get_output(l_out), 1e-6, 1.0 - 1e-6) # Build loss rec_act = lasagne.layers.get_output(l_rec) l2_penalty = T.mean(lasagne.objectives.squared_error(rec_act[:, -5:, :], 0.0)) * 1e-4 loss = build_loss(pred_var, target_var, ExptDict) + l2_penalty # Create the update expressions params = lasagne.layers.get_all_params(l_out, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=0.0005) # Compile the function for a training step, as well as the prediction function # and a utility function to get the inner details of the RNN train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) pred_fn = theano.function([input_var], pred_var, allow_input_downcast=True) rec_layer_fn = theano.function([input_var], lasagne.layers.get_output(l_rec, get_details=True),
# The generated output variable and the loss function if ExptDict["task"]["task_id"] in ['DE1', 'DE2', 'GDE2', 'VDE1', 'SINE']: pred_var = lasagne.layers.get_output(l_out) elif ExptDict["task"]["task_id"] in [ 'CD1', 'CD2', 'Harvey2012', 'Harvey2012Dynamic', 'Harvey2016', 'COMP' ]: pred_var = T.clip(lasagne.layers.get_output(l_out), 1e-6, 1.0 - 1e-6) # Build loss rec_act = lasagne.layers.get_output(l_rec) l2_penalty = T.mean( lasagne.objectives.squared_error(rec_act[:, -5:, :], 0.0)) * 1e-4 l2_params = regularize_network_params(l_out, l2, tags={'trainable': True}) loss = build_loss(pred_var, target_var, ExptDict) + l2_penalty + wdecay_coeff * l2_params # Create the update expressions params = lasagne.layers.get_all_params(l_out, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=0.0005) # Compile the function for a training step, as well as the prediction function # and a utility function to get the inner details of the RNN train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) pred_fn = theano.function([input_var], pred_var, allow_input_downcast=True) rec_layer_fn = theano.function([input_var], lasagne.layers.get_output(l_rec, get_details=True),
for i, (example_input, example_output, s, opt) in generator: example_input = torch.Tensor(example_input).requires_grad_(True) example_input = example_input.to(device) example_output = torch.Tensor(example_output).requires_grad_(True) example_output = example_output.to(device) prediction, hiddens = model.forward(example_input) if ExptDict["task"]["task_id"] in ['DE1', 'DE2', 'GDE2', 'VDE1', 'SINE']: prediction = prediction elif ExptDict["task"]["task_id"] in [ 'CD1', 'CD2', 'Harvey2012', 'Harvey2012Dynamic', 'Harvey2016', 'COMP' ]: prediction = torch.clamp(1e-6, 1.0 - 1e-6, prediction) loss = build_loss(prediction, example_output, ExptDict) + l2_activation_regularization(hiddens) loss.backward() optimizer.step() s_vec.append(s) opt_vec.append(opt) net_vec.append(np.squeeze(prediction.data.cpu().numpy()[:, -5, :])) score = loss.data.cpu() if i % 500 == 0: opt_vec = np.asarray(opt_vec) net_vec = np.asarray(net_vec) s_vec = np.asarray(s_vec) infloss = build_performance(s_vec, opt_vec, net_vec, ExptDict) infloss_vec.append(infloss) print('Batch #%d; Absolute loss: %.6f; Fractional loss: %.6f' % (i, score, infloss))
def main(): parser = options.get_parser('Trainer') options.add_dataset_args(parser) options.add_preprocessing_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) args = parser.parse_args() print(args) args.cuda = not args.disable_cuda and torch.cuda.is_available() # checkpoint checkpoint_dir = os.path.dirname(args.checkpoint) if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir) # load dataset train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(args.processed_dir) assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!' train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus] val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus] test_corpus = [(line.sent, line.type, line.p1, line.p2) for line in test_raw_corpus] start_epoch = 0 caseless = args.caseless batch_size = args.batch_size num_epoch = args.num_epoch # preprocessing sents = [tup[0] for tup in train_corpus + val_corpus] feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless) ## # target_map = {c:i for i, c in enumerate(['null', 'true'])} target_map = ddi2013.target_map train_features, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless) val_features, val_targets = utils.build_corpus(val_corpus, feature_map, target_map, caseless) test_features, test_targets = utils.build_corpus(test_corpus, feature_map, target_map, caseless) class_weights = torch.Tensor(utils.get_class_weights(train_targets)) if args.class_weight else None train_loader = utils.construct_bucket_dataloader(train_features, train_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=True) val_loader = utils.construct_bucket_dataloader(val_features, val_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=False) test_loader = utils.construct_bucket_dataloader(test_features, test_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=False) print('Preprocessing done! Vocab size: {}'.format(len(feature_map))) # build model vocab_size = len(feature_map) tagset_size = len(target_map) model = utils.build_model(args, vocab_size, tagset_size) # loss criterion = utils.build_loss(args, class_weights=class_weights) # load states if os.path.isfile(args.load_checkpoint): print('Loading checkpoint file from {}...'.format(args.load_checkpoint)) checkpoint_file = torch.load(args.load_checkpoint) start_epoch = checkpoint_file['epoch'] + 1 model.load_state_dict(checkpoint_file['state_dict']) # optimizer.load_state_dict(checkpoint_file['optimizer']) else: print('no checkpoint file found: {}, train from scratch...'.format(args.load_checkpoint)) if not args.rand_embedding: pretrained_word_embedding, in_doc_word_indices = utils.load_word_embedding(args.emb_file, feature_map, args.embedding_dim) print(pretrained_word_embedding.size()) print(vocab_size) model.load_pretrained_embedding(pretrained_word_embedding) if args.disable_fine_tune: model.update_part_embedding(in_doc_word_indices) # update only non-pretrained words model.rand_init(init_embedding=args.rand_embedding) # trainer trainer = SeqTrainer(args, model, criterion) if os.path.isfile(args.load_checkpoint): dev_prec, dev_rec, dev_f1, _ = evaluate(trainer, val_loader, target_map, cuda=args.cuda) test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) print('checkpoint dev_prec: {:.4f}, dev_rec: {:.4f}, dev_f1: {:.4f}, test_prec: {:.4f}, test_rec: {:.4f}, test_f1: {:.4f}'.format( dev_prec, dev_rec, dev_f1, test_prec, test_rec, test_f1)) track_list = [] best_f1 = float('-inf') patience_count = 0 start_time = time.time() for epoch in range(start_epoch, num_epoch): epoch_loss = train(train_loader, trainer, epoch) # update lr trainer.lr_step() dev_prec, dev_rec, dev_f1, dev_loss = evaluate(trainer, val_loader, target_map, cuda=args.cuda) if dev_f1 >= best_f1: patience_count = 0 best_f1 = dev_f1 test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) track_list.append({'epoch': epoch, 'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss, 'test_prec': test_prec, 'test_rec': test_rec, 'test_f1': test_f1}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}\tsaving...'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1)) try: utils.save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), 'f_map': feature_map, 't_map': target_map, }, {'track_list': track_list, 'args': vars(args) }, args.checkpoint + '_lstm') except Exception as inst: print(inst) else: patience_count += 1 track_list.append({'epoch': epoch,'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}'.format(epoch, epoch_loss, dev_f1, dev_loss)) print('epoch: {} in {} take: {} s'.format(epoch, args.num_epoch, time.time() - start_time)) if patience_count >= args.patience: break
def main(): parser = options.get_parser('Generator') options.add_dataset_args(parser) options.add_preprocessing_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_generation_args(parser) args = parser.parse_args() print(args) args.cuda = not args.disable_cuda and torch.cuda.is_available() caseless = args.caseless batch_size = args.batch_size if os.path.isfile(args.load_checkpoint): print('Loading checkpoint file from {}...'.format(args.load_checkpoint)) checkpoint_file = torch.load(args.load_checkpoint) else: print('No checkpoint file found: {}'.format(args.load_checkpoint)) raise OSError train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(args.processed_dir, ddi=True) test_corpus = [(line.sent, line.type, line.p1, line.p2) for line in test_raw_corpus] # preprocessing feature_map = checkpoint_file['f_map'] target_map = checkpoint_file['t_map'] test_features, test_targets = utils.build_corpus(test_corpus, feature_map, target_map, caseless) # train/val split test_loader = utils.construct_bucket_dataloader(test_features, test_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=False) # build model vocab_size = len(feature_map) tagset_size = len(target_map) model = utils.build_model(args, vocab_size, tagset_size) # loss criterion = utils.build_loss(args) # load states model.load_state_dict(checkpoint_file['state_dict']) # trainer trainer = SeqTrainer(args, model, criterion) if args.cuda: model.cuda() y_true, y_pred, att_weights = predict(trainer, test_loader, target_map, cuda=args.cuda) assert len(y_pred) == len(test_corpus), 'length of prediction is inconsistent with that of data set' # prediction print('Predicting...') assert len(y_pred) == len(test_corpus), 'length of prediction is inconsistent with that of data set' # write result: sent_id|e1|e2|ddi|type with open(args.predict_file, 'w') as f: for tup, pred in zip(test_raw_corpus, y_pred): ddi = 0 if pred == 'null' else 1 f.write('|'.join([tup.sent_id, tup.e1, tup.e2, str(ddi), pred])) f.write('\n') # error analysis print('Analyzing...') with open(args.error_file, 'w') as f: f.write(' | '.join(['sent_id', 'e1', 'e2', 'target', 'pred'])) f.write('\n') for tup, target, pred, att_weight in zip(test_raw_corpus, y_true, y_pred, att_weights): if target != pred: size = len(tup.sent) f.write('{}\n'.format(' '.join(tup.sent))) if args.model != 'InterAttentionLSTM': att_weight = [att_weight] for i in range(len(att_weight)): f.write('{}\n'.format(' '.join(map(lambda x: str(round(x, 4)), att_weight[i][:size])))) f.write('{}\n\n'.format(' | '.join([tup.sent_id, tup.e1, tup.e2, target, pred]))) # attention print('Writing attention scores...') with open(args.att_file, 'w') as f: f.write(' | '.join(['target', 'sent', 'att_weight'])) f.write('\n') for tup, target, pred, att_weight in zip(test_raw_corpus, y_true, y_pred, att_weights): if target == pred and target != 'null': size = len(tup.sent) f.write('{}\n'.format(target)) f.write('{}\n'.format(' '.join(tup.sent))) if args.model != 'InterAttentionLSTM': att_weight = [att_weight] for i in range(len(att_weight)): f.write('{}\n'.format(' '.join(map(lambda x: str(round(x, 4)), att_weight[i][:size]))))
def main(): parser = options.get_parser('Generator') options.add_dataset_args(parser) options.add_preprocessing_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_generation_args(parser) args = parser.parse_args() model_path = args.load_checkpoint + '.model' args_path = args.load_checkpoint + '.json' with open(args_path, 'r') as f: _args = json.load(f)['args'] [setattr(args, k, v) for k, v in _args.items()] args.cuda = not args.disable_cuda and torch.cuda.is_available() print(args) if args.cuda: torch.backends.cudnn.benchmark = True # increase recursion depth sys.setrecursionlimit(10000) # load dataset train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus( args.processed_dir, ddi=False) assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!' train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus] val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus] caseless = args.caseless batch_size = args.batch_size # build vocab sents = [tup[0] for tup in train_corpus + val_corpus] feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless) target_map = ddi2013.target_map # get class weights _, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless) class_weights = torch.Tensor( utils.get_class_weights(train_targets)) if args.class_weight else None # load dataets _, _, test_loader = utils.load_datasets(args.processed_dir, args.train_size, args, feature_map, dataloader=True) # build model vocab_size = len(feature_map) tagset_size = len(target_map) model = RelationTreeModel(vocab_size, tagset_size, args) # loss criterion = utils.build_loss(args, class_weights=class_weights) # load states assert os.path.isfile(model_path), "Checkpoint not found!" print('Loading checkpoint file from {}...'.format(model_path)) checkpoint_file = torch.load(model_path) model.load_state_dict(checkpoint_file['state_dict']) # trainer trainer = TreeTrainer(args, model, criterion) # predict y_true, y_pred, treelists, f1_by_len = predict(trainer, test_loader, target_map, cuda=args.cuda) # assign words to roots for tup, treelist in zip(test_raw_corpus, treelists): for t in treelist: t.idx = tup.sent[t.idx] if t.idx < len(tup.sent) else None # prediction print('Predicting...') # write result: sent_id|e1|e2|ddi|type with open(args.predict_file, 'w') as f: for tup, pred in zip(test_raw_corpus, y_pred): ddi = 0 if pred == 'null' else 1 f.write('|'.join([tup.sent_id, tup.e1, tup.e2, str(ddi), pred])) f.write('\n') def print_info(f, tup, target, pred, root): f.write('{}\n'.format(' '.join(tup.sent))) f.write('{}\n'.format(' | '.join( [tup.sent_id, tup.e1, tup.e2, target, pred]))) f.write('{}\n\n'.format(root)) # error analysis print('Analyzing...') with open(args.error_file, 'w') as f: f.write(' | '.join(['sent_id', 'e1', 'e2', 'target', 'pred'])) f.write('\n') for tup, target, pred, treelist in zip(test_raw_corpus, y_true, y_pred, treelists): if target != pred: print_info(f, tup, target, pred, treelist[-1]) # attention print('Writing attention scores...') with open(args.correct_file, 'w') as f: f.write(' | '.join(['target', 'sent', 'att_weight'])) f.write('\n') for tup, target, pred, treelist in zip(test_raw_corpus, y_true, y_pred, treelists): if target == pred and target != 'null': print_info(f, tup, target, pred, treelist[-1])
def main(): parser = options.get_parser('Trainer') options.add_dataset_args(parser) options.add_preprocessing_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) args = parser.parse_args() print(args) args.cuda = not args.disable_cuda and torch.cuda.is_available() torch.manual_seed(5) if args.cuda: torch.backends.cudnn.benchmark = True # increase recursion depth sys.setrecursionlimit(10000) # checkpoint checkpoint_dir = os.path.dirname(args.checkpoint) if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir) # load dataset train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(args.processed_dir, ddi=False) assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!' train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus] val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus] start_epoch = 0 caseless = args.caseless batch_size = args.batch_size num_epoch = args.num_epoch # build vocab sents = [tup[0] for tup in train_corpus + val_corpus] feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless) target_map = ddi2013.target_map # get class weights _, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless) class_weights = torch.Tensor(utils.get_class_weights(train_targets)) if args.class_weight else None train_loader, val_loader, test_loader = utils.load_datasets(args.processed_dir, args.train_size, args, feature_map, dataloader=True) # build model vocab_size = len(feature_map) tagset_size = len(target_map) model = RelationTreeModel(vocab_size, tagset_size, args) # loss criterion = utils.build_loss(args, class_weights=class_weights) # load states if os.path.isfile(args.load_checkpoint): print('Loading checkpoint file from {}...'.format(args.load_checkpoint)) checkpoint_file = torch.load(args.load_checkpoint) start_epoch = checkpoint_file['epoch'] + 1 model.load_state_dict(checkpoint_file['state_dict']) # optimizer.load_state_dict(checkpoint_file['optimizer']) else: print('no checkpoint file found: {}, train from scratch...'.format(args.load_checkpoint)) if not args.rand_embedding: pretrained_word_embedding, in_doc_word_indices = utils.load_word_embedding(args.emb_file, feature_map, args.embedding_dim) print(pretrained_word_embedding.size()) print(vocab_size) model.load_pretrained_embedding(pretrained_word_embedding) if args.disable_fine_tune: model.update_part_embedding(in_doc_word_indices) # update only non-pretrained words model.rand_init(init_embedding=args.rand_embedding) # trainer trainer = TreeTrainer(args, model, criterion) best_f1 = float('-inf') if os.path.isfile(args.load_checkpoint): dev_prec, dev_rec, dev_f1, _ = evaluate(trainer, val_loader, target_map, cuda=args.cuda) test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) best_f1 = dev_f1 print('checkpoint dev_prec: {:.4f}, dev_rec: {:.4f}, dev_f1: {:.4f}, test_prec: {:.4f}, test_rec: {:.4f}, test_f1: {:.4f}'.format( dev_prec, dev_rec, dev_f1, test_prec, test_rec, test_f1)) track_list = [] patience_count = 0 start_time = time.time() q = mp.Queue() # set start methods try: mp.set_start_method('spawn') except RuntimeError: pass for epoch in range(start_epoch, num_epoch): epoch_loss = train(train_loader, trainer, epoch) # processes = [] # for rank in range(args.num_processes): # p = mp.Process(target=train, args=(train_loader, trainer, epoch, q)) # p.start() # processes.append(p) # for p in processes: # p.join() # # epoch_loss = q.get() # update lr trainer.lr_step(epoch_loss) dev_prec, dev_rec, dev_f1, dev_loss = evaluate(trainer, val_loader, target_map, cuda=args.cuda) test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) if dev_f1 >= best_f1: patience_count = 0 best_f1 = dev_f1 track_list.append({'epoch': epoch, 'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss, 'test_prec': test_prec, 'test_rec': test_rec, 'test_f1': test_f1}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}\tsaving...'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1)) try: utils.save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), 'f_map': feature_map, 't_map': target_map, }, {'track_list': track_list, 'args': vars(args) }, args.checkpoint) except Exception as inst: print(inst) else: patience_count += 1 track_list.append({'epoch': epoch,'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1)) print('epoch: {} in {} take: {} s'.format(epoch, args.num_epoch, time.time() - start_time)) if patience_count >= args.patience: break