spacy_model, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/dev.json', opt['batch_size'], opt, vocab, spacy_model, evaluation=False) model_id = opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger(model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_f1") # print model info helper.print_config(opt) # model model = SubjectObjectRelationModel(opt, emb_matrix=emb_matrix) class2id = dict([(v, k) for k, v in constant.ID_TO_CLASS.items()]) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt['lr']
def main(): # set top-level random seeds torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) if args.cpu: args.cuda = False elif args.cuda: # force random seed for reproducibility # also apply same seed to numpy in every file torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # make opt opt = vars(args) opt['num_class'] = len(constant.LABEL_TO_ID) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) # in some previous experiments we saw that lower vocab size can improve performance # but it was in a completely different project although on the same data # here it seems it's much harder to get this to work # uncomment the following line if this is solved: # new_vocab_size = 30000 opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.json', opt['batch_size'], opt, vocab, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/dev.json', opt['batch_size'], opt, vocab, evaluation=True) model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_p\tdev_r\tdev_f1") # print model info helper.print_config(opt) # model model = RelationModel(opt, emb_matrix=emb_matrix) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt['lr'] global_step = 0 format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] # setup the scheduler for lr decay # this doesn't seem to work well compared to what we already have # scheduler = ReduceLROnPlateau(model.optimizer, mode='min', factor=opt['lr_decay'], patience=1) # start training for epoch in range(1, opt['num_epoch'] + 1): # TODO: if lr warmup is used, the lr console output is not updated print( "Current params: " + " heads-" + str(opt["n_head"]) + " enc_layers-" + str(opt["num_layers_encoder"]), " drop-" + str(opt["dropout"]) + " scaled_drop-" + str(opt["scaled_dropout"]) + " lr-" + str(opt["lr"]), " lr_decay-" + str(opt["lr_decay"]) + " max_grad_norm-" + str(opt["max_grad_norm"])) print( " weight_no_rel-" + str(opt["weight_no_rel"]) + " weight_rest-" + str(opt["weight_rest"]) + " attn-" + str(opt["attn"]) + " attn_dim-" + str(opt["attn_dim"]), " obj_sub_pos-" + str(opt["obj_sub_pos"]) + " new_residual-" + str(opt["new_residual"])) print( " use_batch_norm-" + str(opt["use_batch_norm"]) + " relative_positions-" + str(opt["relative_positions"]), " decay_epoch-" + str(opt["decay_epoch"]) + " use_lemmas-" + str(opt["use_lemmas"]), " hidden_self-" + str(opt["hidden_self"])) train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = model.update(batch) train_loss += float(loss) if global_step % opt['log_step'] == 0: duration = time.time() - start_time print( format_str.format(datetime.now(), global_step, max_steps, epoch, opt['num_epoch'], loss, duration, current_lr)) # do garbage collection, # as per https://discuss.pytorch.org/t/best-practices-for-maximum-gpu-utilization/13863/6 del loss # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += float(loss) del loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] print( "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch, \ train_loss, dev_loss, dev_f1) ) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_p, dev_r, dev_f1)) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") if epoch % opt['save_epoch'] != 0: os.remove(model_file) # reduce learning rate if it stagnates by a certain decay rate and within given epoch patience # this for some reason works worth than the implementation we have afterwards # scheduler.step(dev_loss) if opt["optim"] != "noopt_adam" and opt["optim"] != "noopt_nadam": # do warm_up_for sgd only instead of adam do_warmup_trick = False if do_warmup_trick: # print("do_warmup_trick") # 1 and 5 first worked kind of # 10 and 15 current_lr = 10 * (360**(-0.5) * min(epoch**(-0.5), epoch * 15**(-1.5))) # print("current_lr", current_lr) model.update_lr(current_lr) else: # decay schedule # 15 is best! # simulate patience of x epochs if len(dev_f1_history ) > opt['decay_epoch'] and dev_f1 <= dev_f1_history[-1]: current_lr *= opt['lr_decay'] model.update_lr(current_lr) # else, update the learning rate in torch_utils.py dev_f1_history += [dev_f1] print("") print("Training ended with {} epochs.".format(epoch))
def main(): args = get_parser() # set seed and prepare for training torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) if args.cpu: args.cuda = False elif args.cuda: torch.cuda.manual_seed(args.seed) init_time = time.time() # make opt opt = vars(args) TEXT, train_batch, dev_batch = load_data(opt['batch_size'], device='cuda:0') vocab = TEXT.vocab opt['vocab_size'] = len(vocab.stoi) emb_matrix = vocab.vectors assert emb_matrix.shape[0] == opt['vocab_size'] assert emb_matrix.shape[1] == opt['emb_dim'] model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + str(model_id) opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config path = os.path.join(model_save_dir, 'config.json') helper.save_config(opt, path, verbose=True) # vocab.save(os.path.join(model_save_dir, 'vocab.pkl')) file_logger = helper.FileLogger( os.path.join(model_save_dir, opt['log']), header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score") # print model info helper.print_config(opt) # Build Model if not opt['load']: trainer = LSTMTrainer(opt, emb_matrix) else: model_file = opt['model_file'] print("Loading model from {}".format(model_file)) model_opt = torch_utils.load_config(model_file) model_opt['optim'] = opt['optim'] trainer = LSTMTrainer(model_opt) trainer.load(model_file) dev_score_history = [] current_lr = opt['lr'] global_step = 0 global_start_time = time.time() format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] # start training for epoch in range(1, opt['num_epoch'] + 1): train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = trainer.update(batch) train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print(format_str.format(datetime.now(), global_step, max_steps, epoch, \ opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set ...") predictions = [] golds = [] dev_loss = 0.0 for i, batch in enumerate(dev_batch): preds, probs, labels, loss = trainer.predict(batch) predictions += preds golds += labels dev_loss += loss train_loss = train_loss / len(train_batch) dev_loss = dev_loss / len(dev_batch) # print(golds) # print(predictions) print(accuracy_score(golds, predictions)) dev_roc = roc_auc_score(golds, predictions) print( "epoch {}: train loss = {:.6f}, dev loss = {:.6f}, dev roc = {:.4f}" .format(epoch, train_loss, dev_loss, dev_roc)) dev_score = dev_roc file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_score, max([dev_score] + dev_score_history))) # save model model_file = os.path.join(model_save_dir, "checkpoint_epoch_{}.py".format(epoch)) trainer.save(model_file, epoch) if epoch == 1 or dev_score > max(dev_score_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") file_logger.log("new best model saved at epoch {}: {:.2f}"\ .format(epoch, dev_score*100)) if epoch % opt['save_epoch'] != 0: os.remove(model_file) if len(dev_score_history) > opt['decay_epoch'] and dev_score <= dev_score_history[-1] and \ opt['optim'] in ['sgd', 'adagrad', 'adadelta']: current_lr *= opt['lr_decay'] trainer.update_lr(current_lr) dev_score_history += [dev_score] print("") print("Training ended with {} epochs.".format(epoch))
def train_model(vocab_params, train_params, train_batch, dev_batch, model_id=-1): torch.manual_seed(train_params.seed) np.random.seed(train_params.seed) random.seed(train_params.seed) if train_params.cpu: train_params.cuda = False elif train_params.cuda: torch.cuda.manual_seed(train_params.seed) # make opt opt = vars(vocab_params) print(constant.LABEL_TO_ID) print(opt) opt['num_class'] = len(constant.LABEL_TO_ID) # Combine all the parameters together opt.update(vars(train_params)) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] if (model_id == -1): model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_f1") # print model info helper.print_config(opt) # model model = RelationModel(opt, emb_matrix=emb_matrix) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt['lr'] global_step = 0 global_start_time = time.time() format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] # start training for epoch in range(1, opt['num_epoch'] + 1): train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = model.update(batch) train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print(format_str.format(datetime.now(), global_step, max_steps, epoch,\ opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\ train_loss, dev_loss, dev_f1)) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_f1)) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") if epoch % opt['save_epoch'] != 0: os.remove(model_file) # lr schedule if len(dev_f1_history) > 10 and dev_f1 <= dev_f1_history[-1] and \ opt['optim'] in ['sgd', 'adagrad']: current_lr *= opt['lr_decay'] model.update_lr(current_lr) dev_f1_history += [dev_f1] print("") print("Training ended with {} epochs.".format(epoch))
opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) asp_emb_matrix = np.load(opt['vocab_dir'] + '/asp_embedding.npy') considered = int(len(constant.ASP_TO_ID) * opt['top_asp']) asp_emb_matrix = asp_emb_matrix[0:considered] assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] assert asp_emb_matrix.shape[1] == opt['emb_dim'] # model save dir helper.ensure_dir(opt['save_dir'], verbose=True) # save config helper.save_config(opt, opt['save_dir'] + '/config.json', verbose=True) vocab.save(opt['save_dir'] + '/vocab.pkl') file_logger = helper.FileLogger( opt['save_dir'] + '/' + opt['log'], header="# epoch\ttrain_loss\ttest_loss\tP\tR\tF1") # load data print("Loading data from {} with batch size {} ...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.list', opt['batch_size'], opt, vocab) dev_batch = DataLoader(opt['data_dir'] + '/test.list', opt['batch_size'], opt, vocab) print('Building model...') trainer = MyTrainer(opt, emb_matrix, asp_emb_matrix)