def train_and_evaluate(model, data_loader, train_data, val_data, test_data, optimizer, metrics, params, model_dir, data_encoder, label_encoder, restore_file=None, best_model='val', save_model=True, eval=True): from src.ner.utils import SummaryWriter, Label, plot # plotting tools train_summary_writer = SummaryWriter([*metrics] + ['loss'], name='train') val_summary_writer = SummaryWriter([*metrics] + ['loss'], name='val') test_summary_writer = SummaryWriter([*metrics] + ['loss'], name='test') writers = [train_summary_writer, val_summary_writer, test_summary_writer] labeller = Label(anchor_metric='accuracy', anchor_writer='val') plots_dir = os.path.join(model_dir, 'plots') if not os.path.exists(plots_dir): os.makedirs(plots_dir) start_epoch = -1 if restore_file is not None: logging.info("Restoring parameters from {}".format(restore_file)) checkpoint = utils.load_checkpoint(restore_file, model, optimizer) start_epoch = checkpoint['epoch'] # save the snapshot of parameters fro reproducibility utils.save_dict_to_json(params.dict, os.path.join(model_dir, 'train_snapshot.json')) # variable initialization best_acc = 0.0 patience = 0 early_stopping_metric = 'accuracy' if not val_data and eval or not val_data and save_model == 'val': raise Exception('No validation data has been passed.') for epoch in range(start_epoch + 1, params.num_epochs): # Run one epoch logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs)) # compute number of batches in one epoch (one full pass over the training set) num_steps = (params.train_size + 1) // params.batch_size train_data_iterator = data_loader.batch_iterator(train_data, params, shuffle=True) train_metrics = train(model, optimizer, train_data_iterator, metrics, params.save_summary_steps, num_steps, label_encoder) train_summary_writer.update(train_metrics) train_acc = train_metrics[early_stopping_metric] if best_model == 'train': is_best = train_acc >= best_acc if eval: # Evaluate for one epoch on validation set num_steps = (params.val_size + 1) // params.batch_size val_data_iterator = data_loader.batch_iterator(val_data, params, shuffle=False) val_metrics = evaluate(model, val_data_iterator, metrics, num_steps, data_encoder, label_encoder, mode='Val') val_summary_writer.update(val_metrics) val_acc = val_metrics[early_stopping_metric] if best_model == 'val': is_best = val_acc >= best_acc ### TEST num_steps = (params.test_size + 1) // params.batch_size test_data_iterator = data_loader.batch_iterator(test_data, params, shuffle=False) test_metrics = evaluate(model, test_data_iterator, metrics, num_steps, data_encoder, label_encoder, mode='Test') test_summary_writer.update(test_metrics) labeller.update(writers=writers) plot(writers=writers, plot_dir=plots_dir, save=True) # Save weights if save_model: utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict() }, is_best=is_best, checkpoint=model_dir) # save encoders only if they do not exist yet if not os.path.exists(os.path.join(model_dir, 'data_encoder.pkl')): utils.save_obj(data_encoder, os.path.join(model_dir, 'data_encoder.pkl')) if not os.path.exists(os.path.join(model_dir, 'label_encoder.pkl')): utils.save_obj(label_encoder, os.path.join(model_dir, 'label_encoder.pkl')) # If best_eval, best_save_path if is_best: patience = 0 logging.info("- Found new best accuracy") best_acc = train_acc if best_model == 'train' else val_acc # Save best metrics in a json file in the model directory if eval: utils.save_dict_to_json( val_metrics, os.path.join(model_dir, "metrics_val_best_weights.json")) utils.save_dict_to_json( train_metrics, os.path.join(model_dir, "metrics_train_best_weights.json")) else: if eval: patience += 1 logging.info('current patience: {} ; max patience: {}'.format( patience, params.patience)) if patience == params.patience: logging.info( 'patience reached. Exiting at epoch: {}'.format(epoch + 1)) # Save latest metrics in a json file in the model directory before exiting if eval: utils.save_dict_to_json( val_metrics, os.path.join(model_dir, 'plots', "metrics_val_last_weights.json")) utils.save_dict_to_json( test_metrics, os.path.join(model_dir, 'plots', "metrics_test_last_weights.json")) utils.save_dict_to_json( train_metrics, os.path.join(model_dir, 'plots', "metrics_train_last_weights.json")) epoch = epoch - patience break # Save latest metrics in a json file in the model directory at end of epoch if eval: utils.save_dict_to_json( val_metrics, os.path.join(model_dir, 'plots', "metrics_val_last_weights.json")) utils.save_dict_to_json( test_metrics, os.path.join(model_dir, 'plots', "metrics_test_last_weights.json")) utils.save_dict_to_json( train_metrics, os.path.join(model_dir, 'plots', "metrics_train_last_weights.json")) return epoch
num_tags=pretrained_label_encoder[ ClassEncoder.FEATURE_NAME].num_tags, pretrained_word_vecs=torch.from_numpy( data_encoder[WordEncoder.FEATURE_NAME].vectors), dropout=params.dropout, freeze_embeddings=params.freeze_wordembeddings).to(device).float() model_total_params = sum(p.numel() for p in model.parameters()) model_total_trainable_params = sum( p.numel() for p in filter(lambda p: p.requires_grad, model.parameters())) print('total params: ', model_total_params) print('total trainable params: ', model_total_trainable_params) # load the pre-trained model logging.info('loading pretrained model') utils.load_checkpoint(os.path.join(pretrained_model_dir, 'best.pth'), model) if not all_layer: for param in model.parameters(): param.requires_grad = False model.reset_layers(label_encoder[ClassEncoder.FEATURE_NAME].num_tags) model.to(device).float() optimizer = optim.Adadelta(params=filter(lambda p: p.requires_grad, model.parameters()), rho=0.95) # 6.2 define metrics from src.ner.evaluation import accuracy_score metrics = {'accuracy': accuracy_score} # 7. Train the model
logging.info('new data: {}'.format(args.data_dir)) # 6.1.1 Define the model architecture for 1st column logging.info('loading previous models and creating new model') c1_model = CNNTC(num_tags=c1_label_encoder[ClassEncoder.FEATURE_NAME].num_tags, pretrained_word_vecs=torch.from_numpy(c1_data_encoder[WordEncoder.FEATURE_NAME].vectors), dropout=c1_params.dropout, freeze_embeddings=c1_params.freeze_wordembeddings).to(device).float() c2_model = CNNTC(num_tags=c2_label_encoder[ClassEncoder.FEATURE_NAME].num_tags, pretrained_word_vecs=torch.from_numpy(c2_data_encoder[WordEncoder.FEATURE_NAME].vectors), dropout=c2_params.dropout, freeze_embeddings=c2_params.freeze_wordembeddings).to(device).float() # 6.1.1.1. load the pre-trained model to fit the architecture if best_prev: utils.load_checkpoint(os.path.join(c1_model_dir, 'best.pth'), c1_model) utils.load_checkpoint(os.path.join(c2_model_dir, 'best.pth'), c2_model) # 6.1.2 Define the model for 2nd column c3_model = CNNTC(num_tags=label_encoder[ClassEncoder.FEATURE_NAME].num_tags, pretrained_word_vecs=torch.from_numpy(data_encoder[WordEncoder.FEATURE_NAME].vectors), dropout=new_params.dropout, freeze_embeddings=new_params.freeze_wordembeddings).to(device).float() # 7. Convert to columns logging.info('creating columns') from src.booster.progNN.column_tc import Column """ for cross-task experiments, use the following settings: 1. column_1 = NER, column_2 = NER, column_3 = TC
def train_and_evaluate(model, data_loader, train_data, val_data, test_data, optimizer, metrics, params, model_dir, data_encoder, label_encoder, restore_file=None, save_model=True, eval=True): from src.ner.utils import SummaryWriter, Label, plot # plotting tools train_summary_writer = SummaryWriter([*metrics] + ['loss'], name='train') val_summary_writer = SummaryWriter([*metrics] + ['loss'], name='val') test_summary_writer = SummaryWriter([*metrics] + ['loss'], name='test') writers = [train_summary_writer, val_summary_writer, test_summary_writer] labeller = Label(anchor_metric='f1_score', anchor_writer='val') plots_dir = os.path.join(model_dir, 'plots') if not os.path.exists(plots_dir): os.makedirs(plots_dir) start_epoch = -1 if restore_file is not None: logging.info("Restoring parameters from {}".format(restore_file)) checkpoint = utils.load_checkpoint(restore_file, model, optimizer) start_epoch = checkpoint['epoch'] # save the snapshot of parameters fro reproducibility utils.save_dict_to_json(params.dict, os.path.join(model_dir, 'train_snapshot.json')) # variable initialization best_val_score = 0.0 patience = 0 early_stopping_metric = 'f1_score' # set the Learning rate Scheduler lambda_lr = lambda epoch: 1 / (1 + (params.lr_decay_rate * epoch)) lr_scheduler = LambdaLR(optimizer, lr_lambda=lambda_lr, last_epoch=start_epoch) # train over epochs for epoch in range(start_epoch + 1, params.num_epochs): lr_scheduler.step() # Run one epoch logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs)) logging.info("Learning Rate : {}".format(lr_scheduler.get_lr())) # compute number of batches in one epoch (one full pass over the training set) # num_steps = (params.train_size + 1) // params.batch_size num_steps = (train_data['size'] + 1) // params.batch_size train_data_iterator = data_loader.batch_iterator(train_data, batch_size=params.batch_size, shuffle=True) train_metrics = train(model, optimizer, train_data_iterator, metrics, params, num_steps, data_encoder, label_encoder) val_score = train_metrics[early_stopping_metric] is_best = val_score >= best_val_score train_summary_writer.update(train_metrics) if eval: # Evaluate for one epoch on validation set # num_steps = (params.val_size + 1) // params.batch_size num_steps = (val_data['size'] + 1) // params.batch_size val_data_iterator = data_loader.batch_iterator(val_data, batch_size=params.batch_size, shuffle=False) val_metrics = evaluate(model, val_data_iterator, metrics, num_steps, label_encoder, mode='val') val_score = val_metrics[early_stopping_metric] is_best = val_score >= best_val_score val_summary_writer.update(val_metrics) ### TEST # num_steps = (params.test_size + 1) // params.batch_size num_steps = (test_data['size'] + 1) // params.batch_size test_data_iterator = data_loader.batch_iterator(test_data, batch_size=params.batch_size, shuffle=False) test_metrics = evaluate(model, test_data_iterator, metrics, num_steps, label_encoder, mode='test') test_summary_writer.update(test_metrics) labeller.update(writers=writers) plot(writers=writers, plot_dir=plots_dir, save=True) # Save weights if save_model: utils.save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict()}, is_best=is_best, checkpoint=model_dir) # save encoders only if they do not exist yet if not os.path.exists(os.path.join(model_dir, 'data_encoder.pkl')): utils.save_obj(data_encoder, os.path.join(model_dir, 'data_encoder.pkl')) if not os.path.exists(os.path.join(model_dir, 'label_encoder.pkl')): utils.save_obj(label_encoder, os.path.join(model_dir, 'label_encoder.pkl')) # If best_eval, best_save_path if is_best: patience = 0 logging.info("- Found new best F1 score") best_val_score = val_score # Save best metrics in a json file in the model directory if eval: utils.save_dict_to_json(val_metrics, os.path.join(model_dir, 'plots', "metrics_val_best_weights.json")) utils.save_dict_to_json(test_metrics, os.path.join(model_dir, 'plots', "metrics_test_best_weights.json")) utils.save_dict_to_json(train_metrics, os.path.join(model_dir, 'plots', "metrics_train_best_weights.json")) else: if eval: patience += 1 logging.info('current patience: {} ; max patience: {}'.format(patience, params.patience)) if patience == params.patience: logging.info('patience reached. Exiting at epoch: {}'.format(epoch + 1)) # Save latest metrics in a json file in the model directory before exiting if eval: utils.save_dict_to_json(val_metrics, os.path.join(model_dir, 'plots', "metrics_val_last_weights.json")) utils.save_dict_to_json(test_metrics, os.path.join(model_dir, 'plots', "metrics_test_last_weights.json")) utils.save_dict_to_json(train_metrics, os.path.join(model_dir, 'plots', "metrics_train_last_weights.json")) epoch = epoch - patience break # Save latest metrics in a json file in the model directory at end of epoch if eval: utils.save_dict_to_json(val_metrics, os.path.join(model_dir, 'plots', "metrics_val_last_weights.json")) utils.save_dict_to_json(test_metrics, os.path.join(model_dir, 'plots', "metrics_test_last_weights.json")) utils.save_dict_to_json(train_metrics, os.path.join(model_dir, 'plots', "metrics_train_last_weights.json")) return epoch
params=params, char_vocab_length=data_encoder[CharEncoder.FEATURE_NAME].vocab_length, num_tags=label_encoder[EntityEncoder.FEATURE_NAME].num_tags, pretrained_word_vecs=torch.from_numpy( data_encoder[WordEncoder.FEATURE_NAME].vectors), dropout=params.dropout, decoder_type=params.decoder, bidirectional=True, freeze_embeddings=False).to(device).float() # 6.2 fetch loss function and metrics from src.ner.evaluation import accuracy_score, f1_score, precision_score, recall_score metrics = { 'accuracy': accuracy_score, 'f1_score': f1_score, 'precision_score': precision_score, 'recall_score': recall_score } utils.load_checkpoint( os.path.join(args.model_dir, args.restore_file + '.pth'), model) # Evaluate num_steps = (params.test_size + 1) // params.batch_size test_metrics = evaluate(model, test_data_iterator, metrics, num_steps, label_encoder) save_path = os.path.join(args.model_dir, "metrics_test_{}.json".format(args.restore_file)) utils.save_dict_to_json(test_metrics, save_path)