print('current device:', torch.cuda.current_device()) # 预测验证集还是测试集 mode = args.mode params = utils.Params() # Set the random seed for reproducible experiments random.seed(args.seed) torch.manual_seed(args.seed) params.seed = args.seed # Set the logger utils.set_logger() # Create the input data pipeline logging.info("Loading the dataset...") dataloader = NERDataLoader(params) val_loader, test_loader = dataloader.load_data(mode='test') logging.info("- done.") # Define the model logging.info('Loading the model...') config_path = os.path.join(params.params_path, 'bert_config.json') config = RobertaConfig.from_json_file(config_path) model = ElectraForTokenClassification(config, params=params) model.to(params.device) # Reload weights from the saved file utils.load_checkpoint( os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model) logging.info('- done.') logging.info("Starting prediction...")
def train_and_evaluate(model, optimizer, scheduler, params, restore_file=None): """Train the model and evaluate every epoch.""" # load args args = parser.parse_args() # reload weights from restore_file if specified if restore_file is not None: restore_path = os.path.join(params.model_dir, args.restore_file + '.pth.tar') logging.info("Restoring parameters from {}".format(restore_path)) # 读取checkpoint utils.load_checkpoint(restore_path, model, optimizer) # Load training data and val data dataloader = NERDataLoader(params) train_loader, val_loader = dataloader.load_data(mode='train') # patience stage best_val_f1 = 0.0 patience_counter = 0 for epoch in range(1, args.epoch_num + 1): # Run one epoch logging.info("Epoch {}/{}".format(epoch, args.epoch_num)) # 一个epoch的步数 params.train_steps = len(train_loader) # Train for one epoch on training set train(model, train_loader, optimizer, params) # Evaluate for one epoch on training set and validation set train_metrics = evaluate(model, train_loader, params, mark='Train', verbose=True) # Dict['loss', 'f1'] val_metrics = evaluate(model, val_loader, params, mark='Val', verbose=True) # Dict['loss', 'f1'] # lr_scheduler学习率递减 step scheduler.step() # 验证集f1-score val_f1 = val_metrics['f1'] # 提升的f1-score improve_f1 = val_f1 - best_val_f1 # Save weights of the network model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self optimizer_to_save = optimizer utils.save_checkpoint({'epoch': epoch + 1, 'state_dict': model_to_save.state_dict(), 'optim_dict': optimizer_to_save.state_dict()}, is_best=improve_f1 > 0, checkpoint=params.model_dir) params.save(params.params_path / 'params.json') # stop training based params.patience if improve_f1 > 0: logging.info("- Found new best F1") best_val_f1 = val_f1 if improve_f1 < params.patience: patience_counter += 1 else: patience_counter = 0 else: patience_counter += 1 # Early stopping and logging best f1 if (patience_counter > params.patience_num and epoch > params.min_epoch_num) or epoch == args.epoch_num: logging.info("Best val f1: {:05.2f}".format(best_val_f1)) break