# Initialize the DataLoader data_dir = 'data/' + args.dataset if args.dataset in ["conll"]: bert_model_dir = 'pretrained_bert_models/bert-base-cased/' elif args.dataset in ["msra"]: bert_model_dir = 'pretrained_bert_models/bert-base-chinese/' data_loader = DataLoader(data_dir, bert_model_dir, params) # Load data test_data = data_loader.load_data('test') # Specify the test set size params.test_size = test_data.__len__() params.eval_steps = params.test_size // params.batch_size test_data_iterator = data_loader.data_iterator(test_data, shuffle=False) logging.info("- done.") # Define the model # config_path = os.path.join(args.bert_model_dir, 'config.json') # config = BertConfig.from_json_file(config_path) # model = BertForTokenClassification(config, num_labels=len(params.tag2idx)) # model = BertForSequenceTagging(config) model = BertForSequenceTagging.from_pretrained(tagger_model_dir) model.device = params.device model.to(params.device) if args.fp16: model.half() if params.n_gpu > 1 and args.multi_gpu:
'params': [p for n, p in param_optimizer] }] if args.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=params.learning_rate, bias_correction=False, max_grad_norm=1.0) scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) else: optimizer = Adam(optimizer_grouped_parameters, lr=params.learning_rate) scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)) train(crf_model, train_data, val_data, optimizer, scheduler, params) test_data_iterator = data_loader.data_iterator(test_data, shuffle=True) print("***** Running prediction *****") evaluate(crf_model, test_data_iterator, 'Test Data')
# 这里我们用bertAdam优化器 train_iter = train_size // Config.batch_size - 1 optimizer = BertAdam(optimizer_grouped_parameters, lr=Config.learning_rate, warmup=0.05, t_total=train_iter * Config.epoch_num) best_f1 = 0 model.train() model.to(Config.device) for epoch in range(1, Config.epoch_num + 1): start_time = time.time() # Run one epoch print("Epoch {}/{}".format(epoch, Config.epoch_num)) train_data_iterator = data_loader.data_iterator(train_data, shuffle=True) for i in range(train_iter): start_time = time.time() batch_data, batch_tags = next(train_data_iterator) # print(batch_data.size()) # torch.Size([2, 52]) # print(batch_tags.size()) # torch.Size([2, 52]) batch_masks = batch_data.gt(0) bert_encode = model(batch_data, token_type_ids=None, attention_mask=batch_masks, labels=batch_tags) train_loss = model.loss_fn(bert_encode=bert_encode, tags=batch_tags, output_mask=batch_masks) train_loss.backward()
utils.set_logger(os.path.join(args.model_dir, 'train.log')) logging.info("device: {}, n_gpu: {}, 16-bits training: {}".format( params.device, params.n_gpu, args.fp16)) # Create the input data pipeline logging.info("Loading the datasets...") # Initialize the DataLoader data_loader = DataLoader(args.data_dir, args.bert_model_dir, params, token_pad_idx=0) # Load training data and test data train_data = data_loader.load_data('train') train_data_iterator = data_loader.data_iterator(train_data, shuffle=True) val_data = data_loader.load_data('val') val_data_iterator = data_loader.data_iterator(val_data, shuffle=False) # Specify the training and validation dataset sizes params.train_size = train_data['size'] params.val_size = val_data['size'] # Prepare model #model = BertForTokenClassification.from_pretrained(args.bert_model_dir, num_labels=len(params.tag2idx)) model = BERT_LSTM_CRF(args.bert_model_dir, params.train_size, 768, 500, 1, dropout_ratio=0.5,