def test(dataset_dir, vocab, device, kf_index=0): """test model performance on the final test set""" data = np.load(dataset_dir, allow_pickle=True) word_test = data["words"] label_test = data["labels"] # build dataset test_dataset = SegDataset(word_test, label_test, vocab, config.label2id) # build data_loader test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=test_dataset.collate_fn) if kf_index == 0: model = load_model(config.model_dir, device) else: model = load_model(config.exp_dir + "model_{}.pth".format(kf_index), device) metric = dev(test_loader, vocab, model, device, mode='test') f1 = metric['f1'] p = metric['p'] r = metric['r'] test_loss = metric['loss'] if kf_index == 0: logging.info( "final test loss: {}, f1 score: {}, precision:{}, recall: {}". format(test_loss, f1, p, r)) else: logging.info( "Kf round: {}, final test loss: {}, f1 score: {}, precision:{}, recall: {}" .format(kf_index, test_loss, f1, p, r)) return test_loss, f1
def test(): data = np.load(config.test_dir, allow_pickle=True) word_test = data["words"] label_test = data["labels"] test_dataset = SegDataset(word_test, label_test, config) logging.info("--------Dataset Build!--------") # build data_loader test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=test_dataset.collate_fn) logging.info("--------Get Data-loader!--------") # Prepare model if config.model_dir is not None: model = BertSeg.from_pretrained(config.model_dir) model.to(config.device) logging.info("--------Load model from {}--------".format( config.model_dir)) else: logging.info("--------No model to test !--------") return val_metrics = evaluate(test_loader, model, mode='test') val_f1 = val_metrics['f1'] val_p = val_metrics['p'] val_r = val_metrics['r'] logging.info( "test loss: {}, f1 score: {}, precision: {}, recall: {}".format( val_metrics['loss'], val_f1, val_p, val_r))
def run(word_train, label_train, word_dev, label_dev, vocab, device, kf_index=0): # build dataset train_dataset = SegDataset(word_train, label_train, vocab, config.label2id) dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id) # build data_loader train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=train_dataset.collate_fn) dev_loader = DataLoader(dev_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=dev_dataset.collate_fn) # model model = BiLSTM_CRF(embedding_size=config.embedding_size, hidden_size=config.hidden_size, vocab_size=vocab.vocab_size(), target_size=vocab.label_size(), num_layers=config.lstm_layers, lstm_drop_out=config.lstm_drop_out, nn_drop_out=config.nn_drop_out) model.to(device) # optimizer optimizer = optim.Adam(model.parameters(), lr=config.lr, betas=config.betas) scheduler = StepLR(optimizer, step_size=config.lr_step, gamma=config.lr_gamma) # how to initialize these parameters elegantly for p in model.crf.parameters(): _ = torch.nn.init.uniform_(p, -1, 1) # train and test # train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index) with torch.no_grad(): # test on the final test set test_loss, f1 = test(config.test_dir, vocab, device, kf_index) return test_loss, f1
def run(): """train the model""" # set the logger utils.set_logger(config.log_dir) logging.info("device: {}".format(config.device)) # 处理数据,分离文本和标签 processor = Processor(config) processor.process() logging.info("--------Process Done!--------") # 分离出验证集 word_train, word_dev, label_train, label_dev = load_dev('train') # build dataset train_dataset = SegDataset(word_train, label_train, config) dev_dataset = SegDataset(word_dev, label_dev, config) logging.info("--------Dataset Build!--------") # get dataset size train_size = len(train_dataset) # build data_loader train_loader = DataLoader(train_dataset, batch_size=config.batch_size, sampler=DistributedSampler(train_dataset), collate_fn=train_dataset.collate_fn, num_workers=4) dev_loader = DataLoader(dev_dataset, batch_size=config.batch_size, sampler=DistributedSampler(dev_dataset), collate_fn=dev_dataset.collate_fn, num_workers=4) logging.info("--------Get Dataloader!--------") # Prepare model device = config.device model = BertSeg.from_pretrained(config.bert_model, num_labels=len(config.label2id)) # 要先将model放到gpu上 model = model.to(device) # Prepare optimizer if config.full_fine_tuning: # model.named_parameters(): [bert, classifier] bert_optimizer = list(model.bert.named_parameters()) classifier_optimizer = list(model.classifier.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in bert_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': config.weight_decay }, { 'params': [p for n, p in bert_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, { 'params': [ p for n, p in classifier_optimizer if not any(nd in n for nd in no_decay) ], 'lr': config.learning_rate * 5, 'weight_decay': config.weight_decay }, { 'params': [ p for n, p in classifier_optimizer if any(nd in n for nd in no_decay) ], 'lr': config.learning_rate * 5, 'weight_decay': 0.0 }] # only fine-tune the head classifier else: param_optimizer = list(model.classifier.named_parameters()) optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer] }] optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, correct_bias=False) train_steps_per_epoch = train_size // config.batch_size scheduler = get_cosine_schedule_with_warmup( optimizer, num_warmup_steps=train_steps_per_epoch, num_training_steps=config.epoch_num * train_steps_per_epoch) model = DistributedDataParallel(model, find_unused_parameters=True, device_ids=[config.local_rank], output_device=config.local_rank) # Train the model logging.info("--------Start Training!--------") train(train_loader, dev_loader, model, optimizer, scheduler, config.model_dir, config.local_rank)