Example #1
0
def test(dataset_dir, vocab, device, kf_index=0):
    """test model performance on the final test set"""
    data = np.load(dataset_dir, allow_pickle=True)
    word_test = data["words"]
    label_test = data["labels"]
    # build dataset
    test_dataset = SegDataset(word_test, label_test, vocab, config.label2id)
    # build data_loader
    test_loader = DataLoader(test_dataset,
                             batch_size=config.batch_size,
                             shuffle=False,
                             collate_fn=test_dataset.collate_fn)
    if kf_index == 0:
        model = load_model(config.model_dir, device)
    else:
        model = load_model(config.exp_dir + "model_{}.pth".format(kf_index),
                           device)
    metric = dev(test_loader, vocab, model, device, mode='test')
    f1 = metric['f1']
    p = metric['p']
    r = metric['r']
    test_loss = metric['loss']
    if kf_index == 0:
        logging.info(
            "final test loss: {}, f1 score: {}, precision:{}, recall: {}".
            format(test_loss, f1, p, r))
    else:
        logging.info(
            "Kf round: {}, final test loss: {}, f1 score: {}, precision:{}, recall: {}"
            .format(kf_index, test_loss, f1, p, r))
    return test_loss, f1
Example #2
0
def test():
    data = np.load(config.test_dir, allow_pickle=True)
    word_test = data["words"]
    label_test = data["labels"]
    test_dataset = SegDataset(word_test, label_test, config)
    logging.info("--------Dataset Build!--------")
    # build data_loader
    test_loader = DataLoader(test_dataset,
                             batch_size=config.batch_size,
                             shuffle=False,
                             collate_fn=test_dataset.collate_fn)
    logging.info("--------Get Data-loader!--------")
    # Prepare model
    if config.model_dir is not None:
        model = BertSeg.from_pretrained(config.model_dir)
        model.to(config.device)
        logging.info("--------Load model from {}--------".format(
            config.model_dir))
    else:
        logging.info("--------No model to test !--------")
        return
    val_metrics = evaluate(test_loader, model, mode='test')
    val_f1 = val_metrics['f1']
    val_p = val_metrics['p']
    val_r = val_metrics['r']
    logging.info(
        "test loss: {}, f1 score: {}, precision: {}, recall: {}".format(
            val_metrics['loss'], val_f1, val_p, val_r))
Example #3
0
def run(word_train,
        label_train,
        word_dev,
        label_dev,
        vocab,
        device,
        kf_index=0):
    # build dataset
    train_dataset = SegDataset(word_train, label_train, vocab, config.label2id)
    dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id)
    # build data_loader
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True,
                              collate_fn=train_dataset.collate_fn)
    dev_loader = DataLoader(dev_dataset,
                            batch_size=config.batch_size,
                            shuffle=True,
                            collate_fn=dev_dataset.collate_fn)
    # model
    model = BiLSTM_CRF(embedding_size=config.embedding_size,
                       hidden_size=config.hidden_size,
                       vocab_size=vocab.vocab_size(),
                       target_size=vocab.label_size(),
                       num_layers=config.lstm_layers,
                       lstm_drop_out=config.lstm_drop_out,
                       nn_drop_out=config.nn_drop_out)
    model.to(device)
    # optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=config.lr,
                           betas=config.betas)
    scheduler = StepLR(optimizer,
                       step_size=config.lr_step,
                       gamma=config.lr_gamma)
    # how to initialize these parameters elegantly
    for p in model.crf.parameters():
        _ = torch.nn.init.uniform_(p, -1, 1)
    # train and test
    # train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index)
    with torch.no_grad():
        # test on the final test set
        test_loss, f1 = test(config.test_dir, vocab, device, kf_index)
    return test_loss, f1
Example #4
0
def run():
    """train the model"""
    # set the logger
    utils.set_logger(config.log_dir)
    logging.info("device: {}".format(config.device))
    # 处理数据,分离文本和标签
    processor = Processor(config)
    processor.process()
    logging.info("--------Process Done!--------")
    # 分离出验证集
    word_train, word_dev, label_train, label_dev = load_dev('train')
    # build dataset
    train_dataset = SegDataset(word_train, label_train, config)
    dev_dataset = SegDataset(word_dev, label_dev, config)
    logging.info("--------Dataset Build!--------")
    # get dataset size
    train_size = len(train_dataset)
    # build data_loader
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              sampler=DistributedSampler(train_dataset),
                              collate_fn=train_dataset.collate_fn,
                              num_workers=4)
    dev_loader = DataLoader(dev_dataset,
                            batch_size=config.batch_size,
                            sampler=DistributedSampler(dev_dataset),
                            collate_fn=dev_dataset.collate_fn,
                            num_workers=4)
    logging.info("--------Get Dataloader!--------")
    # Prepare model
    device = config.device
    model = BertSeg.from_pretrained(config.bert_model,
                                    num_labels=len(config.label2id))
    # 要先将model放到gpu上
    model = model.to(device)
    # Prepare optimizer
    if config.full_fine_tuning:
        # model.named_parameters(): [bert, classifier]
        bert_optimizer = list(model.bert.named_parameters())
        classifier_optimizer = list(model.classifier.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in bert_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            config.weight_decay
        }, {
            'params':
            [p for n, p in bert_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }, {
            'params': [
                p for n, p in classifier_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'lr':
            config.learning_rate * 5,
            'weight_decay':
            config.weight_decay
        }, {
            'params': [
                p for n, p in classifier_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'lr':
            config.learning_rate * 5,
            'weight_decay':
            0.0
        }]
    # only fine-tune the head classifier
    else:
        param_optimizer = list(model.classifier.named_parameters())
        optimizer_grouped_parameters = [{
            'params': [p for n, p in param_optimizer]
        }]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=config.learning_rate,
                      correct_bias=False)
    train_steps_per_epoch = train_size // config.batch_size
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=train_steps_per_epoch,
        num_training_steps=config.epoch_num * train_steps_per_epoch)

    model = DistributedDataParallel(model,
                                    find_unused_parameters=True,
                                    device_ids=[config.local_rank],
                                    output_device=config.local_rank)
    # Train the model
    logging.info("--------Start Training!--------")
    train(train_loader, dev_loader, model, optimizer, scheduler,
          config.model_dir, config.local_rank)