Ejemplo n.º 1
0
def main():
    # Build tokenizer and model
    tokenizer = Tokenizer(CONFIG.max_seq_len, CONFIG.bert_vocab_path)
    bert = BertModel.from_pretrained(CONFIG.bert_model_path)
    model = model_dict[CONFIG.model](bert, CONFIG).to(CONFIG.device)

    # Tokenize data
    train_data = SentimentDataset(CONFIG.train_file, tokenizer)
    test_data = SentimentDataset(CONFIG.test_file, tokenizer)

    # Log cuda memory information
    if CONFIG.device.type == 'cuda':
        logger.info('Cuda mem allocated: {}'.format(
            torch.cuda.memory_allocated(device=CONFIG.device.index)))

    # Start training, use k-fold
    accuracy_list = []
    criterion = nn.CrossEntropyLoss()
    params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = CONFIG.optimizer(params,
                                 lr=CONFIG.learning_rate,
                                 weight_decay=CONFIG.ridge_reg)

    # Train k-fold
    test_data_loader = DataLoader(dataset=test_data,
                                  batch_size=CONFIG.batch_size,
                                  shuffle=False)
    for train_set, valid_set in split_k_fold(train_data,
                                             CONFIG.cross_val_fold):
        train_data_loader = DataLoader(dataset=train_set,
                                       batch_size=CONFIG.batch_size,
                                       shuffle=True)
        valid_data_loader = DataLoader(dataset=valid_set,
                                       batch_size=CONFIG.batch_size,
                                       shuffle=False)
        best_model_path = train(model, criterion, optimizer, train_data_loader,
                                valid_data_loader)
        # Evaluate test accuracy
        model.load_state_dict(torch.load(best_model_path))
        # Change model into evaluation mode
        model.eval()
        accuracy = evaluate(model, test_data_loader)
        logger.info('>> [Test] accuracy: {:.4f}'.format(accuracy))
        accuracy_list.append(accuracy)

    # Calculate mean accuracy
    logger.info('>' * 100)
    mean_accuracy = np.mean(accuracy_list)
    logger.info('>>> [Test] mean accuracy: {:.4f}'.format(mean_accuracy))