コード例 #1
0
def evaluate(classifier_model: BertForMultipleChoice, dataloader: DataLoader,
             device: torch.device):
    """
    モデルの評価を行う。
    結果やラベルはDict形式で返される。
    """
    classifier_model.eval()

    count_steps = 0
    total_loss = 0

    preds = None
    correct_labels = None
    for batch_idx, batch in enumerate(tqdm(dataloader)):
        with torch.no_grad():
            batch = tuple(t for t in batch)
            bert_inputs = {
                "input_ids": batch[0].to(device),
                "attention_mask": batch[1].to(device),
                "token_type_ids": batch[2].to(device),
                "labels": batch[3].to(device)
            }

            classifier_outputs = classifier_model(**bert_inputs)
            loss, logits = classifier_outputs[:2]

            count_steps += 1
            total_loss += loss.item()

            if preds is None:
                preds = logits.detach().cpu().numpy()
                correct_labels = bert_inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                correct_labels = np.append(
                    correct_labels,
                    bert_inputs["labels"].detach().cpu().numpy(),
                    axis=0)

    pred_labels = np.argmax(preds, axis=1)
    accuracy = calc_accuracy(pred_labels, correct_labels)
    eval_loss = total_loss / count_steps

    ret = {
        "pred_labels": pred_labels,
        "correct_labels": correct_labels,
        "logits": preds,
        "accuracy": accuracy,
        "eval_loss": eval_loss
    }

    return ret
コード例 #2
0
def test(test_x, test_y):
    test_set = MyDataset(test_x, test_y)
    test_loader = DataLoader(test_set, batch_size=conf['valid_bs'], collate_fn=collate_fn, shuffle=False,
                             num_workers=conf['num_workers'])

    model = BertForMultipleChoice.from_pretrained(conf['model']).to(conf['device'])

    predictions = []

    for fold in [0, 1, 2, 3, 4]:  # 把训练后的五个模型挨个进行预测
        y_pred = []
        model.load_state_dict(torch.load('../save/{}_fold_{}.pt'.format(conf['model'].split('/')[-1], fold)))

        with torch.no_grad():
            tk = tqdm(test_loader, total=len(test_loader), position=0, leave=True, ncols=50)
            for idx, (input_ids, attention_mask, token_type_ids, y) in enumerate(tk):
                input_ids, attention_mask, token_type_ids, y = input_ids.to(conf['device']), attention_mask.to(
                    conf['device']), token_type_ids.to(conf['device']), y.to(conf['device']).long()

                output = model(input_ids, attention_mask, token_type_ids).logits.cpu().numpy()

                y_pred.extend(output)

        predictions += [y_pred]
    return predictions
コード例 #3
0
def get_bert_model_and_tokenizer(ifModel=True):
    from transformers import BertTokenizer, BertForMultipleChoice
    if ifModel:
        model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
    else:
        model = None
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    return model, tokenizer
コード例 #4
0
ファイル: race.py プロジェクト: sobamchan/race-bert
    def __init__(self, args):
        super(Model, self).__init__()
        model = BertForMultipleChoice.from_pretrained("bert-base-uncased", num_labels=NUM_LABELS)
        self.model = model

        train_dataloader, val_dataloader, test_dataloader = get_dataloader(args.data_dir)
        self._train_dataloader = train_dataloader
        self._val_dataloader = val_dataloader
        self._test_dataloader = test_dataloader
コード例 #5
0
ファイル: modeling.py プロジェクト: maeda6uiui-backup/AIO2
def main(batch_size,num_epochs,lr,train_input_dir,dev1_input_dir,result_save_dir):
    logger.info("seed: {}".format(SEED))
    logger.info("batch_size: {} num_epochs: {} lr: {}".format(batch_size,num_epochs,lr))

    #Create dataloaders.
    logger.info("Create train dataset from {}.".format(train_input_dir))
    train_dataset=create_dataset(train_input_dir,num_examples=-1,num_options=4)

    logger.info("Create dev1 dataloader from {}.".format(dev1_input_dir))
    dev1_dataset=create_dataset(dev1_input_dir,num_examples=-1,num_options=20)
    dev1_dataloader=DataLoader(dev1_dataset,batch_size=4,shuffle=False,drop_last=False)

    #Create a classifier model.
    logger.info("Create a classifier model.")
    classifier_model=BertForMultipleChoice.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
    classifier_model.to(device)

    #Create an optimizer and a scheduler.
    num_iterations=len(train_dataset)//batch_size
    total_steps = num_iterations*num_epochs

    optimizer=AdamW(classifier_model.parameters(),lr=lr,eps=1e-8)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=total_steps
    )

    #Create a directory to save the results in.
    os.makedirs(result_save_dir,exist_ok=True)

    logger.info("Start model training.")
    for epoch in range(num_epochs):
        logger.info("===== Epoch {}/{} =====".format(epoch+1,num_epochs))

        train_dataloader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True,drop_last=False)
        mean_loss=train(classifier_model,optimizer,scheduler,train_dataloader)
        logger.info("Mean loss: {}".format(mean_loss))

        #Save model parameters.
        checkpoint_filepath=os.path.join(result_save_dir,"checkpoint_{}.pt".format(epoch+1))
        torch.save(classifier_model.state_dict(),checkpoint_filepath)

        pred_labels,correct_labels,accuracy=evaluate(classifier_model,dev1_dataloader)
        logger.info("Accuracy: {}".format(accuracy))

        #Save results as text files.
        res_filepath=os.path.join(result_save_dir,"result_eval_{}.txt".format(epoch+1))
        labels_filepath=os.path.join(result_save_dir,"labels_eval_{}.txt".format(epoch+1))

        with open(res_filepath,"w") as w:
            w.write("Accuracy: {}\n".format(accuracy))

        with open(labels_filepath,"w") as w:
            for pred_label,correct_label in zip(pred_labels,correct_labels):
                w.write("{} {}\n".format(pred_label,correct_label))

    logger.info("Finished model training.")
コード例 #6
0
 def create_and_check_bert_for_multiple_choice(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
     config.num_choices = self.num_choices
     model = BertForMultipleChoice(config=config)
     model.eval()
     multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
     multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
     multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
     loss, logits = model(multiple_choice_inputs_ids,
                          attention_mask=multiple_choice_input_mask,
                          token_type_ids=multiple_choice_token_type_ids,
                          labels=choice_labels)
     result = {
         "loss": loss,
         "logits": logits,
     }
     self.parent.assertListEqual(
         list(result["logits"].size()),
         [self.batch_size, self.num_choices])
     self.check_loss_output(result)
コード例 #7
0
def train(classifier_model: BertForMultipleChoice,
          optimizer: torch.optim.Optimizer,
          scheduler: torch.optim.lr_scheduler.LambdaLR,
          dataloader: TensorDataset, device: torch.device,
          logger: logging.Logger, logging_steps: int) -> float:
    """
    モデルの訓練を行う。
    """
    classifier_model.train()

    count_steps = 0
    total_loss = 0

    for batch_idx, batch in enumerate(dataloader):
        batch = tuple(t for t in batch)
        bert_inputs = {
            "input_ids": batch[0].to(device),
            "attention_mask": batch[1].to(device),
            "token_type_ids": batch[2].to(device),
            "labels": batch[3].to(device)
        }

        classifier_model.zero_grad()
        #Forward propagation
        classifier_outputs = classifier_model(**bert_inputs)
        loss = classifier_outputs[0]
        #Backward propagation
        loss.backward()
        torch.nn.utils.clip_grad_norm_(classifier_model.parameters(), 1.0)
        #Update parameters
        optimizer.step()
        scheduler.step()

        count_steps += 1
        total_loss += loss.item()

        if batch_idx % logging_steps == 0:
            logger.info("Step: {}\tLoss: {}\tlr: {}".format(
                batch_idx, loss.item(), optimizer.param_groups[0]["lr"]))

    return total_loss / count_steps
コード例 #8
0
def main(test_input_dir, model_filepath, result_save_dir):
    #Create a dataloader.
    test_dataset = create_dataset(test_input_dir,
                                  num_examples=-1,
                                  num_options=20)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=4,
                                 shuffle=False,
                                 drop_last=True)

    #Create a classifier model.
    logger.info("Load model parameters from {}.".format(model_filepath))
    classifier_model = BertForMultipleChoice.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    classifier_model.to(device)

    parameters = None
    if torch.cuda.is_available():
        parameters = torch.load(model_filepath)
    else:
        parameters = torch.load(model_filepath,
                                map_location=torch.device("cpu"))

    classifier_model.load_state_dict(parameters)

    #Create a directory to save the results in.
    os.makedirs(result_save_dir, exist_ok=True)

    logger.info("Start model evaluation.")
    pred_labels, correct_labels, accuracy = evaluate(classifier_model,
                                                     test_dataloader)
    logger.info("Accuracy: {}".format(accuracy))

    #Save results as text files.
    res_filepath = os.path.join(result_save_dir, "result_eval.txt")
    labels_filepath = os.path.join(result_save_dir, "labels_eval.txt")

    with open(res_filepath, "w") as w:
        w.write("Accuracy: {}\n".format(accuracy))

    with open(labels_filepath, "w") as w:
        for pred_label, correct_label in zip(pred_labels, correct_labels):
            w.write("{} {}\n".format(pred_label, correct_label))

    logger.info("Finished model evaluation.")
コード例 #9
0
def main(test_input_dir,model_dir,test_upper_bound,result_save_dir):
    logger.info("Seed: {}".format(SEED))

    #Create a dataloader.
    logger.info("Create test dataloader from {}.".format(test_input_dir))
    test_dataset=create_dataset(test_input_dir,num_examples=-1,num_options=20)
    test_dataloader=DataLoader(test_dataset,batch_size=4,shuffle=False,drop_last=False)

    #Create a classifier model.
    logger.info("Create a classifier model.")
    classifier_model=BertForMultipleChoice.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
    classifier_model.to(device)

    #Create a directory to save the results in.
    logger.info("Results will be saved in {}.".format(result_save_dir))
    os.makedirs(result_save_dir,exist_ok=True)

    logger.info("Start model evaluation.")
    for i in range(test_upper_bound):
        model_filepath=os.path.join(model_dir,"checkpoint_{}.pt".format(i+1))
        logger.info("Load model parameters from {}.".format(model_filepath))

        parameters=torch.load(model_filepath,map_location=device)
        classifier_model.load_state_dict(parameters)

        pred_labels,correct_labels,accuracy=evaluate(classifier_model,test_dataloader)
        logger.info("Accuracy: {}".format(accuracy))

        #Save results as text files.
        res_filepath=os.path.join(result_save_dir,"result_test_{}.txt".format(i+1))
        labels_filepath=os.path.join(result_save_dir,"labels_test_{}.txt".format(i+1))

        with open(res_filepath,"w") as w:
            w.write("Accuracy: {}\n".format(accuracy))

        with open(labels_filepath,"w") as w:
            for pred_label,correct_label in zip(pred_labels,correct_labels):
                w.write("{} {}\n".format(pred_label,correct_label))

    logger.info("Finished model evaluation.")
コード例 #10
0
ファイル: model.py プロジェクト: napoler/mcQA
    def _prepare_model(self, freeze, task_name='default'):
        """Prepare a model to be trained

        Arguments:
            freeze {bool} -- Whether to freeze the BERT layers.

        Returns:
            [BertForMultipleChoice] -- BertForMultipleChoice model to train
        """
        config = BertConfig.from_pretrained(
            self.bert_model,
            num_labels=self.num_choices,
            finetuning_task=task_name,
            cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE),
                                   'distributed_{}'.format(self.local_rank)),
        )

        model = BertForMultipleChoice.from_pretrained(
            self.bert_model,
            from_tf=bool(".ckpt" in self.bert_model),
            config=config,
            cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE),
                                   'distributed_{}'.format(self.local_rank)),
        )

        if self.fp16:
            model.half()

        model.to(self.device)

        if freeze:
            for param in model.bert.parameters():
                param.requires_grad = False

        if self.local_rank != -1:
            model = DDP(model)
        elif self.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        return model
コード例 #11
0
ファイル: test_model.py プロジェクト: napoler/mcQA
def test_save_load(trained_model, mcqa_dataset, tmpdir):
    model_path = str(tmpdir)

    trained_model.save_model(model_path)

    mdl_clone = Model(bert_model="bert-base-uncased", device="cpu")

    config = BertConfig.from_pretrained(model_path, num_labels=4)

    mdl_clone.model = BertForMultipleChoice.from_pretrained(model_path,
                                                            config=config)

    for param1, param2 in zip(mdl_clone.model.parameters(),
                              trained_model.model.parameters()):

        assert param1.data.allclose(param2.data)

    mdl_clone.fit(mcqa_dataset.get_dataset(),
                  train_batch_size=1,
                  num_train_epochs=1)

    _ = mdl_clone.predict_proba(mcqa_dataset.get_dataset(), eval_batch_size=1)
コード例 #12
0
 def create_and_check_for_multiple_choice(
     self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
 ):
     config.num_choices = self.num_choices
     model = BertForMultipleChoice(config=config)
     model.to(torch_device)
     model.eval()
     multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
     multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
     multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
     result = model(
         multiple_choice_inputs_ids,
         attention_mask=multiple_choice_input_mask,
         token_type_ids=multiple_choice_token_type_ids,
         labels=choice_labels,
     )
     self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
コード例 #13
0
ファイル: model.py プロジェクト: DaltonSchutte/DrBertsOffice
 def _get_model(self):
     """Prepares desired BERT model for specified task"""
     if self.task == 'ner':
         self.bert = BertForTokenClassification.from_pretrained(
             self.weight_path, num_labels=self.num_labels)
     elif self.task == 'rel_ex':
         # TODO: Add special rel_ex for entity extraction
         self.bert = BertForSequenceClassification.from_pretrained(
             self.weight_path, num_labels=self.num_labels)
         # Resize to account for added {'<e1>', '</e1>', '<e2>', '</e2>'}
         self.bert.resize_token_embeddings(len(self.tokenizer))
     elif self.task == 'seq_clf':
         self.bert = BertForSequenceClassification.from_pretrained(
             self.weight_path, num_labels=self.num_labels)
     elif self.task == 'mc':
         self.bert = BertForMultipleChoice.from_pretrained(
             self.weight_path, num_labels=self.num_labels)
     elif self.task == 'qa':
         self.bert = BertForQuestionAnswering.from_pretrained(
             self.weight_path, num_labels=self.num_labels)
     else:
         raise NotImplementedError(
             f"{self.task} is not an implemented task, use ['ner', 'rel_ex', 'seq_clf', 'mc', 'qa']"
         )
コード例 #14
0
ファイル: test.py プロジェクト: maeda6uiui-backup/AIOLib
def main(args):
    test_input_dir: str = args.test_input_dir
    bert_model_dir: str = args.bert_model_dir
    result_save_dir: str = args.result_save_dir
    test_index_lower_bound: int = args.test_index_lower_bound
    test_index_upper_bound: int = args.test_index_upper_bound

    logger.info("{}からテスト用データローダを作成します。".format(test_input_dir))
    test_dataset = mf.create_dataset(test_input_dir,
                                     num_examples=-1,
                                     num_options=20)
    test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

    logger.info("{}から事前学習済みの重みを読み込みます。".format(bert_model_dir))
    classifier_model = BertForMultipleChoice.from_pretrained(bert_model_dir)
    classifier_model.to(device)

    for i in range(test_index_lower_bound, test_index_upper_bound):
        checkpoint_filepath = os.path.join(result_save_dir,
                                           "checkpoint_{}.pt".format(i))
        logger.info("{}からチェックポイントを読み込みます。".format(checkpoint_filepath))
        if os.path.exists(checkpoint_filepath) == False:
            raise RuntimeError("チェックポイントが存在しません。")

        parameters = torch.load(checkpoint_filepath, map_location=device)
        classifier_model.load_state_dict(parameters)

        result_save_filepath = os.path.join(result_save_dir,
                                            "result_test_{}.txt".format(i))
        labels_save_filepath = os.path.join(result_save_dir,
                                            "labels_test_{}.txt".format(i))
        logits_save_filepath = os.path.join(result_save_dir,
                                            "logits_test_{}.txt".format(i))
        mf.evaluate_and_save_result(classifier_model, test_dataloader,
                                    result_save_filepath, labels_save_filepath,
                                    logits_save_filepath, device, logger)
コード例 #15
0
ファイル: maml.py プロジェクト: KimDaeUng/final_meta_transfer
    def __init__(self, args, tokenizer):
        """
        :param args:
        """
        super(MetaLearner, self).__init__()

        # self.num_labels = args.num_labels
        self.outer_batch_size = args.outer_batch_size
        self.inner_batch_size = args.inner_batch_size
        self.outer_update_lr = args.outer_update_lr
        self.inner_update_lr = args.inner_update_lr
        self.inner_update_step = args.inner_update_step
        self.inner_update_step_eval = args.inner_update_step_eval
        self.bert_model = args.bert_model
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

        self.model = BertForMultipleChoice.from_pretrained(self.bert_model)
        self.model.to(self.device)
        self.outer_optimizer = Adam(self.model.parameters(),
                                    lr=self.outer_update_lr)
        self.tokenizer = tokenizer

        self.model.train()
コード例 #16
0
use_gpu = torch.cuda.is_available()
use_multi_gpu = False and use_gpu
device_ids = [0, 1, 2, 3, 4, 5, 6, 7]

tokenizer = BertTokenizerFast.from_pretrained(model_path)

print('train loader')
train_loader = process('train', tokenizer, batch_size, max_length=max_length)
print('valid loader')
valid_loader = process('valid', tokenizer, batch_size, max_length=max_length)

if os.path.exists(f'{model_name}.bin'):
    print('load model')
    model = torch.load(f'{model_name}.bin')
else:
    model = BertForMultipleChoice.from_pretrained(model_path)

if use_multi_gpu:
    model = torch.nn.DataParallel(model, device_ids=device_ids)
if use_gpu:
    model.cuda()

optim = AdamW(model.parameters(), lr=lr)

num_training_steps = len(train_loader) * epochs // accumulation_steps
# num_warmup_steps = num_training_steps * 0.1 // accumulation_steps
num_warmup_steps = 0
warm_up = get_cosine_schedule_with_warmup(
    optim,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps)
コード例 #17
0
ファイル: Run_GAReader.py プロジェクト: cwszz/Semtask
def main(config, model_filename):
    if not os.path.exists(config.output_dir):
        os.makedirs(config.output_dir)

    if not os.path.exists(config.cache_dir):
        os.makedirs(config.cache_dir)

    model_file = os.path.join(
        config.output_dir, model_filename)

    # Prepare the device
    # gpu_ids = [int(device_id) for device_id in config.gpu_ids.split()]
    gpu_ids = [3]
    device, n_gpu = get_device(gpu_ids[0])
    if n_gpu > 1:
        n_gpu = len(gpu_ids)

    # Set Random Seeds
    random.seed(config.seed)
    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(config.seed)
        torch.backends.cudnn.deterministic = True

    tokenizer = BertTokenizer.from_pretrained('./new_bert')
    model = BertForMultipleChoice.from_pretrained('./new_bert') # ./xlnet_model

    cache_train_dataset = "cached_dataset_train_Bert_class"
    cache_dev_dataset = "cached_dataset_dev_Bert_class"
    if os.path.exists(config.cache_dir + '/' + cache_train_dataset):
        logger.info("Loading features from cached file %s", config.cache_dir + '/' + cache_train_dataset)
        train_dataset = torch.load(config.cache_dir + '/' + cache_train_dataset)
        dev_dataset = torch.load(config.cache_dir + '/' + cache_dev_dataset)
    else:
        train_dataset, dev_dataset, test_dataset = load_data(config.data_path,  device, tokenizer, config.cache_dir,32,480)
        logger.info("save cached file in  %s", config.cache_dir)
        torch.save(train_dataset,config.cache_dir + '/' + cache_train_dataset)
        torch.save(dev_dataset,config.cache_dir + '/' + cache_dev_dataset)
    train_sampler = RandomSampler(train_dataset)
    dev_sampler =RandomSampler(dev_dataset)
    train_dataloader  = DataLoader(train_dataset,sampler= train_sampler,batch_size= config.train_batch_size,num_workers=8,pin_memory=False)
    dev_dataloader  = DataLoader(dev_dataset,sampler= dev_sampler,batch_size= config.dev_batch_size,num_workers=8,pin_memory=False)
    # train_iterator = trange(int(config.epoch_num))
    # if config.model_name == "GAReader":
    #     from Bert_GAReader.GAReader.GAReader import GAReader
    #     model = GAReader(
    #         config.bert_word_dim, config.output_dim, config.hidden_size,
    #         config.rnn_num_layers, config.ga_layers, config.bidirectional,
    #         config.dropout, bert_config)
    #     print(model)
    # no_decay = ['bias', 'LayerNorm.weight']

    # optimizer = optim.Adam(model.parameters(), lr=config.lr)
    param_optimizer = list(model.named_parameters())
    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(
            nd in n for nd in no_decay) and 'bert' not in n] , 'weight_decay': 0.01,'lr':3e-4},
        {'params': [p for n, p in param_optimizer if any(
            nd in n for nd in no_decay) and 'bert' not in n], 'weight_decay': 0.0,'lr':3e-4},
        {'params': [p for n, p in param_optimizer if not any(
            nd in n for nd in no_decay) and 'bert'  in n], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(
            nd in n for nd in no_decay) and 'bert'  in n], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=config.lr,eps=1e-8)
    # optimizer = optim.AdamW(optimizer_grouped_parameter,lr=config.lr)
    scheduler = get_linear_schedule_with_warmup(optimizer,16000,200000)
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)
    criterion = criterion.to(device)

    if config.do_train:
        train(config.epoch_num, model, train_dataloader, dev_dataloader, optimizer, criterion, ['0', '1', '2', '3', '4'],
              model_file, config.log_dir, config.print_step, config.clip,device,scheduler)

    model.load_state_dict(torch.load(model_file))

    test_loss, test_acc, test_report = evaluate(
        model, dev_dataloader, criterion, ['0', '1', '2', '3','4'],device)
    print("-------------- Test -------------")
    print("\t Loss: {} | Acc: {} | Macro avg F1: {} | Weighted avg F1: {}".format(
        test_loss, test_acc, test_report['macro avg']['f1-score'], test_report['weighted avg']['f1-score']))
コード例 #18
0
def main():
    # bert_config = modeling.BertConfig.from_json_file(FLAGS["bert_config_file"])

    # if FLAGS.max_seq_length > bert_config.max_position_embeddings:
    #     raise ValueError(
    #         "Cannot use sequence length %d because the BERT model "
    #         "was only trained up to sequence length %d" %
    #         (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    processor = CommonsenseQAProcessor(split=FLAGS["split"])

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(
        # vocab_file=FLAGS["vocab_file"], do_lower_case=FLAGS["do_lower_case"]
    )

    model = BertForMultipleChoice.from_pretrained('bert-base-uncased', return_dict=True)
    # model.train()

    # TODO TPU handling

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None

    if FLAGS["do_train"]:
        train_examples = processor.get_train_examples(FLAGS["data_dir"])

    model_fn = model_fn_builder(
        bert_config=bert_config,
        num_labels=len(label_list),
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS["do_train"]:
        print("train started")
        # train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        input_ids_batch, token_type_ids_batch, labels_batch = file_based_convert_examples_to_features(
            train_examples, label_list, FLAGS["max_seq_length"], tokenizer)



        # train_input_fn = file_based_input_fn_builder(
        #     input_file=train_file,
        #     seq_length=FLAGS.max_seq_length,
        #     is_training=True,
        #     drop_remainder=True)
        # estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS["do_eval"]:
        print("eval started")
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        eval_seq_length = file_based_convert_examples_to_features(
            eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            # Eval will be slightly WRONG on the TPU because it will truncate
            # the last batch.
            eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS["do_predict"]:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        predict_seq_length = file_based_convert_examples_to_features(
            predict_examples, label_list,
            FLAGS.max_seq_length, tokenizer,
            predict_file)

        if FLAGS.use_tpu:
            # Warning: According to tpu_estimator.py Prediction on TPU is an
            # experimental feature and hence not supported here
            raise ValueError("Prediction in TPU not supported")

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = estimator.predict(input_fn=predict_input_fn)

        test_predictions_file = os.path.join(
            FLAGS.output_dir,
            "test_results.csv")
        with tf.gfile.GFile(test_predictions_file, "w") as writer:
            tf.logging.info("***** Predict results *****")
            for example, prediction in zip(predict_examples, result):
                output_line = ",".join([
                                           str(example.qid),
                                           str(CommonsenseQAProcessor.LABELS[np.argmax(prediction)])
                                       ] + [
                                           str(class_probability)
                                           for class_probability in prediction
                                       ]) + "\n"
                writer.write(output_line)
コード例 #19
0
    def __len__(self):
        return len(self.labels)


train_dataset = TextDataset(train, train_label)
test_dataset = TextDataset(val, val_label)

# In[ ]:

train_dataset[100]

# In[ ]:

import torch
from transformers import BertForMultipleChoice, AdamW, get_linear_schedule_with_warmup
model = BertForMultipleChoice.from_pretrained('bert-base-chinese')

# device = 'cpu'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# In[ ]:

train_loader = DataLoader(train_dataset,
                          batch_size=8,
                          shuffle=True,
                          collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset,
                             batch_size=8,
                             shuffle=True,
                             collate_fn=collate_fn)
コード例 #20
0
            tk = tqdm(test_loader, total=len(test_loader), position=0, leave=True, ncols=50)
            for idx, (input_ids, attention_mask, token_type_ids, y) in enumerate(tk):
                input_ids, attention_mask, token_type_ids, y = input_ids.to(conf['device']), attention_mask.to(
                    conf['device']), token_type_ids.to(conf['device']), y.to(conf['device']).long()

                output = model(input_ids, attention_mask, token_type_ids).logits.cpu().numpy()

                y_pred.extend(output)

        predictions += [y_pred]
    return predictions


if __name__ == '__main__':
    init_seeds(conf['seed'])
    model = BertForMultipleChoice.from_pretrained(conf['model']).to(conf['device'])  # 模型
    optimizer = AdamW(model.parameters(), lr=conf['lr'], weight_decay=conf['weight_decay'])  # AdamW优化器
    if os.path.exists("../save/chinese_wwm_ext_L-12_H-768_A-12_fold_0.pt"):
        ## test_y全为0
        test_x, test_y, q_id = read_valid()
        predictions = test(test_x, test_y)
    else:
        X, y = read_data()
        # train_X, train_y, test_X, test_y = train_test_split(X, y, test_size=0.3, random_state=44)

        tokenizer = BertTokenizer.from_pretrained(conf['model'])  # 加载bert的分词器
        # 交叉验证
        folds = StratifiedKFold(n_splits=conf['fold_num'], shuffle=True, random_state=conf['seed']).split(np.arange(len(X)), y)
        train(folds, model, optimizer)

        ## test_y全为0
コード例 #21
0
def main2(model_filename, model2_filename, result_save_dir):
    """
    Main function
    Conducts test with two models.
    Assumes that the first model is trained with image features 
    and the second model is trained with text features only.

    Parameters
    ----------
    model_filename: str
        Filename of the first saved model
    model2_filename: str
        Filename of the second saved model
    result_save_dir: str
        Directory to save the test result in.
    """
    #Load contexts.
    logger.info("Start loading contexts.")
    context_dict = load_contexts(CANDIDATE_ENTITIES_FILENAME)
    logger.info("Finished loading contexts.")
    logger.info("Number of contexts: {}".format(len(context_dict)))

    #Create models.
    model = BertForMultipleChoice.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    model2 = BertForMultipleChoice.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    if torch.cuda.is_available():
        model.cuda()
        model2.cuda()

    #If there exist cached files for the model parameters, then load them.
    if os.path.exists(model_filename):
        logger.info("Load parameters from {}.".format(model_filename))
        model.load_state_dict(torch.load(model_filename))
    if os.path.exists(model2_filename):
        logger.info("Load parameters from {}.".format(model2_filename))
        model.load_state_dict(torch.load(model2_filename))

    #Test
    test_dataset = None
    test_dataset2 = None

    #Load cached features if cache files exist.
    if os.path.exists(DEV2_FEATURES_CACHE_DIR + "input_ids.pt"):
        logger.info("Load features from cached files.")

        input_ids = torch.load(DEV2_FEATURES_CACHE_DIR + "input_ids.pt")
        attention_mask = torch.load(DEV2_FEATURES_CACHE_DIR +
                                    "attention_mask.pt")
        token_type_ids = torch.load(DEV2_FEATURES_CACHE_DIR +
                                    "token_type_ids.pt")
        labels = torch.load(DEV2_FEATURES_CACHE_DIR + "labels.pt")

        test_dataset = torch.utils.data.TensorDataset(input_ids,
                                                      attention_mask,
                                                      token_type_ids, labels)
    else:
        logger.info("Start loading examples.")
        logger.info("JSON filename: {}".format(DEV2_JSON_FILENAME))
        examples = load_examples(DEV2_JSON_FILENAME,
                                 option_num=20,
                                 use_fixed_label=False)
        logger.info("Finished loading examples.")
        logger.info("Number of examples: {}".format(len(examples)))

        logger.info("Start converting examples to features.")
        input_ids, attention_mask, token_type_ids, labels = convert_examples_to_features(
            examples,
            context_dict,
            article_dict,
            option_num=20,
            max_seq_length=512,
            image_features_length=50)
        logger.info("Finished converting examples to features.")

        os.makedirs(DEV2_FEATURES_CACHE_DIR, exist_ok=True)

        torch.save(input_ids, DEV2_FEATURES_CACHE_DIR + "input_ids.pt")
        torch.save(attention_mask,
                   DEV2_FEATURES_CACHE_DIR + "attention_mask.pt")
        torch.save(token_type_ids,
                   DEV2_FEATURES_CACHE_DIR + "token_type_ids.pt")
        torch.save(labels, DEV2_FEATURES_CACHE_DIR + "labels.pt")
        logger.info("Saved cache files in {}.".format(DEV2_FEATURES_CACHE_DIR))

        test_dataset = torch.utils.data.TensorDataset(input_ids,
                                                      attention_mask,
                                                      token_type_ids, labels)

    #Load cached features if cache files exist.
    if os.path.exists(DEV2_FEATURES_CACHE_DIR + "input_ids_text_only.pt"):
        logger.info("Load text-only features from cached files.")

        input_ids = torch.load(DEV2_FEATURES_CACHE_DIR +
                               "input_ids_text_only.pt")
        attention_mask = torch.load(DEV2_FEATURES_CACHE_DIR +
                                    "attention_mask_text_only.pt")
        token_type_ids = torch.load(DEV2_FEATURES_CACHE_DIR +
                                    "token_type_ids_text_only.pt")
        labels = torch.load(DEV2_FEATURES_CACHE_DIR + "labels_text_only.pt")

        test_dataset2 = torch.utils.data.TensorDataset(input_ids,
                                                       attention_mask,
                                                       token_type_ids, labels)
    else:
        logger.info("Start loading examples.")
        logger.info("JSON filename: {}".format(DEV2_JSON_FILENAME))
        examples = load_examples(DEV2_JSON_FILENAME,
                                 option_num=20,
                                 use_fixed_label=False)
        logger.info("Finished loading examples.")
        logger.info("Number of examples: {}".format(len(examples)))

        logger.info("Start converting examples to text-only features.")
        input_ids, attention_mask, token_type_ids, labels = convert_examples_to_features_text_only(
            examples, context_dict, option_num=20, max_seq_length=512)
        logger.info("Finished converting examples to text-only features.")

        #os.makedirs(DEV2_FEATURES_CACHE_DIR,exist_ok=True)

        torch.save(input_ids,
                   DEV2_FEATURES_CACHE_DIR + "input_ids_text_only.pt")
        torch.save(attention_mask,
                   DEV2_FEATURES_CACHE_DIR + "attention_mask_text_only.pt")
        torch.save(token_type_ids,
                   DEV2_FEATURES_CACHE_DIR + "token_type_ids_text_only.pt")
        torch.save(labels, DEV2_FEATURES_CACHE_DIR + "labels_text_only.pt")
        logger.info("Saved cache files in {}.".format(DEV2_FEATURES_CACHE_DIR))

        test_dataset2 = torch.utils.data.TensorDataset(input_ids,
                                                       attention_mask,
                                                       token_type_ids, labels)

    test_with_two_models(model,
                         model2,
                         test_dataset,
                         test_dataset2,
                         batch_size=4,
                         result_filename=result_save_dir + "result.txt",
                         labels_filename=result_save_dir + "labels.txt")
コード例 #22
0
ファイル: run_classifier.py プロジェクト: drxmy/BertHub
def main():
    '''
    NOTES:
    1、This is the main function for training a model to achieve your downstream 
    task in natural language processing, such as question&answer match, sequence 
    classification and so on.
    2、You could load any other pretrained model which huggingface have supported, 
    for example: hfl/chinese-bert-wwm.
    3、Happy for sharing this project to others, if you also do, light the star up and
    bring a link. 
    4、Great wishes in modeling, enjoy it !!!
    '''
    PATH = 'drive/MyDrive/drive/haihua/data/'
    SEED = 2020
    EPOCHS = 5
    BATCH_SIZE = 16
    MAX_LENGTH = 128
    LEARNING_RATE = 1e-5
    NAME = 'hfl/chinese-bert-wwm'

    fix_seed(SEED)
    train = load_data(PATH, train_test='train')
    test = load_data(PATH, train_test='validation')
    print('train example: context={}, pair={}, label={}'.format(
        train[0].context, train[0].pair, train[0].label))
    print('test example: context={}, pair={}, label={}'.format(
        test[0].context, test[0].pair, test[0].label))
    print('Data loaded!!')
    print('***************************')

    train_dataloader, valid_dataloader = process(train,
                                                 NAME,
                                                 BATCH_SIZE,
                                                 MAX_LENGTH,
                                                 threshold=0.8)
    del train
    print('train data process done !!')
    print('###########################')

    test_dataloader = process(test, NAME, BATCH_SIZE, MAX_LENGTH)
    del test
    print('test data process done !!')
    print('###########################')
    bert = BertForMultipleChoice.from_pretrained(NAME)
    optimizer = AdamW(bert.parameters(), lr=LEARNING_RATE)
    total_steps = len(train_dataloader) * EPOCHS
    # change learning rate dynamically in total steps,
    # during warmup phase and train period
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0,
                                                num_training_steps=total_steps)

    bert.cuda()

    for epoch in range(EPOCHS):

        print('======== Epoch {:} / {:} ========'.format(epoch + 1, EPOCHS))
        print('Training...')
        bert.train()
        start_train = time.time()
        total_train_loss = 0

        # fgm = FGM(bert) #*
        for step, batch in enumerate(train_dataloader):

            if step % 200 == 0:
                elapsed = format_time(time.time() - start_train)
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(
                    step, len(train_dataloader), elapsed))

            batch_input_ids = batch[1].cuda()
            batch_token_type_ids = batch[2].cuda()
            batch_attention_masks = batch[3].cuda()
            batch_labels = batch[4].cuda()

            outputs = bert(batch_input_ids,
                           batch_attention_masks,
                           batch_token_type_ids,
                           labels=batch_labels)
            bert.zero_grad()
            outputs.loss.backward()
            torch.nn.utils.clip_grad_norm_(bert.parameters(), 1.0)

            # score down
            # fgm.attack() #*
            # outputs = bert(batch_input_ids,
            #                     batch_attention_masks, batch_token_type_ids, labels=batch_labels) #*
            # loss_adv = outputs.loss #*
            # loss_adv.backward() #*
            # fgm.restore() #*

            del batch_input_ids, batch_token_type_ids, batch_attention_masks, batch_labels

            optimizer.step()
            scheduler.step()
            total_train_loss += outputs.loss.item()

        average_train_loss = total_train_loss / len(train_dataloader)
        training_time = format_time(time.time() - start_train)
        print("  Average training CrossEntropyLoss: {0:.2f}".format(
            average_train_loss))
        print("  Training epcoh took: {:}".format(training_time))

        print('Running Validation...')
        bert.eval()
        start_eval = time.time()
        total_eval_loss = 0
        total_eval_f1 = 0
        for step, batch in enumerate(valid_dataloader):

            if step % 200 == 0:
                elapsed = format_time(time.time() - start_train)
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(
                    step, len(valid_dataloader), elapsed))

            batch_input_ids = batch[1].cuda()
            batch_token_type_ids = batch[2].cuda()
            batch_attention_masks = batch[3].cuda()
            batch_labels = batch[4].cuda()

            with torch.no_grad():
                outputs = bert(batch_input_ids,
                               batch_attention_masks,
                               batch_token_type_ids,
                               labels=batch_labels)
                total_eval_loss += outputs.loss.item()

        average_eval_loss = total_eval_loss / len(valid_dataloader)
        total_eval_f1 += flat_accuracy(outputs.logits, batch_labels)
        del batch_input_ids, batch_token_type_ids, batch_attention_masks, batch_labels

        validation_time = format_time(time.time() - start_eval)
        print("  Average eval CrossEntropyLoss: {0:.2f}".format(
            average_eval_loss))
        print("  Eval auc score: {0:.2f}".format(total_eval_f1))
        print('  Validation took: {:}'.format(validation_time))

    print('Start predict ...')
    sub_id = []
    predictions = []
    for step, batch in enumerate(test_dataloader):
        batch_ids = batch[0]
        batch_input_ids = batch[1].cuda()
        batch_token_type_ids = batch[2].cuda()
        batch_attention_masks = batch[3].cuda()

        with torch.no_grad():
            outputs = bert(batch_input_ids, batch_attention_masks,
                           batch_token_type_ids)

        ids = batch_ids.tolist()
        logits = outputs.logits.detach().cpu().numpy()
        flat_predictions = np.argmax(logits, axis=1).flatten().tolist()
        sub_id += ids
        predictions += flat_predictions

    def convert_id(x):
        if len(str(x)) < 6:
            return '0' * (6 - len(str(x))) + str(x)
        return str(x)

    def convert_label(x):
        res = ['A', 'B', 'C', 'D']
        return res[x]

    sub = pd.DataFrame()
    sub['id'] = sub_id
    sub['label'] = predictions
    sub['label'] = sub['label'].apply(convert_label)

    sub.sort_values('id', inplace=True)
    sub['id'] = sub['id'].apply(convert_id)
    sub.to_csv('/content/drive/MyDrive/drive/haihua/output/sub.csv',
               index=False)
    print('Everything Done !!')
コード例 #23
0
def main(test_input_dir, im_features_dir, test_upper_bound, result_save_dir):
    #Load a list of options.
    logger.info("Load a list of options.")
    test_options = load_options_list(
        os.path.join(test_input_dir, "options_list.txt"))

    #Create a dataloader.
    logger.info("Create a test dataloader from {}.".format(test_input_dir))
    test_dataloader = create_dataloader(test_input_dir,
                                        4,
                                        num_options=20,
                                        shuffle=False,
                                        drop_last=False)

    #Load a pre-trained BERT model.
    logger.info("Load a pre-trained BERT model.")
    bert_model = BertModel.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    bert_model.to(device)

    #Create a BertForMultipleChoice model.
    logger.info("Create a BertForMultipleChoice model.")
    bfmc_model = BertForMultipleChoice.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    bfmc_model.to(device)

    #Create a directory to save the results in.
    #os.makedirs(result_save_dir,exist_ok=True)

    logger.info("Start test.")
    for i in range(test_upper_bound):
        parameters_filepath = os.path.join(result_save_dir,
                                           "checkpoint_{}.pt".format(i + 1))
        logger.info(
            "Load model parameters from {}.".format(parameters_filepath))
        if torch.cuda.is_available():
            bfmc_model.load_state_dict(torch.load(parameters_filepath))
        else:
            bfmc_model.load_state_dict(
                torch.load(parameters_filepath,
                           map_location=torch.device("cpu")))

        pred_labels, correct_labels, accuracy = test(bert_model, bfmc_model,
                                                     test_options,
                                                     im_features_dir,
                                                     test_dataloader)

        logger.info("Accuracy: {}".format(accuracy))

        #Save results as text files.
        res_filepath = os.path.join(result_save_dir,
                                    "result_test_{}.txt".format(i + 1))
        labels_filepath = os.path.join(result_save_dir,
                                       "labels_test_{}.txt".format(i + 1))

        with open(res_filepath, "w") as w:
            w.write("Accuracy: {}\n".format(accuracy))

        with open(labels_filepath, "w") as w:
            for pred_label, correct_label in zip(pred_labels, correct_labels):
                w.write("{} {}\n".format(pred_label, correct_label))

    logger.info("Finished model test.")
コード例 #24
0
def train():
    # 检查配置,获取超参数
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    print("device:{} n_gpu:{}".format(device, n_gpu))
    seed = hyperparameters["seed"]
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    max_seq_length = hyperparameters["max_sent_length"]
    gradient_accumulation_steps = hyperparameters["gradient_accumulation_steps"]
    num_epochs = hyperparameters["num_epoch"]
    train_batch_size = hyperparameters["train_batch_size"] // hyperparameters["gradient_accumulation_steps"]
    tokenizer = BertTokenizer.from_pretrained("bert-large-uncased", do_lower_case=True)
    model = BertForMultipleChoice.from_pretrained("bert-large-uncased")
    model.to(device)

    # 优化器
    param_optimizer = list(model.named_parameters())

    param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    # 载入数据
    train_examples = read_examples('../dataset/train_bert.txt')
    dev_examples = read_examples('../dataset/test_bert.txt')
    nTrain = len(train_examples)
    nDev = len(dev_examples)
    num_train_optimization_steps = int(nTrain / train_batch_size / gradient_accumulation_steps) * num_epochs
    optimizer = AdamW(optimizer_grouped_parameters, lr=hyperparameters["learning_rate"])
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=int(0.1 * num_train_optimization_steps),
                                                num_training_steps=num_train_optimization_steps)

    global_step = 0
    train_features = convert_examples_to_features(train_examples, tokenizer, max_seq_length)
    dev_features = convert_examples_to_features(dev_examples, tokenizer, max_seq_length)
    train_dataloader = get_train_dataloader(train_features, train_batch_size)
    dev_dataloader = get_eval_dataloader(dev_features, hyperparameters["eval_batch_size"])
    print("Num of train features:{}".format(nTrain))
    print("Num of dev features:{}".format(nDev))
    best_dev_accuracy = 0
    best_dev_epoch = 0
    no_up = 0

    epoch_tqdm = trange(int(num_epochs), desc="Epoch")
    for epoch in epoch_tqdm:
        model.train()

        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
            batch = tuple(t.to(device) for t in batch)
            input_ids, label_ids = batch
            loss, logits = model(input_ids=input_ids, labels=label_ids)[:2]
            if gradient_accumulation_steps > 1:
                loss = loss / gradient_accumulation_steps
            tr_loss += loss.item()
            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1
            loss.backward()
            if (step + 1) % gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
                global_step += 1

        train_loss, train_accuracy = evaluate(model, device, train_dataloader, "Train")
        dev_loss, dev_accuracy = evaluate(model, device, dev_dataloader, "Dev")

        if dev_accuracy > best_dev_accuracy:
            best_dev_accuracy = dev_accuracy
            best_dev_epoch = epoch + 1
            no_up = 0

        else:
            no_up += 1
        tqdm.write("\t ***** Eval results (Epoch %s) *****" % str(epoch + 1))
        tqdm.write("\t train_accuracy = %s" % str(train_accuracy))
        tqdm.write("\t dev_accuracy = %s" % str(dev_accuracy))
        tqdm.write("")
        tqdm.write("\t best_dev_accuracy = %s" % str(best_dev_accuracy))
        tqdm.write("\t best_dev_epoch = %s" % str(best_dev_epoch))
        tqdm.write("\t no_up = %s" % str(no_up))
        tqdm.write("")
        if no_up >= hyperparameters["patience"]:
            epoch_tqdm.close()
            break
コード例 #25
0
def main(test_input_dir, model_dir, example_filepath, count_dir, nqis_filepath,
         ignores_filepath, test_upper_bound, result_save_dir):
    logger.info("Seed: {}".format(SEED))

    #Create a dataloader.
    logger.info("Create test dataloader from {}.".format(test_input_dir))
    test_dataset = create_dataset(test_input_dir,
                                  num_examples=-1,
                                  num_options=20)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=4,
                                 shuffle=False,
                                 drop_last=True)

    #Create a classifier model.
    logger.info("Create a classifier model.")
    classifier_model = BertForMultipleChoice.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    classifier_model.to(device)

    #Load examples.
    examples = load_examples(example_filepath)

    #Load nqis.
    nqis = {}
    with open(nqis_filepath, "r", encoding="utf_8") as r:
        lines = r.read().splitlines()
    for line in lines:
        splits = line.split("\t")
        word = splits[0]
        count = int(splits[1])

        nqis[word] = count

    #Load ignores.
    ignores = []
    with open(ignores_filepath, "r", encoding="utf_8") as r:
        lines = r.read().splitlines()
    for line in lines:
        ignores.append(line)

    mecab = MeCab.Tagger()

    #Create a directory to save the results in.
    logger.info("Results will be saved in {}.".format(result_save_dir))
    os.makedirs(result_save_dir, exist_ok=True)

    logger.info("Start model evaluation.")
    for i in range(test_upper_bound):
        model_filepath = os.path.join(model_dir,
                                      "checkpoint_{}.pt".format(i + 1))
        logger.info("Load model parameters from {}.".format(model_filepath))

        parameters = torch.load(model_filepath, map_location=device)
        classifier_model.load_state_dict(parameters)

        pred_labels, correct_labels, accuracy = evaluate(
            classifier_model, test_dataloader, examples, mecab, count_dir,
            nqis, ignores)
        logger.info("Accuracy: {}".format(accuracy))

        #Save results as text files.
        res_filepath = os.path.join(result_save_dir,
                                    "result_test_{}.txt".format(i + 1))
        labels_filepath = os.path.join(result_save_dir,
                                       "labels_test_{}.txt".format(i + 1))

        with open(res_filepath, "w") as w:
            w.write("Accuracy: {}\n".format(accuracy))

        with open(labels_filepath, "w") as w:
            for pred_label, correct_label in zip(pred_labels, correct_labels):
                w.write("{} {}\n".format(pred_label, correct_label))

    logger.info("Finished model evaluation.")
コード例 #26
0
import torch
from transformers import BertTokenizer, BertForMultipleChoice

import logging
logging.basicConfig(level=logging.INFO)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0)  # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0)  # Batch size 1
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]

print(loss)
print(classification_scores)
コード例 #27
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses(
        )

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(
            training_args.output_dir
    ) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(
                training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome.")
        elif last_checkpoint is not None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )
    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank
                                                    ) else logging.WARN)

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        transformers.utils.logging.set_verbosity_info()
        transformers.utils.logging.enable_default_handler()
        transformers.utils.logging.enable_explicit_format()
    logger.info(f"Training/evaluation parameters {training_args}")

    # Set seed before initializing model.
    set_seed(training_args.seed)

    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
    # (the dataset will be downloaded automatically from the datasets Hub).

    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
    # 'text' is found. You can easily tweak this behavior (see below).

    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
    # download the dataset.
    if data_args.train_file is not None or data_args.validation_file is not None:
        data_files = {}
        if data_args.train_file is not None:
            data_files["train"] = data_args.train_file
        if data_args.validation_file is not None:
            data_files["validation"] = data_args.validation_file
        extension = data_args.train_file.split(".")[-1]
        datasets = load_dataset(extension, data_files=data_files)
    else:
        # Downloading and loading the swag dataset from the hub.
        datasets = load_dataset("swag", "regular")
    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
    # https://huggingface.co/docs/datasets/loading_datasets.html.

    # Load pretrained model and tokenizer

    # Distributed training:
    # The .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        use_fast=model_args.use_fast_tokenizer,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    model = AutoModelForMultipleChoice.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )

    # When using your own dataset or a different dataset from swag, you will probably need to change this.
    ending_names = [f"ending{i}" for i in range(4)]
    context_name = "sent1"
    question_header_name = "sent2"

    if data_args.max_seq_length is None:
        max_seq_length = tokenizer.model_max_length
        if max_seq_length > 1024:
            logger.warning(
                f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
                "Picking 1024 instead. You can change that default value by passing --max_seq_length xxx."
            )
            max_seq_length = 1024
    else:
        if data_args.max_seq_length > tokenizer.model_max_length:
            logger.warning(
                f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
                f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
            )
        max_seq_length = min(data_args.max_seq_length,
                             tokenizer.model_max_length)

    # Preprocessing the datasets.
    def preprocess_function(examples):
        first_sentences = [[context] * 4 for context in examples[context_name]]
        question_headers = examples[question_header_name]
        second_sentences = [[
            f"{header} {examples[end][i]}" for end in ending_names
        ] for i, header in enumerate(question_headers)]

        # Flatten out
        first_sentences = sum(first_sentences, [])
        second_sentences = sum(second_sentences, [])

        # Tokenize
        tokenized_examples = tokenizer(
            first_sentences,
            second_sentences,
            truncation=True,
            max_length=max_seq_length,
            padding="max_length" if data_args.pad_to_max_length else False,
        )
        # Un-flatten
        return {
            k: [v[i:i + 4] for i in range(0, len(v), 4)]
            for k, v in tokenized_examples.items()
        }

    if training_args.do_train:
        train_dataset = datasets["train"]
        if "train" not in datasets:
            raise ValueError("--do_train requires a train dataset")
        if data_args.max_train_samples is not None:
            train_dataset = train_dataset.select(
                range(data_args.max_train_samples))
        train_dataset = train_dataset.map(
            preprocess_function,
            batched=True,
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )

    if training_args.do_eval:
        if "validation" not in datasets:
            raise ValueError("--do_eval requires a validation dataset")
        eval_dataset = datasets["validation"]
        if data_args.max_val_samples is not None:
            eval_dataset = eval_dataset.select(range(
                data_args.max_val_samples))
        eval_dataset = eval_dataset.map(
            preprocess_function,
            batched=True,
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )

    # Data collator
    data_collator = (default_data_collator if data_args.pad_to_max_length else
                     DataCollatorForMultipleChoice(
                         tokenizer=tokenizer,
                         pad_to_multiple_of=8 if training_args.fp16 else None))

    # Metric
    def compute_metrics(eval_predictions):
        predictions, label_ids = eval_predictions
        preds = np.argmax(predictions, axis=1)
        return {
            "accuracy": (preds == label_ids).astype(np.float32).mean().item()
        }

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset if training_args.do_train else None,
        eval_dataset=eval_dataset if training_args.do_eval else None,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    # Training
    if training_args.do_train:
        if last_checkpoint is not None:
            checkpoint = last_checkpoint
        elif os.path.isdir(model_args.model_name_or_path):
            checkpoint = model_args.model_name_or_path
        else:
            checkpoint = None
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
        trainer.save_model()  # Saves the tokenizer too for easy upload
        metrics = train_result.metrics

        max_train_samples = (data_args.max_train_samples
                             if data_args.max_train_samples is not None else
                             len(train_dataset))
        metrics["train_samples"] = min(max_train_samples, len(train_dataset))

        trainer.log_metrics("train", metrics)
        trainer.save_metrics("train", metrics)
        trainer.save_state()

    # Evaluation
    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        metrics = trainer.evaluate()
        max_val_samples = data_args.max_val_samples if data_args.max_val_samples is not None else len(
            eval_dataset)
        metrics["eval_samples"] = min(max_val_samples, len(eval_dataset))

        trainer.log_metrics("eval", metrics)
        trainer.save_metrics("eval", metrics)
コード例 #28
0
def main(batch_size,num_epochs,lr,train_input_dir,dev1_input_dir,im_features_dir,result_save_dir):
    logger.info("batch_size: {} num_epochs: {} lr: {}".format(batch_size,num_epochs,lr))

    #Load lists of options.
    logger.info("Load lists of options.")

    train_options=load_options_list(os.path.join(train_input_dir,"options_list.txt"))
    dev1_options=load_options_list(os.path.join(dev1_input_dir,"options_list.txt"))

    #Create dataloaders.
    logger.info("Create a training dataloader from {}.".format(train_input_dir))
    train_dataloader=create_dataloader(train_input_dir,batch_size,num_options=4,shuffle=True,drop_last=True)

    logger.info("Create a dev1 dataloader from {}.".format(dev1_input_dir))
    dev1_dataloader=create_dataloader(dev1_input_dir,4,num_options=20,shuffle=False,drop_last=False)

    #Load a pre-trained BERT model.
    logger.info("Load a pre-trained BERT model.")
    bert_model=BertModel.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
    bert_model.to(device)

    #Create a BertForMultipleChoice model.
    logger.info("Create a BertForMultipleChoice model.")
    bfmc_model=BertForMultipleChoice.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
    bfmc_model.to(device)

    #Create an optimizer and a scheduler.
    optimizer=AdamW(bfmc_model.parameters(),lr=lr,eps=1e-8)
    total_steps = len(train_dataloader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=total_steps
    )

    #Create a directory to save the results in.
    os.makedirs(result_save_dir,exist_ok=True)

    logger.info("Start model training.")
    for epoch in range(num_epochs):
        logger.info("===== Epoch {}/{} =====".format(epoch+1,num_epochs))

        train(bert_model,bfmc_model,train_options,im_features_dir,optimizer,scheduler,train_dataloader)
        pred_labels,correct_labels,accuracy=evaluate(bert_model,bfmc_model,dev1_options,im_features_dir,dev1_dataloader)

        logger.info("Accuracy: {}".format(accuracy))

        #Save model parameters.
        checkpoint_filepath=os.path.join(result_save_dir,"checkpoint_{}.pt".format(epoch+1))
        torch.save(bfmc_model.state_dict(),checkpoint_filepath)

        #Save results as text files.
        res_filepath=os.path.join(result_save_dir,"result_eval_{}.txt".format(epoch+1))
        labels_filepath=os.path.join(result_save_dir,"labels_eval_{}.txt".format(epoch+1))

        with open(res_filepath,"w") as w:
            w.write("Accuracy: {}\n".format(accuracy))

        with open(labels_filepath,"w") as w:
            for pred_label,correct_label in zip(pred_labels,correct_labels):
                w.write("{} {}\n".format(pred_label,correct_label))

    logger.info("Finished model training.")
コード例 #29
0
def main(args):
    train_input_dir:str=args.train_input_dir
    dev_input_dir:str=args.dev_input_dir
    bert_model_dir:str=args.bert_model_dir
    train_batch_size:int=args.train_batch_size
    num_epochs:int=args.num_epochs
    lr:float=args.lr
    result_save_dir:str=args.result_save_dir
    train_logging_steps:int=args.train_logging_steps

    logger.info("バッチサイズ: {}".format(train_batch_size))
    logger.info("エポック数: {}".format(num_epochs))
    logger.info("学習率: {}".format(lr))

    logger.info("{}から訓練用データセットを作成します。".format(train_input_dir))
    train_dataset=mf.create_dataset(train_input_dir,num_examples=-1,num_options=4)

    logger.info("{}からDev用データローダを作成します。".format(dev_input_dir))
    dev_dataset=mf.create_dataset(dev_input_dir,num_examples=-1,num_options=20)
    dev_dataloader=DataLoader(dev_dataset,batch_size=4,shuffle=False)

    logger.info("{}から事前学習済みの重みを読み込みます。".format(bert_model_dir))
    classifier_model=BertForMultipleChoice.from_pretrained(bert_model_dir)
    classifier_model.to(device)

    #結果を保存するディレクトリを作成する。
    logger.info("結果は{}に保存されます。".format(result_save_dir))
    os.makedirs(result_save_dir,exist_ok=True)

    num_iterations=len(train_dataset)//train_batch_size
    total_steps=num_iterations*num_epochs

    optimizer=AdamW(classifier_model.parameters(),lr=lr,eps=1e-8)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=total_steps
    )

    #訓練ループ
    for epoch in range(num_epochs):
        logger.info("===== Epoch {}/{} =====".format(epoch,num_epochs-1))

        #訓練
        train_dataloader=DataLoader(train_dataset,batch_size=train_batch_size,shuffle=True)
        mean_loss=mf.train(
            classifier_model,
            optimizer,
            scheduler,
            train_dataloader,
            device,
            logger,
            train_logging_steps)
        logger.info("訓練時の損失平均値: {}".format(mean_loss))

        #チェックポイントを保存する。
        checkpoint_filepath=os.path.join(result_save_dir,"checkpoint_{}.pt".format(epoch))
        torch.save(classifier_model.state_dict(),checkpoint_filepath)

        #評価
        result_save_filepath=os.path.join(result_save_dir,"result_eval_{}.txt".format(epoch))
        labels_save_filepath=os.path.join(result_save_dir,"labels_eval_{}.txt".format(epoch))
        logits_save_filepath=os.path.join(result_save_dir,"logits_eval_{}.txt".format(epoch))
        mf.evaluate_and_save_result(
            classifier_model,
            dev_dataloader,
            result_save_filepath,
            labels_save_filepath,
            logits_save_filepath,
            device,
            logger
        )
コード例 #30
0
def main(do_train, train_batch_size, train_epoch_num, model_filename,
         result_save_dir):
    """
    Main function

    Parameters
    ----------
    do_train: bool
        Runs model training if true.
    train_batch_size: int
        Batch size for model training
    train_epoch_num: int
        Number of epochs for model training
    model_filename: str
        Filename of the saved model
    result_save_dir: str
        Directory to save the test result in.
    """
    #Load the list of articles.
    logger.info("Start loading the article list.")
    df = pd.read_table(ARTICLE_LIST_FILENAME, header=None)
    logger.info("Finished loading the article list.")

    #Make a dict of articles.
    logger.info("Start creating a dict of articles.")

    article_dict = {}
    for row in df.itertuples(name=None):
        article_name = row[1]
        dir_1 = row[2]
        dir_2 = row[3]

        image_dir = IMAGE_BASE_DIR + str(dir_1) + "/" + str(dir_2) + "/"
        article_dict[article_name] = image_dir

    logger.info("Finished creating a dict of articles.")

    #Load contexts.
    logger.info("Start loading contexts.")
    context_dict = load_contexts(CANDIDATE_ENTITIES_FILENAME)
    logger.info("Finished loading contexts.")
    logger.info("Number of contexts: {}".format(len(context_dict)))

    #Create a model.
    model = BertForMultipleChoice.from_pretrained(
        "cl-tohoku/bert-base-japanese-whole-word-masking")
    if torch.cuda.is_available():
        model.cuda()

    #If there exists a cached file for the model parameters, then load it.
    if os.path.exists(model_filename):
        logger.info("Load parameters from {}.".format(model_filename))
        model.load_state_dict(torch.load(model_filename))

    #Load COCO labels.
    logger.info("Load labels for the COCO dataset.")
    logger.info("Filename: {}".format(COCO_LABEL_LIST_FILENAME))

    label_dict = {}
    with open(COCO_LABEL_LIST_FILENAME, mode="r", encoding="utf-8") as r:
        for index, label in enumerate(r):
            stripped_label = label.strip()
            label_dict[index] = stripped_label
            logger.info("{} {}".format(index, stripped_label))

    if do_train == True:
        #Train
        train_dataset = None

        #Load cached features it cache files exist.
        if os.path.exists(TRAIN_FEATURES_CACHE_DIR + "input_ids.pt"):
            logger.info("Load features from cached files.")

            input_ids = torch.load(TRAIN_FEATURES_CACHE_DIR + "input_ids.pt")
            attention_mask = torch.load(TRAIN_FEATURES_CACHE_DIR +
                                        "attention_mask.pt")
            token_type_ids = torch.load(TRAIN_FEATURES_CACHE_DIR +
                                        "token_type_ids.pt")
            labels = torch.load(TRAIN_FEATURES_CACHE_DIR + "labels.pt")

            train_dataset = torch.utils.data.TensorDataset(
                input_ids, attention_mask, token_type_ids, labels)

        else:
            logger.info("Start loading examples.")
            logger.info("JSON filename: {}".format(TRAIN_JSON_FILENAME))
            examples = load_examples(TRAIN_JSON_FILENAME,
                                     option_num=TRAIN_OPTION_NUM,
                                     use_fixed_label=True)
            logger.info("Finished loading examples.")
            logger.info("Number of examples: {}".format(len(examples)))

            logger.info("Start converting examples to features.")
            input_ids, attention_mask, token_type_ids, labels = convert_examples_to_features(
                examples,
                context_dict,
                article_dict,
                option_num=TRAIN_OPTION_NUM,
                max_seq_length=512,
                image_features_length=50)
            input_ids, attention_mask, token_type_ids, labels = convert_examples_to_features_pred_labels(
                examples,
                context_dict,
                article_dict,
                label_dict,
                option_num=4,
                max_seq_length=512)
            logger.info("Finished converting examples to features.")

            os.makedirs(TRAIN_FEATURES_CACHE_DIR, exist_ok=True)

            torch.save(input_ids, TRAIN_FEATURES_CACHE_DIR + "input_ids.pt")
            torch.save(attention_mask,
                       TRAIN_FEATURES_CACHE_DIR + "attention_mask.pt")
            torch.save(token_type_ids,
                       TRAIN_FEATURES_CACHE_DIR + "token_type_ids.pt")
            torch.save(labels, TRAIN_FEATURES_CACHE_DIR + "labels.pt")
            logger.info(
                "Saved cache files in {}.".format(TRAIN_FEATURES_CACHE_DIR))

            train_dataset = torch.utils.data.TensorDataset(
                input_ids, attention_mask, token_type_ids, labels)

        train(model,
              train_dataset,
              batch_size=train_batch_size,
              epoch_num=train_epoch_num,
              model_filename=model_filename)

    #Test
    test_dataset = None

    #Load cached features if cache files exist.
    if os.path.exists(DEV2_FEATURES_CACHE_DIR + "input_ids.pt"):
        logger.info("Load features from cached files.")

        input_ids = torch.load(DEV2_FEATURES_CACHE_DIR + "input_ids.pt")
        attention_mask = torch.load(DEV2_FEATURES_CACHE_DIR +
                                    "attention_mask.pt")
        token_type_ids = torch.load(DEV2_FEATURES_CACHE_DIR +
                                    "token_type_ids.pt")
        labels = torch.load(DEV2_FEATURES_CACHE_DIR + "labels.pt")

        test_dataset = torch.utils.data.TensorDataset(input_ids,
                                                      attention_mask,
                                                      token_type_ids, labels)

    else:
        logger.info("Start loading examples.")
        logger.info("JSON filename: {}".format(DEV2_JSON_FILENAME))
        examples = load_examples(DEV2_JSON_FILENAME,
                                 option_num=20,
                                 use_fixed_label=False)
        logger.info("Finished loading examples.")
        logger.info("Number of examples: {}".format(len(examples)))

        logger.info("Start converting examples to features.")
        input_ids, attention_mask, token_type_ids, labels = convert_examples_to_features_pred_labels(
            examples,
            context_dict,
            article_dict,
            label_dict,
            option_num=20,
            max_seq_length=512)
        logger.info("Finished converting examples to features.")

        os.makedirs(DEV2_FEATURES_CACHE_DIR, exist_ok=True)

        torch.save(input_ids, DEV2_FEATURES_CACHE_DIR + "input_ids.pt")
        torch.save(attention_mask,
                   DEV2_FEATURES_CACHE_DIR + "attention_mask.pt")
        torch.save(token_type_ids,
                   DEV2_FEATURES_CACHE_DIR + "token_type_ids.pt")
        torch.save(labels, DEV2_FEATURES_CACHE_DIR + "labels.pt")
        logger.info("Saved cache files in {}.".format(DEV2_FEATURES_CACHE_DIR))

        test_dataset = torch.utils.data.TensorDataset(input_ids,
                                                      attention_mask,
                                                      token_type_ids, labels)

    test(model,
         test_dataset,
         batch_size=4,
         result_filename=result_save_dir + "result.txt",
         labels_filename=result_save_dir + "labels.txt")