Exemple #1
0
    def test_bert_5_w(self):

        vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                              include_cls_sep=False)

        with self.assertWarns(Warning):
            model = BertForSentenceMatching(embed)

            input_ids = torch.LongTensor([[1, 2, 3], [6, 5, 0]])

            pred = model.predict(input_ids)
            self.assertTrue(isinstance(pred, dict))
            self.assertTrue(Const.OUTPUT in pred)
            self.assertEqual(tuple(pred[Const.OUTPUT].shape), (2,))
                               tokenizer=arg.tokenizer).process_from_file()
elif arg.task == 'quora':
    data_bundle = QuoraBertPipe(lower=arg.to_lower,
                                tokenizer=arg.tokenizer).process_from_file()
else:
    raise RuntimeError(f'NOT support {arg.task} task yet!')

print(data_bundle)  # print details in data_bundle

# load embedding
embed = BertEmbedding(data_bundle.vocabs[Const.INPUT],
                      model_dir_or_name=arg.bert_model_dir_or_name)

# define model
model = BertForSentenceMatching(embed,
                                num_labels=len(
                                    data_bundle.vocabs[Const.TARGET]))

# define optimizer and callback
optimizer = AdamW(lr=arg.lr, params=model.parameters())
callbacks = [
    WarmupCallback(warmup=arg.warm_up_rate, schedule='linear'),
]

if arg.task in ['snli']:
    callbacks.append(
        EvaluateCallback(data=data_bundle.datasets[arg.test_dataset_name]))
    # evaluate test set in every epoch if task is snli.

# define trainer
trainer = Trainer(train_data=data_bundle.get_dataset(arg.train_dataset_name),