def test_bert_5_w(self): vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split()) embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert', include_cls_sep=False) with self.assertWarns(Warning): model = BertForSentenceMatching(embed) input_ids = torch.LongTensor([[1, 2, 3], [6, 5, 0]]) pred = model.predict(input_ids) self.assertTrue(isinstance(pred, dict)) self.assertTrue(Const.OUTPUT in pred) self.assertEqual(tuple(pred[Const.OUTPUT].shape), (2,))
tokenizer=arg.tokenizer).process_from_file() elif arg.task == 'quora': data_bundle = QuoraBertPipe(lower=arg.to_lower, tokenizer=arg.tokenizer).process_from_file() else: raise RuntimeError(f'NOT support {arg.task} task yet!') print(data_bundle) # print details in data_bundle # load embedding embed = BertEmbedding(data_bundle.vocabs[Const.INPUT], model_dir_or_name=arg.bert_model_dir_or_name) # define model model = BertForSentenceMatching(embed, num_labels=len( data_bundle.vocabs[Const.TARGET])) # define optimizer and callback optimizer = AdamW(lr=arg.lr, params=model.parameters()) callbacks = [ WarmupCallback(warmup=arg.warm_up_rate, schedule='linear'), ] if arg.task in ['snli']: callbacks.append( EvaluateCallback(data=data_bundle.datasets[arg.test_dataset_name])) # evaluate test set in every epoch if task is snli. # define trainer trainer = Trainer(train_data=data_bundle.get_dataset(arg.train_dataset_name),