Python BertEmbedding 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fastNLP.embeddings.bert_embedding

클래스/타입: BertEmbedding

hotexamples.com에서의 예제들: 6

Python BertEmbedding - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fastNLP.embeddings.bert_embedding.BertEmbedding에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

BertEmbedding(6)

자주 사용되는 메소드들

BertEmbedding (6)

예제 #1

파일 보기

    def test_bert_3(self):

        vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                              include_cls_sep=False)
        model = BertForTokenClassification(embed, 7)

        input_ids = torch.LongTensor([[1, 2, 3], [6, 5, 0]])

        pred = model(input_ids)
        self.assertTrue(isinstance(pred, dict))
        self.assertTrue(Const.OUTPUT in pred)
        self.assertEqual(tuple(pred[Const.OUTPUT].shape), (2, 3, 7))

예제 #2

파일 보기

    def test_bert_1_w(self):
        vocab = Vocabulary().add_word_lst("this is a test .".split())
        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                              include_cls_sep=False)

        with self.assertWarns(Warning):
            model = BertForSequenceClassification(embed, 2)

            input_ids = torch.LongTensor([[1, 2, 3], [5, 6, 0]])

            pred = model.predict(input_ids)
            self.assertTrue(isinstance(pred, dict))
            self.assertTrue(Const.OUTPUT in pred)
            self.assertEqual(tuple(pred[Const.OUTPUT].shape), (2,))

예제 #3

파일 보기

    def test_bert_4(self):
        vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                              include_cls_sep=False)
        model = BertForQuestionAnswering(embed)

        input_ids = torch.LongTensor([[1, 2, 3], [6, 5, 0]])

        pred = model(input_ids)
        self.assertTrue(isinstance(pred, dict))
        self.assertTrue('pred_start' in pred)
        self.assertTrue('pred_end' in pred)
        self.assertEqual(tuple(pred['pred_start'].shape), (2, 3))
        self.assertEqual(tuple(pred['pred_end'].shape), (2, 3))

예제 #4

파일 보기

    def test_bert_2(self):

        vocab = Vocabulary().add_word_lst("this is a test [SEP] .".split())
        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                              include_cls_sep=True)

        model = BertForMultipleChoice(embed, 2)

        input_ids = torch.LongTensor([[[2, 6, 7], [1, 6, 5]]])
        print(input_ids.size())

        pred = model(input_ids)
        self.assertTrue(isinstance(pred, dict))
        self.assertTrue(Const.OUTPUT in pred)
        self.assertEqual(tuple(pred[Const.OUTPUT].shape), (1, 2))

예제 #5

파일 보기

    def test_bert_for_question_answering_train(self):
        from fastNLP import CMRC2018Loss
        from fastNLP.io import CMRC2018BertPipe
        from fastNLP import Trainer

        data_bundle = CMRC2018BertPipe().process_from_file('tests/data_for_tests/io/cmrc')
        data_bundle.rename_field('chars', 'words')
        train_data = data_bundle.get_dataset('train')
        vocab = data_bundle.get_vocab('words')

        embed = BertEmbedding(vocab, model_dir_or_name='tests/data_for_tests/embedding/small_bert',
                              include_cls_sep=False, auto_truncate=True)
        model = BertForQuestionAnswering(embed)
        loss = CMRC2018Loss()

        trainer = Trainer(train_data, model, loss=loss, use_tqdm=False)
        trainer.train(load_best_model=False)

예제 #6

파일 보기

파일: bert.py 프로젝트: shellycao123/big-data-fall20

vocab.from_dataset(bundle.get_dataset("train"),
                   field_name='words',
                   no_create_entry_dataset=[
                       bundle.get_dataset("test"),
                       bundle.get_dataset("dev")
                   ])
vocab.index_dataset(bundle.get_dataset("train"), field_name='words')
vocab.index_dataset(bundle.get_dataset("test"), field_name='words')
vocab.index_dataset(bundle.get_dataset("dev"), field_name='words')

# establish the model
from fastNLP import Const
import torch
from fastNLP.models import BertForSentenceMatching
from fastNLP.embeddings.bert_embedding import BertEmbedding
embed = BertEmbedding(vocab, model_dir_or_name='cn-base', requires_grad=False)

#pad the input array
bundle.set_pad_val("words", 0)
bundle.set_input("words")
bundle.set_target("target")

model = BertForSentenceMatching(embed)
from fastNLP import AccuracyMetric
metrics = AccuracyMetric(pred=Const.OUTPUT, target=Const.TARGET)

# train the model
from fastNLP import Trainer
from fastNLP import CrossEntropyLoss
N_EPOCHS = 1
BATCH_SIZE = 16