Beispiel #1
0
def test_bert_padding(train_raw):
    preprocessor = preprocessors.BertPreprocessor()
    data_preprocessed = preprocessor.transform(train_raw, verbose=0)
    dataset = Dataset(data_preprocessed, mode='point')

    pre_fixed_padding = callbacks.BertPadding(fixed_length_left=5,
                                              fixed_length_right=5,
                                              pad_mode='pre')
    dataloader = DataLoader(dataset, batch_size=5, callback=pre_fixed_padding)
    for batch in dataloader:
        assert batch[0]['text_left'].shape == (5, 7)
        assert batch[0]['text_right'].shape == (5, 6)

    post_padding = callbacks.BertPadding(pad_mode='post')
    dataloader = DataLoader(dataset, batch_size=5, callback=post_padding)
    for batch in dataloader:
        max_left_len = max(batch[0]['length_left'].detach().cpu().numpy())
        max_right_len = max(batch[0]['length_right'].detach().cpu().numpy())
        assert batch[0]['text_left'].shape == (5, max_left_len + 2)
        assert batch[0]['text_right'].shape == (5, max_right_len + 1)
Beispiel #2
0
 def get_default_preprocessor(cls,
                              mode: str = 'bert-base-uncased'
                              ) -> BasePreprocessor:
     """:return: Default preprocessor."""
     return preprocessors.BertPreprocessor(mode=mode)
Beispiel #3
0
 def get_default_preprocessor(cls) -> BasePreprocessor:
     """:return: Default preprocessor."""
     return preprocessors.BertPreprocessor()