Python BigBirdModel Exemples, transformers.BigBirdModel Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : zfj1998/transformers

    def test_fast_integration(self):
        torch.manual_seed(0)

        input_ids = torch.randint(
            self.model_tester.vocab_size,
            (self.model_tester.batch_size, self.model_tester.seq_length),
            device=torch_device,
        )
        attention_mask = torch.ones(
            (self.model_tester.batch_size, self.model_tester.seq_length),
            device=torch_device)
        attention_mask[:, :-10] = 0
        token_type_ids = torch.randint(
            self.model_tester.type_vocab_size,
            (self.model_tester.batch_size, self.model_tester.seq_length),
            device=torch_device,
        )

        config, _, _, _, _, _, _ = self.model_tester.prepare_config_and_inputs(
        )
        model = BigBirdModel(config).to(torch_device).eval()

        with torch.no_grad():
            hidden_states = model(
                input_ids,
                token_type_ids=token_type_ids,
                attention_mask=attention_mask).last_hidden_state
            self.assertTrue(
                torch.allclose(
                    hidden_states[0, 0, :5],
                    torch.tensor([-0.6326, 0.6124, -0.0844, 0.6698, -1.7155],
                                 device=torch_device),
                    atol=1e-3,
                ))

Exemple #2

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : vuhluu/transformers

    def test_fast_integration(self):
        # fmt: off
        input_ids = torch.tensor(
            [[6, 117, 33, 36, 70, 22, 63, 31, 71, 72, 88, 58, 109, 49, 48, 116, 92, 6, 19, 95, 118, 100, 80, 111, 93, 2, 31, 84, 26, 5, 6, 82, 46, 96, 109, 4, 39, 19, 109, 13, 92, 31, 36, 90, 111, 18, 75, 6, 56, 74, 16, 42, 56, 92, 69, 108, 127, 81, 82, 41, 106, 19, 44, 24, 82, 121, 120, 65, 36, 26, 72, 13, 36, 98, 43, 64, 8, 53, 100, 92, 51, 122, 66, 17, 61, 50, 104, 127, 26, 35, 94, 23, 110, 71, 80, 67, 109, 111, 44, 19, 51, 41, 86, 71, 76, 44, 18, 68, 44, 77, 107, 81, 98, 126, 100, 2, 49, 98, 84, 39, 23, 98, 52, 46, 10, 82, 121, 73],[6, 117, 33, 36, 70, 22, 63, 31, 71, 72, 88, 58, 109, 49, 48, 116, 92, 6, 19, 95, 118, 100, 80, 111, 93, 2, 31, 84, 26, 5, 6, 82, 46, 96, 109, 4, 39, 19, 109, 13, 92, 31, 36, 90, 111, 18, 75, 6, 56, 74, 16, 42, 56, 92, 69, 108, 127, 81, 82, 41, 106, 19, 44, 24, 82, 121, 120, 65, 36, 26, 72, 13, 36, 98, 43, 64, 8, 53, 100, 92, 51, 12, 66, 17, 61, 50, 104, 127, 26, 35, 94, 23, 110, 71, 80, 67, 109, 111, 44, 19, 51, 41, 86, 71, 76, 28, 18, 68, 44, 77, 107, 81, 98, 126, 100, 2, 49, 18, 84, 39, 23, 98, 52, 46, 10, 82, 121, 73]],  # noqa: E231
            dtype=torch.long,
            device=torch_device,
        )
        # fmt: on
        input_ids = input_ids % self.model_tester.vocab_size
        input_ids[1] = input_ids[1] - 1

        attention_mask = torch.ones((input_ids.shape), device=torch_device)
        attention_mask[:, :-10] = 0

        config, _, _, _, _, _, _ = self.model_tester.prepare_config_and_inputs()
        torch.manual_seed(0)
        model = BigBirdModel(config).eval().to(torch_device)

        with torch.no_grad():
            hidden_states = model(input_ids, attention_mask=attention_mask).last_hidden_state
            self.assertTrue(
                torch.allclose(
                    hidden_states[0, 0, :5],
                    torch.tensor([1.4943, 0.0928, 0.8254, -0.2816, -0.9788], device=torch_device),
                    atol=1e-3,
                )
            )

Exemple #3

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : yulinggu-cs/transformers

    def test_tokenizer_inference(self):
        tokenizer = BigBirdTokenizer.from_pretrained(
            "google/bigbird-roberta-base")
        model = BigBirdModel.from_pretrained("google/bigbird-roberta-base",
                                             attention_type="block_sparse",
                                             num_random_blocks=3,
                                             block_size=16)
        model.to(torch_device)

        text = [
            "Transformer-based models are unable to process long sequences due to their self-attention operation,"
            " which scales quadratically with the sequence length. To address this limitation, we introduce the"
            " Longformer with an attention mechanism that scales linearly with sequence length, making it easy to"
            " process documents of thousands of tokens or longer. Longformer’s attention mechanism is a drop-in"
            " replacement for the standard self-attention and combines a local windowed attention with a task"
            " motivated global attention. Following prior work on long-sequence transformers, we evaluate Longformer"
            " on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In"
            " contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream"
            " tasks. Our pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new"
            " state-of-the-art results on WikiHop and TriviaQA."
        ]
        inputs = tokenizer(text)

        for k in inputs:
            inputs[k] = torch.tensor(inputs[k],
                                     device=torch_device,
                                     dtype=torch.long)

        prediction = model(**inputs)
        prediction = prediction[0]

        self.assertEqual(prediction.shape, torch.Size((1, 199, 768)))

        expected_prediction = torch.tensor(
            [
                [-0.0213, -0.2213, -0.0061, 0.0687],
                [0.0977, 0.1858, 0.2374, 0.0483],
                [0.2112, -0.2524, 0.5793, 0.0967],
                [0.2473, -0.5070, -0.0630, 0.2174],
                [0.2885, 0.1139, 0.6071, 0.2991],
                [0.2328, -0.2373, 0.3648, 0.1058],
                [0.2517, -0.0689, 0.0555, 0.0880],
                [0.1021, -0.1495, -0.0635, 0.1891],
                [0.0591, -0.0722, 0.2243, 0.2432],
                [-0.2059, -0.2679, 0.3225, 0.6183],
                [0.2280, -0.2618, 0.1693, 0.0103],
                [0.0183, -0.1375, 0.2284, -0.1707],
            ],
            device=torch_device,
        )
        self.assertTrue(
            torch.allclose(prediction[0, 52:64, 320:324],
                           expected_prediction,
                           atol=1e-4))

Exemple #4

0

Afficher le fichier

Fichier : test_tokenization_big_bird.py Projet : vuiseng9/transformers

    def test_torch_encode_plus_sent_to_model(self):
        import torch

        from transformers import BigBirdConfig, BigBirdModel

        # Build sequence
        first_ten_tokens = list(self.big_tokenizer.get_vocab().keys())[:10]
        sequence = " ".join(first_ten_tokens)
        encoded_sequence = self.big_tokenizer.encode_plus(sequence, return_tensors="pt", return_token_type_ids=False)
        batch_encoded_sequence = self.big_tokenizer.batch_encode_plus(
            [sequence + " " + sequence], return_tensors="pt", return_token_type_ids=False
        )

        config = BigBirdConfig(attention_type="original_full")
        model = BigBirdModel(config)

        assert model.get_input_embeddings().weight.shape[0] >= self.big_tokenizer.vocab_size

        with torch.no_grad():
            model(**encoded_sequence)
            model(**batch_encoded_sequence)

Exemple #5

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : zfj1998/transformers

 def create_and_check_model_as_decoder(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
     encoder_hidden_states,
     encoder_attention_mask,
 ):
     config.add_cross_attention = True
     model = BigBirdModel(config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
         encoder_attention_mask=encoder_attention_mask,
     )
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
     )
     result = model(input_ids,
                    attention_mask=input_mask,
                    token_type_ids=token_type_ids)
     self.parent.assertEqual(
         result.last_hidden_state.shape,
         (self.batch_size, self.seq_length, self.hidden_size))

Exemple #6

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : vuhluu/transformers

    def test_block_sparse_context_layer(self):
        model = BigBirdModel.from_pretrained(
            "google/bigbird-roberta-base", attention_type="block_sparse", num_random_blocks=3, block_size=16
        )
        model.to(torch_device)
        model.eval()
        config = model.config

        input_ids = self._get_dummy_input_ids()
        dummy_hidden_states = model.embeddings(input_ids)

        attn_mask = torch.ones_like(input_ids, device=torch_device)
        blocked_mask, band_mask, from_mask, to_mask = model.create_masks_for_block_sparse_attn(
            attn_mask, config.block_size
        )
        targeted_cl = torch.tensor(
            [
                [0.1874, 1.5260, 0.2335, -0.0473, -0.0961, 1.8384, -0.0141, 0.1250, 0.0085, -0.0048],
                [-0.0554, 0.0728, 0.1683, -0.1332, 0.1741, 0.1337, -0.2380, -0.1849, -0.0390, -0.0259],
                [-0.0419, 0.0767, 0.1591, -0.1399, 0.1789, 0.1257, -0.2406, -0.1772, -0.0261, -0.0079],
                [0.1860, 1.5172, 0.2326, -0.0473, -0.0953, 1.8291, -0.0147, 0.1245, 0.0082, -0.0046],
                [0.1879, 1.5296, 0.2335, -0.0471, -0.0975, 1.8433, -0.0136, 0.1260, 0.0086, -0.0054],
                [0.1854, 1.5147, 0.2334, -0.0480, -0.0956, 1.8250, -0.0149, 0.1222, 0.0082, -0.0060],
                [0.1859, 1.5184, 0.2334, -0.0474, -0.0955, 1.8297, -0.0143, 0.1234, 0.0079, -0.0054],
                [0.1885, 1.5336, 0.2335, -0.0467, -0.0979, 1.8481, -0.0130, 0.1269, 0.0085, -0.0049],
                [0.1881, 1.5305, 0.2335, -0.0471, -0.0976, 1.8445, -0.0135, 0.1262, 0.0086, -0.0053],
                [0.1852, 1.5148, 0.2333, -0.0480, -0.0949, 1.8254, -0.0151, 0.1225, 0.0079, -0.0055],
                [0.1877, 1.5292, 0.2335, -0.0470, -0.0972, 1.8431, -0.0135, 0.1259, 0.0084, -0.0052],
                [0.1874, 1.5261, 0.2334, -0.0472, -0.0968, 1.8393, -0.0140, 0.1251, 0.0084, -0.0052],
                [0.1853, 1.5151, 0.2331, -0.0478, -0.0948, 1.8256, -0.0154, 0.1228, 0.0086, -0.0052],
                [0.1867, 1.5233, 0.2334, -0.0475, -0.0965, 1.8361, -0.0139, 0.1247, 0.0084, -0.0054],
            ],
            device=torch_device,
        )

        context_layer = model.encoder.layer[0].attention.self(
            dummy_hidden_states,
            band_mask=band_mask,
            from_mask=from_mask,
            to_mask=to_mask,
            from_blocked_mask=blocked_mask,
            to_blocked_mask=blocked_mask,
        )
        context_layer = context_layer[0]

        self.assertEqual(context_layer.shape, torch.Size((1, 128, 768)))
        self.assertTrue(torch.allclose(context_layer[0, 64:78, 300:310], targeted_cl, atol=0.0001))

Exemple #7

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : zfj1998/transformers

    def test_tokenizer_inference(self):
        tokenizer = BigBirdTokenizer.from_pretrained(
            "google/bigbird-roberta-base")
        model = BigBirdModel.from_pretrained("google/bigbird-roberta-base",
                                             attention_type="block_sparse",
                                             num_random_blocks=3,
                                             block_size=16)
        model.to(torch_device)

        text = [
            'This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will add words that should not exsist and be tokenized to <unk>, such as saoneuhaoesuth ... This is a very long text with a lot of weird characters, such as: . , ~ ? ( ) " [ ] ! : - . Also we will add words that should not exsist and be tokenized to <unk>, such as saoneuhaoesuth ,, I was born in 92000, and this is falsÃ©.'
        ]
        inputs = tokenizer(text)

        for k in inputs:
            inputs[k] = torch.tensor(inputs[k],
                                     device=torch_device,
                                     dtype=torch.long)

        prediction = model(**inputs)
        prediction = prediction[0]

        self.assertEqual(prediction.shape, torch.Size((1, 128, 768)))

        expected_prediction = torch.tensor(
            [
                [-0.0745, 0.0689, -0.1126, -0.0610],
                [-0.0343, 0.0111, -0.0269, -0.0858],
                [0.1150, 0.0896, 0.0492, 0.0149],
                [-0.0657, 0.2035, 0.0444, -0.0535],
                [0.1143, 0.0465, 0.1583, -0.1855],
                [-0.0216, 0.0807, 0.0536, 0.1371],
                [-0.1879, 0.0097, -0.1916, 0.1701],
                [0.7616, 0.1240, 0.0669, 0.2588],
                [0.1096, -0.1810, -0.1987, 0.0445],
                [0.1810, -0.3608, -0.0081, 0.1764],
                [-0.0472, 0.0460, 0.0976, -0.0021],
                [-0.0274, -0.3274, -0.0788, 0.0465],
            ],
            device=torch_device,
        )
        self.assertTrue(
            torch.allclose(prediction[0, 52:64, 320:324],
                           expected_prediction,
                           atol=1e-4))

Exemple #8

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : vuhluu/transformers

    def test_auto_padding(self):
        model = BigBirdModel.from_pretrained(
            "google/bigbird-roberta-base", attention_type="block_sparse", num_random_blocks=3, block_size=16
        )
        model.to(torch_device)
        model.eval()

        input_ids = torch.tensor([200 * [10] + 40 * [2] + [1]], device=torch_device, dtype=torch.long)
        output = model(input_ids).to_tuple()[0]

        # fmt: off
        target = torch.tensor(
            [[-0.045136, -0.068013, 0.12246, -0.01356, 0.018386, 0.025333, -0.0044439, -0.0030996, -0.064031, 0.0006439], [-0.045018, -0.067638, 0.12317, -0.013998, 0.019216, 0.025695, -0.0043705, -0.0031895, -0.063153, 0.00088899], [-0.045042, -0.067305, 0.1234, -0.014512, 0.020057, 0.026084, -0.004615, -0.0031728, -0.062442, 0.0010263], [-0.044589, -0.067655, 0.12416, -0.014287, 0.019416, 0.026065, -0.0050958, -0.002702, -0.063158, 0.0004827], [-0.044627, -0.067535, 0.1239, -0.014319, 0.019491, 0.026213, -0.0059482, -0.0025906, -0.063116, 0.00014669], [-0.044899, -0.067704, 0.12337, -0.014231, 0.019256, 0.026345, -0.0065565, -0.0022938, -0.063433, -0.00011409], [-0.045599, -0.067764, 0.12235, -0.014151, 0.019206, 0.026417, -0.0068965, -0.0024494, -0.063313, -4.4499e-06], [-0.045557, -0.068372, 0.12199, -0.013747, 0.017962, 0.026103, -0.0070607, -0.0023552, -0.06447, -0.00048756], [-0.045334, -0.068913, 0.1217, -0.013566, 0.01693, 0.025745, -0.006311, -0.0024903, -0.065575, -0.0006719], [-0.045171, -0.068726, 0.12164, -0.013688, 0.017139, 0.025629, -0.005213, -0.0029412, -0.065237, -0.00020669], [-0.044411, -0.069267, 0.12206, -0.013645, 0.016212, 0.025589, -0.0044121, -0.002972, -0.066277, -0.00067963], [-0.043487, -0.069792, 0.1232, -0.013663, 0.015303, 0.02613, -0.0036294, -0.0030616, -0.067483, -0.0012642], [-0.042622, -0.069287, 0.12469, -0.013936, 0.016204, 0.026474, -0.0040534, -0.0027365, -0.066994, -0.0014148], [-0.041879, -0.070031, 0.12593, -0.014047, 0.015082, 0.027751, -0.0040683, -0.0027189, -0.068985, -0.0027146]],  # noqa: E231
            device=torch_device,
        )
        # fmt: on

        self.assertEqual(output.shape, torch.Size((1, 241, 768)))
        self.assertTrue(torch.allclose(output[0, 64:78, 300:310], target, atol=0.0001))

Exemple #9

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : vuhluu/transformers

 def create_and_check_for_auto_padding(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
 ):
     model = BigBirdModel(config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids)
     self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))

Exemple #10

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : vuhluu/transformers

 def create_and_check_for_change_to_full_attn(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
 ):
     model = BigBirdModel(config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids)
     self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
     # the config should not be changed
     self.parent.assertTrue(model.config.attention_type == "block_sparse")

Exemple #11

0

Afficher le fichier

Fichier : test_modeling_big_bird.py Projet : zfj1998/transformers

    def test_block_sparse_attention_probs(self):
        """
        Asserting if outputted attention matrix is similar to hard coded attention matrix
        """

        if not self.test_attention_probs:
            return

        model = BigBirdModel.from_pretrained("google/bigbird-roberta-base",
                                             attention_type="block_sparse",
                                             num_random_blocks=3,
                                             block_size=16)
        model.to(torch_device)
        model.eval()
        config = model.config

        input_ids = self._get_dummy_input_ids()

        hidden_states = model.embeddings(input_ids)

        batch_size, seqlen, _ = hidden_states.size()
        attn_mask = torch.ones(batch_size,
                               seqlen,
                               device=torch_device,
                               dtype=torch.float)
        to_seq_length = from_seq_length = seqlen
        from_block_size = to_block_size = config.block_size

        blocked_mask, band_mask, from_mask, to_mask = model.create_masks_for_block_sparse_attn(
            attn_mask, config.block_size)
        from_blocked_mask = to_blocked_mask = blocked_mask

        for i in range(config.num_hidden_layers):
            pointer = model.encoder.layer[i].attention.self

            query_layer = pointer.transpose_for_scores(
                pointer.query(hidden_states))
            key_layer = pointer.transpose_for_scores(
                pointer.key(hidden_states))
            value_layer = pointer.transpose_for_scores(
                pointer.value(hidden_states))

            context_layer, attention_probs = pointer.bigbird_block_sparse_attention(
                query_layer,
                key_layer,
                value_layer,
                band_mask,
                from_mask,
                to_mask,
                from_blocked_mask,
                to_blocked_mask,
                pointer.num_attention_heads,
                pointer.num_random_blocks,
                pointer.attention_head_size,
                from_block_size,
                to_block_size,
                batch_size,
                from_seq_length,
                to_seq_length,
                seed=pointer.seed,
                plan_from_length=None,
                plan_num_rand_blocks=None,
                output_attentions=True,
            )

            context_layer = context_layer.contiguous().view(
                batch_size, from_seq_length, -1)
            cl = torch.einsum("bhqk,bhkd->bhqd", attention_probs, value_layer)
            cl = cl.view(context_layer.size())

            self.assertTrue(torch.allclose(context_layer, cl, atol=0.001))

Exemple #12

0

Afficher le fichier

from transformers import BigBirdModel, BertModel
from transformers import BigBirdTokenizer, BertTokenizer
from transformers import RobertaTokenizer, RobertaModel
from tqdm import tqdm
import time
import numpy as np
import torch

bigbird = 'google/bigbird-roberta-base'
bert = 'roberta-base'

bbtokenizer = BigBirdTokenizer.from_pretrained(bigbird)
bbmodel = BigBirdModel.from_pretrained(bigbird)

bttokenizer = RobertaTokenizer.from_pretrained(bert)
btmodel = RobertaModel.from_pretrained(bert)

use_bigbird = True

if use_bigbird:
    tokenizer = bbtokenizer
    model = bbmodel
else:
    tokenizer = bttokenizer
    model = btmodel


def get_latency(model, inputs):
    start = time.time()
    for _ in tqdm(range(100)):
        output = model(**inputs)