Python TokenTensorizer.initialize Examples

Programming Language: Python

Namespace/Package Name: pytext.data.tensorizers

Class/Type: TokenTensorizer

Method/Function: initialize

Examples at hotexamples.com: 3

Python TokenTensorizer.initialize - 3 examples found. These are the top rated real world Python examples of pytext.data.tensorizers.TokenTensorizer.initialize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Config(20)

TokenTensorizer(9)

initialize(3)

from_config(2)

numberize(2)

Example #1

Show file

File: tensorizers_test.py Project: ufukhurriyetoglu/pytext

 def test_initialize_word_tensorizer(self):
     tensorizer = TokenTensorizer(text_column="text")
     init = tensorizer.initialize()
     init.send(None)  # kick
     for row in self.data.train:
         init.send(row)
     init.close()
     self.assertEqual(49, len(tensorizer.vocab))

Example #2

Show file

File: tensorizers_test.py Project: jingfeidu/pytext-1

    def test_create_word_tensors(self):
        tensorizer = TokenTensorizer(text_column="text")
        init = tensorizer.initialize()
        init.send(None)  # kick
        for row in self.data.train:
            init.send(row)
        init.close()

        rows = [{"text": "I want some coffee"}, {"text": "Turn it up"}]
        tensors = (tensorizer.numberize(row) for row in rows)
        tokens, seq_len = next(tensors)
        self.assertEqual([24, 0, 0, 0], tokens)
        self.assertEqual(4, seq_len)

        tokens, seq_len = next(tensors)
        self.assertEqual([13, 47, 9], tokens)
        self.assertEqual(3, seq_len)

Example #3

Show file

File: tensorizers_test.py Project: zailushang2006/pytext

    def test_initialize_token_tensorizer(self):
        # default (build from data)
        tensorizer = TokenTensorizer(text_column="text")
        self._initialize_tensorizer(tensorizer)
        self.assertEqual(49, len(tensorizer.vocab))

        # size limit on tokens from data
        tensorizer = TokenTensorizer(
            text_column="text", vocab_config=VocabConfig(size_from_data=3)
        )
        self._initialize_tensorizer(tensorizer)
        self.assertEqual(5, len(tensorizer.vocab))  # 3 + unk token + pad token

        embed_file = tests_module.test_file("pretrained_embed_raw")

        # vocab from data + vocab_file
        tensorizer = TokenTensorizer(
            text_column="text",
            vocab_config=VocabConfig(
                size_from_data=3,
                vocab_files=[
                    VocabFileConfig(filepath=embed_file, skip_header_line=True)
                ],
            ),
        )
        self._initialize_tensorizer(tensorizer)
        self.assertEqual(15, len(tensorizer.vocab))

        # vocab just from vocab_file
        tensorizer = TokenTensorizer(
            text_column="text",
            vocab_config=VocabConfig(
                build_from_data=False,
                vocab_files=[
                    VocabFileConfig(
                        filepath=embed_file, skip_header_line=True, size_limit=5
                    )
                ],
            ),
        )
        init = tensorizer.initialize()
        # Should skip initialization
        with self.assertRaises(StopIteration):
            init.send(None)
        self.assertEqual(7, len(tensorizer.vocab))  # 5 + unk token + pad token