Python Dictionary.eos Examples

Programming Language: Python

Namespace/Package Name: fairseq.data

Class/Type: Dictionary

Method/Function: eos

Examples at hotexamples.com: 5

Python Dictionary.eos - 5 examples found. These are the top rated real world Python examples of fairseq.data.Dictionary.eos extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Dictionary(30)

add_from_file(30)

add_symbol(30)

load(30)

encode_line(17)

finalize(15)

index(13)

add_file_to_dictionary(9)

pad(7)

string(7)

eos(5)

save(3)

pad_to_multiple_(3)

eos_index(2)

pad_index(2)

symbols(1)

get_count(1)

nspecial(1)

load_from_json(1)

indices(1)

__len__(1)

count(1)

bos_index(1)

bos(1)

unk(1)

Example #1

Show file

    def test_character_token_embedder(self):
        vocab = Dictionary()
        vocab.add_symbol('hello')
        vocab.add_symbol('there')

        embedder = CharacterTokenEmbedder(vocab, [(2, 16), (4, 32), (8, 64),
                                                  (16, 2)], 64, 5, 2)

        test_sents = [['hello', 'unk', 'there'], ['there'], ['hello', 'there']]
        max_len = max(len(s) for s in test_sents)
        input = torch.LongTensor(len(test_sents),
                                 max_len + 2).fill_(vocab.pad())
        for i in range(len(test_sents)):
            input[i][0] = vocab.eos()
            for j in range(len(test_sents[i])):
                input[i][j + 1] = vocab.index(test_sents[i][j])
            input[i][j + 2] = vocab.eos()
        embs = embedder(input)

        assert embs.size() == (len(test_sents), max_len + 2, 5)
        self.assertAlmostEqual(embs[0][0], embs[1][0])
        self.assertAlmostEqual(embs[0][0], embs[0][-1])
        self.assertAlmostEqual(embs[0][1], embs[2][1])
        self.assertAlmostEqual(embs[0][3], embs[1][1])

        embs.sum().backward()
        assert embedder.char_embeddings.weight.grad is not None

Example #2

Show file

File: test_noising.py Project: hadyelsahar/fairseq

    def _get_test_data(self):
        vocab = Dictionary()
        vocab.add_symbol("he@@")
        vocab.add_symbol("llo")
        vocab.add_symbol("how")
        vocab.add_symbol("are")
        vocab.add_symbol("y@@")
        vocab.add_symbol("ou")
        vocab.add_symbol("n@@")
        vocab.add_symbol("ew")
        vocab.add_symbol("or@@")
        vocab.add_symbol("k")

        src_tokens = [
            ["he@@", "llo", "n@@", "ew", "y@@", "or@@", "k"],
            ["how", "are", "y@@", "ou"],
        ]
        src_len = [len(x) for x in src_tokens]
        x = torch.LongTensor(len(src_tokens), max(src_len) + 1).fill_(vocab.pad())
        for i in range(len(src_tokens)):
            for j in range(len(src_tokens[i])):
                x[i][j] = vocab.index(src_tokens[i][j])
            x[i][j + 1] = vocab.eos()

        x = x.transpose(1, 0)
        return vocab, x, torch.LongTensor([i + 1 for i in src_len])

Example #3

Show file

    def _get_test_data(self, append_eos=True):
        vocab = Dictionary()
        vocab.add_symbol("he@@")
        vocab.add_symbol("llo")
        vocab.add_symbol("how")
        vocab.add_symbol("are")
        vocab.add_symbol("y@@")
        vocab.add_symbol("ou")
        vocab.add_symbol("n@@")
        vocab.add_symbol("ew")
        vocab.add_symbol("or@@")
        vocab.add_symbol("k")

        src_tokens = [
            ["he@@", "llo", "n@@", "ew", "y@@", "or@@", "k"],
            ["how", "are", "y@@", "ou"],
        ]
        src_len = [len(x) for x in src_tokens]
        # If we have to append EOS, we include EOS in counting src length
        if append_eos:
            src_len = [length + 1 for length in src_len]

        x = torch.LongTensor(len(src_tokens), max(src_len)).fill_(vocab.pad())
        for i in range(len(src_tokens)):
            for j in range(len(src_tokens[i])):
                x[i][j] = vocab.index(src_tokens[i][j])
            if append_eos:
                x[i][j + 1] = vocab.eos()

        x = x.transpose(1, 0)
        return vocab, x, torch.LongTensor(src_len)

Example #4

Show file

File: test_noising.py Project: Silent-Zebra/reproduce

    def assert_word_shuffle_matches_expected(
        self,
        x,
        x_len,
        max_shuffle_distance: int,
        vocab: Dictionary,
        expected_shufle_maps: List[Dict[int, int]],
        expect_eos_at_end: bool,
        bpe_end_marker=None,
    ):
        """
        This verifies that with a given x, x_len, max_shuffle_distance, and
        vocab, we get the expected shuffle result.

        Args:
            x: Tensor of shape (T x B) = (sequence_length, batch_size)
            x_len: Tensor of length B = batch_size
            max_shuffle_distance: arg to pass to noising
            expected_shuffle_maps: List[mapping] where mapping is a
                Dict[old_index, new_index], mapping x's elements from their
                old positions in x to their new positions in x.
            expect_eos_at_end: if True, check the output to make sure there is
                an EOS at the end.
            bpe_end_marker: str denoting the BPE end token. If this is not None, we
                set the BPE cont token to None in the noising classes.
        """
        bpe_cont_marker = None
        if bpe_end_marker is None:
            bpe_cont_marker = "@@"

        with data_utils.numpy_seed(1234):
            word_shuffle = noising.WordShuffle(
                vocab, bpe_cont_marker=bpe_cont_marker, bpe_end_marker=bpe_end_marker
            )
            x_noised, l_noised = word_shuffle.noising(
                x, x_len, max_shuffle_distance=max_shuffle_distance
            )

        # For every example, we have a different expected shuffle map. We check
        # that each example is shuffled as expected according to each
        # corresponding shuffle map.
        for i in range(len(expected_shufle_maps)):
            shuffle_map = expected_shufle_maps[i]
            for k, v in shuffle_map.items():
                self.assertEqual(x[k][i], x_noised[v][i])

        # Shuffling should not affect the length of each example
        for pre_shuffle_length, post_shuffle_length in zip(x_len, l_noised):
            self.assertEqual(pre_shuffle_length, post_shuffle_length)
        if expect_eos_at_end:
            self.assert_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos())

Example #5

Show file

File: test_noising.py Project: Silent-Zebra/reproduce

    def _convert_src_tokens_to_tensor(
        self, vocab: Dictionary, src_tokens: List[List[str]], append_eos: bool
    ):
        src_len = [len(x) for x in src_tokens]
        # If we have to append EOS, we include EOS in counting src length
        if append_eos:
            src_len = [length + 1 for length in src_len]

        x = torch.LongTensor(len(src_tokens), max(src_len)).fill_(vocab.pad())
        for i in range(len(src_tokens)):
            for j in range(len(src_tokens[i])):
                x[i][j] = vocab.index(src_tokens[i][j])
            if append_eos:
                x[i][j + 1] = vocab.eos()

        x = x.transpose(1, 0)
        return x, torch.LongTensor(src_len)