Python PosTagIndexer.tokens_to_indices Beispiele

Programmiersprache: Python

Namespace / Paketname: allennlp.data.token_indexers

Klasse / Typ: PosTagIndexer

Methode / Funktion: tokens_to_indices

Beispiele auf hotexamples.com: 5

Python PosTagIndexer.tokens_to_indices - 5 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die allennlp.data.token_indexers.PosTagIndexer.tokens_to_indices, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

PosTagIndexer(16)

_coarse_tags(6)

count_vocab_items(4)

tokens_to_indices(3)

pad_token_sequence(2)

as_padded_tensor(1)

as_padded_tensor_dict(1)

get_padding_lengths(1)

get_padding_token(1)

token_to_indices(1)

Beispiel #1

Datei anzeigen

Datei: pos_tag_indexer_test.py Projekt: zulushakaka/allennlp

    def test_tokens_to_indices_uses_pos_tags(self):
        tokens = self.tokenizer.tokenize("This is a sentence.")
        tokens = [t for t in tokens] + [Token("</S>")]
        tokens[1] = Token("is", tag_="VBZ", pos_="VERB")
        vocab = Vocabulary()
        verb_index = vocab.add_token_to_namespace("VERB", namespace="pos_tags")
        cop_index = vocab.add_token_to_namespace("VBZ", namespace="pos_tags")
        none_index = vocab.add_token_to_namespace("NONE", namespace="pos_tags")
        # Have to add other tokens too, since we're calling `tokens_to_indices` on all of them
        vocab.add_token_to_namespace("DET", namespace="pos_tags")
        vocab.add_token_to_namespace("NOUN", namespace="pos_tags")
        vocab.add_token_to_namespace("PUNCT", namespace="pos_tags")

        indexer = PosTagIndexer(namespace="pos_tags", coarse_tags=True)

        indices = indexer.tokens_to_indices(tokens, vocab)
        assert len(indices) == 1
        assert "tokens" in indices
        assert indices["tokens"][1] == verb_index
        assert indices["tokens"][-1] == none_index

        indexer._coarse_tags = False
        assert indexer.tokens_to_indices([tokens[1]], vocab) == {
            "tokens": [cop_index]
        }

Beispiel #2

Datei anzeigen

    def test_tokens_to_indices_uses_pos_tags(self):
        tokens = self.tokenizer.split_words(u"This is a sentence.")
        tokens = [t for t in tokens] + [Token(u"</S>")]
        vocab = Vocabulary()
        verb_index = vocab.add_token_to_namespace(u'VERB', namespace=u'pos_tags')
        cop_index = vocab.add_token_to_namespace(u'VBZ', namespace=u'pos_tags')
        none_index = vocab.add_token_to_namespace(u'NONE', namespace=u'pos_tags')
        # Have to add other tokens too, since we're calling `tokens_to_indices` on all of them
        vocab.add_token_to_namespace(u'DET', namespace=u'pos_tags')
        vocab.add_token_to_namespace(u'NOUN', namespace=u'pos_tags')
        vocab.add_token_to_namespace(u'PUNCT', namespace=u'pos_tags')

        indexer = PosTagIndexer(coarse_tags=True)

        indices = indexer.tokens_to_indices(tokens, vocab, u"tokens")
        assert len(indices) == 1
        assert u"tokens" in indices
        assert indices[u"tokens"][1] == verb_index
        assert indices[u"tokens"][-1] == none_index

        indexer._coarse_tags = False  # pylint: disable=protected-access
        assert indexer.tokens_to_indices([tokens[1]], vocab, u"coarse") == {u"coarse": [cop_index]}

Beispiel #3

Datei anzeigen

Datei: pos_tag_indexer_test.py Projekt: apmoore1/allennlp

    def test_tokens_to_indices_uses_pos_tags(self):
        tokens = self.tokenizer.split_words("This is a sentence.")
        tokens = [t for t in tokens] + [Token("</S>")]
        vocab = Vocabulary()
        verb_index = vocab.add_token_to_namespace('VERB', namespace='pos_tags')
        cop_index = vocab.add_token_to_namespace('VBZ', namespace='pos_tags')
        none_index = vocab.add_token_to_namespace('NONE', namespace='pos_tags')
        # Have to add other tokens too, since we're calling `tokens_to_indices` on all of them
        vocab.add_token_to_namespace('DET', namespace='pos_tags')
        vocab.add_token_to_namespace('NOUN', namespace='pos_tags')
        vocab.add_token_to_namespace('PUNCT', namespace='pos_tags')

        indexer = PosTagIndexer(namespace='pos_tags', coarse_tags=True)

        indices = indexer.tokens_to_indices(tokens, vocab, "tokens")
        assert len(indices) == 1
        assert "tokens" in indices
        assert indices["tokens"][1] == verb_index
        assert indices["tokens"][-1] == none_index

        indexer._coarse_tags = False  # pylint: disable=protected-access
        assert indexer.tokens_to_indices([tokens[1]], vocab, "coarse") == {"coarse": [cop_index]}

Beispiel #4

Datei anzeigen

Datei: pos_tag_indexer_test.py Projekt: apmoore1/allennlp

 def test_blank_pos_tag(self):
     tokens = [Token(token) for token in "allennlp is awesome .".split(" ")]
     for token in tokens:
         token.pos_ = ""
     indexer = PosTagIndexer()
     counter = defaultdict(lambda: defaultdict(int))
     for token in tokens:
         indexer.count_vocab_items(token, counter)
     # spacy uses a empty string to indicate "no POS tag"
     # we convert it to "NONE"
     assert counter["pos_tokens"]["NONE"] == 4
     vocab = Vocabulary(counter)
     none_index = vocab.get_token_index('NONE', 'pos_tokens')
     # should raise no exception
     indices = indexer.tokens_to_indices(tokens, vocab, index_name="pos")
     assert {"pos": [none_index, none_index, none_index, none_index]} == indices

Beispiel #5

Datei anzeigen

Datei: pos_tag_indexer_test.py Projekt: sudipta90/allennlp

 def test_blank_pos_tag(self):
     tokens = [Token(token) for token in "allennlp is awesome .".split(" ")]
     indexer = PosTagIndexer()
     counter = defaultdict(lambda: defaultdict(int))
     for token in tokens:
         indexer.count_vocab_items(token, counter)
     # spacy uses a empty string to indicate "no POS tag"
     # we convert it to "NONE"
     assert counter["pos_tokens"]["NONE"] == 4
     vocab = Vocabulary(counter)
     none_index = vocab.get_token_index("NONE", "pos_tokens")
     # should raise no exception
     indices = indexer.tokens_to_indices(tokens, vocab)
     assert {
         "tokens": [none_index, none_index, none_index, none_index]
     } == indices