Python Vocabulary.get_token_index 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: elmo.data.vocabulary

클래스/타입: Vocabulary

메소드/함수: get_token_index

hotexamples.com에서의 예제들: 4

Python Vocabulary.get_token_index - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 elmo.data.vocabulary.Vocabulary.get_token_index에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Vocabulary(15)

add_token_to_namespace(8)

get_token_index(8)

from_instances(4)

get_index_to_token_vocabulary(4)

get_token_from_index(4)

from_files(2)

get_vocab_size(2)

save_to_files(2)

set_from_file(2)

from_params(1)

예제 #1

파일 보기

파일: vocabulary_test.py 프로젝트: fulQuan/ELMo

 def test_unknown_token(self):
     # pylint: disable=protected-access
     # We're putting this behavior in a test so that the behavior is documented.  There is
     # solver code that depends in a small way on how we treat the unknown token, so any
     # breaking change to this behavior should break a test, so you know you've done something
     # that needs more consideration.
     vocab = Vocabulary()
     oov_token = vocab._oov_token
     oov_index = vocab.get_token_index(oov_token)
     assert oov_index == 1
     assert vocab.get_token_index("unseen word") == oov_index

예제 #2

파일 보기

파일: vocabulary_test.py 프로젝트: fulQuan/ELMo

    def test_set_from_file_reads_padded_files(self):
        # pylint: disable=protected-access
        vocab_filename = self.TEST_DIR + 'vocab_file'
        with codecs.open(vocab_filename, 'w', 'utf-8') as vocab_file:
            vocab_file.write('<S>\n')
            vocab_file.write('</S>\n')
            vocab_file.write('<UNK>\n')
            vocab_file.write('a\n')
            vocab_file.write('tricky\x0bchar\n')
            vocab_file.write('word\n')
            vocab_file.write('another\n')

        vocab = Vocabulary()
        vocab.set_from_file(vocab_filename, is_padded=True, oov_token="<UNK>")

        assert vocab._oov_token == DEFAULT_OOV_TOKEN
        assert vocab.get_token_index("random string") == 3
        assert vocab.get_token_index("<S>") == 1
        assert vocab.get_token_index("</S>") == 2
        assert vocab.get_token_index(DEFAULT_OOV_TOKEN) == 3
        assert vocab.get_token_index("a") == 4
        assert vocab.get_token_index("tricky\x0bchar") == 5
        assert vocab.get_token_index("word") == 6
        assert vocab.get_token_index("another") == 7
        assert vocab.get_token_from_index(0) == vocab._padding_token
        assert vocab.get_token_from_index(1) == "<S>"
        assert vocab.get_token_from_index(2) == "</S>"
        assert vocab.get_token_from_index(3) == DEFAULT_OOV_TOKEN
        assert vocab.get_token_from_index(4) == "a"
        assert vocab.get_token_from_index(5) == "tricky\x0bchar"
        assert vocab.get_token_from_index(6) == "word"
        assert vocab.get_token_from_index(7) == "another"

예제 #3

파일 보기

파일: vocabulary_test.py 프로젝트: fulQuan/ELMo

    def test_namespaces(self):
        vocab = Vocabulary()
        initial_vocab_size = vocab.get_vocab_size()
        word_index = vocab.add_token_to_namespace("word", namespace='1')
        assert "word" in vocab.get_index_to_token_vocabulary(namespace='1').values()
        assert vocab.get_token_index("word", namespace='1') == word_index
        assert vocab.get_token_from_index(word_index, namespace='1') == "word"
        assert vocab.get_vocab_size(namespace='1') == initial_vocab_size + 1

        # Now add it again, in a different namespace and a different word, and make sure it's like
        # new.
        word2_index = vocab.add_token_to_namespace("word2", namespace='2')
        word_index = vocab.add_token_to_namespace("word", namespace='2')
        assert "word" in vocab.get_index_to_token_vocabulary(namespace='2').values()
        assert "word2" in vocab.get_index_to_token_vocabulary(namespace='2').values()
        assert vocab.get_token_index("word", namespace='2') == word_index
        assert vocab.get_token_index("word2", namespace='2') == word2_index
        assert vocab.get_token_from_index(word_index, namespace='2') == "word"
        assert vocab.get_token_from_index(word2_index, namespace='2') == "word2"
        assert vocab.get_vocab_size(namespace='2') == initial_vocab_size + 2

예제 #4

파일 보기

파일: single_id_token_indexer.py 프로젝트: fulQuan/ELMo

 def token_to_indices(self, token: Token, vocabulary: Vocabulary) -> int:
     if getattr(token, 'text_id', None) is not None:
         # `text_id` being set on the token means that we aren't using the vocab, we just use
         # this id instead.
         index = token.text_id
     else:
         text = token.text
         if self.lowercase_tokens:
             text = text.lower()
         index = vocabulary.get_token_index(text, self.namespace)
     return index