Example #1
0
 def test_file_corpus(self):
     corpus = FileCorpus(path_text_file)
     tokens_iter = corpus.get_token_iterator(verbose=1)
     total_words, words = count_words_and_collect_prefix(tokens_iter)
     print("!!!!!total words", total_words)
     assert total_words == TEST_TEXT_LEN
     assert '|'.join(words) == TEST_FIRST_10_WORDS
Example #2
0
 def test_file_corpus(self):
     corpus = FileCorpus(path_text_file)
     tokens_iter = corpus.get_token_iterator(verbose=1)
     total_words, words = count_words_and_collect_prefix(tokens_iter)
     print("!!!!!total words", total_words)