Beispiel #1
0
 def testVocabularyProcessor(self):
     vocab_processor = text.VocabularyProcessor(max_document_length=4,
                                                min_frequency=1)
     tokens = vocab_processor.fit_transform(
         ["a b c", "a\nb\nc", "a, b - c"])
     self.assertAllEqual(list(tokens),
                         [[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 3]])
Beispiel #2
0
 def testExistingVocabularyProcessor(self):
     vocab = CategoricalVocabulary()
     vocab.get("A")
     vocab.get("B")
     vocab.freeze()
     vocab_processor = text.VocabularyProcessor(
         max_document_length=4, vocabulary=vocab, tokenizer_fn=list)
     tokens = vocab_processor.fit_transform(["ABC", "CBABAF"])
     self.assertAllEqual(list(tokens), [[1, 2, 0, 0], [0, 2, 1, 2]])
Beispiel #3
0
 def testVocabularyProcessorSaveRestore(self):
     filename = tf.test.get_temp_dir() + 'test.vocab'
     vocab_processor = text.VocabularyProcessor(max_document_length=4,
                                                min_frequency=1)
     tokens = vocab_processor.fit_transform(
         ["a b c", "a\nb\nc", "a, b - c"])
     vocab_processor.save(filename)
     new_vocab = text.VocabularyProcessor.restore(filename)
     tokens = new_vocab.transform(["a b c"])
     self.assertAllEqual(list(tokens), [[1, 2, 3, 0]])