def test_vocabulary_serialization(self): vocabulary_name = os.path.join(self.directory, "vocabulary.pkl") vocabulary = Vocabulary(["the quick brown fox jumped over the lazy dog"], WordTokenizer(True)) self._serialize(vocabulary_name, vocabulary) self.assertTrue(os.path.isfile(vocabulary_name)) deserialized_vocabulary = self._deserialize(vocabulary_name) self.assertEqual(vocabulary, deserialized_vocabulary) s = "The quick black fox" np.testing.assert_equal(vocabulary.index_string(s), deserialized_vocabulary.index_string(s))
def test_vocabulary_serialization(self): vocabulary_name = os.path.join(self.directory, "vocabulary.pkl") vocabulary = Vocabulary( ["the quick brown fox jumped over the lazy dog"], WordTokenizer(True)) self._serialize(vocabulary_name, vocabulary) self.assertTrue(os.path.isfile(vocabulary_name)) deserialized_vocabulary = self._deserialize(vocabulary_name) self.assertEqual(vocabulary, deserialized_vocabulary) s = "The quick black fox" np.testing.assert_equal(vocabulary.index_string(s), deserialized_vocabulary.index_string(s))
def test_index_tokens(self): document = "the quick brown fox jumped over the lazy dog" vocabulary = Vocabulary([document], WordTokenizer(True)) np.testing.assert_equal(np.array([2, 9, 3, 5, 6, 8, 2, 7, 4]), vocabulary.index_string(document))