Example #1
0
 def test_vocabulary_serialization(self):
     vocabulary_name = os.path.join(self.directory, "vocabulary.pkl")
     vocabulary = Vocabulary(["the quick brown fox jumped over the lazy dog"], WordTokenizer(True))
     self._serialize(vocabulary_name, vocabulary)
     self.assertTrue(os.path.isfile(vocabulary_name))
     deserialized_vocabulary = self._deserialize(vocabulary_name)
     self.assertEqual(vocabulary, deserialized_vocabulary)
     s = "The quick black fox"
     np.testing.assert_equal(vocabulary.index_string(s), deserialized_vocabulary.index_string(s))
Example #2
0
 def test_vocabulary_serialization(self):
     vocabulary_name = os.path.join(self.directory, "vocabulary.pkl")
     vocabulary = Vocabulary(
         ["the quick brown fox jumped over the lazy dog"],
         WordTokenizer(True))
     self._serialize(vocabulary_name, vocabulary)
     self.assertTrue(os.path.isfile(vocabulary_name))
     deserialized_vocabulary = self._deserialize(vocabulary_name)
     self.assertEqual(vocabulary, deserialized_vocabulary)
     s = "The quick black fox"
     np.testing.assert_equal(vocabulary.index_string(s),
                             deserialized_vocabulary.index_string(s))
Example #3
0
 def test_index_tokens(self):
     document = "the quick brown fox jumped over the lazy dog"
     vocabulary = Vocabulary([document], WordTokenizer(True))
     np.testing.assert_equal(np.array([2, 9, 3, 5, 6, 8, 2, 7, 4]),
                             vocabulary.index_string(document))
Example #4
0
 def test_index_tokens(self):
     document = "the quick brown fox jumped over the lazy dog"
     vocabulary = Vocabulary([document], WordTokenizer(True))
     np.testing.assert_equal(np.array([2, 9, 3, 5, 6, 8, 2, 7, 4]), vocabulary.index_string(document))