Example #1
0
 def testNGramsBagOfWordsEmpty(self):
   string_tensor = tf.constant([], dtype=tf.string)
   tokenized_tensor = tf.compat.v1.string_split(string_tensor, delimiter='')
   ngrams = mappers.ngrams(tokenized_tensor, (1, 2), separator='')
   bow = mappers.bag_of_words(tokenized_tensor, (1, 2), separator='')
   with tf.compat.v1.Session():
     ngrams_output = ngrams.eval()
     bow_output = bow.eval()
     self.assertAllEqual(ngrams_output.values, [])
     self.assertAllEqual(bow_output.values, [])
     self.assertAllEqual(ngrams_output.dense_shape, [0, 0])
     self.assertAllEqual(bow_output.dense_shape, [0, 0])
Example #2
0
 def testBagOfWords(self,
                    strings,
                    expected_output_indices,
                    expected_output_values,
                    ngram_range=(1, 1),
                    separator=' '):
   string_tensor = tf.constant(strings, dtype=tf.string)
   tokenized_tensor = tf.compat.v1.string_split(
       string_tensor, delimiter=separator)
   output_tensor = mappers.bag_of_words(
       tokens=tokenized_tensor, ngram_range=ngram_range, separator=separator)
   with tf.compat.v1.Session():
     output = output_tensor.eval()
     self.assertAllEqual(output.indices, expected_output_indices)
     self.assertAllEqual(output.values, expected_output_values)
Example #3
0
 def testBagOfWords(self,
                    strings,
                    expected_output_indices,
                    expected_output_values,
                    ngram_range=(1, 1),
                    separator=' '):
   # TODO(b/141750093): Re-enable this test for MacOS.
   if sys.platform == 'darwin':
     self.skipTest(
         'bag_of_words can produce unexpected results on macOS when there are '
         'empty rows, such as certain words overwritten with an empty string.')
   with tf.compat.v1.Graph().as_default():
     string_tensor = tf.constant(strings, dtype=tf.string)
     tokenized_tensor = tf.compat.v1.string_split(
         string_tensor, delimiter=separator)
     output_tensor = mappers.bag_of_words(
         tokens=tokenized_tensor, ngram_range=ngram_range, separator=separator)
     with tf.compat.v1.Session():
       output = output_tensor.eval()
       self.assertAllEqual(output.indices, expected_output_indices)
       self.assertAllEqual(output.values, expected_output_values)