def test_none(self): result = char_ngrams('123', 4, 5, itself='ASIS') self.assertTrue(tf.is_tensor(result)) self.assertNotIsInstance(result, tf.RaggedTensor) result = self.evaluate(result) self.assertAllEqual([], result.tolist())
def test_alone_inside(self): result = char_ngrams('123', 2, 3, itself='ALONE') self.assertTrue(tf.is_tensor(result)) self.assertNotIsInstance(result, tf.RaggedTensor) result = self.evaluate(result) self.assertAllEqual([b'12', b'23'], result.tolist())
def test_as_is_below(self): result = char_ngrams('1234', 2, 3, itself='ASIS') self.assertTrue(tf.is_tensor(result)) self.assertNotIsInstance(result, tf.RaggedTensor) result = self.evaluate(result) self.assertAllEqual([b'12', b'23', b'34', b'123', b'234'], result.tolist())
def test_default_2d(self): expected = tf.constant([[['x', 'y'], ['x', '']]], dtype=tf.string) result = char_ngrams([['xy', 'x']], 1, 1, itself='ASIS') self.assertIsInstance(result, tf.RaggedTensor) result = result.to_tensor(default_value='') expected, result = self.evaluate([expected, result]) self.assertAllEqual(expected, result)
def test_0d(self): expected = tf.constant(['x', 'y'], dtype=tf.string) result = char_ngrams('xy', 1, 1, itself='NEVER') self.assertTrue(tf.is_tensor(result)) self.assertNotIsInstance(result, tf.RaggedTensor) expected, result = self.evaluate([expected, result]) self.assertAllEqual(expected, result)
def test_inference_shape(self): source = [ ['1', '2', '3'], ['4', '5', '6'], ] result = char_ngrams(source, 1, 1, itself='ALWAYS') self.assertEqual([2, 3, None], result.shape.as_list())
def test_actual_shape(self): source = [ ['1', '2', '3'], ['4', '5', '6'], ] result = char_ngrams(source, 1, 1, itself='ALWAYS') self.assertIsInstance(result, tf.RaggedTensor) result = result.to_tensor(default_value='') result = self.evaluate(result) self.assertAllEqual((2, 3, 1), result.shape)
def ngram_features(input_words, minn, maxn): input_words = normalize_unicode(input_words, 'NFKC') input_words = replace_string( # accentuation input_words, [u'\u0060', u' \u0301', u'\u02CA', u'\u02CB', u'\u0300', u'\u0301'], [''] * 6) input_words = lower_case(input_words) input_words = zero_digits(input_words) input_words = wrap_with(input_words, '<', '>') word_ngrams = char_ngrams(input_words, minn, maxn, itself='ALONE') return word_ngrams
def test_ragged(self): expected = tf.constant([[ ['a', 'b', 'ab', '', ''], ['c', ' ', 'd', 'c ', ' d'], ], [['e', '', '', '', ''], ['', '', '', '', '']]]) result = char_ngrams(tf.ragged.constant([['ab', 'c d'], ['e']]), 1, 2, itself='ASIS') self.assertIsInstance(result, tf.RaggedTensor) result = result.to_tensor(default_value='') expected, result = self.evaluate([expected, result]) self.assertAllEqual(expected, result)