def testRaggedInputs(self, descr, data, width, expected, axis=-1, ragged_rank=None): data = ragged_factory_ops.constant(data, ragged_rank=ragged_rank) result = sliding_window_op.sliding_window(data, width, axis) self.assertRaggedEqual(result, expected)
def testDocStringExamples(self): # Sliding window (width=3) across a sequence of tokens data = constant_op.constant( ['one', 'two', 'three', 'four', 'five', 'six']) output = sliding_window_op.sliding_window(data=data, width=3, axis=0) self.assertRaggedEqual( output, [[b'one', b'two', b'three'], [b'two', b'three', b'four'], [b'three', b'four', b'five'], [b'four', b'five', b'six']]) self.assertEqual('Shape: %s -> %s' % (data.shape, output.shape), 'Shape: (6,) -> (4, 3)') # Sliding window (width=2) across the inner dimension of a ragged matrix # containing a batch of token sequences data = ragged_factory_ops.constant([['Up', 'high', 'in', 'the', 'air'], ['Down', 'under', 'water'], ['Away', 'to', 'outer', 'space']]) output = sliding_window_op.sliding_window(data, width=2, axis=-1) self.assertRaggedEqual( output, [[[b'Up', b'high'], [b'high', b'in'], [b'in', b'the'], [b'the', b'air']], [[b'Down', b'under'], [b'under', b'water']], [[b'Away', b'to'], [b'to', b'outer'], [b'outer', b'space']] ]) # pyformat: disable self.assertEqual( 'Shape: %s -> %s' % (data.shape.as_list(), output.shape.as_list()), 'Shape: [3, None] -> [3, None, 2]') # Sliding window across the second dimension of a 3-D tensor containing # batches of sequences of embedding vectors: data = constant_op.constant([[[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], [5, 5, 1]], [[1, 1, 2], [2, 2, 2], [3, 3, 2], [4, 4, 2], [5, 5, 2]]]) output = sliding_window_op.sliding_window(data=data, width=2, axis=1) self.assertRaggedEqual( output, [[[[1, 1, 1], [2, 2, 1]], [[2, 2, 1], [3, 3, 1]], [[3, 3, 1], [4, 4, 1]], [[4, 4, 1], [5, 5, 1]]], [[[1, 1, 2], [2, 2, 2]], [[2, 2, 2], [3, 3, 2]], [[3, 3, 2], [4, 4, 2]], [[4, 4, 2], [5, 5, 2]]]]) self.assertEqual('Shape: %s -> %s' % (data.shape, output.shape), 'Shape: (2, 5, 3) -> (2, 4, 2, 3)')
def ngrams(data, width, axis=-1, reduction_type=None, string_separator=" ", name=None): """Create a tensor of n-grams based on the input data `data`. Creates a tensor of n-grams based on `data`. The n-grams are of width `width` and are created along axis `axis`; the n-grams are created by combining windows of `width` adjacent elements from `data` using `reduction_type`. This op is intended to cover basic use cases; more complex combinations can be created using the sliding_window op. Args: data: The data to reduce. width: The width of the ngram window. If there is not sufficient data to fill out the ngram window, the resulting ngram will be empty. axis: The axis to create ngrams along. Note that for string join reductions, only axis '-1' is supported; for other reductions, any positive or negative axis can be used. Should be a constant. reduction_type: A member of the Reduction enum. Should be a constant. Currently supports: * `Reduction.SUM`: Add values in the window. * `Reduction.MEAN`: Average values in the window. * `Reduction.STRING_JOIN`: Join strings in the window. Note that axis must be -1 here. string_separator: The separator string used for `Reduction.STRING_JOIN`. Ignored otherwise. Must be a string constant, not a Tensor. name: The op name. Returns: A tensor of ngrams. Raises: InvalidArgumentError: if `reduction_type` is either None or not a Reduction, or if `reduction_type` is STRING_JOIN and `axis` is not -1. """ with tf.name_scope(name, "NGrams", [data, width]): if reduction_type is None: raise errors.InvalidArgumentError( None, None, "reduction_type must be specified.") if not isinstance(reduction_type, Reduction): raise errors.InvalidArgumentError( None, None, "reduction_type must be a Reduction.") # TODO(b/122967921): Lift this restriction after ragged_reduce_join is done. if reduction_type is Reduction.STRING_JOIN and axis != -1: raise errors.InvalidArgumentError( None, None, "%s requires that ngrams' 'axis' parameter be -1." % Reduction.STRING_JOIN.name) windowed_data = sliding_window(data, width, axis) if axis < 0: reduction_axis = axis else: reduction_axis = axis + 1 # Ragged reduction ops work on both Tensor and RaggedTensor, so we can # use them here regardless of the type of tensor in 'windowed_data'. if reduction_type is Reduction.SUM: return tf.reduce_sum(windowed_data, reduction_axis) elif reduction_type is Reduction.MEAN: return tf.reduce_mean(windowed_data, reduction_axis) elif reduction_type is Reduction.STRING_JOIN: if isinstance(data, tf.RaggedTensor): return tf.ragged.map_flat_values(tf.reduce_join, windowed_data, axis=axis, separator=string_separator) else: return tf.reduce_join(windowed_data, axis=axis, separator=string_separator)
def _test_sliding_window_op(self, expected_result, data, width, axis): result = sliding_window_op.sliding_window(data, width, axis) self.assertAllEqual(expected_result, result)