Esempio n. 1
0
 def testRaggedInputs(self,
                      descr,
                      data,
                      width,
                      expected,
                      axis=-1,
                      ragged_rank=None):
     data = ragged_factory_ops.constant(data, ragged_rank=ragged_rank)
     result = sliding_window_op.sliding_window(data, width, axis)
     self.assertRaggedEqual(result, expected)
Esempio n. 2
0
    def testDocStringExamples(self):
        # Sliding window (width=3) across a sequence of tokens
        data = constant_op.constant(
            ['one', 'two', 'three', 'four', 'five', 'six'])
        output = sliding_window_op.sliding_window(data=data, width=3, axis=0)
        self.assertRaggedEqual(
            output, [[b'one', b'two', b'three'], [b'two', b'three', b'four'],
                     [b'three', b'four', b'five'], [b'four', b'five', b'six']])
        self.assertEqual('Shape: %s -> %s' % (data.shape, output.shape),
                         'Shape: (6,) -> (4, 3)')

        # Sliding window (width=2) across the inner dimension of a ragged matrix
        # containing a batch of token sequences
        data = ragged_factory_ops.constant([['Up', 'high', 'in', 'the', 'air'],
                                            ['Down', 'under', 'water'],
                                            ['Away', 'to', 'outer', 'space']])
        output = sliding_window_op.sliding_window(data, width=2, axis=-1)
        self.assertRaggedEqual(
            output,
            [[[b'Up', b'high'], [b'high', b'in'], [b'in', b'the'],
              [b'the', b'air']], [[b'Down', b'under'], [b'under', b'water']],
             [[b'Away', b'to'], [b'to', b'outer'], [b'outer', b'space']]
             ])  # pyformat: disable
        self.assertEqual(
            'Shape: %s -> %s' % (data.shape.as_list(), output.shape.as_list()),
            'Shape: [3, None] -> [3, None, 2]')

        # Sliding window across the second dimension of a 3-D tensor containing
        # batches of sequences of embedding vectors:
        data = constant_op.constant([[[1, 1, 1], [2, 2, 1], [3, 3, 1],
                                      [4, 4, 1], [5, 5, 1]],
                                     [[1, 1, 2], [2, 2, 2], [3, 3, 2],
                                      [4, 4, 2], [5, 5, 2]]])
        output = sliding_window_op.sliding_window(data=data, width=2, axis=1)
        self.assertRaggedEqual(
            output, [[[[1, 1, 1], [2, 2, 1]], [[2, 2, 1], [3, 3, 1]],
                      [[3, 3, 1], [4, 4, 1]], [[4, 4, 1], [5, 5, 1]]],
                     [[[1, 1, 2], [2, 2, 2]], [[2, 2, 2], [3, 3, 2]],
                      [[3, 3, 2], [4, 4, 2]], [[4, 4, 2], [5, 5, 2]]]])
        self.assertEqual('Shape: %s -> %s' % (data.shape, output.shape),
                         'Shape: (2, 5, 3) -> (2, 4, 2, 3)')
Esempio n. 3
0
def ngrams(data,
           width,
           axis=-1,
           reduction_type=None,
           string_separator=" ",
           name=None):
    """Create a tensor of n-grams based on the input data `data`.

  Creates a tensor of n-grams based on `data`. The n-grams are of width `width`
  and are created along axis `axis`; the n-grams are created by combining
  windows of `width` adjacent elements from `data` using `reduction_type`. This
  op is intended to cover basic use cases; more complex combinations can be
  created using the sliding_window op.

  Args:
    data: The data to reduce.
    width: The width of the ngram window. If there is not sufficient data to
      fill out the ngram window, the resulting ngram will be empty.
    axis: The axis to create ngrams along. Note that for string join reductions,
      only axis '-1' is supported; for other reductions, any positive or
      negative axis can be used. Should be a constant.
    reduction_type: A member of the Reduction enum. Should be a constant.
      Currently supports:

      * `Reduction.SUM`: Add values in the window.
      * `Reduction.MEAN`: Average values in the window.
      * `Reduction.STRING_JOIN`: Join strings in the window.
        Note that axis must be -1 here.

    string_separator: The separator string used for `Reduction.STRING_JOIN`.
      Ignored otherwise. Must be a string constant, not a Tensor.
    name: The op name.

  Returns:
    A tensor of ngrams.

  Raises:
    InvalidArgumentError: if `reduction_type` is either None or not a Reduction,
      or if `reduction_type` is STRING_JOIN and `axis` is not -1.
  """

    with tf.name_scope(name, "NGrams", [data, width]):
        if reduction_type is None:
            raise errors.InvalidArgumentError(
                None, None, "reduction_type must be specified.")

        if not isinstance(reduction_type, Reduction):
            raise errors.InvalidArgumentError(
                None, None, "reduction_type must be a Reduction.")

        # TODO(b/122967921): Lift this restriction after ragged_reduce_join is done.
        if reduction_type is Reduction.STRING_JOIN and axis != -1:
            raise errors.InvalidArgumentError(
                None, None,
                "%s requires that ngrams' 'axis' parameter be -1." %
                Reduction.STRING_JOIN.name)

        windowed_data = sliding_window(data, width, axis)

        if axis < 0:
            reduction_axis = axis
        else:
            reduction_axis = axis + 1

        # Ragged reduction ops work on both Tensor and RaggedTensor, so we can
        # use them here regardless of the type of tensor in 'windowed_data'.
        if reduction_type is Reduction.SUM:
            return tf.reduce_sum(windowed_data, reduction_axis)
        elif reduction_type is Reduction.MEAN:
            return tf.reduce_mean(windowed_data, reduction_axis)
        elif reduction_type is Reduction.STRING_JOIN:
            if isinstance(data, tf.RaggedTensor):
                return tf.ragged.map_flat_values(tf.reduce_join,
                                                 windowed_data,
                                                 axis=axis,
                                                 separator=string_separator)
            else:
                return tf.reduce_join(windowed_data,
                                      axis=axis,
                                      separator=string_separator)
Esempio n. 4
0
 def _test_sliding_window_op(self, expected_result, data, width, axis):
     result = sliding_window_op.sliding_window(data, width, axis)
     self.assertAllEqual(expected_result, result)