Beispiel #1
0
    def call(self, inputs, invert=False):
        table = self._inverse_table if invert else self._table
        # The table lookup ops don't natively support ragged tensors, so if we have
        # a RT we need to use map_flat_values to look up every element.
        if ragged_tensor.is_ragged(inputs):
            indexed_data = ragged_functional_ops.map_flat_values(
                table.lookup, inputs)
            if not invert:
                indexed_data = ragged_functional_ops.map_flat_values(
                    self.replace_oov_buckets, inputs, indexed_data)
        elif isinstance(
                inputs,
            (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
            if not invert:
                values = self.replace_oov_buckets(inputs.values,
                                                  table.lookup(inputs.values))
            indexed_data = sparse_tensor.SparseTensor(inputs.indices, values,
                                                      inputs.dense_shape)
        else:
            indexed_data = table.lookup(inputs)
            if not invert:
                indexed_data = self.replace_oov_buckets(inputs, indexed_data)
            # (b/149446477): output does not preserve input shape.
            indexed_data.set_shape(inputs.shape)

        # Composite tensors can pass tensor values through, which will cause
        # errors if this is the only layer in the model. To fix this, pass
        # the output through an identity op.
        return array_ops.identity(indexed_data)
 def testRaggedTensorSplitsRaggedRankMismatchError(self):
     x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]])
     y = ragged_factory_ops.constant([[[3, 1, 4], []], [], [[1, 5]]])
     with self.assertRaisesRegex(
             ValueError,
             r'All ragged inputs must have the same ragged_rank.'):
         ragged_functional_ops.map_flat_values(math_ops.add, x, y)
 def testRaggedTensorShapeMismatchError(self):
     x = ragged_factory_ops.constant([[1, 2, 3], [4, 5]])
     with self.assertRaisesRegex(
             ValueError,
             r'tf.ragged.map_flat_values requires that the output of '
             '`op` have the same outer-dimension size as flat_values of any ragged '
             r'inputs. \(output shape: \(\); expected outer dimension size: 5\)'
     ):
         ragged_functional_ops.map_flat_values(math_ops.argmax, x)
 def testDocStringExamples(self):
     """Test the examples in apply_op_to_ragged_values.__doc__."""
     rt = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5], [6]])
     v1 = ragged_functional_ops.map_flat_values(array_ops.ones_like, rt)
     v2 = ragged_functional_ops.map_flat_values(math_ops.multiply, rt, rt)
     v3 = ragged_functional_ops.map_flat_values(math_ops.add, rt, 5)
     self.assertAllEqual(v1, [[1, 1, 1], [], [1, 1], [1]])
     self.assertAllEqual(v2, [[1, 4, 9], [], [16, 25], [36]])
     self.assertAllEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
 def testDocStringExamples(self):
   """Test the examples in apply_op_to_ragged_values.__doc__."""
   rt = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5], [6]])
   v1 = ragged_functional_ops.map_flat_values(array_ops.ones_like, rt)
   v2 = ragged_functional_ops.map_flat_values(math_ops.multiply, rt, rt)
   v3 = ragged_functional_ops.map_flat_values(math_ops.add, rt, 5)
   self.assertRaggedEqual(v1, [[1, 1, 1], [], [1, 1], [1]])
   self.assertRaggedEqual(v2, [[1, 4, 9], [], [16, 25], [36]])
   self.assertRaggedEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
Beispiel #6
0
 def _ragged_lookup(self, inputs):
   """Perform a table lookup on a ragged tensor."""
   # The table lookup ops don't natively support ragged tensors, so if we have
   # a RT we need to use map_flat_values to look up every element.
   indexed_data = ragged_functional_ops.map_flat_values(
       self.table.lookup, inputs)
   indexed_data = ragged_functional_ops.map_flat_values(
       self._replace_oov_buckets, inputs, indexed_data)
   # Composite tensors can pass tensor values through, which will cause
   # errors if all operations in the TF graph do so. We can break this chain
   # with an identity here.
   return array_ops.identity(indexed_data)
Beispiel #7
0
 def _ragged_lookup(self, inputs):
     """Perform a table lookup on a ragged tensor."""
     # The table lookup ops don't natively support ragged tensors, so if we have
     # a RT we need to use map_flat_values to look up every element.
     indexed_data = ragged_functional_ops.map_flat_values(
         self._lookup_and_mask, inputs)
     indexed_data = ragged_functional_ops.map_flat_values(
         self._replace_oov_buckets, inputs, indexed_data)
     # table.lookup is not shape-preserving, so we need to set the shape here.
     indexed_data._set_shape(inputs.shape)  # pylint: disable=protected-access
     # Composite tensors can pass tensor values through, which will cause
     # errors if all operations in the TF graph do so. We can break this chain
     # with an identity here.
     return array_ops.identity(indexed_data)
    def testRaggedTensorSplitsValueMismatchError(self):
        x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]])
        y = ragged_factory_ops.constant([[1], [2, 3], [4, 5]])
        with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
                                    r'partitions have incompatible'):
            ragged_functional_ops.map_flat_values(math_ops.add, x, y)

        z_splits = array_ops.placeholder_with_default(
            constant_op.constant([0, 3], dtypes.int64), None)
        z = ragged_tensor.RaggedTensor.from_row_splits([0, 1, 2], z_splits)
        with self.assertRaisesRegex(
                ValueError,
                r"Input RaggedTensors' flat_values must all have the same "
                r'outer-dimension size.  Got sizes: \{3, 5\}'):
            ragged_functional_ops.map_flat_values(math_ops.add, x, z)
    def benchmark_split_merge_tokenizer(self):
        if FLAGS.ragged_vs_dense:
            return

        random_seed.set_seed(5)

        char_splits = self._get_char_level_splits()
        if not context.executing_eagerly():
            # Evaluate splits as their shape cannot be infered in graph mode
            # and are needed for mapping
            with session.Session() as sess:
                sess.run(self.iterator.initializer)
                char_splits = sess.run(char_splits)

        def randomize_splits(inputs):
            return random_ops.random_uniform(inputs.shape,
                                             maxval=2,
                                             dtype=dtypes.int32)

        labels = ragged_functional_ops.map_flat_values(randomize_splits,
                                                       char_splits)

        if not context.executing_eagerly():
            # Evaluate labels computation to exclude these steps from op benchmarking
            with session.Session() as sess:
                labels = sess.run(labels)

        tokenizer = text_ops.SplitMergeTokenizer()
        self._run(tokenizer, {"labels": labels})
Beispiel #10
0
  def call(self, inputs):
    if ragged_tensor.is_ragged(inputs):
      integer_buckets = ragged_functional_ops.map_flat_values(
          math_ops._bucketize, inputs, boundaries=self.bins)  # pylint: disable=protected-access
      # Ragged map_flat_values doesn't touch the non-values tensors in the
      # ragged composite tensor. If this op is the only op a Keras model,
      # this can cause errors in Graph mode, so wrap the tensor in an identity.
      integer_buckets = array_ops.identity(integer_buckets)
    elif isinstance(inputs, sparse_tensor.SparseTensor):
      integer_buckets = math_ops._bucketize(  # pylint: disable=protected-access
          inputs.values,
          boundaries=self.bins)
    else:
      integer_buckets = math_ops._bucketize(inputs, boundaries=self.bins)  # pylint: disable=protected-access

    if self.output_mode == INTEGER:
      if isinstance(inputs, sparse_tensor.SparseTensor):
        return sparse_tensor.SparseTensor(
            indices=array_ops.identity(inputs.indices),
            values=integer_buckets,
            dense_shape=array_ops.identity(inputs.dense_shape))
      return integer_buckets
    else:
      if isinstance(inputs, sparse_tensor.SparseTensor):
        raise ValueError("`output_mode=binary` is not supported for "
                         "sparse input")
      # The 'bins' array is the set of boundaries between the bins. We actually
      # have 'len(bins)+1' outputs.
      # TODO(momernick): This will change when we have the ability to adapt().
      return array_ops.one_hot(integer_buckets, depth=len(self.bins) + 1)
  def testRaggedMapOnStructure_RaggedOutputs(self):
    batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
    # [[10, 20, 30], [40], [50, 60, 70]]
    robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

    features = {'batman': batman, 'robin': robin}

    def _increment(f):
      return {
          'batman': f['batman'] + 1,
          'robin': f['robin'] + 1,
      }

    output = ragged_map_ops.map_fn(
        fn=_increment,
        elems=features,
        infer_shape=False,
        dtype={
            'batman':
                ragged_tensor.RaggedTensorType(
                    dtype=dtypes.int32, ragged_rank=1),
            'robin':
                ragged_tensor.RaggedTensorType(
                    dtype=dtypes.int32, ragged_rank=1)
        },
    )

    self.assertRaggedEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]])
    self.assertRaggedEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
    def call(self, inputs):
        bins = [math_ops.cast(array_ops.squeeze(self.bins), dtypes.float32)]

        def _bucketize_fn(inputs):
            return gen_boosted_trees_ops.BoostedTreesBucketize(
                float_values=[math_ops.cast(inputs, dtypes.float32)],
                bucket_boundaries=bins)[0]

        if tf_utils.is_ragged(inputs):
            integer_buckets = ragged_functional_ops.map_flat_values(
                _bucketize_fn, inputs)
            # Ragged map_flat_values doesn't touch the non-values tensors in the
            # ragged composite tensor. If this op is the only op a Keras model,
            # this can cause errors in Graph mode, so wrap the tensor in an identity.
            return array_ops.identity(integer_buckets)
        elif isinstance(inputs, sparse_tensor.SparseTensor):
            return sparse_tensor.SparseTensor(
                indices=array_ops.identity(inputs.indices),
                values=_bucketize_fn(inputs.values),
                dense_shape=array_ops.identity(inputs.dense_shape))
        else:
            static_shape = inputs.get_shape()
            if any(dim is None for dim in static_shape.as_list()[1:]):
                raise NotImplementedError(
                    "Discretization Layer requires known non-batch shape,"
                    "found {}".format(static_shape))

            dynamic_shape = array_ops.shape_v2(inputs)
            # BoostedTreesBucketize only handles rank 1 inputs. We need to flatten our
            # inputs after batch size and vectorized_map over each sample.
            reshaped = array_ops.reshape(inputs, [dynamic_shape[0], -1])
            return array_ops.reshape(
                control_flow_ops.vectorized_map(_bucketize_fn, reshaped),
                dynamic_shape)
Beispiel #13
0
 def _process_single_input(self, inputs):
     # Converts integer inputs to string.
     if inputs.dtype.is_integer:
         if isinstance(inputs, sparse_tensor.SparseTensor):
             inputs = sparse_tensor.SparseTensor(
                 indices=inputs.indices,
                 values=string_ops.as_string(inputs.values),
                 dense_shape=inputs.dense_shape)
         else:
             inputs = string_ops.as_string(inputs)
     str_to_hash_bucket = self._get_string_to_hash_bucket_fn()
     if tf_utils.is_ragged(inputs):
         return ragged_functional_ops.map_flat_values(
             str_to_hash_bucket,
             inputs,
             num_buckets=self.num_bins,
             name='hash')
     elif isinstance(inputs, sparse_tensor.SparseTensor):
         sparse_values = inputs.values
         sparse_hashed_values = str_to_hash_bucket(sparse_values,
                                                   self.num_bins,
                                                   name='hash')
         return sparse_tensor.SparseTensor(indices=inputs.indices,
                                           values=sparse_hashed_values,
                                           dense_shape=inputs.dense_shape)
     else:
         return str_to_hash_bucket(inputs, self.num_bins, name='hash')
Beispiel #14
0
    def testRaggedMapOnStructure_RaggedOutputs(self):
        batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
        # [[10, 20, 30], [40], [50, 60, 70]]
        robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

        features = {'batman': batman, 'robin': robin}

        def _increment(f):
            return {
                'batman': f['batman'] + 1,
                'robin': f['robin'] + 1,
            }

        output = ragged_map_ops.map_fn(
            fn=_increment,
            elems=features,
            infer_shape=False,
            dtype={
                'batman':
                ragged_tensor.RaggedTensorType(dtype=dtypes.int32,
                                               ragged_rank=1),
                'robin':
                ragged_tensor.RaggedTensorType(dtype=dtypes.int32,
                                               ragged_rank=1)
            },
        )

        self.assertAllEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]])
        self.assertAllEqual(output['robin'],
                            [[11, 21, 31], [41], [51, 61, 71]])
Beispiel #15
0
def embedding_lookup_ragged(embedding_weights, ragged_ids, name=None):
    """Look up the ragged ids in a list of embedding tensors.

  Args:
    embedding_weights: A tensor representing the complete embedding tensor
      having the shape [e1, ...eM]
    ragged_ids: A 'RaggedTensor' with type 'int32' or 'int64' containing the ids
      to be looked up in 'embedding_weights' of shape [r0, ..rN]. Values must be
      in the range '[0, embedding_weights.shape[0]]'.
    name: A name for the operation (optional)

  Returns:
    A ragged tensor of shape [r0, r1, ...rN, e1, ...eM].

  Raises:
    ValueError: whether the embedding_weights is empty or the ragged_ids is
    not a RaggedTensor.
  """
    if embedding_weights is None:
        raise ValueError("The embedding weights must be specified.")
    if isinstance(embedding_weights, (list, tuple)) and not embedding_weights:
        raise ValueError("The embedding weights should not be empty.")
    if ragged_ids.dtype != dtypes.int32 and ragged_ids.dtype != dtypes.int64:
        raise ValueError(
            "The values contained by the inputs have type " +
            str(ragged_ids.dtype) + " and cannot be processed. All values"
            " should be indices, either of type `in32` or `int64`.")

    with ops.name_scope(name, "embedding_lookup_ragged") as name:
        looked_up_ragged = ragged_functional_ops.map_flat_values(
            array_ops.gather, embedding_weights, ragged_ids)

        return looked_up_ragged
Beispiel #16
0
def _elementwise_where_v2(condition, x, y):
    """Ragged version of tf.where_v2(condition, x, y)."""
    # Broadcast x, y, and condition to have the same shape.
    if not (condition.shape.is_fully_defined() and x.shape.is_fully_defined()
            and y.shape.is_fully_defined() and x.shape == y.shape
            and condition.shape == x.shape):
        shape_c = ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(
            condition)
        shape_x = ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(x)
        shape_y = ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(y)
        shape = ragged_tensor_shape.broadcast_dynamic_shape(
            shape_c,
            ragged_tensor_shape.broadcast_dynamic_shape(shape_x, shape_y))
        condition = ragged_tensor_shape.broadcast_to(condition, shape)
        x = ragged_tensor_shape.broadcast_to(x, shape)
        y = ragged_tensor_shape.broadcast_to(y, shape)

    condition_is_ragged = isinstance(condition, ragged_tensor.RaggedTensor)
    x_is_ragged = isinstance(x, ragged_tensor.RaggedTensor)
    y_is_ragged = isinstance(y, ragged_tensor.RaggedTensor)
    if not (condition_is_ragged or x_is_ragged or y_is_ragged):
        return array_ops.where_v2(condition, x, y)

    return ragged_functional_ops.map_flat_values(array_ops.where_v2, condition,
                                                 x, y)
Beispiel #17
0
  def call(self, inputs):
    inputs = self._preprocess(inputs)

    # If we're not doing any output processing, return right away.
    if self._output_mode is None:
      return inputs

    # The table lookup ops don't natively support ragged tensors, so if we have
    # a RT we need to use map_flat_values to look up every element.
    if ragged_tensor.is_ragged(inputs):
      indexed_data = ragged_functional_ops.map_flat_values(
          self._table.lookup, inputs)
    else:
      indexed_data = self._table.lookup(inputs)

    if self._output_mode == INT:
      # Once we have the dense tensor, we can return it if we weren't given a
      # fixed output sequence length. If we were, though, we have to dynamically
      # choose whether to pad or trim it based on each tensor.

      # We need to convert to dense if we have a ragged tensor.
      if ragged_tensor.is_ragged(indexed_data):
        dense_data = indexed_data.to_tensor(default_value=0)
      else:
        dense_data = indexed_data

      if self._output_sequence_length is None:
        return dense_data
      else:
        sequence_len = K.shape(dense_data)[1]
        pad_amt = self._output_sequence_length - sequence_len
        pad_fn = lambda: array_ops.pad(dense_data, [[0, 0], [0, pad_amt]])
        slice_fn = lambda: dense_data[:, :self._output_sequence_length]
        return control_flow_ops.cond(
            sequence_len < self._output_sequence_length,
            true_fn=pad_fn,
            false_fn=slice_fn)

    out_depth = self._max_tokens if self._pad_to_max else math_ops.cast(
        (self._get_table_size() + self._reserved_values), dtypes.int32)

    if self._output_mode == BINARY:
      bool_one_hot_data = array_ops.one_hot(
          indexed_data, depth=out_depth, on_value=True, off_value=False)
      reduced_bool_data = math_ops.reduce_any(bool_one_hot_data, axis=1)
      binary_data = math_ops.cast(reduced_bool_data, dtypes.int64)
      return binary_data

    one_hot_data = array_ops.one_hot(indexed_data, depth=out_depth)
    counts = math_ops.reduce_sum(one_hot_data, axis=1)
    if self._output_mode == COUNT:
      return math_ops.cast(counts, dtypes.int64)

    tf_idf_data = math_ops.multiply(counts, self._tf_idf_weights)
    if self._output_mode == TFIDF:
      return tf_idf_data

    # We can only get here if we didn't recognize the passed mode.
    raise ValueError("Unknown output mode %s" % self._output_mode)
 def assertRaggedMapInnerValuesReturns(self,
                                       op,
                                       expected,
                                       args=(),
                                       kwargs=None):
     kwargs = kwargs or {}
     result = ragged_functional_ops.map_flat_values(op, *args, **kwargs)
     self.assertAllEqual(result, expected)
 def assertRaggedMapInnerValuesReturns(self,
                                       op,
                                       expected,
                                       args=(),
                                       kwargs=None):
   kwargs = kwargs or {}
   result = ragged_functional_ops.map_flat_values(op, *args, **kwargs)
   self.assertRaggedEqual(result, expected)
    def testRaggedMapFnPreservesUniformRowLength(self):
        # x and y are equal, except that x has uniform_row_length and y does not.
        x = ragged_tensor.RaggedTensor.from_uniform_row_length(
            ragged_factory_ops.constant([[1, 2], [3]]), uniform_row_length=2)
        y = ragged_factory_ops.constant([[[1, 2], [3]]])

        a = ragged_functional_ops.map_flat_values(math_ops.add, x, y)
        self.assertAllEqual(x.uniform_row_length, a.uniform_row_length)

        b = ragged_functional_ops.map_flat_values(math_ops.add, y, x)
        self.assertAllEqual(x.uniform_row_length, b.uniform_row_length)

        c = ragged_functional_ops.map_flat_values(math_ops.add_n, [x, x])
        self.assertAllEqual(x.uniform_row_length, c.uniform_row_length)

        d = ragged_functional_ops.map_flat_values(math_ops.add_n, [y, x, y])
        self.assertAllEqual(x.uniform_row_length, d.uniform_row_length)
Beispiel #21
0
def string_join(inputs: typing.List[ragged_tensor.RaggedOrDense],
                separator="",
                name=None):
    """RaggedTensor implementation for tf.strings.join."""
    if len(inputs) < 0:
        raise ValueError("tf.strings.join: expected at least one input.")
    with ops.name_scope(name, "RaggedStringJoin", inputs):
        return ragged_functional_ops.map_flat_values(string_ops.string_join,
                                                     inputs, separator)
Beispiel #22
0
 def testRaggedTensorSplitsMismatchErrorAtRuntime(self):
   splits1 = array_ops.placeholder_with_default(
       constant_op.constant([0, 3, 3, 5], dtypes.int64), None)
   splits2 = array_ops.placeholder_with_default(
       constant_op.constant([0, 1, 3, 5], dtypes.int64), None)
   x = ragged_tensor.RaggedTensor.from_row_splits([3, 1, 4, 1, 5], splits1)
   y = ragged_tensor.RaggedTensor.from_row_splits([1, 2, 3, 4, 5], splits2)
   with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                r'.*Inputs must have identical ragged splits'):
     self.evaluate(ragged_functional_ops.map_flat_values(math_ops.add, x, y))
Beispiel #23
0
    def call(self, inputs):
        # The table lookup ops don't natively support ragged tensors, so if we have
        # a RT we need to use map_flat_values to look up every element.
        if ragged_tensor.is_ragged(inputs):
            indexed_data = ragged_functional_ops.map_flat_values(
                self._table.lookup, inputs)
        else:
            indexed_data = self._table.lookup(inputs)

        return indexed_data
 def testRaggedTensorSplitsMismatchErrorAtRuntime(self):
   splits1 = array_ops.placeholder_with_default(
       constant_op.constant([0, 3, 3, 5], dtypes.int64), None)
   splits2 = array_ops.placeholder_with_default(
       constant_op.constant([0, 1, 3, 5], dtypes.int64), None)
   x = ragged_tensor.RaggedTensor.from_row_splits([3, 1, 4, 1, 5], splits1)
   y = ragged_tensor.RaggedTensor.from_row_splits([1, 2, 3, 4, 5], splits2)
   with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                r'.*Inputs must have identical ragged splits'):
     self.evaluate(ragged_functional_ops.map_flat_values(math_ops.add, x, y))
Beispiel #25
0
    def _preprocess(self, inputs):
        if self._standardize == LOWER_AND_STRIP_PUNCTUATION:
            if ragged_tensor.is_ragged(inputs):
                lowercase_inputs = ragged_functional_ops.map_flat_values(
                    gen_string_ops.string_lower, inputs)
                # Depending on configuration, we may never touch the non-data tensor
                # in the ragged inputs tensor. If that is the case, and this is the
                # only layer in the keras model, running it will throw an error.
                # To get around this, we wrap the result in an identity.
                lowercase_inputs = array_ops.identity(lowercase_inputs)
            else:
                lowercase_inputs = gen_string_ops.string_lower(inputs)
            inputs = string_ops.regex_replace(lowercase_inputs,
                                              DEFAULT_STRIP_REGEX, "")
        elif callable(self._standardize):
            inputs = self._standardize(inputs)
        elif self._standardize is not None:
            raise ValueError(
                ("%s is not a supported standardization. "
                 "TextVectorization supports the following options "
                 "for `standardize`: None, "
                 "'lower_and_strip_punctuation', or a "
                 "Callable.") % self._standardize)

        if self._split is not None:
            # If we are splitting, we validate that the 1st axis is of dimension 1 and
            # so can be squeezed out. We do this here instead of after splitting for
            # performance reasons - it's more expensive to squeeze a ragged tensor.
            if inputs.shape.ndims > 1:
                inputs = array_ops.squeeze(inputs, axis=-1)
            if self._split == SPLIT_ON_WHITESPACE:
                # This treats multiple whitespaces as one whitespace, and strips leading
                # and trailing whitespace.
                inputs = ragged_string_ops.string_split_v2(inputs)
            elif callable(self._split):
                inputs = self._split(inputs)
            else:
                raise ValueError(
                    ("%s is not a supported splitting."
                     "TextVectorization supports the following options "
                     "for `split`: None, 'whitespace', or a Callable.") %
                    self._split)

        # Note that 'inputs' here can be either ragged or dense depending on the
        # configuration choices for this Layer. The strings.ngrams op, however, does
        # support both ragged and dense inputs.
        if self._ngrams is not None:
            inputs = ragged_string_ops.ngrams(inputs,
                                              ngram_width=self._ngrams,
                                              separator=" ")

        return inputs
Beispiel #26
0
 def call(self, inputs):
     if isinstance(inputs, tf.SparseTensor):
         id_values = self._round_and_truncate(inputs.values)
         result = tf.SparseTensor(
             indices=inputs.indices,
             values=id_values,
             dense_shape=inputs.dense_shape,
         )
     elif ragged_tensor.is_ragged(inputs):
         result = ragged_functional_ops.map_flat_values(
             self._round_and_truncate, inputs)
     else:
         result = self._round_and_truncate(inputs)
     return tf.cast(result, tf.int64)
Beispiel #27
0
  def testGradient(self):
    if context.executing_eagerly():
      return
    # rt1.shape == rt2.shape == [2, (D2), (D3), 2].
    rt1 = ragged_factory_ops.constant(
        [[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0]]]], ragged_rank=2)
    rt2 = ragged_factory_ops.constant(
        [[[[9.0, 8.0], [7.0, 6.0]], [[5.0, 4.0]]]], ragged_rank=2)
    rt = ragged_functional_ops.map_flat_values(math_ops.add, rt1, rt2 * 2.0)
    st = rt.to_sparse()

    g1, g2 = gradients_impl.gradients(st.values,
                                      [rt1.flat_values, rt2.flat_values])
    self.assertRaggedEqual(g1, [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
    self.assertRaggedEqual(g2, [[2.0, 2.0], [2.0, 2.0], [2.0, 2.0]])
    def call(self, inputs):
        self._called = True
        inputs = self._preprocess(inputs)

        # If we're not doing any output processing, return right away.
        if self._output_mode is None:
            return inputs

        # The table lookup ops don't natively support ragged tensors, so if we have
        # a RT we need to use map_flat_values to look up every element.
        if ragged_tensor.is_ragged(inputs):
            indexed_data = ragged_functional_ops.map_flat_values(
                self._table.lookup, inputs)
        else:
            indexed_data = self._table.lookup(inputs)

        if self._output_mode == INT:
            # Once we have the dense tensor, we can return it if we weren't given a
            # fixed output sequence length. If we were, though, we have to dynamically
            # choose whether to pad or trim it based on each tensor.

            # We need to convert to dense if we have a ragged tensor.
            if ragged_tensor.is_ragged(indexed_data):
                dense_data = indexed_data.to_tensor(default_value=0)
            else:
                dense_data = indexed_data

            if self._output_sequence_length is None:
                dense_data.set_shape(tensor_shape.TensorShape((None, None)))
                return dense_data
            else:
                sequence_len = K.shape(dense_data)[1]
                pad_amt = self._output_sequence_length - sequence_len
                pad_fn = lambda: array_ops.pad(dense_data, [[0, 0],
                                                            [0, pad_amt]])
                slice_fn = lambda: dense_data[:, :self._output_sequence_length]
                output_tensor = control_flow_ops.cond(
                    sequence_len < self._output_sequence_length,
                    true_fn=pad_fn,
                    false_fn=slice_fn)
                output_tensor.set_shape(
                    tensor_shape.TensorShape(
                        (None, self._output_sequence_length)))
                return output_tensor

        # If we're not returning integers here, we rely on the vectorization layer
        # to create the output.
        return self._vectorize_layer(indexed_data)
Beispiel #29
0
 def call(self, inputs):
   # TODO(tanzheny): Add int support.
   str_to_hash_bucket = self._get_string_to_hash_bucket_fn()
   if ragged_tensor.is_ragged(inputs):
     return ragged_functional_ops.map_flat_values(
         str_to_hash_bucket, inputs, num_buckets=self.num_bins, name='hash')
   elif isinstance(inputs, sparse_tensor.SparseTensor):
     sparse_values = inputs.values
     sparse_hashed_values = str_to_hash_bucket(
         sparse_values, self.num_bins, name='hash')
     return sparse_tensor.SparseTensor(
         indices=inputs.indices,
         values=sparse_hashed_values,
         dense_shape=inputs.dense_shape)
   else:
     return str_to_hash_bucket(inputs, self.num_bins, name='hash')
  def testRaggedMapOnStructure(self):
    batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
    # [[10, 20, 30], [40], [50, 60, 70]]
    robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

    features = {'batman': batman, 'robin': robin}

    def _reduce_sum_from_all(f):
      return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin'])

    output = ragged_map_ops.map_fn(
        fn=_reduce_sum_from_all,
        elems=features,
        dtype=dtypes.int32,
    )

    self.assertRaggedEqual(output, [66, 44, 198])
Beispiel #31
0
 def call(self, inputs):
     if ragged_tensor.is_ragged(inputs):
         integer_buckets = ragged_functional_ops.map_flat_values(
             gen_math_ops.Bucketize, input=inputs, boundaries=self.bins)
         # Ragged map_flat_values doesn't touch the non-values tensors in the
         # ragged composite tensor. If this op is the only op a Keras model,
         # this can cause errors in Graph mode, so wrap the tensor in an identity.
         return array_ops.identity(integer_buckets)
     elif isinstance(inputs, sparse_tensor.SparseTensor):
         integer_buckets = gen_math_ops.Bucketize(input=inputs.values,
                                                  boundaries=self.bins)
         return sparse_tensor.SparseTensor(
             indices=array_ops.identity(inputs.indices),
             values=integer_buckets,
             dense_shape=array_ops.identity(inputs.dense_shape))
     else:
         return gen_math_ops.Bucketize(input=inputs, boundaries=self.bins)
Beispiel #32
0
    def testRaggedMapOnStructure(self):
        batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
        # [[10, 20, 30], [40], [50, 60, 70]]
        robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)

        features = {'batman': batman, 'robin': robin}

        def _reduce_sum_from_all(f):
            return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin'])

        output = ragged_map_ops.map_fn(
            fn=_reduce_sum_from_all,
            elems=features,
            dtype=dtypes.int32,
        )

        self.assertAllEqual(output, [66, 44, 198])
def embedding_lookup_ragged(embedding_weights,
                            ragged_ids,
                            partition_strategy="mod",
                            max_norm=None,
                            name=None):
    """Look up the ragged ids in a list of embedding tensors.

  Args:
    embedding_weights: A tensor representing the complete embedding tensor
      having the shape [e1, ...eM]
    ragged_ids: A 'RaggedTensor' with type 'int32' or 'int64' containing the ids
      to be looked up in 'embedding_weights' of shape [r0, ..rN]. Values must be
      in the range '[0, embedding_weights.shape[0]]'.
    partition_strategy: A string specifying the partitioning strategy.
    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
      than this value.
    name: A name for the operation (optional)

  Returns:
    A ragged tensor of shape [r0, r1, ...rN, e1, ...eM].

  Raises:
    ValueError: whether the embedding_weights is empty or the ragged_ids is
    not a RaggedTensor.
  """
    if embedding_weights is None:
        raise ValueError("The embedding weights must be specified.")
    if isinstance(embedding_weights, (list, tuple)) and not embedding_weights:
        raise ValueError("The embedding weights should not be empty.")
    if ragged_ids.dtype != dtypes.int32 and ragged_ids.dtype != dtypes.int64:
        raise ValueError(
            "The values contained by the inputs have type "
            f"{str(ragged_ids.dtype)}"
            " and cannot be processed. All values"
            " should be indices, either of type `in32` or `int64`.")

    with ops.name_scope(name, "embedding_lookup_ragged") as name:
        looked_up_ragged = ragged_functional_ops.map_flat_values(
            embedding_lookup,
            params=embedding_weights,
            ids=ragged_ids,
            partition_strategy=partition_strategy,
            max_norm=max_norm)

        return looked_up_ragged
Beispiel #34
0
def ragged_cumsum(x: ragged_tensor.Ragged,
                  axis: int = 0,
                  exclusive: bool = False,
                  reverse: bool = False,
                  name: typing.Optional[str] = None):
  """Calculate math_ops.cumsum for a RaggedTensor.

  Given a ragged tensor `x`, the `result` is a ragged tensor with the same
  shape. One can calculate the value of `result[i_1...i_k]` as follows:
  ```
  dense_result=tf.math.cumsum(rt.to_tensor(), axis=axis, exclusive=exclusive,
                              reverse=reverse)
  result[i_1...i_k]=dense_result[i_1...i_k]
  ```

  Args:
    x: the original ragged tensor to sum.
    axis: the axis along which to sum, can range -rank<=axis<rank.
    exclusive: is the sum exclusive or inclusive? If True, then result[0]=0.
        If False, then result[0]=x[0].
    reverse: If True, sum from back to front.
    name: the name of the op.
  Returns:
    the cumulative sum.
  """
  with ops.name_scope(name, 'RaggedCumSum', [x, axis, exclusive, reverse]):
    axis = array_ops.get_positive_axis(axis, x.shape.rank, ndims_name='rank')
    if axis == x.ragged_rank:
      last_rp = x._nested_row_partitions[-1]  # pylint: disable=protected-access
      return x.with_flat_values(
          _cumsum_flat_values_at_ragged_rank(last_rp, x.flat_values,
                                             exclusive=exclusive,
                                             reverse=reverse))
    elif axis > x.ragged_rank:
      new_axis = axis - x.ragged_rank
      cumsum_bound = functools.partial(
          math_ops.cumsum, axis=new_axis, exclusive=exclusive, reverse=reverse)
      return ragged_functional_ops.map_flat_values(cumsum_bound, x)
    else:
      dense_version = x.to_tensor()
      result = math_ops.cumsum(
          dense_version, axis, exclusive=exclusive, reverse=reverse, name=name)
      return ragged_tensor.RaggedTensor.from_tensor(
          result, lengths=x.nested_row_lengths())
Beispiel #35
0
    def call(self, inputs):
        def _bucketize_op(bins):
            bins = [math_ops.cast(bins, dtypes.float32)]
            return lambda inputs: gen_boosted_trees_ops.BoostedTreesBucketize(  # pylint: disable=g-long-lambda
                float_values=[math_ops.cast(inputs, dtypes.float32)],
                bucket_boundaries=bins)[0]

        if tf_utils.is_ragged(inputs):
            integer_buckets = ragged_functional_ops.map_flat_values(
                _bucketize_op(array_ops.squeeze(self.bins)), inputs)
            # Ragged map_flat_values doesn't touch the non-values tensors in the
            # ragged composite tensor. If this op is the only op a Keras model,
            # this can cause errors in Graph mode, so wrap the tensor in an identity.
            return array_ops.identity(integer_buckets)
        elif isinstance(inputs, sparse_tensor.SparseTensor):
            integer_buckets = gen_boosted_trees_ops.BoostedTreesBucketize(
                float_values=[math_ops.cast(inputs.values, dtypes.float32)],
                bucket_boundaries=[
                    math_ops.cast(array_ops.squeeze(self.bins), dtypes.float32)
                ])[0]
            return sparse_tensor.SparseTensor(
                indices=array_ops.identity(inputs.indices),
                values=integer_buckets,
                dense_shape=array_ops.identity(inputs.dense_shape))
        else:
            input_shape = inputs.get_shape()
            if any(dim is None for dim in input_shape.as_list()[1:]):
                raise NotImplementedError(
                    "Discretization Layer requires known non-batch shape,"
                    "found {}".format(input_shape))

            reshaped = array_ops.reshape(inputs, [
                -1,
                gen_math_ops.Prod(input=input_shape.as_list()[1:], axis=0)
            ])

            return array_ops.reshape(
                control_flow_ops.vectorized_map(
                    _bucketize_op(array_ops.squeeze(self.bins)), reshaped),
                array_ops.constant([-1] + input_shape.as_list()[1:]))
Beispiel #36
0
def _elementwise_where(condition, x, y):
  """Ragged version of tf.where(condition, x, y)."""
  condition_is_ragged = isinstance(condition, ragged_tensor.RaggedTensor)
  x_is_ragged = isinstance(x, ragged_tensor.RaggedTensor)
  y_is_ragged = isinstance(y, ragged_tensor.RaggedTensor)

  if not (condition_is_ragged or x_is_ragged or y_is_ragged):
    return array_ops.where(condition, x, y)

  elif condition_is_ragged and x_is_ragged and y_is_ragged:
    return ragged_functional_ops.map_flat_values(array_ops.where, condition, x,
                                                 y)
  elif not condition_is_ragged:
    # Concatenate x and y, and then use `gather` to assemble the selected rows.
    condition.shape.assert_has_rank(1)
    x_nrows = _nrows(x)
    x_and_y = ragged_concat_ops.concat([x, y], axis=0)
    indices = array_ops.where(condition, math_ops.range(x_nrows),
                              x_nrows + math_ops.range(_nrows(y)))
    return ragged_gather_ops.gather(x_and_y, indices)

  else:
    raise ValueError('Input shapes do not match.')
def boolean_mask(data, mask, keepdims=False, name=None):
  """Applies a boolean mask to `data`.

  Returns a potentially ragged tensor that is formed by retaining the elements
  in `data` where the corresponding value in `mask` is `True`.

  If `keepdims` is true then outer dimensions (corresponding to the `mask`
  dimensions) are preserved, and:

  * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]`

     Where `j` is the `i`th `True` entry of `mask[a1...aA]`.

  If `keepdims` is false, then the outer dimensions are collapsed (similar to
  the behavior of `tf.boolean_mask`), and:

  * `output[i, b1...bB] = data[a1...aA, b1...bB]`

     Where `(a1...aA)` is the `i`th `True` entry of `mask`
     (in row-major order).

  Args:
    data: A potentially ragged tensor.
    mask: A potentially ragged boolean tensor.  `mask`'s shape must be a prefix
      of `data`'s shape.  `rank(mask)` must be known statically.
    keepdims: Whether to preserve the outer dimensions (`keepdims=True`) or
      flatten them (`keepdims=False`).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A potentially ragged tensor that is formed by retaining the elements in
    `data` where the corresponding value in `mask` is `True`.

    If `keepdims` is false:

    * `rank(output) = rank(data) - rank(mask) + 1`.
    * `output.ragged_rank = max(data.ragged_rank - rank(mask) + 1, 0)`.

    If `keepdims` is true:

    * `rank(output) = rank(data)`.
    * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`.

  Raises:
    ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is
      not a prefix of `data.shape`.

  #### Examples:
    ```python
    >>> # Aliases for True & False so data and mask line up.
    >>> T, F = (True, False)

    >>> tf.ragged.boolean_mask(  # Mask a 2D Tensor.  Flatten outer dims.
    ...     data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
    ...     mask=[[T, F, T], [F, F, F], [T, F, F]],
    ...     keepdims=False).tolist()
    [1, 3, 7]

    >>> tf.ragged.boolean_mask(  # Mask a 2D Tensor.  Preserve outer dims.
    ...     data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
    ...     mask=[[T, F, T], [F, F, F], [T, F, F]],
    ...     keepdims=True).tolist()
    [[1, 3], [], [7]]

    >>> tf.ragged.boolean_mask(  # Mask a 2D RaggedTensor.  Flatten outer dims.
    ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
    ...     tf.ragged.constant([[F, F, T], [F], [T, T]]),
    ...     keepdims=False).tolist()
    [3, 5, 6]

    >>> tf.ragged.boolean_mask(  # Mask a 2D RaggedTensor.  Preserve outer dims.
    ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
    ...     tf.ragged.constant([[F, F, T], [F], [T, T]]),
    ...     keepdims=True).tolist()
    [[3], [], [5, 6]]

    >>> tf.ragged.boolean_mask(  # Mask rows of a 2D RaggedTensor.
    ...     tf.ragged.constant([[1, 2, 3], [4], [5, 6]]),
    ...     tf.ragged.constant([True, False, True]),
    ...     keepdims=True).tolist()
    [[1, 2, 3], [5, 6]]
    ```
  """
  with ops.name_scope(name, 'RaggedMask', [data, mask]):
    # Convert inputs to tensors.
    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
    mask = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        mask, dtypes.bool, name='mask')
    row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes(
        data, mask, return_dtype=True)

    # Get static rank of mask.
    if mask.shape.ndims is None:
      raise ValueError('mask.shape.ndims must be known statically.')
    elif mask.shape.ndims == 0:
      raise ValueError('mask cannot be scalar.')

    # If mask is ragged, then recurse with a non-ragged mask.
    if ragged_tensor.is_ragged(mask):
      if not ragged_tensor.is_ragged(data):
        data = ragged_tensor.RaggedTensor.from_tensor(
            data, ragged_rank=mask.ragged_rank,
            row_splits_dtype=mask.row_splits.dtype)
      # Check that mask.nested_row_splits is a prefix of
      # data.nested_row_splits.
      splits_list = [
          mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank]
      ]
      with ops.control_dependencies(
          ragged_util.assert_splits_match(splits_list)):
        # Strip off ragged `splits` until `mask` is non-ragged.  Keep the splits
        # that we strip off in `splits`, so we can add them back on after
        # we recursively mask the non-ragged data.
        splits = []
        while ragged_tensor.is_ragged(mask):
          if mask.shape.ndims > 2:
            splits.append(mask.row_splits)
          else:
            # Count the number of True mask values in each row to find the
            # lengths of the filtered rows; then convert to splits.
            int_mask = ragged_functional_ops.map_flat_values(
                math_ops.cast, mask, dtype=row_splits_dtype)
            masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1)
            splits.append(ragged_util.lengths_to_splits(masked_row_lengths))
          mask = mask.values
          data = data.values

        # Recursively apply the nested non-ragged mask to the nested data.
        masked_values = boolean_mask(data, mask, keepdims)

        # Add the ragged `splits` back to the result.
        if keepdims:
          masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits(
              masked_values, splits, validate=False)

        return masked_values

    # If mask is non-ragged and has rank 1, and data is ragged, then build a
    # ragged tensor with the indicated rows.
    elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1:
      # Get the masked splits: first get the length of each row, then filter
      # out the rows that we are deleting, and convert that filtered set of
      # masks back to a splits tensor.
      lengths = data.row_lengths()
      masked_lengths = array_ops.boolean_mask(lengths, mask)
      masked_splits = ragged_util.lengths_to_splits(masked_lengths)

      # Get the masked values: first get row ids corresponding to each
      # value, then use tf.gather to build a boolean mask that's false for
      # values that come from rows that we are deleting, and use that mask to
      # construct the masked values tensor.
      segment_ids = segment_id_ops.row_splits_to_segment_ids(data.row_splits)
      segment_mask = array_ops.gather(mask, segment_ids)
      masked_values = boolean_mask(data.values, segment_mask, keepdims=False)

      return ragged_tensor.RaggedTensor.from_row_splits(masked_values,
                                                        masked_splits,
                                                        validate=False)

    # If mask is non-ragged and has rank>1, then convert it to be ragged,
    # with a ragged rank matching data.
    if ragged_tensor.is_ragged(data):
      mask = ragged_tensor.RaggedTensor.from_tensor(
          mask, ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1),
          row_splits_dtype=data.row_splits.dtype)
      return boolean_mask(data, mask, keepdims)

    # Otherwise, data and mask are both `Tensor`s.
    else:
      # Apply `boolean_mask` to get the masked values.
      masked_values = array_ops.boolean_mask(data, mask)

      if mask.shape.ndims >= 2 and keepdims:
        # Add the innermost ragged dimension.  For each innermost cell, get the
        # number of values it contains.  Then flatten that to get a list of
        # cell lengths, and convert it to splits.  Finally, combine the splits
        # and values to get the innermost ragged tensor.
        masked_lengths = math_ops.count_nonzero(mask, axis=-1,
                                                dtype=row_splits_dtype)
        flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1])
        masked_values = ragged_tensor.RaggedTensor.from_row_lengths(
            masked_values, flattened_masked_lengths, validate=False)

        # Wrap remaining ragged dimensions.
        if mask.shape.ndims > 2 and keepdims:
          mask_shape = array_ops.shape(mask, out_type=row_splits_dtype)
          split_size = math_ops.cumprod(mask_shape) + 1
          for dim in range(mask.shape.ndims - 3, -1, -1):
            elt_size = mask_shape[dim + 1]
            masked_splits = math_ops.range(split_size[dim]) * elt_size
            masked_values = ragged_tensor.RaggedTensor.from_row_splits(
                masked_values, masked_splits, validate=False)

      return masked_values
Beispiel #38
0
def _cast(input_tensor, dtype):
  return ragged_functional_ops.map_flat_values(math_ops.cast, input_tensor,
                                               dtype)