def call(self, inputs, invert=False): table = self._inverse_table if invert else self._table # The table lookup ops don't natively support ragged tensors, so if we have # a RT we need to use map_flat_values to look up every element. if ragged_tensor.is_ragged(inputs): indexed_data = ragged_functional_ops.map_flat_values( table.lookup, inputs) if not invert: indexed_data = ragged_functional_ops.map_flat_values( self.replace_oov_buckets, inputs, indexed_data) elif isinstance( inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): if not invert: values = self.replace_oov_buckets(inputs.values, table.lookup(inputs.values)) indexed_data = sparse_tensor.SparseTensor(inputs.indices, values, inputs.dense_shape) else: indexed_data = table.lookup(inputs) if not invert: indexed_data = self.replace_oov_buckets(inputs, indexed_data) # (b/149446477): output does not preserve input shape. indexed_data.set_shape(inputs.shape) # Composite tensors can pass tensor values through, which will cause # errors if this is the only layer in the model. To fix this, pass # the output through an identity op. return array_ops.identity(indexed_data)
def testRaggedTensorSplitsRaggedRankMismatchError(self): x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]]) y = ragged_factory_ops.constant([[[3, 1, 4], []], [], [[1, 5]]]) with self.assertRaisesRegex( ValueError, r'All ragged inputs must have the same ragged_rank.'): ragged_functional_ops.map_flat_values(math_ops.add, x, y)
def testRaggedTensorShapeMismatchError(self): x = ragged_factory_ops.constant([[1, 2, 3], [4, 5]]) with self.assertRaisesRegex( ValueError, r'tf.ragged.map_flat_values requires that the output of ' '`op` have the same outer-dimension size as flat_values of any ragged ' r'inputs. \(output shape: \(\); expected outer dimension size: 5\)' ): ragged_functional_ops.map_flat_values(math_ops.argmax, x)
def testDocStringExamples(self): """Test the examples in apply_op_to_ragged_values.__doc__.""" rt = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5], [6]]) v1 = ragged_functional_ops.map_flat_values(array_ops.ones_like, rt) v2 = ragged_functional_ops.map_flat_values(math_ops.multiply, rt, rt) v3 = ragged_functional_ops.map_flat_values(math_ops.add, rt, 5) self.assertAllEqual(v1, [[1, 1, 1], [], [1, 1], [1]]) self.assertAllEqual(v2, [[1, 4, 9], [], [16, 25], [36]]) self.assertAllEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
def testDocStringExamples(self): """Test the examples in apply_op_to_ragged_values.__doc__.""" rt = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5], [6]]) v1 = ragged_functional_ops.map_flat_values(array_ops.ones_like, rt) v2 = ragged_functional_ops.map_flat_values(math_ops.multiply, rt, rt) v3 = ragged_functional_ops.map_flat_values(math_ops.add, rt, 5) self.assertRaggedEqual(v1, [[1, 1, 1], [], [1, 1], [1]]) self.assertRaggedEqual(v2, [[1, 4, 9], [], [16, 25], [36]]) self.assertRaggedEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
def _ragged_lookup(self, inputs): """Perform a table lookup on a ragged tensor.""" # The table lookup ops don't natively support ragged tensors, so if we have # a RT we need to use map_flat_values to look up every element. indexed_data = ragged_functional_ops.map_flat_values( self.table.lookup, inputs) indexed_data = ragged_functional_ops.map_flat_values( self._replace_oov_buckets, inputs, indexed_data) # Composite tensors can pass tensor values through, which will cause # errors if all operations in the TF graph do so. We can break this chain # with an identity here. return array_ops.identity(indexed_data)
def _ragged_lookup(self, inputs): """Perform a table lookup on a ragged tensor.""" # The table lookup ops don't natively support ragged tensors, so if we have # a RT we need to use map_flat_values to look up every element. indexed_data = ragged_functional_ops.map_flat_values( self._lookup_and_mask, inputs) indexed_data = ragged_functional_ops.map_flat_values( self._replace_oov_buckets, inputs, indexed_data) # table.lookup is not shape-preserving, so we need to set the shape here. indexed_data._set_shape(inputs.shape) # pylint: disable=protected-access # Composite tensors can pass tensor values through, which will cause # errors if all operations in the TF graph do so. We can break this chain # with an identity here. return array_ops.identity(indexed_data)
def testRaggedTensorSplitsValueMismatchError(self): x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]]) y = ragged_factory_ops.constant([[1], [2, 3], [4, 5]]) with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), r'partitions have incompatible'): ragged_functional_ops.map_flat_values(math_ops.add, x, y) z_splits = array_ops.placeholder_with_default( constant_op.constant([0, 3], dtypes.int64), None) z = ragged_tensor.RaggedTensor.from_row_splits([0, 1, 2], z_splits) with self.assertRaisesRegex( ValueError, r"Input RaggedTensors' flat_values must all have the same " r'outer-dimension size. Got sizes: \{3, 5\}'): ragged_functional_ops.map_flat_values(math_ops.add, x, z)
def benchmark_split_merge_tokenizer(self): if FLAGS.ragged_vs_dense: return random_seed.set_seed(5) char_splits = self._get_char_level_splits() if not context.executing_eagerly(): # Evaluate splits as their shape cannot be infered in graph mode # and are needed for mapping with session.Session() as sess: sess.run(self.iterator.initializer) char_splits = sess.run(char_splits) def randomize_splits(inputs): return random_ops.random_uniform(inputs.shape, maxval=2, dtype=dtypes.int32) labels = ragged_functional_ops.map_flat_values(randomize_splits, char_splits) if not context.executing_eagerly(): # Evaluate labels computation to exclude these steps from op benchmarking with session.Session() as sess: labels = sess.run(labels) tokenizer = text_ops.SplitMergeTokenizer() self._run(tokenizer, {"labels": labels})
def call(self, inputs): if ragged_tensor.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( math_ops._bucketize, inputs, boundaries=self.bins) # pylint: disable=protected-access # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. integer_buckets = array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): integer_buckets = math_ops._bucketize( # pylint: disable=protected-access inputs.values, boundaries=self.bins) else: integer_buckets = math_ops._bucketize(inputs, boundaries=self.bins) # pylint: disable=protected-access if self.output_mode == INTEGER: if isinstance(inputs, sparse_tensor.SparseTensor): return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=integer_buckets, dense_shape=array_ops.identity(inputs.dense_shape)) return integer_buckets else: if isinstance(inputs, sparse_tensor.SparseTensor): raise ValueError("`output_mode=binary` is not supported for " "sparse input") # The 'bins' array is the set of boundaries between the bins. We actually # have 'len(bins)+1' outputs. # TODO(momernick): This will change when we have the ability to adapt(). return array_ops.one_hot(integer_buckets, depth=len(self.bins) + 1)
def testRaggedMapOnStructure_RaggedOutputs(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _increment(f): return { 'batman': f['batman'] + 1, 'robin': f['robin'] + 1, } output = ragged_map_ops.map_fn( fn=_increment, elems=features, infer_shape=False, dtype={ 'batman': ragged_tensor.RaggedTensorType( dtype=dtypes.int32, ragged_rank=1), 'robin': ragged_tensor.RaggedTensorType( dtype=dtypes.int32, ragged_rank=1) }, ) self.assertRaggedEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]]) self.assertRaggedEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
def call(self, inputs): bins = [math_ops.cast(array_ops.squeeze(self.bins), dtypes.float32)] def _bucketize_fn(inputs): return gen_boosted_trees_ops.BoostedTreesBucketize( float_values=[math_ops.cast(inputs, dtypes.float32)], bucket_boundaries=bins)[0] if tf_utils.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( _bucketize_fn, inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=_bucketize_fn(inputs.values), dense_shape=array_ops.identity(inputs.dense_shape)) else: static_shape = inputs.get_shape() if any(dim is None for dim in static_shape.as_list()[1:]): raise NotImplementedError( "Discretization Layer requires known non-batch shape," "found {}".format(static_shape)) dynamic_shape = array_ops.shape_v2(inputs) # BoostedTreesBucketize only handles rank 1 inputs. We need to flatten our # inputs after batch size and vectorized_map over each sample. reshaped = array_ops.reshape(inputs, [dynamic_shape[0], -1]) return array_ops.reshape( control_flow_ops.vectorized_map(_bucketize_fn, reshaped), dynamic_shape)
def _process_single_input(self, inputs): # Converts integer inputs to string. if inputs.dtype.is_integer: if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_tensor.SparseTensor( indices=inputs.indices, values=string_ops.as_string(inputs.values), dense_shape=inputs.dense_shape) else: inputs = string_ops.as_string(inputs) str_to_hash_bucket = self._get_string_to_hash_bucket_fn() if tf_utils.is_ragged(inputs): return ragged_functional_ops.map_flat_values( str_to_hash_bucket, inputs, num_buckets=self.num_bins, name='hash') elif isinstance(inputs, sparse_tensor.SparseTensor): sparse_values = inputs.values sparse_hashed_values = str_to_hash_bucket(sparse_values, self.num_bins, name='hash') return sparse_tensor.SparseTensor(indices=inputs.indices, values=sparse_hashed_values, dense_shape=inputs.dense_shape) else: return str_to_hash_bucket(inputs, self.num_bins, name='hash')
def testRaggedMapOnStructure_RaggedOutputs(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _increment(f): return { 'batman': f['batman'] + 1, 'robin': f['robin'] + 1, } output = ragged_map_ops.map_fn( fn=_increment, elems=features, infer_shape=False, dtype={ 'batman': ragged_tensor.RaggedTensorType(dtype=dtypes.int32, ragged_rank=1), 'robin': ragged_tensor.RaggedTensorType(dtype=dtypes.int32, ragged_rank=1) }, ) self.assertAllEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]]) self.assertAllEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
def embedding_lookup_ragged(embedding_weights, ragged_ids, name=None): """Look up the ragged ids in a list of embedding tensors. Args: embedding_weights: A tensor representing the complete embedding tensor having the shape [e1, ...eM] ragged_ids: A 'RaggedTensor' with type 'int32' or 'int64' containing the ids to be looked up in 'embedding_weights' of shape [r0, ..rN]. Values must be in the range '[0, embedding_weights.shape[0]]'. name: A name for the operation (optional) Returns: A ragged tensor of shape [r0, r1, ...rN, e1, ...eM]. Raises: ValueError: whether the embedding_weights is empty or the ragged_ids is not a RaggedTensor. """ if embedding_weights is None: raise ValueError("The embedding weights must be specified.") if isinstance(embedding_weights, (list, tuple)) and not embedding_weights: raise ValueError("The embedding weights should not be empty.") if ragged_ids.dtype != dtypes.int32 and ragged_ids.dtype != dtypes.int64: raise ValueError( "The values contained by the inputs have type " + str(ragged_ids.dtype) + " and cannot be processed. All values" " should be indices, either of type `in32` or `int64`.") with ops.name_scope(name, "embedding_lookup_ragged") as name: looked_up_ragged = ragged_functional_ops.map_flat_values( array_ops.gather, embedding_weights, ragged_ids) return looked_up_ragged
def _elementwise_where_v2(condition, x, y): """Ragged version of tf.where_v2(condition, x, y).""" # Broadcast x, y, and condition to have the same shape. if not (condition.shape.is_fully_defined() and x.shape.is_fully_defined() and y.shape.is_fully_defined() and x.shape == y.shape and condition.shape == x.shape): shape_c = ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor( condition) shape_x = ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(x) shape_y = ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(y) shape = ragged_tensor_shape.broadcast_dynamic_shape( shape_c, ragged_tensor_shape.broadcast_dynamic_shape(shape_x, shape_y)) condition = ragged_tensor_shape.broadcast_to(condition, shape) x = ragged_tensor_shape.broadcast_to(x, shape) y = ragged_tensor_shape.broadcast_to(y, shape) condition_is_ragged = isinstance(condition, ragged_tensor.RaggedTensor) x_is_ragged = isinstance(x, ragged_tensor.RaggedTensor) y_is_ragged = isinstance(y, ragged_tensor.RaggedTensor) if not (condition_is_ragged or x_is_ragged or y_is_ragged): return array_ops.where_v2(condition, x, y) return ragged_functional_ops.map_flat_values(array_ops.where_v2, condition, x, y)
def call(self, inputs): inputs = self._preprocess(inputs) # If we're not doing any output processing, return right away. if self._output_mode is None: return inputs # The table lookup ops don't natively support ragged tensors, so if we have # a RT we need to use map_flat_values to look up every element. if ragged_tensor.is_ragged(inputs): indexed_data = ragged_functional_ops.map_flat_values( self._table.lookup, inputs) else: indexed_data = self._table.lookup(inputs) if self._output_mode == INT: # Once we have the dense tensor, we can return it if we weren't given a # fixed output sequence length. If we were, though, we have to dynamically # choose whether to pad or trim it based on each tensor. # We need to convert to dense if we have a ragged tensor. if ragged_tensor.is_ragged(indexed_data): dense_data = indexed_data.to_tensor(default_value=0) else: dense_data = indexed_data if self._output_sequence_length is None: return dense_data else: sequence_len = K.shape(dense_data)[1] pad_amt = self._output_sequence_length - sequence_len pad_fn = lambda: array_ops.pad(dense_data, [[0, 0], [0, pad_amt]]) slice_fn = lambda: dense_data[:, :self._output_sequence_length] return control_flow_ops.cond( sequence_len < self._output_sequence_length, true_fn=pad_fn, false_fn=slice_fn) out_depth = self._max_tokens if self._pad_to_max else math_ops.cast( (self._get_table_size() + self._reserved_values), dtypes.int32) if self._output_mode == BINARY: bool_one_hot_data = array_ops.one_hot( indexed_data, depth=out_depth, on_value=True, off_value=False) reduced_bool_data = math_ops.reduce_any(bool_one_hot_data, axis=1) binary_data = math_ops.cast(reduced_bool_data, dtypes.int64) return binary_data one_hot_data = array_ops.one_hot(indexed_data, depth=out_depth) counts = math_ops.reduce_sum(one_hot_data, axis=1) if self._output_mode == COUNT: return math_ops.cast(counts, dtypes.int64) tf_idf_data = math_ops.multiply(counts, self._tf_idf_weights) if self._output_mode == TFIDF: return tf_idf_data # We can only get here if we didn't recognize the passed mode. raise ValueError("Unknown output mode %s" % self._output_mode)
def assertRaggedMapInnerValuesReturns(self, op, expected, args=(), kwargs=None): kwargs = kwargs or {} result = ragged_functional_ops.map_flat_values(op, *args, **kwargs) self.assertAllEqual(result, expected)
def assertRaggedMapInnerValuesReturns(self, op, expected, args=(), kwargs=None): kwargs = kwargs or {} result = ragged_functional_ops.map_flat_values(op, *args, **kwargs) self.assertRaggedEqual(result, expected)
def testRaggedMapFnPreservesUniformRowLength(self): # x and y are equal, except that x has uniform_row_length and y does not. x = ragged_tensor.RaggedTensor.from_uniform_row_length( ragged_factory_ops.constant([[1, 2], [3]]), uniform_row_length=2) y = ragged_factory_ops.constant([[[1, 2], [3]]]) a = ragged_functional_ops.map_flat_values(math_ops.add, x, y) self.assertAllEqual(x.uniform_row_length, a.uniform_row_length) b = ragged_functional_ops.map_flat_values(math_ops.add, y, x) self.assertAllEqual(x.uniform_row_length, b.uniform_row_length) c = ragged_functional_ops.map_flat_values(math_ops.add_n, [x, x]) self.assertAllEqual(x.uniform_row_length, c.uniform_row_length) d = ragged_functional_ops.map_flat_values(math_ops.add_n, [y, x, y]) self.assertAllEqual(x.uniform_row_length, d.uniform_row_length)
def string_join(inputs: typing.List[ragged_tensor.RaggedOrDense], separator="", name=None): """RaggedTensor implementation for tf.strings.join.""" if len(inputs) < 0: raise ValueError("tf.strings.join: expected at least one input.") with ops.name_scope(name, "RaggedStringJoin", inputs): return ragged_functional_ops.map_flat_values(string_ops.string_join, inputs, separator)
def testRaggedTensorSplitsMismatchErrorAtRuntime(self): splits1 = array_ops.placeholder_with_default( constant_op.constant([0, 3, 3, 5], dtypes.int64), None) splits2 = array_ops.placeholder_with_default( constant_op.constant([0, 1, 3, 5], dtypes.int64), None) x = ragged_tensor.RaggedTensor.from_row_splits([3, 1, 4, 1, 5], splits1) y = ragged_tensor.RaggedTensor.from_row_splits([1, 2, 3, 4, 5], splits2) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*Inputs must have identical ragged splits'): self.evaluate(ragged_functional_ops.map_flat_values(math_ops.add, x, y))
def call(self, inputs): # The table lookup ops don't natively support ragged tensors, so if we have # a RT we need to use map_flat_values to look up every element. if ragged_tensor.is_ragged(inputs): indexed_data = ragged_functional_ops.map_flat_values( self._table.lookup, inputs) else: indexed_data = self._table.lookup(inputs) return indexed_data
def testRaggedTensorSplitsMismatchErrorAtRuntime(self): splits1 = array_ops.placeholder_with_default( constant_op.constant([0, 3, 3, 5], dtypes.int64), None) splits2 = array_ops.placeholder_with_default( constant_op.constant([0, 1, 3, 5], dtypes.int64), None) x = ragged_tensor.RaggedTensor.from_row_splits([3, 1, 4, 1, 5], splits1) y = ragged_tensor.RaggedTensor.from_row_splits([1, 2, 3, 4, 5], splits2) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*Inputs must have identical ragged splits'): self.evaluate(ragged_functional_ops.map_flat_values(math_ops.add, x, y))
def _preprocess(self, inputs): if self._standardize == LOWER_AND_STRIP_PUNCTUATION: if ragged_tensor.is_ragged(inputs): lowercase_inputs = ragged_functional_ops.map_flat_values( gen_string_ops.string_lower, inputs) # Depending on configuration, we may never touch the non-data tensor # in the ragged inputs tensor. If that is the case, and this is the # only layer in the keras model, running it will throw an error. # To get around this, we wrap the result in an identity. lowercase_inputs = array_ops.identity(lowercase_inputs) else: lowercase_inputs = gen_string_ops.string_lower(inputs) inputs = string_ops.regex_replace(lowercase_inputs, DEFAULT_STRIP_REGEX, "") elif callable(self._standardize): inputs = self._standardize(inputs) elif self._standardize is not None: raise ValueError( ("%s is not a supported standardization. " "TextVectorization supports the following options " "for `standardize`: None, " "'lower_and_strip_punctuation', or a " "Callable.") % self._standardize) if self._split is not None: # If we are splitting, we validate that the 1st axis is of dimension 1 and # so can be squeezed out. We do this here instead of after splitting for # performance reasons - it's more expensive to squeeze a ragged tensor. if inputs.shape.ndims > 1: inputs = array_ops.squeeze(inputs, axis=-1) if self._split == SPLIT_ON_WHITESPACE: # This treats multiple whitespaces as one whitespace, and strips leading # and trailing whitespace. inputs = ragged_string_ops.string_split_v2(inputs) elif callable(self._split): inputs = self._split(inputs) else: raise ValueError( ("%s is not a supported splitting." "TextVectorization supports the following options " "for `split`: None, 'whitespace', or a Callable.") % self._split) # Note that 'inputs' here can be either ragged or dense depending on the # configuration choices for this Layer. The strings.ngrams op, however, does # support both ragged and dense inputs. if self._ngrams is not None: inputs = ragged_string_ops.ngrams(inputs, ngram_width=self._ngrams, separator=" ") return inputs
def call(self, inputs): if isinstance(inputs, tf.SparseTensor): id_values = self._round_and_truncate(inputs.values) result = tf.SparseTensor( indices=inputs.indices, values=id_values, dense_shape=inputs.dense_shape, ) elif ragged_tensor.is_ragged(inputs): result = ragged_functional_ops.map_flat_values( self._round_and_truncate, inputs) else: result = self._round_and_truncate(inputs) return tf.cast(result, tf.int64)
def testGradient(self): if context.executing_eagerly(): return # rt1.shape == rt2.shape == [2, (D2), (D3), 2]. rt1 = ragged_factory_ops.constant( [[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0]]]], ragged_rank=2) rt2 = ragged_factory_ops.constant( [[[[9.0, 8.0], [7.0, 6.0]], [[5.0, 4.0]]]], ragged_rank=2) rt = ragged_functional_ops.map_flat_values(math_ops.add, rt1, rt2 * 2.0) st = rt.to_sparse() g1, g2 = gradients_impl.gradients(st.values, [rt1.flat_values, rt2.flat_values]) self.assertRaggedEqual(g1, [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]) self.assertRaggedEqual(g2, [[2.0, 2.0], [2.0, 2.0], [2.0, 2.0]])
def call(self, inputs): self._called = True inputs = self._preprocess(inputs) # If we're not doing any output processing, return right away. if self._output_mode is None: return inputs # The table lookup ops don't natively support ragged tensors, so if we have # a RT we need to use map_flat_values to look up every element. if ragged_tensor.is_ragged(inputs): indexed_data = ragged_functional_ops.map_flat_values( self._table.lookup, inputs) else: indexed_data = self._table.lookup(inputs) if self._output_mode == INT: # Once we have the dense tensor, we can return it if we weren't given a # fixed output sequence length. If we were, though, we have to dynamically # choose whether to pad or trim it based on each tensor. # We need to convert to dense if we have a ragged tensor. if ragged_tensor.is_ragged(indexed_data): dense_data = indexed_data.to_tensor(default_value=0) else: dense_data = indexed_data if self._output_sequence_length is None: dense_data.set_shape(tensor_shape.TensorShape((None, None))) return dense_data else: sequence_len = K.shape(dense_data)[1] pad_amt = self._output_sequence_length - sequence_len pad_fn = lambda: array_ops.pad(dense_data, [[0, 0], [0, pad_amt]]) slice_fn = lambda: dense_data[:, :self._output_sequence_length] output_tensor = control_flow_ops.cond( sequence_len < self._output_sequence_length, true_fn=pad_fn, false_fn=slice_fn) output_tensor.set_shape( tensor_shape.TensorShape( (None, self._output_sequence_length))) return output_tensor # If we're not returning integers here, we rely on the vectorization layer # to create the output. return self._vectorize_layer(indexed_data)
def call(self, inputs): # TODO(tanzheny): Add int support. str_to_hash_bucket = self._get_string_to_hash_bucket_fn() if ragged_tensor.is_ragged(inputs): return ragged_functional_ops.map_flat_values( str_to_hash_bucket, inputs, num_buckets=self.num_bins, name='hash') elif isinstance(inputs, sparse_tensor.SparseTensor): sparse_values = inputs.values sparse_hashed_values = str_to_hash_bucket( sparse_values, self.num_bins, name='hash') return sparse_tensor.SparseTensor( indices=inputs.indices, values=sparse_hashed_values, dense_shape=inputs.dense_shape) else: return str_to_hash_bucket(inputs, self.num_bins, name='hash')
def testRaggedMapOnStructure(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _reduce_sum_from_all(f): return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin']) output = ragged_map_ops.map_fn( fn=_reduce_sum_from_all, elems=features, dtype=dtypes.int32, ) self.assertRaggedEqual(output, [66, 44, 198])
def call(self, inputs): if ragged_tensor.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( gen_math_ops.Bucketize, input=inputs, boundaries=self.bins) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): integer_buckets = gen_math_ops.Bucketize(input=inputs.values, boundaries=self.bins) return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=integer_buckets, dense_shape=array_ops.identity(inputs.dense_shape)) else: return gen_math_ops.Bucketize(input=inputs, boundaries=self.bins)
def testRaggedMapOnStructure(self): batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]]) # [[10, 20, 30], [40], [50, 60, 70]] robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10) features = {'batman': batman, 'robin': robin} def _reduce_sum_from_all(f): return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin']) output = ragged_map_ops.map_fn( fn=_reduce_sum_from_all, elems=features, dtype=dtypes.int32, ) self.assertAllEqual(output, [66, 44, 198])
def embedding_lookup_ragged(embedding_weights, ragged_ids, partition_strategy="mod", max_norm=None, name=None): """Look up the ragged ids in a list of embedding tensors. Args: embedding_weights: A tensor representing the complete embedding tensor having the shape [e1, ...eM] ragged_ids: A 'RaggedTensor' with type 'int32' or 'int64' containing the ids to be looked up in 'embedding_weights' of shape [r0, ..rN]. Values must be in the range '[0, embedding_weights.shape[0]]'. partition_strategy: A string specifying the partitioning strategy. max_norm: If not `None`, each embedding is clipped if its l2-norm is larger than this value. name: A name for the operation (optional) Returns: A ragged tensor of shape [r0, r1, ...rN, e1, ...eM]. Raises: ValueError: whether the embedding_weights is empty or the ragged_ids is not a RaggedTensor. """ if embedding_weights is None: raise ValueError("The embedding weights must be specified.") if isinstance(embedding_weights, (list, tuple)) and not embedding_weights: raise ValueError("The embedding weights should not be empty.") if ragged_ids.dtype != dtypes.int32 and ragged_ids.dtype != dtypes.int64: raise ValueError( "The values contained by the inputs have type " f"{str(ragged_ids.dtype)}" " and cannot be processed. All values" " should be indices, either of type `in32` or `int64`.") with ops.name_scope(name, "embedding_lookup_ragged") as name: looked_up_ragged = ragged_functional_ops.map_flat_values( embedding_lookup, params=embedding_weights, ids=ragged_ids, partition_strategy=partition_strategy, max_norm=max_norm) return looked_up_ragged
def ragged_cumsum(x: ragged_tensor.Ragged, axis: int = 0, exclusive: bool = False, reverse: bool = False, name: typing.Optional[str] = None): """Calculate math_ops.cumsum for a RaggedTensor. Given a ragged tensor `x`, the `result` is a ragged tensor with the same shape. One can calculate the value of `result[i_1...i_k]` as follows: ``` dense_result=tf.math.cumsum(rt.to_tensor(), axis=axis, exclusive=exclusive, reverse=reverse) result[i_1...i_k]=dense_result[i_1...i_k] ``` Args: x: the original ragged tensor to sum. axis: the axis along which to sum, can range -rank<=axis<rank. exclusive: is the sum exclusive or inclusive? If True, then result[0]=0. If False, then result[0]=x[0]. reverse: If True, sum from back to front. name: the name of the op. Returns: the cumulative sum. """ with ops.name_scope(name, 'RaggedCumSum', [x, axis, exclusive, reverse]): axis = array_ops.get_positive_axis(axis, x.shape.rank, ndims_name='rank') if axis == x.ragged_rank: last_rp = x._nested_row_partitions[-1] # pylint: disable=protected-access return x.with_flat_values( _cumsum_flat_values_at_ragged_rank(last_rp, x.flat_values, exclusive=exclusive, reverse=reverse)) elif axis > x.ragged_rank: new_axis = axis - x.ragged_rank cumsum_bound = functools.partial( math_ops.cumsum, axis=new_axis, exclusive=exclusive, reverse=reverse) return ragged_functional_ops.map_flat_values(cumsum_bound, x) else: dense_version = x.to_tensor() result = math_ops.cumsum( dense_version, axis, exclusive=exclusive, reverse=reverse, name=name) return ragged_tensor.RaggedTensor.from_tensor( result, lengths=x.nested_row_lengths())
def call(self, inputs): def _bucketize_op(bins): bins = [math_ops.cast(bins, dtypes.float32)] return lambda inputs: gen_boosted_trees_ops.BoostedTreesBucketize( # pylint: disable=g-long-lambda float_values=[math_ops.cast(inputs, dtypes.float32)], bucket_boundaries=bins)[0] if tf_utils.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( _bucketize_op(array_ops.squeeze(self.bins)), inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): integer_buckets = gen_boosted_trees_ops.BoostedTreesBucketize( float_values=[math_ops.cast(inputs.values, dtypes.float32)], bucket_boundaries=[ math_ops.cast(array_ops.squeeze(self.bins), dtypes.float32) ])[0] return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=integer_buckets, dense_shape=array_ops.identity(inputs.dense_shape)) else: input_shape = inputs.get_shape() if any(dim is None for dim in input_shape.as_list()[1:]): raise NotImplementedError( "Discretization Layer requires known non-batch shape," "found {}".format(input_shape)) reshaped = array_ops.reshape(inputs, [ -1, gen_math_ops.Prod(input=input_shape.as_list()[1:], axis=0) ]) return array_ops.reshape( control_flow_ops.vectorized_map( _bucketize_op(array_ops.squeeze(self.bins)), reshaped), array_ops.constant([-1] + input_shape.as_list()[1:]))
def _elementwise_where(condition, x, y): """Ragged version of tf.where(condition, x, y).""" condition_is_ragged = isinstance(condition, ragged_tensor.RaggedTensor) x_is_ragged = isinstance(x, ragged_tensor.RaggedTensor) y_is_ragged = isinstance(y, ragged_tensor.RaggedTensor) if not (condition_is_ragged or x_is_ragged or y_is_ragged): return array_ops.where(condition, x, y) elif condition_is_ragged and x_is_ragged and y_is_ragged: return ragged_functional_ops.map_flat_values(array_ops.where, condition, x, y) elif not condition_is_ragged: # Concatenate x and y, and then use `gather` to assemble the selected rows. condition.shape.assert_has_rank(1) x_nrows = _nrows(x) x_and_y = ragged_concat_ops.concat([x, y], axis=0) indices = array_ops.where(condition, math_ops.range(x_nrows), x_nrows + math_ops.range(_nrows(y))) return ragged_gather_ops.gather(x_and_y, indices) else: raise ValueError('Input shapes do not match.')
def boolean_mask(data, mask, keepdims=False, name=None): """Applies a boolean mask to `data`. Returns a potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. If `keepdims` is true then outer dimensions (corresponding to the `mask` dimensions) are preserved, and: * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]` Where `j` is the `i`th `True` entry of `mask[a1...aA]`. If `keepdims` is false, then the outer dimensions are collapsed (similar to the behavior of `tf.boolean_mask`), and: * `output[i, b1...bB] = data[a1...aA, b1...bB]` Where `(a1...aA)` is the `i`th `True` entry of `mask` (in row-major order). Args: data: A potentially ragged tensor. mask: A potentially ragged boolean tensor. `mask`'s shape must be a prefix of `data`'s shape. `rank(mask)` must be known statically. keepdims: Whether to preserve the outer dimensions (`keepdims=True`) or flatten them (`keepdims=False`). name: A name prefix for the returned tensor (optional). Returns: A potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. If `keepdims` is false: * `rank(output) = rank(data) - rank(mask) + 1`. * `output.ragged_rank = max(data.ragged_rank - rank(mask) + 1, 0)`. If `keepdims` is true: * `rank(output) = rank(data)`. * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`. Raises: ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is not a prefix of `data.shape`. #### Examples: ```python >>> # Aliases for True & False so data and mask line up. >>> T, F = (True, False) >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. Flatten outer dims. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]], ... keepdims=False).tolist() [1, 3, 7] >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. Preserve outer dims. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]], ... keepdims=True).tolist() [[1, 3], [], [7]] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. Flatten outer dims. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]]), ... keepdims=False).tolist() [3, 5, 6] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. Preserve outer dims. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]]), ... keepdims=True).tolist() [[3], [], [5, 6]] >>> tf.ragged.boolean_mask( # Mask rows of a 2D RaggedTensor. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([True, False, True]), ... keepdims=True).tolist() [[1, 2, 3], [5, 6]] ``` """ with ops.name_scope(name, 'RaggedMask', [data, mask]): # Convert inputs to tensors. data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') mask = ragged_tensor.convert_to_tensor_or_ragged_tensor( mask, dtypes.bool, name='mask') row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes( data, mask, return_dtype=True) # Get static rank of mask. if mask.shape.ndims is None: raise ValueError('mask.shape.ndims must be known statically.') elif mask.shape.ndims == 0: raise ValueError('mask cannot be scalar.') # If mask is ragged, then recurse with a non-ragged mask. if ragged_tensor.is_ragged(mask): if not ragged_tensor.is_ragged(data): data = ragged_tensor.RaggedTensor.from_tensor( data, ragged_rank=mask.ragged_rank, row_splits_dtype=mask.row_splits.dtype) # Check that mask.nested_row_splits is a prefix of # data.nested_row_splits. splits_list = [ mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank] ] with ops.control_dependencies( ragged_util.assert_splits_match(splits_list)): # Strip off ragged `splits` until `mask` is non-ragged. Keep the splits # that we strip off in `splits`, so we can add them back on after # we recursively mask the non-ragged data. splits = [] while ragged_tensor.is_ragged(mask): if mask.shape.ndims > 2: splits.append(mask.row_splits) else: # Count the number of True mask values in each row to find the # lengths of the filtered rows; then convert to splits. int_mask = ragged_functional_ops.map_flat_values( math_ops.cast, mask, dtype=row_splits_dtype) masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1) splits.append(ragged_util.lengths_to_splits(masked_row_lengths)) mask = mask.values data = data.values # Recursively apply the nested non-ragged mask to the nested data. masked_values = boolean_mask(data, mask, keepdims) # Add the ragged `splits` back to the result. if keepdims: masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits( masked_values, splits, validate=False) return masked_values # If mask is non-ragged and has rank 1, and data is ragged, then build a # ragged tensor with the indicated rows. elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1: # Get the masked splits: first get the length of each row, then filter # out the rows that we are deleting, and convert that filtered set of # masks back to a splits tensor. lengths = data.row_lengths() masked_lengths = array_ops.boolean_mask(lengths, mask) masked_splits = ragged_util.lengths_to_splits(masked_lengths) # Get the masked values: first get row ids corresponding to each # value, then use tf.gather to build a boolean mask that's false for # values that come from rows that we are deleting, and use that mask to # construct the masked values tensor. segment_ids = segment_id_ops.row_splits_to_segment_ids(data.row_splits) segment_mask = array_ops.gather(mask, segment_ids) masked_values = boolean_mask(data.values, segment_mask, keepdims=False) return ragged_tensor.RaggedTensor.from_row_splits(masked_values, masked_splits, validate=False) # If mask is non-ragged and has rank>1, then convert it to be ragged, # with a ragged rank matching data. if ragged_tensor.is_ragged(data): mask = ragged_tensor.RaggedTensor.from_tensor( mask, ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1), row_splits_dtype=data.row_splits.dtype) return boolean_mask(data, mask, keepdims) # Otherwise, data and mask are both `Tensor`s. else: # Apply `boolean_mask` to get the masked values. masked_values = array_ops.boolean_mask(data, mask) if mask.shape.ndims >= 2 and keepdims: # Add the innermost ragged dimension. For each innermost cell, get the # number of values it contains. Then flatten that to get a list of # cell lengths, and convert it to splits. Finally, combine the splits # and values to get the innermost ragged tensor. masked_lengths = math_ops.count_nonzero(mask, axis=-1, dtype=row_splits_dtype) flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1]) masked_values = ragged_tensor.RaggedTensor.from_row_lengths( masked_values, flattened_masked_lengths, validate=False) # Wrap remaining ragged dimensions. if mask.shape.ndims > 2 and keepdims: mask_shape = array_ops.shape(mask, out_type=row_splits_dtype) split_size = math_ops.cumprod(mask_shape) + 1 for dim in range(mask.shape.ndims - 3, -1, -1): elt_size = mask_shape[dim + 1] masked_splits = math_ops.range(split_size[dim]) * elt_size masked_values = ragged_tensor.RaggedTensor.from_row_splits( masked_values, masked_splits, validate=False) return masked_values
def _cast(input_tensor, dtype): return ragged_functional_ops.map_flat_values(math_ops.cast, input_tensor, dtype)