def convert_to_list(values, sparse_default_value=None): """Convert a TensorLike, CompositeTensor, or ndarray into a Python list.""" if tf_utils.is_ragged(values): # There is a corner case when dealing with ragged tensors: if you get an # actual RaggedTensor (not a RaggedTensorValue) passed in non-eager mode, # you can't call to_list() on it without evaluating it first. However, # because we don't yet fully support composite tensors across Keras, # K.get_value() won't evaluate the tensor. # TODO(momernick): Get Keras to recognize composite tensors as Tensors # and then replace this with a call to K.get_value. if (isinstance(values, ragged_tensor.RaggedTensor) and not context.executing_eagerly()): values = K.get_session(values).run(values) values = values.to_list() if isinstance(values, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): if sparse_default_value is None: if dtypes.as_dtype(values.values.dtype) == dtypes.string: sparse_default_value = '' else: sparse_default_value = -1 dense_tensor = sparse_ops.sparse_tensor_to_dense( values, default_value=sparse_default_value) values = K.get_value(dense_tensor) if isinstance(values, ops.Tensor): values = K.get_value(values) # We may get passed a ndarray or the code above may give us a ndarray. # In either case, we want to force it into a standard python list. if isinstance(values, np.ndarray): values = values.tolist() return values
def call(self, inputs): bins = [math_ops.cast(array_ops.squeeze(self.bins), dtypes.float32)] def _bucketize_fn(inputs): return gen_boosted_trees_ops.BoostedTreesBucketize( float_values=[math_ops.cast(inputs, dtypes.float32)], bucket_boundaries=bins)[0] if tf_utils.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( _bucketize_fn, inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=_bucketize_fn(inputs.values), dense_shape=array_ops.identity(inputs.dense_shape)) else: static_shape = inputs.get_shape() if any(dim is None for dim in static_shape.as_list()[1:]): raise NotImplementedError( "Discretization Layer requires known non-batch shape," "found {}".format(static_shape)) dynamic_shape = array_ops.shape_v2(inputs) # BoostedTreesBucketize only handles rank 1 inputs. We need to flatten our # inputs after batch size and vectorized_map over each sample. reshaped = array_ops.reshape(inputs, [dynamic_shape[0], -1]) return array_ops.reshape( control_flow_ops.vectorized_map(_bucketize_fn, reshaped), dynamic_shape)
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) inputs = self._preprocess(inputs) # If we're not doing any output processing, return right away. if self._output_mode is None: return inputs lookup_data = self._index_lookup_layer(inputs) if self._output_mode == INT: # Maybe trim the output (NOOP if self._output_sequence_length is None). output_tensor = lookup_data[..., :self._output_sequence_length] output_shape = output_tensor.shape.as_list() output_shape[-1] = self._output_sequence_length # If it is a ragged tensor, convert it to dense with correct shape. if tf_utils.is_ragged(output_tensor): return output_tensor.to_tensor(default_value=0, shape=output_shape) if self._output_sequence_length is None: return output_tensor padding, _ = array_ops.required_space_to_batch_paddings( output_tensor.shape, output_shape) return array_ops.pad(output_tensor, padding) return lookup_data
def _process_single_input(self, inputs): # Converts integer inputs to string. if inputs.dtype.is_integer: if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_tensor.SparseTensor( indices=inputs.indices, values=string_ops.as_string(inputs.values), dense_shape=inputs.dense_shape) else: inputs = string_ops.as_string(inputs) str_to_hash_bucket = self._get_string_to_hash_bucket_fn() if tf_utils.is_ragged(inputs): return ragged_functional_ops.map_flat_values( str_to_hash_bucket, inputs, num_buckets=self.num_bins, name='hash') elif isinstance(inputs, sparse_tensor.SparseTensor): sparse_values = inputs.values sparse_hashed_values = str_to_hash_bucket(sparse_values, self.num_bins, name='hash') return sparse_tensor.SparseTensor(indices=inputs.indices, values=sparse_hashed_values, dense_shape=inputs.dense_shape) else: return str_to_hash_bucket(inputs, self.num_bins, name='hash')
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) if not self.max_tokens and self._vocab_size is None: raise ValueError("You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32: inputs = math_ops.cast(inputs, dtypes.int64) lookup_result = self._table_handler.lookup(inputs) lookup_checks = [] if self.num_oov_indices == 0 and not self.invert: if tf_utils.is_sparse(inputs): lookup_values = lookup_result.values input_values = inputs.values elif tf_utils.is_ragged(inputs): lookup_values = lookup_result.flat_values input_values = inputs.flat_values else: lookup_values = lookup_result input_values = inputs oov_indices = array_ops.where_v2(math_ops.equal(lookup_values, -1)) oov_inputs = array_ops.gather_nd(input_values, oov_indices) msg = string_ops.string_format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs,)) assertion = control_flow_ops.Assert( math_ops.equal(array_ops.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) with ops.control_dependencies(lookup_checks): if self.output_mode == INT: return array_ops.identity(lookup_result) multi_hot_output = (self.output_mode == MULTI_HOT) if self._vocab_size and not self.pad_to_max_tokens: out_depth = self._vocab_size else: out_depth = self.max_tokens if self.sparse: bincounts = category_encoding.sparse_bincount(lookup_result, out_depth, multi_hot_output) else: bincounts = category_encoding.dense_bincount(lookup_result, out_depth, multi_hot_output) if self.output_mode == TF_IDF: return math_ops.multiply(bincounts, self.tf_idf_weights) return bincounts
def _preprocess(self, inputs): if self._standardize == LOWER_AND_STRIP_PUNCTUATION: if tf_utils.is_ragged(inputs): lowercase_inputs = ragged_functional_ops.map_flat_values( gen_string_ops.string_lower, inputs) # Depending on configuration, we may never touch the non-data tensor # in the ragged inputs tensor. If that is the case, and this is the # only layer in the keras model, running it will throw an error. # To get around this, we wrap the result in an identity. lowercase_inputs = array_ops.identity(lowercase_inputs) else: lowercase_inputs = gen_string_ops.string_lower(inputs) inputs = string_ops.regex_replace(lowercase_inputs, DEFAULT_STRIP_REGEX, "") elif callable(self._standardize): inputs = self._standardize(inputs) elif self._standardize is not None: raise ValueError( ("%s is not a supported standardization. " "TextVectorization supports the following options " "for `standardize`: None, " "'lower_and_strip_punctuation', or a " "Callable.") % self._standardize) if self._split is not None: # If we are splitting, we validate that the 1st axis is of dimension 1 and # so can be squeezed out. We do this here instead of after splitting for # performance reasons - it's more expensive to squeeze a ragged tensor. if inputs.shape.ndims > 1: inputs = array_ops.squeeze(inputs, axis=-1) if self._split == SPLIT_ON_WHITESPACE: # This treats multiple whitespaces as one whitespace, and strips leading # and trailing whitespace. inputs = ragged_string_ops.string_split_v2(inputs) elif callable(self._split): inputs = self._split(inputs) else: raise ValueError( ("%s is not a supported splitting." "TextVectorization supports the following options " "for `split`: None, 'whitespace', or a Callable.") % self._split) # Note that 'inputs' here can be either ragged or dense depending on the # configuration choices for this Layer. The strings.ngrams op, however, does # support both ragged and dense inputs. if self._ngrams is not None: inputs = ragged_string_ops.ngrams(inputs, ngram_width=self._ngrams, separator=" ") return inputs
def lookup(self, inputs): """Perform a table lookup.""" # Sparse tensors don't play nicely with tensor conversion, so we handle # them before attempting to convert lists or arrays to tensors. if isinstance( inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): return self._sparse_lookup(inputs) # Try to convert lists/arrays to tensors or RaggedTensors. inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs) # Run the lookup operation on the converted tensor. if tf_utils.is_ragged(inputs): return self._ragged_lookup(inputs) else: return self._tensor_lookup(inputs)
def compute(self, values, accumulator=None): """Compute a step in this computation, returning a new accumulator.""" if isinstance(values, sparse_tensor.SparseTensor): values = values.values if tf_utils.is_ragged(values): values = values.flat_values flattened_input = np.reshape(values, newshape=(-1, 1)) summaries = [summarize(v, self.epsilon) for v in flattened_input.T] if accumulator is None: return self._create_accumulator(summaries) else: return self._create_accumulator( [merge_summaries(prev_summ, summ, self.epsilon) for prev_summ, summ in zip(accumulator.summaries, summaries)])
def call(self, inputs): if tf_utils.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( gen_math_ops.Bucketize, input=inputs, boundaries=self.bins) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): integer_buckets = gen_math_ops.Bucketize( input=inputs.values, boundaries=self.bins) return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=integer_buckets, dense_shape=array_ops.identity(inputs.dense_shape)) else: return gen_math_ops.Bucketize(input=inputs, boundaries=self.bins)
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) self._called = True inputs = self._preprocess(inputs) # If we're not doing any output processing, return right away. if self._output_mode is None: return inputs indexed_data = self._index_lookup_layer(inputs) if self._output_mode == INT: # Once we have the dense tensor, we can return it if we weren't given a # fixed output sequence length. If we were, though, we have to dynamically # choose whether to pad or trim it based on each tensor. # We need to convert to dense if we have a ragged tensor. if tf_utils.is_ragged(indexed_data): dense_data = indexed_data.to_tensor(default_value=0) else: dense_data = indexed_data if self._output_sequence_length is None: return dense_data else: sequence_len = K.shape(dense_data)[1] pad_amt = self._output_sequence_length - sequence_len pad_fn = lambda: array_ops.pad(dense_data, [[0, 0], [0, pad_amt]]) slice_fn = lambda: dense_data[:, :self._output_sequence_length] output_tensor = control_flow_ops.cond( sequence_len < self._output_sequence_length, true_fn=pad_fn, false_fn=slice_fn) output_shape = output_tensor.shape.as_list() output_shape[-1] = self._output_sequence_length output_tensor.set_shape(tensor_shape.TensorShape(output_shape)) return output_tensor # If we're not returning integers here, we rely on the vectorization layer # to create the output. return self._vectorize_layer(indexed_data)
def lookup(self, inputs): """Perform a table lookup.""" # Sparse tensors don't play nicely with tensor conversion, so we handle # them before attempting to convert lists or arrays to tensors. if isinstance( inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): return self._sparse_lookup(inputs) if tf_utils.is_ragged(inputs): if isinstance(inputs, ragged_tensor_value.RaggedTensorValue): flat_values = ops.convert_to_tensor_v2_with_dispatch( value=inputs.flat_values, name="flat_values") inputs = ragged_tensor.RaggedTensor.from_nested_row_splits( flat_values, inputs.nested_row_splits, validate=False) return self._ragged_lookup(inputs) # For normal tensor inputs inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) return self._tensor_lookup(inputs)
def call(self, inputs): def _bucketize_op(bins): bins = [math_ops.cast(bins, dtypes.float32)] return lambda inputs: gen_boosted_trees_ops.BoostedTreesBucketize( # pylint: disable=g-long-lambda float_values=[math_ops.cast(inputs, dtypes.float32)], bucket_boundaries=bins)[0] if tf_utils.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( _bucketize_op(array_ops.squeeze(self.bins)), inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): integer_buckets = gen_boosted_trees_ops.BoostedTreesBucketize( float_values=[math_ops.cast(inputs.values, dtypes.float32)], bucket_boundaries=[ math_ops.cast(array_ops.squeeze(self.bins), dtypes.float32) ])[0] return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=integer_buckets, dense_shape=array_ops.identity(inputs.dense_shape)) else: input_shape = inputs.get_shape() if any(dim is None for dim in input_shape.as_list()[1:]): raise NotImplementedError( "Discretization Layer requires known non-batch shape," "found {}".format(input_shape)) reshaped = array_ops.reshape(inputs, [ -1, gen_math_ops.Prod(input=input_shape.as_list()[1:], axis=0) ]) return array_ops.reshape( control_flow_ops.vectorized_map( _bucketize_op(array_ops.squeeze(self.bins)), reshaped), array_ops.constant([-1] + input_shape.as_list()[1:]))
def call(self, inputs): inputs = [self._preprocess_input(inp) for inp in inputs] depth_tuple = self._depth_tuple if self.depth else (len(inputs),) ragged_out = sparse_out = False if any(tf_utils.is_ragged(inp) for inp in inputs): ragged_out = True elif any(isinstance(inp, sparse_tensor.SparseTensor) for inp in inputs): sparse_out = True outputs = [] for depth in depth_tuple: if len(inputs) < depth: raise ValueError( 'Number of inputs cannot be less than depth, got {} input tensors, ' 'and depth {}'.format(len(inputs), depth)) for partial_inps in itertools.combinations(inputs, depth): partial_out = self.partial_crossing( partial_inps, ragged_out, sparse_out) outputs.append(partial_out) if sparse_out: return sparse_ops.sparse_concat_v2(axis=1, sp_inputs=outputs) return array_ops.concat(outputs, axis=1)
def __call__(self, y_true, y_pred, sample_weight=None, regularization_losses=None): """Computes the overall loss. Args: y_true: An arbitrary structure of Tensors representing the ground truth. y_pred: An arbitrary structure of Tensors representing a Model's outputs. sample_weight: An arbitrary structure of Tensors representing the per-sample loss weights. If one Tensor is passed, it is used for all losses. If multiple Tensors are passed, the structure should match `y_pred`. regularization_losses: Additional losses to be added to the total loss. Returns: Tuple of `(total_loss, per_output_loss_list)` """ y_true = self._conform_to_outputs(y_pred, y_true) sample_weight = self._conform_to_outputs(y_pred, sample_weight) if not self._built: self.build(y_pred) y_pred = nest.flatten(y_pred) y_true = nest.flatten(y_true) sample_weight = nest.flatten(sample_weight) loss_values = [] # Used for gradient calculation. loss_metric_values = [] # Used for loss metric calculation. batch_dim = None zip_args = (y_true, y_pred, sample_weight, self._losses, self._loss_weights, self._per_output_metrics) for y_t, y_p, sw, loss_obj, loss_weight, metric_obj in zip(*zip_args): if y_t is None or loss_obj is None: # Ok to have no loss for an output. continue y_t, y_p, sw = match_dtype_and_rank(y_t, y_p, sw) sw = apply_mask(y_p, sw, get_mask(y_p)) loss_value = loss_obj(y_t, y_p, sample_weight=sw) loss_metric_value = loss_value # Correct for the `Mean` loss metrics counting each replica as a batch. if loss_obj.reduction == losses_utils.ReductionV2.SUM: loss_metric_value *= ds_context.get_strategy( ).num_replicas_in_sync if batch_dim is None: if tf_utils.is_ragged(y_t): batch_dim = y_t.nrows() else: batch_dim = array_ops.shape(y_t)[0] if metric_obj is not None: metric_obj.update_state(loss_metric_value, sample_weight=batch_dim) if loss_weight is not None: loss_value *= loss_weight loss_metric_value *= loss_weight if (loss_obj.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE or loss_obj.reduction == losses_utils.ReductionV2.AUTO): loss_value = losses_utils.scale_loss_for_distribution( loss_value) loss_values.append(loss_value) loss_metric_values.append(loss_metric_value) if regularization_losses: regularization_losses = losses_utils.cast_losses_to_common_dtype( regularization_losses) reg_loss = math_ops.add_n(regularization_losses) loss_metric_values.append(reg_loss) loss_values.append( losses_utils.scale_loss_for_distribution(reg_loss)) if loss_values: loss_metric_values = losses_utils.cast_losses_to_common_dtype( loss_metric_values) total_loss_metric_value = math_ops.add_n(loss_metric_values) self._loss_metric.update_state(total_loss_metric_value, sample_weight=batch_dim) loss_values = losses_utils.cast_losses_to_common_dtype(loss_values) total_loss = math_ops.add_n(loss_values) return total_loss else: # Ok for a model to have no compiled loss. return array_ops.zeros(shape=())
def _create_keras_history_helper(tensors, processed_ops, created_layers): """Helper method for `create_keras_history`. Arguments: tensors: A structure of Tensors for which to create Keras metadata. processed_ops: Set. TensorFlow operations that have already been wrapped in `TensorFlowOpLayer` instances. created_layers: List. The `TensorFlowOpLayer` instances created. Returns: Tuple. First element is the updated set of TensorFlow Operations that have been wrapped in `TensorFlowOpLayer` instances. Second element is a list of the `TensorFlowOpLayer` instances created. """ # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. # Cannot be imported at top because of circular dependencies. # TODO(omalleyt): Resolve circular dependency. from tensorflow.python.keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = nest.flatten(tensors) sparse_ops = [] ragged_tensors = [] for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue if isinstance( tensor, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): sparse_ops.append(tensor.op) continue if tf_utils.is_ragged(tensor): # Ragged tensors don't have an op property ragged_tensors.append(tensor) continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: # Recursively set `_keras_history`. op_inputs = list(op.inputs) constants = {} layer_inputs = [] for i, op_input in enumerate(op_inputs): if uses_keras_history(op_input): layer_inputs.append(op_input) else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. ds_with_session = ( distribution_strategy_context.in_cross_replica_context( ) and not ops.executing_eagerly_outside_functions()) using_xla = control_flow_util.GraphOrParentsInXlaContext( ops.get_default_graph()) if ds_with_session or using_xla: # In Legacy Graph mode, evaluating here makes Session be # configured improperly. The downside of this is that saving # via `get_config` breaks, but SavedModel still works. constants[i] = op_input else: with ops.init_scope(): if ops.executing_eagerly_outside_functions(): constants[ i] = backend.eval_in_eager_or_function( op_input) else: constants[i] = backend.function([], op_input)([]) layer_inputs = unnest_if_single_tensor(layer_inputs) processed_ops, created_layers = _create_keras_history_helper( layer_inputs, processed_ops, created_layers) name = op.name node_def = op.node_def.SerializeToString() op_layer = base_layer.TensorFlowOpLayer(node_def, constants=constants, name=name) created_layers.append(op_layer) op_layer._set_connectivity_metadata( # pylint: disable=protected-access args=(layer_inputs, ), kwargs={}, outputs=op.outputs) processed_ops.update([op]) if sparse_ops or ragged_tensors: lambda_example = """ weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) output = tf.keras.layers.Lambda(weights_mult)(input) """ raise ValueError( 'Tensorflow ops that generate ragged or sparse tensor ' 'outputs are currently not supported by Keras automatic ' 'op wrapping. Please wrap these ops in a Lambda layer: ' '\n\n```\n{example}\n```\n' 'Sparse ops encountered: {sparse_ops}\n' 'Ragged tensors encountered: {ragged_tensors}\n'.format( example=lambda_example, sparse_ops=str(sparse_ops), ragged_tensors=str(ragged_tensors))) return processed_ops, created_layers
def test_is_ragged_return_true_for_ragged_tensor(self): tensor = ragged_tensor.RaggedTensor.from_row_splits( values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) self.assertTrue(tf_utils.is_ragged(tensor))
def test_is_ragged_return_false_for_list(self): tensor = [1., 2., 3.] self.assertFalse(tf_utils.is_ragged(tensor))