def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) if not self.max_tokens and self._vocab_size is None: raise ValueError("You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32: inputs = math_ops.cast(inputs, dtypes.int64) lookup_result = self._table_handler.lookup(inputs) lookup_checks = [] if self.num_oov_indices == 0 and not self.invert: if tf_utils.is_sparse(inputs): lookup_values = lookup_result.values input_values = inputs.values elif tf_utils.is_ragged(inputs): lookup_values = lookup_result.flat_values input_values = inputs.flat_values else: lookup_values = lookup_result input_values = inputs oov_indices = array_ops.where_v2(math_ops.equal(lookup_values, -1)) oov_inputs = array_ops.gather_nd(input_values, oov_indices) msg = string_ops.string_format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs,)) assertion = control_flow_ops.Assert( math_ops.equal(array_ops.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) with ops.control_dependencies(lookup_checks): if self.output_mode == INT: return array_ops.identity(lookup_result) multi_hot_output = (self.output_mode == MULTI_HOT) if self._vocab_size and not self.pad_to_max_tokens: out_depth = self._vocab_size else: out_depth = self.max_tokens if self.sparse: bincounts = category_encoding.sparse_bincount(lookup_result, out_depth, multi_hot_output) else: bincounts = category_encoding.dense_bincount(lookup_result, out_depth, multi_hot_output) if self.output_mode == TF_IDF: return math_ops.multiply(bincounts, self.tf_idf_weights) return bincounts
def call(self, inputs): def bucketize(inputs): return gen_math_ops.Bucketize( input=inputs, boundaries=self.bin_boundaries) if tf_utils.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values(bucketize, inputs) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif tf_utils.is_sparse(inputs): return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=bucketize(inputs.values), dense_shape=array_ops.identity(inputs.dense_shape)) else: return bucketize(inputs)
def test_is_sparse_return_false_for_list(self): tensor = [1., 2., 3.] self.assertFalse(tf_utils.is_sparse(tensor))
def test_is_sparse_return_true_for_sparse_tensor_value(self): tensor = sparse_tensor.SparseTensorValue(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]) self.assertTrue(tf_utils.is_sparse(tensor))
def expand_dims(inputs, axis): if tf_utils.is_sparse(inputs): return sparse_ops.sparse_expand_dims(inputs, axis) else: return array_ops.expand_dims(inputs, axis)