def test_dense_input_wrong_shape_fails(self): x = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32) weights = np.array([[3, 2], [5, 4], [4, 3]]) # Note: Eager mode and graph mode throw different errors here. Graph mode # will fail with a ValueError from the shape checking logic, while Eager # will fail with an InvalidArgumentError from the kernel itself. if context.executing_eagerly(): with self.assertRaisesRegexp(errors.InvalidArgumentError, "must have the same shape"): self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1)) else: with self.assertRaisesRegexp(ValueError, "both shapes must be equal"): self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = ops.convert_to_tensor_v2(inputs) if inputs.shape.rank == 1: inputs = array_ops.expand_dims(inputs, 1) if count_weights is not None and self._output_mode != COUNT: raise ValueError( "count_weights is not used in `output_mode='tf-idf'`, " "or `output_mode='binary'`. Please pass a single input.") self._called = True if self._max_tokens is None: out_depth = K.get_value(self.num_elements) if out_depth == 0: raise RuntimeError( "If you construct a `CategoryEncoding` layer with " "`max_tokens=None`, you need to call `adapt()` " "on it before using it") else: out_depth = self._max_tokens if self._output_mode == TFIDF: # If the input is a sparse tensor, we densify it with the default value of # -1. Because -1 is ignored by one_hot, this effectively drops the non-set # positions from the output encoding. if self._sparse: raise ValueError("`sparse=True` with `output_mode=tfidf` " "is not supported.") if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_ops.sparse_tensor_to_dense(inputs, default_value=-1) one_hot_data = array_ops.one_hot(inputs, depth=out_depth) counts = math_ops.reduce_sum(one_hot_data, axis=1) tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights) tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth))) return tf_idf_data binary_output = (self._output_mode == BINARY) if self._sparse: result = bincount_ops.sparse_bincount(inputs, weights=count_weights, minlength=out_depth, axis=-1, binary_output=binary_output) result = math_ops.cast(result, K.floatx()) batch_size = array_ops.shape(result)[0] result = sparse_tensor.SparseTensor( indices=result.indices, values=result.values, dense_shape=[batch_size, out_depth]) return result else: result = bincount_ops.bincount(inputs, weights=count_weights, minlength=out_depth, dtype=K.floatx(), axis=-1, binary_output=binary_output) result.set_shape(tensor_shape.TensorShape((None, out_depth))) return result
def test_dense_input_ragged_weights_fails(self): x = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32) weights = ragged_factory_ops.constant([[6, 0.5, 2], [14], [10, 0.25, 5, 3]]) with self.assertRaisesRegex(ValueError, "must be a tf.Tensor"): self.evaluate( bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
def call(self, inputs): self._called = True if self._max_tokens is None: out_depth = K.get_value(self.num_elements) else: out_depth = self._max_tokens if self._output_mode == TFIDF: # If the input is a sparse tensor, we densify it with the default value of # -1. Because -1 is ignored by one_hot, this effectively drops the non-set # positions from the output encoding. if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_ops.sparse_tensor_to_dense(inputs, default_value=-1) one_hot_data = array_ops.one_hot(inputs, depth=out_depth) counts = math_ops.reduce_sum(one_hot_data, axis=1) tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights) tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth))) return tf_idf_data binary_output = (self._output_mode == BINARY) if self._sparse: return bincount_ops.sparse_bincount( inputs, minlength=out_depth, axis=-1, binary_output=binary_output) else: result = bincount_ops.bincount( inputs, minlength=out_depth, dtype=dtypes.int64, axis=-1, binary_output=binary_output) result.set_shape(tensor_shape.TensorShape((None, out_depth))) return result
def test_sparse_input_too_many_indices_fails(self): x = sparse_ops.from_dense( np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) weights = sparse_ops.from_dense( np.array([[3, 1, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) with self.assertRaisesIncompatibleShapesError(): self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
def test_sparse_input_wrong_indices_fails(self): x = sparse_ops.from_dense( np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) weights = sparse_ops.from_dense( np.array([[3, 1, 0, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) with self.assertRaisesRegexp(errors.InvalidArgumentError, "must have the same indices"): self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
def test_ragged_input_different_shape_fails(self): x = ragged_factory_ops.constant([[6, 1, 2], [14], [10, 1, 5, 3]]) weights = ragged_factory_ops.constant([[6, 0.5, 2], [], [10, 0.25, 5, 3]]) with self.assertRaisesRegex(errors.InvalidArgumentError, "must have the same row splits"): self.evaluate( bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
def test_ragged_input_sparse_weights_fails(self): x = ragged_factory_ops.constant([[6, 1, 2], [14], [10, 1, 5, 3]]) weights = sparse_ops.from_dense( np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) with self.assertRaisesRegex(ValueError, "must be a RaggedTensor"): self.evaluate( bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
def test_sparse_input_dense_weights_fails(self): x = sparse_ops.from_dense( np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) weights = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32) with self.assertRaisesRegex(ValueError, "must be a SparseTensor"): self.evaluate( bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
def sparse_bincount(inputs, out_depth, multi_hot_output, count_weights=None): """Apply binary or count encoding to an input and return a sparse tensor.""" result = bincount_ops.sparse_bincount(inputs, weights=count_weights, minlength=out_depth, maxlength=out_depth, axis=-1, binary_output=multi_hot_output) result = math_ops.cast(result, backend.floatx()) batch_size = array_ops.shape(result)[0] result = sparse_tensor.SparseTensor(indices=result.indices, values=result.values, dense_shape=[batch_size, out_depth]) return result
def test_dense_input(self, x, expected_indices, expected_values, expected_shape, minlength=None, maxlength=None, binary_output=False, weights=None, axis=-1): y = bincount_ops.sparse_bincount(x, weights=weights, minlength=minlength, maxlength=maxlength, binary_output=binary_output, axis=axis) self.assertAllEqual(expected_indices, y.indices) self.assertAllEqual(expected_values, y.values) self.assertAllEqual(expected_shape, y.dense_shape)
def call(self, inputs, count_weights=None): if count_weights is not None and self._output_mode != COUNT: raise ValueError( "count_weights is not used in `output_mode='tf-idf'`, " "or `output_mode='binary'`. Please pass a single input.") self._called = True if self._max_tokens is None: out_depth = K.get_value(self.num_elements) else: out_depth = self._max_tokens if self._output_mode == TFIDF: # If the input is a sparse tensor, we densify it with the default value of # -1. Because -1 is ignored by one_hot, this effectively drops the non-set # positions from the output encoding. if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_ops.sparse_tensor_to_dense(inputs, default_value=-1) one_hot_data = array_ops.one_hot(inputs, depth=out_depth) counts = math_ops.reduce_sum(one_hot_data, axis=1) tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights) tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth))) return tf_idf_data binary_output = (self._output_mode == BINARY) if self._sparse: result = bincount_ops.sparse_bincount(inputs, weights=count_weights, minlength=out_depth, axis=-1, binary_output=binary_output) return math_ops.cast(result, K.floatx()) else: result = bincount_ops.bincount(inputs, weights=count_weights, minlength=out_depth, dtype=K.floatx(), axis=-1, binary_output=binary_output) result.set_shape(tensor_shape.TensorShape((None, out_depth))) return result
def test_ragged_input(self, x, expected_indices, expected_values, expected_shape, maxlength=None, minlength=None, binary_output=False, weights=None, axis=-1): x_ragged = ragged_factory_ops.constant(x) w = ragged_factory_ops.constant(weights) if weights is not None else None y = bincount_ops.sparse_bincount( x_ragged, weights=w, minlength=minlength, maxlength=maxlength, binary_output=binary_output, axis=axis) self.assertAllEqual(expected_indices, y.indices) self.assertAllEqual(expected_values, y.values) self.assertAllEqual(expected_shape, y.dense_shape)
def test_sparse_input(self, x, expected_indices, expected_values, expected_shape, maxlength=None, minlength=None, binary_output=False, weights=None, axis=-1): x_sparse = sparse_ops.from_dense(x) w_sparse = sparse_ops.from_dense(weights) if weights is not None else None y = bincount_ops.sparse_bincount( x_sparse, weights=w_sparse, minlength=minlength, maxlength=maxlength, binary_output=binary_output, axis=axis) self.assertAllEqual(expected_indices, y.indices) self.assertAllEqual(expected_values, y.values) self.assertAllEqual(expected_shape, y.dense_shape)