예제 #1
0
 def test_dense_input_wrong_shape_fails(self):
   x = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
   weights = np.array([[3, 2], [5, 4], [4, 3]])
   # Note: Eager mode and graph mode throw different errors here. Graph mode
   # will fail with a ValueError from the shape checking logic, while Eager
   # will fail with an InvalidArgumentError from the kernel itself.
   if context.executing_eagerly():
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  "must have the same shape"):
       self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
   else:
     with self.assertRaisesRegexp(ValueError, "both shapes must be equal"):
       self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
예제 #2
0
    def call(self, inputs, count_weights=None):
        if isinstance(inputs, (list, np.ndarray)):
            inputs = ops.convert_to_tensor_v2(inputs)
        if inputs.shape.rank == 1:
            inputs = array_ops.expand_dims(inputs, 1)

        if count_weights is not None and self._output_mode != COUNT:
            raise ValueError(
                "count_weights is not used in `output_mode='tf-idf'`, "
                "or `output_mode='binary'`. Please pass a single input.")
        self._called = True
        if self._max_tokens is None:
            out_depth = K.get_value(self.num_elements)
            if out_depth == 0:
                raise RuntimeError(
                    "If you construct a `CategoryEncoding` layer with "
                    "`max_tokens=None`, you need to call `adapt()` "
                    "on it before using it")
        else:
            out_depth = self._max_tokens

        if self._output_mode == TFIDF:
            # If the input is a sparse tensor, we densify it with the default value of
            # -1. Because -1 is ignored by one_hot, this effectively drops the non-set
            # positions from the output encoding.
            if self._sparse:
                raise ValueError("`sparse=True` with `output_mode=tfidf` "
                                 "is not supported.")
            if isinstance(inputs, sparse_tensor.SparseTensor):
                inputs = sparse_ops.sparse_tensor_to_dense(inputs,
                                                           default_value=-1)
            one_hot_data = array_ops.one_hot(inputs, depth=out_depth)
            counts = math_ops.reduce_sum(one_hot_data, axis=1)
            tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights)
            tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return tf_idf_data

        binary_output = (self._output_mode == BINARY)
        if self._sparse:
            result = bincount_ops.sparse_bincount(inputs,
                                                  weights=count_weights,
                                                  minlength=out_depth,
                                                  axis=-1,
                                                  binary_output=binary_output)
            result = math_ops.cast(result, K.floatx())
            batch_size = array_ops.shape(result)[0]
            result = sparse_tensor.SparseTensor(
                indices=result.indices,
                values=result.values,
                dense_shape=[batch_size, out_depth])
            return result
        else:
            result = bincount_ops.bincount(inputs,
                                           weights=count_weights,
                                           minlength=out_depth,
                                           dtype=K.floatx(),
                                           axis=-1,
                                           binary_output=binary_output)
            result.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return result
예제 #3
0
 def test_dense_input_ragged_weights_fails(self):
     x = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
     weights = ragged_factory_ops.constant([[6, 0.5, 2], [14],
                                            [10, 0.25, 5, 3]])
     with self.assertRaisesRegex(ValueError, "must be a tf.Tensor"):
         self.evaluate(
             bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
예제 #4
0
  def call(self, inputs):
    self._called = True
    if self._max_tokens is None:
      out_depth = K.get_value(self.num_elements)
    else:
      out_depth = self._max_tokens

    if self._output_mode == TFIDF:
      # If the input is a sparse tensor, we densify it with the default value of
      # -1. Because -1 is ignored by one_hot, this effectively drops the non-set
      # positions from the output encoding.
      if isinstance(inputs, sparse_tensor.SparseTensor):
        inputs = sparse_ops.sparse_tensor_to_dense(inputs, default_value=-1)
      one_hot_data = array_ops.one_hot(inputs, depth=out_depth)
      counts = math_ops.reduce_sum(one_hot_data, axis=1)
      tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights)
      tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth)))
      return tf_idf_data

    binary_output = (self._output_mode == BINARY)
    if self._sparse:
      return bincount_ops.sparse_bincount(
          inputs, minlength=out_depth, axis=-1, binary_output=binary_output)
    else:
      result = bincount_ops.bincount(
          inputs,
          minlength=out_depth,
          dtype=dtypes.int64,
          axis=-1,
          binary_output=binary_output)
      result.set_shape(tensor_shape.TensorShape((None, out_depth)))
      return result
예제 #5
0
 def test_sparse_input_too_many_indices_fails(self):
   x = sparse_ops.from_dense(
       np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
   weights = sparse_ops.from_dense(
       np.array([[3, 1, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
   with self.assertRaisesIncompatibleShapesError():
     self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
예제 #6
0
 def test_sparse_input_wrong_indices_fails(self):
   x = sparse_ops.from_dense(
       np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
   weights = sparse_ops.from_dense(
       np.array([[3, 1, 0, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
   with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                "must have the same indices"):
     self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
예제 #7
0
 def test_ragged_input_different_shape_fails(self):
     x = ragged_factory_ops.constant([[6, 1, 2], [14], [10, 1, 5, 3]])
     weights = ragged_factory_ops.constant([[6, 0.5, 2], [],
                                            [10, 0.25, 5, 3]])
     with self.assertRaisesRegex(errors.InvalidArgumentError,
                                 "must have the same row splits"):
         self.evaluate(
             bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
예제 #8
0
 def test_ragged_input_sparse_weights_fails(self):
     x = ragged_factory_ops.constant([[6, 1, 2], [14], [10, 1, 5, 3]])
     weights = sparse_ops.from_dense(
         np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]],
                  dtype=np.int32))
     with self.assertRaisesRegex(ValueError, "must be a RaggedTensor"):
         self.evaluate(
             bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
예제 #9
0
 def test_sparse_input_dense_weights_fails(self):
     x = sparse_ops.from_dense(
         np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]],
                  dtype=np.int32))
     weights = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
     with self.assertRaisesRegex(ValueError, "must be a SparseTensor"):
         self.evaluate(
             bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
예제 #10
0
def sparse_bincount(inputs, out_depth, multi_hot_output, count_weights=None):
    """Apply binary or count encoding to an input and return a sparse tensor."""
    result = bincount_ops.sparse_bincount(inputs,
                                          weights=count_weights,
                                          minlength=out_depth,
                                          maxlength=out_depth,
                                          axis=-1,
                                          binary_output=multi_hot_output)
    result = math_ops.cast(result, backend.floatx())
    batch_size = array_ops.shape(result)[0]
    result = sparse_tensor.SparseTensor(indices=result.indices,
                                        values=result.values,
                                        dense_shape=[batch_size, out_depth])
    return result
예제 #11
0
 def test_dense_input(self,
                      x,
                      expected_indices,
                      expected_values,
                      expected_shape,
                      minlength=None,
                      maxlength=None,
                      binary_output=False,
                      weights=None,
                      axis=-1):
     y = bincount_ops.sparse_bincount(x,
                                      weights=weights,
                                      minlength=minlength,
                                      maxlength=maxlength,
                                      binary_output=binary_output,
                                      axis=axis)
     self.assertAllEqual(expected_indices, y.indices)
     self.assertAllEqual(expected_values, y.values)
     self.assertAllEqual(expected_shape, y.dense_shape)
예제 #12
0
    def call(self, inputs, count_weights=None):
        if count_weights is not None and self._output_mode != COUNT:
            raise ValueError(
                "count_weights is not used in `output_mode='tf-idf'`, "
                "or `output_mode='binary'`. Please pass a single input.")
        self._called = True
        if self._max_tokens is None:
            out_depth = K.get_value(self.num_elements)
        else:
            out_depth = self._max_tokens

        if self._output_mode == TFIDF:
            # If the input is a sparse tensor, we densify it with the default value of
            # -1. Because -1 is ignored by one_hot, this effectively drops the non-set
            # positions from the output encoding.
            if isinstance(inputs, sparse_tensor.SparseTensor):
                inputs = sparse_ops.sparse_tensor_to_dense(inputs,
                                                           default_value=-1)
            one_hot_data = array_ops.one_hot(inputs, depth=out_depth)
            counts = math_ops.reduce_sum(one_hot_data, axis=1)
            tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights)
            tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return tf_idf_data

        binary_output = (self._output_mode == BINARY)
        if self._sparse:
            result = bincount_ops.sparse_bincount(inputs,
                                                  weights=count_weights,
                                                  minlength=out_depth,
                                                  axis=-1,
                                                  binary_output=binary_output)
            return math_ops.cast(result, K.floatx())
        else:
            result = bincount_ops.bincount(inputs,
                                           weights=count_weights,
                                           minlength=out_depth,
                                           dtype=K.floatx(),
                                           axis=-1,
                                           binary_output=binary_output)
            result.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return result
예제 #13
0
 def test_ragged_input(self,
                       x,
                       expected_indices,
                       expected_values,
                       expected_shape,
                       maxlength=None,
                       minlength=None,
                       binary_output=False,
                       weights=None,
                       axis=-1):
   x_ragged = ragged_factory_ops.constant(x)
   w = ragged_factory_ops.constant(weights) if weights is not None else None
   y = bincount_ops.sparse_bincount(
       x_ragged,
       weights=w,
       minlength=minlength,
       maxlength=maxlength,
       binary_output=binary_output,
       axis=axis)
   self.assertAllEqual(expected_indices, y.indices)
   self.assertAllEqual(expected_values, y.values)
   self.assertAllEqual(expected_shape, y.dense_shape)
예제 #14
0
 def test_sparse_input(self,
                       x,
                       expected_indices,
                       expected_values,
                       expected_shape,
                       maxlength=None,
                       minlength=None,
                       binary_output=False,
                       weights=None,
                       axis=-1):
   x_sparse = sparse_ops.from_dense(x)
   w_sparse = sparse_ops.from_dense(weights) if weights is not None else None
   y = bincount_ops.sparse_bincount(
       x_sparse,
       weights=w_sparse,
       minlength=minlength,
       maxlength=maxlength,
       binary_output=binary_output,
       axis=axis)
   self.assertAllEqual(expected_indices, y.indices)
   self.assertAllEqual(expected_values, y.values)
   self.assertAllEqual(expected_shape, y.dense_shape)