def testMismatchedShapesExpandNonconcatDim(self):
    with self.session(use_gpu=False) as sess:
      sp_a = self._SparseTensor_3x3()
      sp_b = self._SparseTensor_3x5()
      sp_c = self._SparseTensor_3x2()
      sp_d = self._SparseTensor_2x3()
      for concat_dim0 in (-2, 0):
        for concat_dim1 in (-1, 1):
          sp_concat_dim0 = sparse_ops.sparse_concat(
              concat_dim0, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True)
          sp_concat_dim1 = sparse_ops.sparse_concat(
              concat_dim1, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True)

          sp_concat_dim0_out = self.evaluate(sp_concat_dim0)
          sp_concat_dim1_out = self.evaluate(sp_concat_dim1)

          self.assertAllEqual(sp_concat_dim0_out.indices,
                              [[0, 2], [1, 0], [2, 0], [2, 2], [4, 1], [5, 0],
                               [5, 3], [5, 4], [7, 0], [8, 0], [9, 1], [10, 0],
                               [10, 2]])
          self.assertAllEqual(sp_concat_dim0_out.values,
                              [1, 2, 3, 4, 1, 2, 1, 0, 1, 2, 1, 1, 2])
          self.assertAllEqual(sp_concat_dim0_out.dense_shape, [11, 5])

          self.assertAllEqual(sp_concat_dim1_out.indices,
                              [[0, 2], [0, 11], [1, 0], [1, 4], [1, 8], [1, 10],
                               [1, 12], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7],
                               [2, 8]])
          self.assertAllEqual(sp_concat_dim1_out.values,
                              [1, 1, 2, 1, 1, 1, 2, 3, 4, 2, 1, 0, 2])
          self.assertAllEqual(sp_concat_dim1_out.dense_shape, [3, 13])
  def testMismatchedShapesExpandNonconcatDim(self):
    with self.session() as sess:
      sp_a = self._SparseTensor_3x3()
      sp_b = self._SparseTensor_3x5()
      sp_c = self._SparseTensor_3x2()
      sp_d = self._SparseTensor_2x3()
      for concat_dim0 in (-2, 0):
        for concat_dim1 in (-1, 1):
          sp_concat_dim0 = sparse_ops.sparse_concat(
              concat_dim0, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True)
          sp_concat_dim1 = sparse_ops.sparse_concat(
              concat_dim1, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True)

          sp_concat_dim0_out = self.evaluate(sp_concat_dim0)
          sp_concat_dim1_out = self.evaluate(sp_concat_dim1)

          self.assertAllEqual(sp_concat_dim0_out.indices,
                              [[0, 2], [1, 0], [2, 0], [2, 2], [4, 1], [5, 0],
                               [5, 3], [5, 4], [7, 0], [8, 0], [9, 1], [10, 0],
                               [10, 2]])
          self.assertAllEqual(sp_concat_dim0_out.values,
                              [1, 2, 3, 4, 1, 2, 1, 0, 1, 2, 1, 1, 2])
          self.assertAllEqual(sp_concat_dim0_out.dense_shape, [11, 5])

          self.assertAllEqual(sp_concat_dim1_out.indices,
                              [[0, 2], [0, 11], [1, 0], [1, 4], [1, 8], [1, 10],
                               [1, 12], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7],
                               [2, 8]])
          self.assertAllEqual(sp_concat_dim1_out.values,
                              [1, 1, 2, 1, 1, 1, 2, 3, 4, 2, 1, 0, 2])
          self.assertAllEqual(sp_concat_dim1_out.dense_shape, [3, 13])
Example #3
0
    def testMismatchedRank(self):
        with self.session(use_gpu=False):
            sp_a = self._SparseTensor_3x3()
            sp_e = self._SparseTensor_2x3x4()

            # Rank mismatches can be caught at shape-inference time
            for concat_dim in (-1, 1):
                with self.assertRaises(ValueError):
                    sparse_ops.sparse_concat(concat_dim, [sp_a, sp_e])
  def testMismatchedRank(self):
    with self.session(use_gpu=False):
      sp_a = self._SparseTensor_3x3()
      sp_e = self._SparseTensor_2x3x4()

      # Rank mismatches can be caught at shape-inference time
      for concat_dim in (-1, 1):
        with self.assertRaises(ValueError):
          sparse_ops.sparse_concat(concat_dim, [sp_a, sp_e])
Example #5
0
    def testMismatchedRankExpandNonconcatDim(self):
        with self.session(use_gpu=False):
            sp_a = self._SparseTensor_3x3()
            sp_e = self._SparseTensor_2x3x4()

            # Rank mismatches should be caught at shape-inference time, even for
            # expand_nonconcat_dim=True.
            for concat_dim in (-1, 1):
                with self.assertRaises(ValueError):
                    sparse_ops.sparse_concat(concat_dim, [sp_a, sp_e],
                                             expand_nonconcat_dim=True)
  def testMismatchedRankExpandNonconcatDim(self):
    with self.session(use_gpu=False):
      sp_a = self._SparseTensor_3x3()
      sp_e = self._SparseTensor_2x3x4()

      # Rank mismatches should be caught at shape-inference time, even for
      # expand_nonconcat_dim=True.
      for concat_dim in (-1, 1):
        with self.assertRaises(ValueError):
          sparse_ops.sparse_concat(
              concat_dim, [sp_a, sp_e], expand_nonconcat_dim=True)
Example #7
0
def _ParseSparse(data):
    """Concat sparse tensors together.

  Args:
    data: A dict of name -> Tensor.

  Returns:
    A single sparse tensor and a 1-D input spec Tensor.

  Raises:
    NotImplementedError:  Combining dense and sparse tensors is not
      supported.
    ValueError: If data contains non-string Tensors.
  """
    for k in sorted(data.keys()):
        if not isinstance(data[k], sparse_tensor.SparseTensor):
            raise NotImplementedError(
                'Features should be either all sparse or all dense.  Use a '
                'feature engineering function to convert some of them.')

    data_spec = [
        constants.DATA_CATEGORICAL if data[data.keys()[0]].dtype
        == dtypes.string else constants.DATA_FLOAT
    ]
    return sparse_ops.sparse_concat(1, data.values()), data_spec
Example #8
0
    def testConcatDim0(self):
        with self.session(use_gpu=False) as sess:
            # concat(A, D):
            # [    1]
            # [2    ]
            # [3   4]
            # [  1  ]
            # [1   2]
            sp_a = self._SparseTensor_3x3()
            sp_d = self._SparseTensor_2x3()

            for concat_dim in (-2, 0):
                sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_d])

                self.assertEqual(sp_concat.indices.get_shape(), [7, 2])
                self.assertEqual(sp_concat.values.get_shape(), [7])
                self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

                concat_out = self.evaluate(sp_concat)

                self.assertAllEqual(
                    concat_out.indices,
                    [[0, 2], [1, 0], [2, 0], [2, 2], [3, 1], [4, 0], [4, 2]])
                self.assertAllEqual(concat_out.values,
                                    np.array([1, 2, 3, 4, 1, 1, 2]))
                self.assertAllEqual(concat_out.dense_shape, np.array([5, 3]))
Example #9
0
    def testConcat2(self):
        with self.session(use_gpu=False) as sess:
            # concat(A, B):
            # [    1          ]
            # [2       1      ]
            # [3   4 2     1 0]
            for sp_a in (self._SparseTensorValue_3x3(),
                         self._SparseTensor_3x3()):
                for sp_b in (self._SparseTensorValue_3x5(),
                             self._SparseTensor_3x5()):
                    for concat_dim in (-1, 1):
                        sp_concat = sparse_ops.sparse_concat(
                            concat_dim, [sp_a, sp_b])

                        self.assertEqual(sp_concat.indices.get_shape(), [8, 2])
                        self.assertEqual(sp_concat.values.get_shape(), [8])
                        self.assertEqual(sp_concat.dense_shape.get_shape(),
                                         [2])

                        concat_out = self.evaluate(sp_concat)

                        self.assertAllEqual(concat_out.indices,
                                            [[0, 2], [1, 0], [1, 4], [2, 0],
                                             [2, 2], [2, 3], [2, 6], [2, 7]])
                        self.assertAllEqual(concat_out.values,
                                            [1, 2, 1, 3, 4, 2, 1, 0])
                        self.assertAllEqual(concat_out.dense_shape, [3, 8])
Example #10
0
def _ParseSparse(data):
    """Concat sparse tensors together.

  A common use of sparse tensors is to treat strings as a sparse bit vector
  with a large number of features representing the presence of all possible
  values.  Here we convert these strings to integer indices in a sparse bit
  tensor.  In order to pack each incoming feature into a single sparse tensor,
  we add an offset to the converted indices to indicate that they came from
  different features in the source data.

  Args:
    data: A dict of name -> Tensor.

  Returns:
    A single sparse tensor with float values and a 1-D input spec Tensor.

  Raises:
    NotImplementedError:  Combining dense and sparse tensors is not yet
      supported.
    ValueError: If data contains non-string Tensors.
  """
    convert_ops = Load()

    # TODO(gilberth): Support mixed string/float sparse tensors.
    # We currently only support string (categorical) data if we're using sparse
    # tensors.
    for v in data.values():
        if v.dtype != dtypes.string:
            raise ValueError("Only sparse tensors of type string are supported.")

    # Sparse tensor indices have 63 bits to use for information. We use the
    # minimum number of these (MSBs) for the offset, and pack the rest with the
    # actual data.
    num_features = len(data)
    offset_bits = int(math.ceil(math.log(num_features, 2)))

    # We condense data to 26 bits, see sparse_values_to_indices.cc
    offset_increment = int(math.pow(2, 26 - offset_bits))
    offset = 0

    sparse_tensors = []
    keys = None
    for k in sorted(data.keys()):
        if k == graph_io.KEY_FEATURE_NAME:
            keys = data[k]
        elif isinstance(data[k], ops.SparseTensor):
            sparse_indices = data[k].indices
            sparse_values = data[k].values
            new_shape = array_ops.concat(0, [array_ops.slice(data[k].shape, [0], [1]), [offset_increment]])

            new_indices, new_values = convert_ops.sparse_values_to_indices(
                sparse_indices, sparse_values, offset, offset_bits=offset_bits
            )
        else:
            # Convert dense to sparse.
            raise NotImplementedError("Dense to sparse conversion not implemented.")

        sparse_tensors.append(ops.SparseTensor(indices=new_indices, values=new_values, shape=new_shape))

    return (sparse_ops.sparse_concat(1, sparse_tensors), keys, [constants.DATA_CATEGORICAL])
Example #11
0
def _SparseReduceSumSparseGrad(op, unused_output_indices_grad, out_grad,
                               unused_output_shape_grad):
    """
    Args:
    op: the SparseReorder op
    unused_output_indices_grad: the incoming gradients of the output indices
    out_grad: the incoming gradients of the output values

    Returns:
    Gradient for each of the 4 input tensors:
      (input_indices, input_values, input_shape, reduction_axes)
    The gradients for input_indices, reduction_axes and input_shape is None.
    """
    # sp_indices = op.inputs[0]
    # vals_shape = array_ops.shape(op.inputs[1])
    sp_shape = op.inputs[2]
    out_shape = op.outputs[2]

    output_shape_kept_dims = math_ops.to_int64(
        math_ops.reduced_shape(sp_shape, op.inputs[3]))
    sp_grad = sparse_tensor.SparseTensor(op.outputs[0], out_grad, out_shape)
    sp_grad = sparse_ops.sparse_reshape(sp_grad, output_shape_kept_dims)

    #TODO: replace hardcoded 16 with inferred dimension size from sp_shape
    # sp_tile = sparse_ops.sparse_concat(2, [sp_grad]*math_ops.to_int64(sp_shape)[2]) #tile gradients along 3rd axis
    sp_tile = sparse_ops.sparse_concat(2, [sp_grad] *
                                       128)  #tile gradients along 3rd axis

    # (sparse_indices, sparse_values, sparse_shape, reduction_axes)
    return (None, sp_tile._values, None, None)
Example #12
0
    def tensors_to_item(self, keys_to_tensors):
        """Maps the given dictionary of tensors to a concatenated list of keypoints.

    Args:
      keys_to_tensors: a mapping of TF-Example keys to parsed tensors.

    Returns:
      [time, num_keypoints, 2] tensor of keypoint coordinates, in order [y, x].
          Whether the tensor is a SparseTensor or a dense Tensor is determined
          by the return_dense parameter. Empty positions in the sparse tensor
          are filled with -1.0 values.
    """
        coordinates = []
        for key in self._full_keys:
            value = keys_to_tensors[key]
            expanded_dims = array_ops.concat([
                math_ops.to_int64(array_ops.shape(value)),
                constant_op.constant([1], dtype=dtypes.int64)
            ], 0)
            coordinate = sparse_ops.sparse_reshape(value, expanded_dims)
            coordinates.append(coordinate)
        keypoints = sparse_ops.sparse_concat(2, coordinates)
        if self._return_dense:
            keypoints = sparse_ops.sparse_tensor_to_dense(
                keypoints, default_value=self._default_value)
        return keypoints
Example #13
0
    def testConcat3(self):
        with self.session(use_gpu=False) as sess:
            # concat(A, B, C):
            # [    1              ]
            # [2       1       1  ]
            # [3   4 2     1 0 2  ]
            sp_a = self._SparseTensor_3x3()
            sp_b = self._SparseTensor_3x5()
            sp_c = self._SparseTensor_3x2()

            for concat_dim in (-1, 1):
                sp_concat = sparse_ops.sparse_concat(concat_dim,
                                                     [sp_a, sp_b, sp_c])

                self.assertEqual(sp_concat.indices.get_shape(), [10, 2])
                self.assertEqual(sp_concat.values.get_shape(), [10])
                self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

                concat_out = self.evaluate(sp_concat)

                self.assertAllEqual(concat_out.indices,
                                    [[0, 2], [1, 0], [1, 4], [1, 8], [2, 0],
                                     [2, 2], [2, 3], [2, 6], [2, 7], [2, 8]])
                self.assertAllEqual(concat_out.values,
                                    [1, 2, 1, 1, 3, 4, 2, 1, 0, 2])
                self.assertAllEqual(concat_out.dense_shape, [3, 10])
Example #14
0
def _ParseSparse(data):
  """Concat sparse tensors together.

  Args:
    data: A dict of name -> Tensor.

  Returns:
    A single sparse tensor and a 1-D input spec Tensor.

  Raises:
    NotImplementedError:  Combining dense and sparse tensors is not
      supported.
    ValueError: If data contains non-string Tensors.
  """
  for k in sorted(data.keys()):
    if not isinstance(data[k], sparse_tensor.SparseTensor):
      raise NotImplementedError(
          'Features should be either all sparse or all dense.  Use a '
          'feature engineering function to convert some of them.')

  data_spec = [
      constants.DATA_CATEGORICAL if data[data.keys()[0]].dtype == dtypes.string
      else constants.DATA_FLOAT
  ]
  return sparse_ops.sparse_concat(1, data.values()), data_spec
Example #15
0
    def testConcatNonNumeric(self):
        with self.session(use_gpu=False) as sess:
            # concat(A, B):
            # [    a          ]
            # [b       e      ]
            # [c   d f     g h]
            sp_a = self._SparseTensor_String3x3()
            sp_b = self._SparseTensor_String3x5()

            for concat_dim in (-1, 1):
                sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b])

                self.assertEqual(sp_concat.indices.get_shape(), [8, 2])
                self.assertEqual(sp_concat.values.get_shape(), [8])
                self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

                concat_out = self.evaluate(sp_concat)

                self.assertAllEqual(concat_out.indices,
                                    [[0, 2], [1, 0], [1, 4], [2, 0], [2, 2],
                                     [2, 3], [2, 6], [2, 7]])
                self.assertAllEqual(
                    concat_out.values,
                    [b"a", b"b", b"e", b"c", b"d", b"f", b"g", b"h"])
                self.assertAllEqual(concat_out.dense_shape, [3, 8])
Example #16
0
def concatenate(tensors, axis=-1, name="concat"):
    """Concatenates a list of tensors alongside the specified axis.
  Args:
      tensors: list of tensors to concatenate.
      axis: concatenation axis.
      name: str,
  Returns:
      A tensor.
  Example:
      >>>a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
      >>>b = tf.constant([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
      >>>tf.keras.backend.concatenate((a, b), axis=-1)
      <tf.Tensor: shape=(3, 6), dtype=int32, numpy=
      array([[ 1,  2,  3, 10, 20, 30],
             [ 4,  5,  6, 40, 50, 60],
             [ 7,  8,  9, 70, 80, 90]], dtype=int32)>
  """
    if axis < 0:
        rank = K.ndim(tensors[0])
        if rank:
            axis %= rank
        else:
            axis = 0

    if all(K.is_sparse(x) for x in tensors):
        return sparse_ops.sparse_concat(axis, tensors, name=name)
    elif all(isinstance(x, ragged_tensor.RaggedTensor) for x in tensors):
        return array_ops.concat(tensors, axis, name=name)
    else:
        return array_ops.concat([K.to_dense(x) for x in tensors],
                                axis,
                                name=name)
  def testConcatDim0(self):
    with self.session(use_gpu=False) as sess:
      # concat(A, D):
      # [    1]
      # [2    ]
      # [3   4]
      # [  1  ]
      # [1   2]
      sp_a = self._SparseTensor_3x3()
      sp_d = self._SparseTensor_2x3()

      for concat_dim in (-2, 0):
        sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_d])

        self.assertEqual(sp_concat.indices.get_shape(), [7, 2])
        self.assertEqual(sp_concat.values.get_shape(), [7])
        self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

        concat_out = self.evaluate(sp_concat)

        self.assertAllEqual(
            concat_out.indices,
            [[0, 2], [1, 0], [2, 0], [2, 2], [3, 1], [4, 0], [4, 2]])
        self.assertAllEqual(concat_out.values, np.array([1, 2, 3, 4, 1, 1, 2]))
        self.assertAllEqual(concat_out.dense_shape, np.array([5, 3]))
Example #18
0
 def batch_reduce_fn(state, value):
   padded_value = sparse_tensor.SparseTensor(
       indices=value.indices, values=value.values, dense_shape=padded_shape)
   reshaped_value = sparse_ops.sparse_reshape(
       padded_value,
       array_ops.concat(
           [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
   return sparse_ops.sparse_concat(0, [state, reshaped_value])
Example #19
0
 def batch_reduce_fn(state, value):
   padded_value = sparse_tensor.SparseTensor(
       indices=value.indices, values=value.values, dense_shape=padded_shape)
   reshaped_value = sparse_ops.sparse_reshape(
       padded_value,
       array_ops.concat(
           [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
   return sparse_ops.sparse_concat(0, [state, reshaped_value])
 def testConcatShape(self):
   # Test case for GitHub 21964.
   x = sparse_tensor.SparseTensor(
       indices=[[0, 0], [1, 1]], values=[1, 2], dense_shape=[2, 2])
   y = sparse_tensor.SparseTensor(
       indices=[[0, 0], [1, 1]], values=[1, 2], dense_shape=[2, 2])
   z = sparse_ops.sparse_concat(-1, [x, y])
   self.assertEqual(z.get_shape().as_list(), [2, 4])
Example #21
0
 def testSliceConcat(self):
   for sp_input in (self._SparseTensorValue_3x4x2(),
                    self._SparseTensor_3x4x2()):
     with self.cached_session(use_gpu=False):
       sparse_tensors = sparse_ops.sparse_split(
           sp_input=sp_input, num_split=2, axis=1)
       concat_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
       expected_output = self._SparseTensor_3x4x2()
       self.assertAllEqual(concat_tensor.indices.eval(),
                           expected_output.indices.eval())
 def testSliceConcat(self):
   for sp_input in (self._SparseTensorValue_3x4x2(),
                    self._SparseTensor_3x4x2()):
     with self.test_session(use_gpu=False):
       sparse_tensors = sparse_ops.sparse_split(
           sp_input=sp_input, num_split=2, axis=1)
       concat_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
       expected_output = self._SparseTensor_3x4x2()
       self.assertAllEqual(concat_tensor.indices.eval(),
                           expected_output.indices.eval())
 def testSliceConcat(self):
     for sp_input in (self._SparseTensorValue_3x4x2(),
                      self._SparseTensor_3x4x2()):
         for axis in (1, -2):
             sparse_tensors = sparse_ops.sparse_split(sp_input=sp_input,
                                                      num_split=2,
                                                      axis=axis)
             concat_tensor = self.evaluate(
                 sparse_ops.sparse_concat(1, sparse_tensors))
             expected_output = self._SparseTensor_3x4x2()
             self.assertAllEqual(concat_tensor.indices,
                                 expected_output.indices)
Example #24
0
    def testMismatchedShapes(self):
        with self.session(use_gpu=False) as sess:
            sp_a = self._SparseTensor_3x3()
            sp_b = self._SparseTensor_3x5()
            sp_c = self._SparseTensor_3x2()
            sp_d = self._SparseTensor_2x3()
            for concat_dim in (-1, 1):
                sp_concat = sparse_ops.sparse_concat(concat_dim,
                                                     [sp_a, sp_b, sp_c, sp_d])

                # Shape mismatches can only be caught when the op is run
                with self.assertRaisesOpError("Input shapes must match"):
                    sess.run(sp_concat)
  def testMismatchedShapes(self):
    with self.session(use_gpu=False) as sess:
      sp_a = self._SparseTensor_3x3()
      sp_b = self._SparseTensor_3x5()
      sp_c = self._SparseTensor_3x2()
      sp_d = self._SparseTensor_2x3()
      for concat_dim in (-1, 1):
        sp_concat = sparse_ops.sparse_concat(concat_dim,
                                             [sp_a, sp_b, sp_c, sp_d])

        # Shape mismatches can only be caught when the op is run
        with self.assertRaisesOpError("Input shapes must match"):
          self.evaluate(sp_concat)
  def testShapeInferenceUnknownShapes(self):
    with self.session(use_gpu=False):
      sp_inputs = [
          self._SparseTensor_UnknownShape(),
          self._SparseTensor_UnknownShape(val_shape=[3]),
          self._SparseTensor_UnknownShape(ind_shape=[1, 3]),
          self._SparseTensor_UnknownShape(shape_shape=[3])
      ]

      for concat_dim in (-2, 0):
        sp_concat = sparse_ops.sparse_concat(concat_dim, sp_inputs)

        self.assertEqual(sp_concat.indices.get_shape().as_list(), [None, 3])
        self.assertEqual(sp_concat.values.get_shape().as_list(), [None])
        self.assertEqual(sp_concat.dense_shape.get_shape(), [3])
  def testShapeInferenceUnknownShapes(self):
    with self.session():
      sp_inputs = [
          self._SparseTensor_UnknownShape(),
          self._SparseTensor_UnknownShape(val_shape=[3]),
          self._SparseTensor_UnknownShape(ind_shape=[1, 3]),
          self._SparseTensor_UnknownShape(shape_shape=[3])
      ]

      for concat_dim in (-2, 0):
        sp_concat = sparse_ops.sparse_concat(concat_dim, sp_inputs)

        self.assertEqual(sp_concat.indices.get_shape().as_list(), [None, 3])
        self.assertEqual(sp_concat.values.get_shape().as_list(), [None])
        self.assertEqual(sp_concat.dense_shape.get_shape(), [3])
def append_composite_tensor(target, to_append):
  """Helper function to append composite tensors to each other in the 0 axis.

  In order to support batching within a fit/evaluate/predict call, we need
  to be able to aggregate within a CompositeTensor. Unfortunately, the CT
  API currently does not make this easy - especially in V1 mode, where we're
  working with CompositeTensor Value objects that have no connection with the
  CompositeTensors that created them.

  Arguments:
    target: CompositeTensor or CompositeTensor value object that will be
      appended to.
    to_append: CompositeTensor or CompositeTensor value object to append to.
      'target'.

  Returns:
    A CompositeTensor or CompositeTensor value object.

  Raises:
    RuntimeError: if concatenation is not possible.
  """
  if type(target) is not type(to_append):
    raise RuntimeError('Unable to concatenate %s and %s' %
                       (type(target), type(to_append)))

  # Perform type-specific concatenation.
  # TODO(b/125094323): This should be replaced by a simple call to
  # target.append() that should work on all of the below classes.

  # If we're seeing a CompositeTensor here, we know it's because we're in
  # Eager mode (or else we'd have evaluated the CT to a CT Value object
  # already). Therefore, it's safe to call concat() on it without evaluating
  # the result any further. If not - that is, if we're seeing a
  # SparseTensorValue or a RaggedTensorValue - we need to hand-update it
  # since we're outside of the graph anyways.
  if isinstance(target, sparse_tensor.SparseTensor):
    # We need to invoke the sparse version of concatenate here - tf.concat
    # won't work.
    return sparse_ops.sparse_concat(sp_inputs=[target, to_append], axis=0)
  elif isinstance(target, ragged_tensor.RaggedTensor):
    return ragged_concat_ops.concat([target, to_append], axis=0)
  elif isinstance(target, sparse_tensor.SparseTensorValue):
    return _append_sparse_tensor_value(target, to_append)
  elif isinstance(target, ragged_tensor_value.RaggedTensorValue):
    return _append_ragged_tensor_value(target, to_append)
  else:
    raise RuntimeError('Attempted to concatenate unsupported object %s.' %
                       type(target))
Example #29
0
def append_composite_tensor(target, to_append):
    """Helper function to append composite tensors to each other in the 0 axis.

  In order to support batching within a fit/evaluate/predict call, we need
  to be able to aggregate within a CompositeTensor. Unfortunately, the CT
  API currently does not make this easy - especially in V1 mode, where we're
  working with CompositeTensor Value objects that have no connection with the
  CompositeTensors that created them.

  Arguments:
    target: CompositeTensor or CompositeTensor value object that will be
      appended to.
    to_append: CompositeTensor or CompositeTensor value object to append to.
      'target'.

  Returns:
    A CompositeTensor or CompositeTensor value object.

  Raises:
    RuntimeError: if concatenation is not possible.
  """
    if type(target) is not type(to_append):
        raise RuntimeError('Unable to concatenate %s and %s' %
                           (type(target), type(to_append)))

    # Perform type-specific concatenation.
    # TODO(b/125094323): This should be replaced by a simple call to
    # target.append() that should work on all of the below classes.

    # If we're seeing a CompositeTensor here, we know it's because we're in
    # Eager mode (or else we'd have evaluated the CT to a CT Value object
    # already). Therefore, it's safe to call concat() on it without evaluating
    # the result any further. If not - that is, if we're seeing a
    # SparseTensorValue or a RaggedTensorValue - we need to hand-update it
    # since we're outside of the graph anyways.
    if isinstance(target, sparse_tensor.SparseTensor):
        # We need to invoke the sparse version of concatenate here - tf.concat
        # won't work.
        return sparse_ops.sparse_concat(sp_inputs=[target, to_append], axis=0)
    elif isinstance(target, ragged_tensor.RaggedTensor):
        return ragged_concat_ops.concat([target, to_append], axis=0)
    elif isinstance(target, sparse_tensor.SparseTensorValue):
        return _append_sparse_tensor_value(target, to_append)
    elif isinstance(target, ragged_tensor_value.RaggedTensorValue):
        return _append_ragged_tensor_value(target, to_append)
    else:
        raise RuntimeError('Attempted to concatenate unsupported object %s.' %
                           type(target))
Example #30
0
def _create_joint_embedding_lookup(columns_to_tensors,
                                   embedding_lookup_arguments,
                                   num_outputs,
                                   trainable,
                                   weight_collections):
  """Creates an embedding lookup for all columns sharing a single weight."""
  for arg in embedding_lookup_arguments:
    assert arg.weight_tensor is None, (
        'Joint sums for weighted sparse columns are not supported. '
        'Please use weighted_sum_from_feature_columns instead.')
    assert arg.combiner == 'sum', (
        'Combiners other than sum are not supported for joint sums. '
        'Please use weighted_sum_from_feature_columns instead.')
  assert len(embedding_lookup_arguments) >= 1, (
      'At least one column must be in the model.')
  prev_size = 0
  sparse_tensors = []
  for a in embedding_lookup_arguments:
    t = a.input_tensor
    values = t.values + prev_size
    prev_size += a.vocab_size
    sparse_tensors.append(
        ops.SparseTensor(t.indices,
                         values,
                         t.shape))
  sparse_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
  with variable_scope.variable_scope(
      None, default_name='linear_weights', values=columns_to_tensors.values()):
    variable = contrib_variables.model_variable(
        name='weights',
        shape=[prev_size, num_outputs],
        dtype=dtypes.float32,
        initializer=init_ops.zeros_initializer,
        trainable=trainable,
        collections=weight_collections)
    if isinstance(variable, variables.Variable):
      variable = [variable]
    else:
      variable = variable._get_variable_list()  # pylint: disable=protected-access
    predictions = embedding_ops.safe_embedding_lookup_sparse(
        variable,
        sparse_tensor,
        sparse_weights=None,
        default_id=0,
        combiner='sum',
        name='_weights')
    return variable, predictions
Example #31
0
def _create_joint_embedding_lookup(columns_to_tensors,
                                   embedding_lookup_arguments,
                                   num_outputs,
                                   trainable,
                                   weight_collections):
  """Creates an embedding lookup for all columns sharing a single weight."""
  for arg in embedding_lookup_arguments:
    assert arg.weight_tensor is None, (
        'Joint sums for weighted sparse columns are not supported. '
        'Please use weighted_sum_from_feature_columns instead.')
    assert arg.combiner == 'sum', (
        'Combiners other than sum are not supported for joint sums. '
        'Please use weighted_sum_from_feature_columns instead.')
  assert len(embedding_lookup_arguments) >= 1, (
      'At least one column must be in the model.')
  prev_size = 0
  sparse_tensors = []
  for a in embedding_lookup_arguments:
    t = a.input_tensor
    values = t.values + prev_size
    prev_size += a.vocab_size
    sparse_tensors.append(
        ops.SparseTensor(t.indices,
                         values,
                         t.shape))
  sparse_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
  with variable_scope.variable_scope(
      None, default_name='linear_weights', values=columns_to_tensors.values()):
    variable = contrib_variables.model_variable(
        name='weights',
        shape=[prev_size, num_outputs],
        dtype=dtypes.float32,
        initializer=init_ops.zeros_initializer,
        trainable=trainable,
        collections=weight_collections)
    if isinstance(variable, variables.Variable):
      variable = [variable]
    else:
      variable = variable._get_variable_list()  # pylint: disable=protected-access
    predictions = embedding_ops.safe_embedding_lookup_sparse(
        variable,
        sparse_tensor,
        sparse_weights=None,
        default_id=0,
        combiner='sum',
        name='_weights')
    return variable, predictions
Example #32
0
def ParseDataTensorOrDict(data):
    """Return a tensor to use for input data.

  The incoming features can be a dict where keys are the string names of the
  columns, which we turn into a single 2-D tensor.

  Args:
    data: `Output` or `dict` of `Output` objects.

  Returns:
    A 2-D tensor for input to tensor_forest, a keys tensor for the
    tf.Examples if they exist, and a list of the type of each column
    (e.g. continuous float, categorical).
  """
    if isinstance(data, dict):
        # If there's at least one sparse tensor, everything has to be sparse.
        is_sparse = False
        for v in data.values():
            if isinstance(v, sparse_tensor.SparseTensor):
                is_sparse = True
                break

        categorical_types = (dtypes.string, dtypes.int32, dtypes.int64)
        data_spec = [
            constants.DATA_CATEGORICAL
            if data[k].dtype in categorical_types else constants.DATA_FLOAT
            for k in sorted(data.keys())
        ]
        data_spec = [constants.DATA_FLOAT] + data_spec
        features = []
        for k in sorted(data.keys()):
            if data[k].dtype == dtypes.string:
                convert_ops = Load()
                features.append(convert_ops.string_to_float(data[k]))
            elif data[k].dtype.is_integer:
                features.append(math_ops.to_float(data[k]))
            else:
                features.append(data[k])

        if is_sparse:
            return sparse_ops.sparse_concat(1, features), data_spec
        else:
            return array_ops.concat(1, features), data_spec
    else:
        return (data, [constants.DATA_FLOAT])
  def testConcatNoNonZeros(self):
    sp_a = self._SparseTensor_NoNonZeros((2, 3, 4))
    sp_b = self._SparseTensor_NoNonZeros((2, 7, 4))
    sp_c = self._SparseTensor_NoNonZeros((2, 5, 4))

    with self.session() as sess:
      concat_dim = 1
      sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c])

      self.assertEqual(sp_concat.indices.get_shape(), [0, 3])
      self.assertEqual(sp_concat.values.get_shape(), [0])
      self.assertEqual(sp_concat.dense_shape.get_shape(), [3])

      concat_out = self.evaluate(sp_concat)

      self.assertEqual(concat_out.indices.shape, (0, 3))
      self.assertEqual(concat_out.values.shape, (0,))
      self.assertAllEqual(concat_out.dense_shape, [2, 15, 4])
Example #34
0
def concatenate(tensors, axis=-1):
    """Concatenates a list of tensors alongside the specified axis.
    Arguments:
      tensors: list of tensors to concatenate.
      axis: concatenation axis.
    Returns:
      A tensor.
    """
    if axis < 0:
        rank = ndim(tensors[0])
        if rank:
            axis %= rank
        else:
            axis = 0

    if py_all([is_sparse(x) for x in tensors]):
        return sparse_ops.sparse_concat(axis, tensors)
    else:
        return array_ops.concat([to_dense(x) for x in tensors], axis)
Example #35
0
def concatenate(tensors, axis=-1):
    """Concatenates a list of tensors alongside the specified axis.
    Arguments:
      tensors: list of tensors to concatenate.
      axis: concatenation axis.
    Returns:
      A tensor.
    """
    if axis < 0:
        rank = ndim(tensors[0])
        if rank:
            axis %= rank
        else:
            axis = 0

    if py_all([is_sparse(x) for x in tensors]):
        return sparse_ops.sparse_concat(axis, tensors)
    else:
        return array_ops.concat([to_dense(x) for x in tensors], axis)
Example #36
0
def ParseDataTensorOrDict(data):
  """Return a tensor to use for input data.

  The incoming features can be a dict where keys are the string names of the
  columns, which we turn into a single 2-D tensor.

  Args:
    data: `Tensor` or `dict` of `Tensor` objects.

  Returns:
    A 2-D tensor for input to tensor_forest, a keys tensor for the
    tf.Examples if they exist, and a list of the type of each column
    (e.g. continuous float, categorical).
  """
  if isinstance(data, dict):
    # If there's at least one sparse tensor, everything has to be sparse.
    is_sparse = False
    for v in data.values():
      if isinstance(v, sparse_tensor.SparseTensor):
        is_sparse = True
        break

    categorical_types = (dtypes.string, dtypes.int32, dtypes.int64)
    data_spec = [constants.DATA_CATEGORICAL if
                 data[k].dtype in categorical_types else
                 constants.DATA_FLOAT for k in sorted(data.keys())]
    data_spec = [constants.DATA_FLOAT] + data_spec
    features = []
    for k in sorted(data.keys()):
      if data[k].dtype == dtypes.string:
        convert_ops = Load()
        features.append(convert_ops.string_to_float(data[k]))
      elif data[k].dtype.is_integer:
        features.append(math_ops.to_float(data[k]))
      else:
        features.append(data[k])

    if is_sparse:
      return sparse_ops.sparse_concat(1, features), data_spec
    else:
      return array_ops.concat_v2(features, 1), data_spec
  else:
    return (data, [constants.DATA_FLOAT])
  def testConcatSomeNoNonZeros(self):
    sp_a = self._SparseTensor_NoNonZeros((2, 7, 4))
    sp_b = self._SparseTensor_2x3x4()
    sp_c = self._SparseTensor_NoNonZeros((2, 5, 4))
    output_nnz = sp_b.indices.get_shape()[0]

    with self.session() as sess:
      concat_dim = 1
      sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c])

      self.assertEqual(sp_concat.indices.get_shape(), [output_nnz, 3])
      self.assertEqual(sp_concat.values.get_shape(), [output_nnz])
      self.assertEqual(sp_concat.dense_shape.get_shape(), [3])

      concat_out = self.evaluate(sp_concat)

      self.assertAllEqual(concat_out.indices,
                          sp_b.indices + [0, sp_a.dense_shape[1], 0])
      self.assertAllEqual(concat_out.values, sp_b.values)
      self.assertAllEqual(concat_out.dense_shape, [2, 15, 4])
Example #38
0
 def _structuredRaggedSparseElement(self, structure, shapes, dtype,
                                    padded_shape):
   if structure is None:
     dense_shape = np.maximum(np.amax(shapes, axis=0), padded_shape)
     values = []
     for shape in shapes:
       dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0)  # pylint: disable=g-explicit-length-test
       sparse = dense_to_sparse(array_ops.zeros(shape, dtype=dtype))
       padded_sparse = sparse_tensor.SparseTensor(sparse.indices,
                                                  sparse.values, dense_shape)
       reshaped_sparse = sparse_ops.sparse_reshape(
           padded_sparse,
           array_ops.concat([np.array([1], dtype=np.int64), dense_shape], 0))
       values.append(reshaped_sparse)
     return sparse_ops.sparse_concat(0, values)
   else:
     return tuple([
         self._structuredRaggedSparseElement(substructure, shapes, dtype,
                                             padded_shape)
         for substructure in structure
     ])
  def testConcat1(self):
    with self.session() as sess:
      # concat(A):
      # [    1]
      # [2    ]
      # [3   4]
      for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
        # Note that we ignore concat_dim in this case since we short-circuit the
        # single-input case in python.
        for concat_dim in (-2000, 1, 2000):
          sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a])

          self.assertEqual(sp_concat.indices.get_shape(), [4, 2])
          self.assertEqual(sp_concat.values.get_shape(), [4])
          self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

          concat_out = self.evaluate(sp_concat)

          self.assertAllEqual(concat_out.indices,
                              [[0, 2], [1, 0], [2, 0], [2, 2]])
          self.assertAllEqual(concat_out.values, [1, 2, 3, 4])
          self.assertAllEqual(concat_out.dense_shape, [3, 3])
  def testConcat2(self):
    with self.session(use_gpu=False) as sess:
      # concat(A, B):
      # [    1          ]
      # [2       1      ]
      # [3   4 2     1 0]
      for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
        for sp_b in (self._SparseTensorValue_3x5(), self._SparseTensor_3x5()):
          for concat_dim in (-1, 1):
            sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b])

            self.assertEqual(sp_concat.indices.get_shape(), [8, 2])
            self.assertEqual(sp_concat.values.get_shape(), [8])
            self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

            concat_out = self.evaluate(sp_concat)

            self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4],
                                                     [2, 0], [2, 2], [2, 3],
                                                     [2, 6], [2, 7]])
            self.assertAllEqual(concat_out.values, [1, 2, 1, 3, 4, 2, 1, 0])
            self.assertAllEqual(concat_out.dense_shape, [3, 8])
  def testConcat1(self):
    with self.session(use_gpu=False) as sess:
      # concat(A):
      # [    1]
      # [2    ]
      # [3   4]
      for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
        # Note that we ignore concat_dim in this case since we short-circuit the
        # single-input case in python.
        for concat_dim in (-2000, 1, 2000):
          sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a])

          self.assertEqual(sp_concat.indices.get_shape(), [4, 2])
          self.assertEqual(sp_concat.values.get_shape(), [4])
          self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

          concat_out = self.evaluate(sp_concat)

          self.assertAllEqual(concat_out.indices,
                              [[0, 2], [1, 0], [2, 0], [2, 2]])
          self.assertAllEqual(concat_out.values, [1, 2, 3, 4])
          self.assertAllEqual(concat_out.dense_shape, [3, 3])
  def testConcatNonNumeric(self):
    with self.session(use_gpu=False) as sess:
      # concat(A, B):
      # [    a          ]
      # [b       e      ]
      # [c   d f     g h]
      sp_a = self._SparseTensor_String3x3()
      sp_b = self._SparseTensor_String3x5()

      for concat_dim in (-1, 1):
        sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b])

        self.assertEqual(sp_concat.indices.get_shape(), [8, 2])
        self.assertEqual(sp_concat.values.get_shape(), [8])
        self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

        concat_out = self.evaluate(sp_concat)

        self.assertAllEqual(
            concat_out.indices,
            [[0, 2], [1, 0], [1, 4], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7]])
        self.assertAllEqual(concat_out.values,
                            [b"a", b"b", b"e", b"c", b"d", b"f", b"g", b"h"])
        self.assertAllEqual(concat_out.dense_shape, [3, 8])
  def testConcat3(self):
    with self.session(use_gpu=False) as sess:
      # concat(A, B, C):
      # [    1              ]
      # [2       1       1  ]
      # [3   4 2     1 0 2  ]
      sp_a = self._SparseTensor_3x3()
      sp_b = self._SparseTensor_3x5()
      sp_c = self._SparseTensor_3x2()

      for concat_dim in (-1, 1):
        sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c])

        self.assertEqual(sp_concat.indices.get_shape(), [10, 2])
        self.assertEqual(sp_concat.values.get_shape(), [10])
        self.assertEqual(sp_concat.dense_shape.get_shape(), [2])

        concat_out = self.evaluate(sp_concat)

        self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4], [1, 8],
                                                 [2, 0], [2, 2], [2, 3], [2, 6],
                                                 [2, 7], [2, 8]])
        self.assertAllEqual(concat_out.values, [1, 2, 1, 1, 3, 4, 2, 1, 0, 2])
        self.assertAllEqual(concat_out.dense_shape, [3, 10])
 def _check(i):
   self.assertTrue(sparse_tensor.is_sparse(i))
   return sparse_ops.sparse_concat(0, [i, i])
Example #45
0
 def _check(i):
   self.assertTrue(isinstance(i, sparse_tensor.SparseTensor))
   return sparse_ops.sparse_concat(0, [i, i])
Example #46
0
def _ParseSparse(data):
    """Concat sparse tensors together.

  A common use of sparse tensors is to treat strings as a sparse bit vector
  with a large number of features representing the presence of all possible
  values.  Here we convert these strings to integer indices in a sparse bit
  tensor.  In order to pack each incoming feature into a single sparse tensor,
  we add an offset to the converted indices to indicate that they came from
  different features in the source data.

  Args:
    data: A dict of name -> Tensor.

  Returns:
    A single sparse tensor with float values and a 1-D input spec Tensor.

  Raises:
    NotImplementedError:  Combining dense and sparse tensors is not yet
      supported.
    ValueError: If data contains non-string Tensors.
  """
    convert_ops = Load()

    # Sparse tensor indices have 63 bits to use for information. We use the
    # minimum number of these (MSBs) for the offset, and pack the rest with the
    # actual data.
    num_features = len(data)
    offset_bits = int(math.ceil(math.log(num_features, 2)))

    # We condense data to 26 bits, see sparse_values_to_indices.cc
    offset_increment = int(math.pow(2, 26 - offset_bits))
    offset = 0

    sparse_tensors = []
    keys = None
    weights = None
    for k in sorted(data.keys()):
        if k == graph_io.KEY_FEATURE_NAME:
            keys = data[k]
        elif k == EXAMPLE_WEIGHT_NAME:
            weights = data[k]
        elif isinstance(data[k], ops.SparseTensor):
            # TODO(gilberth): Support mixed string/float sparse tensors.
            # We currently only support string (categorical) data if we're using
            # sparse tensors.
            if data[k].dtype != dtypes.string:
                raise ValueError(
                    'Only sparse tensors of type string are supported.')
            sparse_indices = data[k].indices
            sparse_values = data[k].values
            new_shape = array_ops.concat(
                0,
                [array_ops.slice(data[k].shape, [0], [1]), [offset_increment]])

            new_indices, new_values = convert_ops.sparse_values_to_indices(
                sparse_indices, sparse_values, offset, offset_bits=offset_bits)
            sparse_tensors.append(
                ops.SparseTensor(indices=new_indices,
                                 values=new_values,
                                 shape=new_shape))
        else:
            # Convert dense to sparse.
            raise NotImplementedError(
                'Dense to sparse conversion not implemented.')

    return (sparse_ops.sparse_concat(1, sparse_tensors), keys, weights,
            [constants.DATA_CATEGORICAL])
Example #47
0
def ParseDataTensorOrDict(data):
  """Return a tensor to use for input data.

  The incoming features can be a dict where keys are the string names of the
  columns, which we turn into a single 2-D tensor.

  Args:
    data: `Tensor` or `dict` of `Tensor` objects.

  Returns:
    A 2-D tensor for input to tensor_forest, a keys tensor for the
    tf.Examples if they exist, and a list of the type of each column
    (e.g. continuous float, categorical).
  """
  data_spec = TensorForestDataSpec()
  if isinstance(data, dict):
    dense_features_size = 0
    dense_features = []
    sparse_features = []
    for k in sorted(data.keys()):
      is_sparse = isinstance(data[k], sparse_tensor.SparseTensor)
      if is_sparse:
        # TODO(gilberth): support sparse continuous.
        if data[k].dtype == dtypes.float32:
          logging.info('TensorForest does not support sparse continuous.')
          continue
        elif data_spec.sparse.size() == 0:
          col_spec = data_spec.sparse.add()
          col_spec.original_type = DATA_CATEGORICAL
          col_spec.name = 'all_sparse'
          col_spec.size = -1
        sparse_features.append(
            sparse_tensor.SparseTensor(data[
                k].indices, CastToFloat(data[k].values), data[k].dense_shape))
      else:
        col_spec = data_spec.dense.add()

        col_spec.original_type = DTYPE_TO_FTYPE[data[k].dtype]
        col_spec.name = GetColumnName(k, len(dense_features))
        # the second dimension of get_shape should always be known.
        shape = data[k].get_shape()
        if len(shape) == 1:
          col_spec.size = 1
        else:
          col_spec.size = shape[1].value

        dense_features_size += col_spec.size
        dense_features.append(CastToFloat(data[k]))

    processed_dense_features = None
    processed_sparse_features = None
    if dense_features:
      processed_dense_features = array_ops.concat(dense_features, 1)
      data_spec.dense_features_size = dense_features_size
    if sparse_features:
      processed_sparse_features = sparse_ops.sparse_concat(1, sparse_features)
    logging.info(data_spec.SerializeToString())
    return processed_dense_features, processed_sparse_features, data_spec
  elif isinstance(data, sparse_tensor.SparseTensor):
    col_spec = data_spec.sparse.add()
    col_spec.name = 'sparse_features'
    col_spec.original_type = DTYPE_TO_FTYPE[data.dtype]
    col_spec.size = -1
    data_spec.dense_features_size = 0
    return None, data, data_spec
  else:
    data = ops.convert_to_tensor(data)
    col_spec = data_spec.dense.add()
    col_spec.name = 'dense_features'
    col_spec.original_type = DTYPE_TO_FTYPE[data.dtype]
    col_spec.size = data.get_shape()[1]
    data_spec.dense_features_size = col_spec.size
    return data, None, data_spec
Example #48
0
def ParseDataTensorOrDict(data):
  """Return a tensor to use for input data.

  The incoming features can be a dict where keys are the string names of the
  columns, which we turn into a single 2-D tensor.

  Args:
    data: `Tensor` or `dict` of `Tensor` objects.

  Returns:
    A 2-D tensor for input to tensor_forest, a keys tensor for the
    tf.Examples if they exist, and a list of the type of each column
    (e.g. continuous float, categorical).
  """
  data_spec = TensorForestDataSpec()
  if isinstance(data, dict):
    dense_features_size = 0
    dense_features = []
    sparse_features = []
    for k in sorted(data.keys()):
      is_sparse = isinstance(data[k], sparse_tensor.SparseTensor)
      if is_sparse:
        # TODO(gilberth): support sparse categorical.
        if data[k].dtype == dtypes.string:
          logging.info('TensorForest does not support sparse categorical. '
                       'Transform it into a number with hash buckets.')
          continue
        elif data_spec.sparse.size() == 0:
          col_spec = data_spec.sparse.add()
          col_spec.original_type = DATA_FLOAT
          col_spec.name = 'all_sparse'
          col_spec.size = -1
        sparse_features.append(
            sparse_tensor.SparseTensor(data[
                k].indices, CastToFloat(data[k].values), data[k].dense_shape))
      else:
        col_spec = data_spec.dense.add()

        col_spec.original_type = DTYPE_TO_FTYPE[data[k].dtype]
        col_spec.name = k
        # the second dimension of get_shape should always be known.
        shape = data[k].get_shape()
        if len(shape) == 1:
          col_spec.size = 1
        else:
          col_spec.size = shape[1].value

        dense_features_size += col_spec.size
        x = array_ops.reshape(data[k], [-1, 1])
        dense_features.append(CastToFloat(x))

    processed_dense_features = None
    processed_sparse_features = None
    if dense_features:
      processed_dense_features = array_ops.concat(dense_features, 1)
      data_spec.dense_features_size = dense_features_size
    if sparse_features:
      processed_sparse_features = sparse_ops.sparse_concat(1, sparse_features)
    logging.info(data_spec.SerializeToString())
    return processed_dense_features, processed_sparse_features, data_spec
  elif isinstance(data, sparse_tensor.SparseTensor):
    col_spec = data_spec.sparse.add()
    col_spec.name = 'sparse_features'
    col_spec.original_type = DTYPE_TO_FTYPE[data.dtype]
    col_spec.size = -1
    data_spec.dense_features_size = 0
    return None, data, data_spec
  else:
    data = ops.convert_to_tensor(data)
    col_spec = data_spec.dense.add()
    col_spec.name = 'dense_features'
    col_spec.original_type = DTYPE_TO_FTYPE[data.dtype]
    col_spec.size = data.get_shape()[1]
    data_spec.dense_features_size = col_spec.size
    return data, None, data_spec
Example #49
0
 def batch_reduce_fn(state, value):
   return sparse_ops.sparse_concat(0, [state, value])
 def _check(i):
     self.assertTrue(isinstance(i, sparse_tensor.SparseTensor))
     return sparse_ops.sparse_concat(0, [i, i])
Example #51
0
 def batch_reduce_fn(state, value):
   return sparse_ops.sparse_concat(0, [state, value])