def testMismatchedShapesExpandNonconcatDim(self): with self.session(use_gpu=False) as sess: sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x5() sp_c = self._SparseTensor_3x2() sp_d = self._SparseTensor_2x3() for concat_dim0 in (-2, 0): for concat_dim1 in (-1, 1): sp_concat_dim0 = sparse_ops.sparse_concat( concat_dim0, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True) sp_concat_dim1 = sparse_ops.sparse_concat( concat_dim1, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True) sp_concat_dim0_out = self.evaluate(sp_concat_dim0) sp_concat_dim1_out = self.evaluate(sp_concat_dim1) self.assertAllEqual(sp_concat_dim0_out.indices, [[0, 2], [1, 0], [2, 0], [2, 2], [4, 1], [5, 0], [5, 3], [5, 4], [7, 0], [8, 0], [9, 1], [10, 0], [10, 2]]) self.assertAllEqual(sp_concat_dim0_out.values, [1, 2, 3, 4, 1, 2, 1, 0, 1, 2, 1, 1, 2]) self.assertAllEqual(sp_concat_dim0_out.dense_shape, [11, 5]) self.assertAllEqual(sp_concat_dim1_out.indices, [[0, 2], [0, 11], [1, 0], [1, 4], [1, 8], [1, 10], [1, 12], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7], [2, 8]]) self.assertAllEqual(sp_concat_dim1_out.values, [1, 1, 2, 1, 1, 1, 2, 3, 4, 2, 1, 0, 2]) self.assertAllEqual(sp_concat_dim1_out.dense_shape, [3, 13])
def testMismatchedShapesExpandNonconcatDim(self): with self.session() as sess: sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x5() sp_c = self._SparseTensor_3x2() sp_d = self._SparseTensor_2x3() for concat_dim0 in (-2, 0): for concat_dim1 in (-1, 1): sp_concat_dim0 = sparse_ops.sparse_concat( concat_dim0, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True) sp_concat_dim1 = sparse_ops.sparse_concat( concat_dim1, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True) sp_concat_dim0_out = self.evaluate(sp_concat_dim0) sp_concat_dim1_out = self.evaluate(sp_concat_dim1) self.assertAllEqual(sp_concat_dim0_out.indices, [[0, 2], [1, 0], [2, 0], [2, 2], [4, 1], [5, 0], [5, 3], [5, 4], [7, 0], [8, 0], [9, 1], [10, 0], [10, 2]]) self.assertAllEqual(sp_concat_dim0_out.values, [1, 2, 3, 4, 1, 2, 1, 0, 1, 2, 1, 1, 2]) self.assertAllEqual(sp_concat_dim0_out.dense_shape, [11, 5]) self.assertAllEqual(sp_concat_dim1_out.indices, [[0, 2], [0, 11], [1, 0], [1, 4], [1, 8], [1, 10], [1, 12], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7], [2, 8]]) self.assertAllEqual(sp_concat_dim1_out.values, [1, 1, 2, 1, 1, 1, 2, 3, 4, 2, 1, 0, 2]) self.assertAllEqual(sp_concat_dim1_out.dense_shape, [3, 13])
def testMismatchedRank(self): with self.session(use_gpu=False): sp_a = self._SparseTensor_3x3() sp_e = self._SparseTensor_2x3x4() # Rank mismatches can be caught at shape-inference time for concat_dim in (-1, 1): with self.assertRaises(ValueError): sparse_ops.sparse_concat(concat_dim, [sp_a, sp_e])
def testMismatchedRankExpandNonconcatDim(self): with self.session(use_gpu=False): sp_a = self._SparseTensor_3x3() sp_e = self._SparseTensor_2x3x4() # Rank mismatches should be caught at shape-inference time, even for # expand_nonconcat_dim=True. for concat_dim in (-1, 1): with self.assertRaises(ValueError): sparse_ops.sparse_concat(concat_dim, [sp_a, sp_e], expand_nonconcat_dim=True)
def testMismatchedRankExpandNonconcatDim(self): with self.session(use_gpu=False): sp_a = self._SparseTensor_3x3() sp_e = self._SparseTensor_2x3x4() # Rank mismatches should be caught at shape-inference time, even for # expand_nonconcat_dim=True. for concat_dim in (-1, 1): with self.assertRaises(ValueError): sparse_ops.sparse_concat( concat_dim, [sp_a, sp_e], expand_nonconcat_dim=True)
def _ParseSparse(data): """Concat sparse tensors together. Args: data: A dict of name -> Tensor. Returns: A single sparse tensor and a 1-D input spec Tensor. Raises: NotImplementedError: Combining dense and sparse tensors is not supported. ValueError: If data contains non-string Tensors. """ for k in sorted(data.keys()): if not isinstance(data[k], sparse_tensor.SparseTensor): raise NotImplementedError( 'Features should be either all sparse or all dense. Use a ' 'feature engineering function to convert some of them.') data_spec = [ constants.DATA_CATEGORICAL if data[data.keys()[0]].dtype == dtypes.string else constants.DATA_FLOAT ] return sparse_ops.sparse_concat(1, data.values()), data_spec
def testConcatDim0(self): with self.session(use_gpu=False) as sess: # concat(A, D): # [ 1] # [2 ] # [3 4] # [ 1 ] # [1 2] sp_a = self._SparseTensor_3x3() sp_d = self._SparseTensor_2x3() for concat_dim in (-2, 0): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_d]) self.assertEqual(sp_concat.indices.get_shape(), [7, 2]) self.assertEqual(sp_concat.values.get_shape(), [7]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual( concat_out.indices, [[0, 2], [1, 0], [2, 0], [2, 2], [3, 1], [4, 0], [4, 2]]) self.assertAllEqual(concat_out.values, np.array([1, 2, 3, 4, 1, 1, 2])) self.assertAllEqual(concat_out.dense_shape, np.array([5, 3]))
def testConcat2(self): with self.session(use_gpu=False) as sess: # concat(A, B): # [ 1 ] # [2 1 ] # [3 4 2 1 0] for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): for sp_b in (self._SparseTensorValue_3x5(), self._SparseTensor_3x5()): for concat_dim in (-1, 1): sp_concat = sparse_ops.sparse_concat( concat_dim, [sp_a, sp_b]) self.assertEqual(sp_concat.indices.get_shape(), [8, 2]) self.assertEqual(sp_concat.values.get_shape(), [8]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7]]) self.assertAllEqual(concat_out.values, [1, 2, 1, 3, 4, 2, 1, 0]) self.assertAllEqual(concat_out.dense_shape, [3, 8])
def _ParseSparse(data): """Concat sparse tensors together. A common use of sparse tensors is to treat strings as a sparse bit vector with a large number of features representing the presence of all possible values. Here we convert these strings to integer indices in a sparse bit tensor. In order to pack each incoming feature into a single sparse tensor, we add an offset to the converted indices to indicate that they came from different features in the source data. Args: data: A dict of name -> Tensor. Returns: A single sparse tensor with float values and a 1-D input spec Tensor. Raises: NotImplementedError: Combining dense and sparse tensors is not yet supported. ValueError: If data contains non-string Tensors. """ convert_ops = Load() # TODO(gilberth): Support mixed string/float sparse tensors. # We currently only support string (categorical) data if we're using sparse # tensors. for v in data.values(): if v.dtype != dtypes.string: raise ValueError("Only sparse tensors of type string are supported.") # Sparse tensor indices have 63 bits to use for information. We use the # minimum number of these (MSBs) for the offset, and pack the rest with the # actual data. num_features = len(data) offset_bits = int(math.ceil(math.log(num_features, 2))) # We condense data to 26 bits, see sparse_values_to_indices.cc offset_increment = int(math.pow(2, 26 - offset_bits)) offset = 0 sparse_tensors = [] keys = None for k in sorted(data.keys()): if k == graph_io.KEY_FEATURE_NAME: keys = data[k] elif isinstance(data[k], ops.SparseTensor): sparse_indices = data[k].indices sparse_values = data[k].values new_shape = array_ops.concat(0, [array_ops.slice(data[k].shape, [0], [1]), [offset_increment]]) new_indices, new_values = convert_ops.sparse_values_to_indices( sparse_indices, sparse_values, offset, offset_bits=offset_bits ) else: # Convert dense to sparse. raise NotImplementedError("Dense to sparse conversion not implemented.") sparse_tensors.append(ops.SparseTensor(indices=new_indices, values=new_values, shape=new_shape)) return (sparse_ops.sparse_concat(1, sparse_tensors), keys, [constants.DATA_CATEGORICAL])
def _SparseReduceSumSparseGrad(op, unused_output_indices_grad, out_grad, unused_output_shape_grad): """ Args: op: the SparseReorder op unused_output_indices_grad: the incoming gradients of the output indices out_grad: the incoming gradients of the output values Returns: Gradient for each of the 4 input tensors: (input_indices, input_values, input_shape, reduction_axes) The gradients for input_indices, reduction_axes and input_shape is None. """ # sp_indices = op.inputs[0] # vals_shape = array_ops.shape(op.inputs[1]) sp_shape = op.inputs[2] out_shape = op.outputs[2] output_shape_kept_dims = math_ops.to_int64( math_ops.reduced_shape(sp_shape, op.inputs[3])) sp_grad = sparse_tensor.SparseTensor(op.outputs[0], out_grad, out_shape) sp_grad = sparse_ops.sparse_reshape(sp_grad, output_shape_kept_dims) #TODO: replace hardcoded 16 with inferred dimension size from sp_shape # sp_tile = sparse_ops.sparse_concat(2, [sp_grad]*math_ops.to_int64(sp_shape)[2]) #tile gradients along 3rd axis sp_tile = sparse_ops.sparse_concat(2, [sp_grad] * 128) #tile gradients along 3rd axis # (sparse_indices, sparse_values, sparse_shape, reduction_axes) return (None, sp_tile._values, None, None)
def tensors_to_item(self, keys_to_tensors): """Maps the given dictionary of tensors to a concatenated list of keypoints. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. Returns: [time, num_keypoints, 2] tensor of keypoint coordinates, in order [y, x]. Whether the tensor is a SparseTensor or a dense Tensor is determined by the return_dense parameter. Empty positions in the sparse tensor are filled with -1.0 values. """ coordinates = [] for key in self._full_keys: value = keys_to_tensors[key] expanded_dims = array_ops.concat([ math_ops.to_int64(array_ops.shape(value)), constant_op.constant([1], dtype=dtypes.int64) ], 0) coordinate = sparse_ops.sparse_reshape(value, expanded_dims) coordinates.append(coordinate) keypoints = sparse_ops.sparse_concat(2, coordinates) if self._return_dense: keypoints = sparse_ops.sparse_tensor_to_dense( keypoints, default_value=self._default_value) return keypoints
def testConcat3(self): with self.session(use_gpu=False) as sess: # concat(A, B, C): # [ 1 ] # [2 1 1 ] # [3 4 2 1 0 2 ] sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x5() sp_c = self._SparseTensor_3x2() for concat_dim in (-1, 1): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c]) self.assertEqual(sp_concat.indices.get_shape(), [10, 2]) self.assertEqual(sp_concat.values.get_shape(), [10]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4], [1, 8], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7], [2, 8]]) self.assertAllEqual(concat_out.values, [1, 2, 1, 1, 3, 4, 2, 1, 0, 2]) self.assertAllEqual(concat_out.dense_shape, [3, 10])
def testConcatNonNumeric(self): with self.session(use_gpu=False) as sess: # concat(A, B): # [ a ] # [b e ] # [c d f g h] sp_a = self._SparseTensor_String3x3() sp_b = self._SparseTensor_String3x5() for concat_dim in (-1, 1): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b]) self.assertEqual(sp_concat.indices.get_shape(), [8, 2]) self.assertEqual(sp_concat.values.get_shape(), [8]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7]]) self.assertAllEqual( concat_out.values, [b"a", b"b", b"e", b"c", b"d", b"f", b"g", b"h"]) self.assertAllEqual(concat_out.dense_shape, [3, 8])
def concatenate(tensors, axis=-1, name="concat"): """Concatenates a list of tensors alongside the specified axis. Args: tensors: list of tensors to concatenate. axis: concatenation axis. name: str, Returns: A tensor. Example: >>>a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) >>>b = tf.constant([[10, 20, 30], [40, 50, 60], [70, 80, 90]]) >>>tf.keras.backend.concatenate((a, b), axis=-1) <tf.Tensor: shape=(3, 6), dtype=int32, numpy= array([[ 1, 2, 3, 10, 20, 30], [ 4, 5, 6, 40, 50, 60], [ 7, 8, 9, 70, 80, 90]], dtype=int32)> """ if axis < 0: rank = K.ndim(tensors[0]) if rank: axis %= rank else: axis = 0 if all(K.is_sparse(x) for x in tensors): return sparse_ops.sparse_concat(axis, tensors, name=name) elif all(isinstance(x, ragged_tensor.RaggedTensor) for x in tensors): return array_ops.concat(tensors, axis, name=name) else: return array_ops.concat([K.to_dense(x) for x in tensors], axis, name=name)
def batch_reduce_fn(state, value): padded_value = sparse_tensor.SparseTensor( indices=value.indices, values=value.values, dense_shape=padded_shape) reshaped_value = sparse_ops.sparse_reshape( padded_value, array_ops.concat( [np.array([1], dtype=np.int64), padded_value.dense_shape], 0)) return sparse_ops.sparse_concat(0, [state, reshaped_value])
def testConcatShape(self): # Test case for GitHub 21964. x = sparse_tensor.SparseTensor( indices=[[0, 0], [1, 1]], values=[1, 2], dense_shape=[2, 2]) y = sparse_tensor.SparseTensor( indices=[[0, 0], [1, 1]], values=[1, 2], dense_shape=[2, 2]) z = sparse_ops.sparse_concat(-1, [x, y]) self.assertEqual(z.get_shape().as_list(), [2, 4])
def testSliceConcat(self): for sp_input in (self._SparseTensorValue_3x4x2(), self._SparseTensor_3x4x2()): with self.cached_session(use_gpu=False): sparse_tensors = sparse_ops.sparse_split( sp_input=sp_input, num_split=2, axis=1) concat_tensor = sparse_ops.sparse_concat(1, sparse_tensors) expected_output = self._SparseTensor_3x4x2() self.assertAllEqual(concat_tensor.indices.eval(), expected_output.indices.eval())
def testSliceConcat(self): for sp_input in (self._SparseTensorValue_3x4x2(), self._SparseTensor_3x4x2()): with self.test_session(use_gpu=False): sparse_tensors = sparse_ops.sparse_split( sp_input=sp_input, num_split=2, axis=1) concat_tensor = sparse_ops.sparse_concat(1, sparse_tensors) expected_output = self._SparseTensor_3x4x2() self.assertAllEqual(concat_tensor.indices.eval(), expected_output.indices.eval())
def testSliceConcat(self): for sp_input in (self._SparseTensorValue_3x4x2(), self._SparseTensor_3x4x2()): for axis in (1, -2): sparse_tensors = sparse_ops.sparse_split(sp_input=sp_input, num_split=2, axis=axis) concat_tensor = self.evaluate( sparse_ops.sparse_concat(1, sparse_tensors)) expected_output = self._SparseTensor_3x4x2() self.assertAllEqual(concat_tensor.indices, expected_output.indices)
def testMismatchedShapes(self): with self.session(use_gpu=False) as sess: sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x5() sp_c = self._SparseTensor_3x2() sp_d = self._SparseTensor_2x3() for concat_dim in (-1, 1): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c, sp_d]) # Shape mismatches can only be caught when the op is run with self.assertRaisesOpError("Input shapes must match"): sess.run(sp_concat)
def testMismatchedShapes(self): with self.session(use_gpu=False) as sess: sp_a = self._SparseTensor_3x3() sp_b = self._SparseTensor_3x5() sp_c = self._SparseTensor_3x2() sp_d = self._SparseTensor_2x3() for concat_dim in (-1, 1): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c, sp_d]) # Shape mismatches can only be caught when the op is run with self.assertRaisesOpError("Input shapes must match"): self.evaluate(sp_concat)
def testShapeInferenceUnknownShapes(self): with self.session(use_gpu=False): sp_inputs = [ self._SparseTensor_UnknownShape(), self._SparseTensor_UnknownShape(val_shape=[3]), self._SparseTensor_UnknownShape(ind_shape=[1, 3]), self._SparseTensor_UnknownShape(shape_shape=[3]) ] for concat_dim in (-2, 0): sp_concat = sparse_ops.sparse_concat(concat_dim, sp_inputs) self.assertEqual(sp_concat.indices.get_shape().as_list(), [None, 3]) self.assertEqual(sp_concat.values.get_shape().as_list(), [None]) self.assertEqual(sp_concat.dense_shape.get_shape(), [3])
def testShapeInferenceUnknownShapes(self): with self.session(): sp_inputs = [ self._SparseTensor_UnknownShape(), self._SparseTensor_UnknownShape(val_shape=[3]), self._SparseTensor_UnknownShape(ind_shape=[1, 3]), self._SparseTensor_UnknownShape(shape_shape=[3]) ] for concat_dim in (-2, 0): sp_concat = sparse_ops.sparse_concat(concat_dim, sp_inputs) self.assertEqual(sp_concat.indices.get_shape().as_list(), [None, 3]) self.assertEqual(sp_concat.values.get_shape().as_list(), [None]) self.assertEqual(sp_concat.dense_shape.get_shape(), [3])
def append_composite_tensor(target, to_append): """Helper function to append composite tensors to each other in the 0 axis. In order to support batching within a fit/evaluate/predict call, we need to be able to aggregate within a CompositeTensor. Unfortunately, the CT API currently does not make this easy - especially in V1 mode, where we're working with CompositeTensor Value objects that have no connection with the CompositeTensors that created them. Arguments: target: CompositeTensor or CompositeTensor value object that will be appended to. to_append: CompositeTensor or CompositeTensor value object to append to. 'target'. Returns: A CompositeTensor or CompositeTensor value object. Raises: RuntimeError: if concatenation is not possible. """ if type(target) is not type(to_append): raise RuntimeError('Unable to concatenate %s and %s' % (type(target), type(to_append))) # Perform type-specific concatenation. # TODO(b/125094323): This should be replaced by a simple call to # target.append() that should work on all of the below classes. # If we're seeing a CompositeTensor here, we know it's because we're in # Eager mode (or else we'd have evaluated the CT to a CT Value object # already). Therefore, it's safe to call concat() on it without evaluating # the result any further. If not - that is, if we're seeing a # SparseTensorValue or a RaggedTensorValue - we need to hand-update it # since we're outside of the graph anyways. if isinstance(target, sparse_tensor.SparseTensor): # We need to invoke the sparse version of concatenate here - tf.concat # won't work. return sparse_ops.sparse_concat(sp_inputs=[target, to_append], axis=0) elif isinstance(target, ragged_tensor.RaggedTensor): return ragged_concat_ops.concat([target, to_append], axis=0) elif isinstance(target, sparse_tensor.SparseTensorValue): return _append_sparse_tensor_value(target, to_append) elif isinstance(target, ragged_tensor_value.RaggedTensorValue): return _append_ragged_tensor_value(target, to_append) else: raise RuntimeError('Attempted to concatenate unsupported object %s.' % type(target))
def _create_joint_embedding_lookup(columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections): """Creates an embedding lookup for all columns sharing a single weight.""" for arg in embedding_lookup_arguments: assert arg.weight_tensor is None, ( 'Joint sums for weighted sparse columns are not supported. ' 'Please use weighted_sum_from_feature_columns instead.') assert arg.combiner == 'sum', ( 'Combiners other than sum are not supported for joint sums. ' 'Please use weighted_sum_from_feature_columns instead.') assert len(embedding_lookup_arguments) >= 1, ( 'At least one column must be in the model.') prev_size = 0 sparse_tensors = [] for a in embedding_lookup_arguments: t = a.input_tensor values = t.values + prev_size prev_size += a.vocab_size sparse_tensors.append( ops.SparseTensor(t.indices, values, t.shape)) sparse_tensor = sparse_ops.sparse_concat(1, sparse_tensors) with variable_scope.variable_scope( None, default_name='linear_weights', values=columns_to_tensors.values()): variable = contrib_variables.model_variable( name='weights', shape=[prev_size, num_outputs], dtype=dtypes.float32, initializer=init_ops.zeros_initializer, trainable=trainable, collections=weight_collections) if isinstance(variable, variables.Variable): variable = [variable] else: variable = variable._get_variable_list() # pylint: disable=protected-access predictions = embedding_ops.safe_embedding_lookup_sparse( variable, sparse_tensor, sparse_weights=None, default_id=0, combiner='sum', name='_weights') return variable, predictions
def ParseDataTensorOrDict(data): """Return a tensor to use for input data. The incoming features can be a dict where keys are the string names of the columns, which we turn into a single 2-D tensor. Args: data: `Output` or `dict` of `Output` objects. Returns: A 2-D tensor for input to tensor_forest, a keys tensor for the tf.Examples if they exist, and a list of the type of each column (e.g. continuous float, categorical). """ if isinstance(data, dict): # If there's at least one sparse tensor, everything has to be sparse. is_sparse = False for v in data.values(): if isinstance(v, sparse_tensor.SparseTensor): is_sparse = True break categorical_types = (dtypes.string, dtypes.int32, dtypes.int64) data_spec = [ constants.DATA_CATEGORICAL if data[k].dtype in categorical_types else constants.DATA_FLOAT for k in sorted(data.keys()) ] data_spec = [constants.DATA_FLOAT] + data_spec features = [] for k in sorted(data.keys()): if data[k].dtype == dtypes.string: convert_ops = Load() features.append(convert_ops.string_to_float(data[k])) elif data[k].dtype.is_integer: features.append(math_ops.to_float(data[k])) else: features.append(data[k]) if is_sparse: return sparse_ops.sparse_concat(1, features), data_spec else: return array_ops.concat(1, features), data_spec else: return (data, [constants.DATA_FLOAT])
def testConcatNoNonZeros(self): sp_a = self._SparseTensor_NoNonZeros((2, 3, 4)) sp_b = self._SparseTensor_NoNonZeros((2, 7, 4)) sp_c = self._SparseTensor_NoNonZeros((2, 5, 4)) with self.session() as sess: concat_dim = 1 sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c]) self.assertEqual(sp_concat.indices.get_shape(), [0, 3]) self.assertEqual(sp_concat.values.get_shape(), [0]) self.assertEqual(sp_concat.dense_shape.get_shape(), [3]) concat_out = self.evaluate(sp_concat) self.assertEqual(concat_out.indices.shape, (0, 3)) self.assertEqual(concat_out.values.shape, (0,)) self.assertAllEqual(concat_out.dense_shape, [2, 15, 4])
def concatenate(tensors, axis=-1): """Concatenates a list of tensors alongside the specified axis. Arguments: tensors: list of tensors to concatenate. axis: concatenation axis. Returns: A tensor. """ if axis < 0: rank = ndim(tensors[0]) if rank: axis %= rank else: axis = 0 if py_all([is_sparse(x) for x in tensors]): return sparse_ops.sparse_concat(axis, tensors) else: return array_ops.concat([to_dense(x) for x in tensors], axis)
def ParseDataTensorOrDict(data): """Return a tensor to use for input data. The incoming features can be a dict where keys are the string names of the columns, which we turn into a single 2-D tensor. Args: data: `Tensor` or `dict` of `Tensor` objects. Returns: A 2-D tensor for input to tensor_forest, a keys tensor for the tf.Examples if they exist, and a list of the type of each column (e.g. continuous float, categorical). """ if isinstance(data, dict): # If there's at least one sparse tensor, everything has to be sparse. is_sparse = False for v in data.values(): if isinstance(v, sparse_tensor.SparseTensor): is_sparse = True break categorical_types = (dtypes.string, dtypes.int32, dtypes.int64) data_spec = [constants.DATA_CATEGORICAL if data[k].dtype in categorical_types else constants.DATA_FLOAT for k in sorted(data.keys())] data_spec = [constants.DATA_FLOAT] + data_spec features = [] for k in sorted(data.keys()): if data[k].dtype == dtypes.string: convert_ops = Load() features.append(convert_ops.string_to_float(data[k])) elif data[k].dtype.is_integer: features.append(math_ops.to_float(data[k])) else: features.append(data[k]) if is_sparse: return sparse_ops.sparse_concat(1, features), data_spec else: return array_ops.concat_v2(features, 1), data_spec else: return (data, [constants.DATA_FLOAT])
def testConcatSomeNoNonZeros(self): sp_a = self._SparseTensor_NoNonZeros((2, 7, 4)) sp_b = self._SparseTensor_2x3x4() sp_c = self._SparseTensor_NoNonZeros((2, 5, 4)) output_nnz = sp_b.indices.get_shape()[0] with self.session() as sess: concat_dim = 1 sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b, sp_c]) self.assertEqual(sp_concat.indices.get_shape(), [output_nnz, 3]) self.assertEqual(sp_concat.values.get_shape(), [output_nnz]) self.assertEqual(sp_concat.dense_shape.get_shape(), [3]) concat_out = self.evaluate(sp_concat) self.assertAllEqual(concat_out.indices, sp_b.indices + [0, sp_a.dense_shape[1], 0]) self.assertAllEqual(concat_out.values, sp_b.values) self.assertAllEqual(concat_out.dense_shape, [2, 15, 4])
def _structuredRaggedSparseElement(self, structure, shapes, dtype, padded_shape): if structure is None: dense_shape = np.maximum(np.amax(shapes, axis=0), padded_shape) values = [] for shape in shapes: dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0) # pylint: disable=g-explicit-length-test sparse = dense_to_sparse(array_ops.zeros(shape, dtype=dtype)) padded_sparse = sparse_tensor.SparseTensor(sparse.indices, sparse.values, dense_shape) reshaped_sparse = sparse_ops.sparse_reshape( padded_sparse, array_ops.concat([np.array([1], dtype=np.int64), dense_shape], 0)) values.append(reshaped_sparse) return sparse_ops.sparse_concat(0, values) else: return tuple([ self._structuredRaggedSparseElement(substructure, shapes, dtype, padded_shape) for substructure in structure ])
def testConcat1(self): with self.session() as sess: # concat(A): # [ 1] # [2 ] # [3 4] for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): # Note that we ignore concat_dim in this case since we short-circuit the # single-input case in python. for concat_dim in (-2000, 1, 2000): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a]) self.assertEqual(sp_concat.indices.get_shape(), [4, 2]) self.assertEqual(sp_concat.values.get_shape(), [4]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [2, 0], [2, 2]]) self.assertAllEqual(concat_out.values, [1, 2, 3, 4]) self.assertAllEqual(concat_out.dense_shape, [3, 3])
def testConcat2(self): with self.session(use_gpu=False) as sess: # concat(A, B): # [ 1 ] # [2 1 ] # [3 4 2 1 0] for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): for sp_b in (self._SparseTensorValue_3x5(), self._SparseTensor_3x5()): for concat_dim in (-1, 1): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b]) self.assertEqual(sp_concat.indices.get_shape(), [8, 2]) self.assertEqual(sp_concat.values.get_shape(), [8]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7]]) self.assertAllEqual(concat_out.values, [1, 2, 1, 3, 4, 2, 1, 0]) self.assertAllEqual(concat_out.dense_shape, [3, 8])
def testConcat1(self): with self.session(use_gpu=False) as sess: # concat(A): # [ 1] # [2 ] # [3 4] for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()): # Note that we ignore concat_dim in this case since we short-circuit the # single-input case in python. for concat_dim in (-2000, 1, 2000): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a]) self.assertEqual(sp_concat.indices.get_shape(), [4, 2]) self.assertEqual(sp_concat.values.get_shape(), [4]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [2, 0], [2, 2]]) self.assertAllEqual(concat_out.values, [1, 2, 3, 4]) self.assertAllEqual(concat_out.dense_shape, [3, 3])
def testConcatNonNumeric(self): with self.session(use_gpu=False) as sess: # concat(A, B): # [ a ] # [b e ] # [c d f g h] sp_a = self._SparseTensor_String3x3() sp_b = self._SparseTensor_String3x5() for concat_dim in (-1, 1): sp_concat = sparse_ops.sparse_concat(concat_dim, [sp_a, sp_b]) self.assertEqual(sp_concat.indices.get_shape(), [8, 2]) self.assertEqual(sp_concat.values.get_shape(), [8]) self.assertEqual(sp_concat.dense_shape.get_shape(), [2]) concat_out = self.evaluate(sp_concat) self.assertAllEqual( concat_out.indices, [[0, 2], [1, 0], [1, 4], [2, 0], [2, 2], [2, 3], [2, 6], [2, 7]]) self.assertAllEqual(concat_out.values, [b"a", b"b", b"e", b"c", b"d", b"f", b"g", b"h"]) self.assertAllEqual(concat_out.dense_shape, [3, 8])
def _check(i): self.assertTrue(sparse_tensor.is_sparse(i)) return sparse_ops.sparse_concat(0, [i, i])
def _check(i): self.assertTrue(isinstance(i, sparse_tensor.SparseTensor)) return sparse_ops.sparse_concat(0, [i, i])
def _ParseSparse(data): """Concat sparse tensors together. A common use of sparse tensors is to treat strings as a sparse bit vector with a large number of features representing the presence of all possible values. Here we convert these strings to integer indices in a sparse bit tensor. In order to pack each incoming feature into a single sparse tensor, we add an offset to the converted indices to indicate that they came from different features in the source data. Args: data: A dict of name -> Tensor. Returns: A single sparse tensor with float values and a 1-D input spec Tensor. Raises: NotImplementedError: Combining dense and sparse tensors is not yet supported. ValueError: If data contains non-string Tensors. """ convert_ops = Load() # Sparse tensor indices have 63 bits to use for information. We use the # minimum number of these (MSBs) for the offset, and pack the rest with the # actual data. num_features = len(data) offset_bits = int(math.ceil(math.log(num_features, 2))) # We condense data to 26 bits, see sparse_values_to_indices.cc offset_increment = int(math.pow(2, 26 - offset_bits)) offset = 0 sparse_tensors = [] keys = None weights = None for k in sorted(data.keys()): if k == graph_io.KEY_FEATURE_NAME: keys = data[k] elif k == EXAMPLE_WEIGHT_NAME: weights = data[k] elif isinstance(data[k], ops.SparseTensor): # TODO(gilberth): Support mixed string/float sparse tensors. # We currently only support string (categorical) data if we're using # sparse tensors. if data[k].dtype != dtypes.string: raise ValueError( 'Only sparse tensors of type string are supported.') sparse_indices = data[k].indices sparse_values = data[k].values new_shape = array_ops.concat( 0, [array_ops.slice(data[k].shape, [0], [1]), [offset_increment]]) new_indices, new_values = convert_ops.sparse_values_to_indices( sparse_indices, sparse_values, offset, offset_bits=offset_bits) sparse_tensors.append( ops.SparseTensor(indices=new_indices, values=new_values, shape=new_shape)) else: # Convert dense to sparse. raise NotImplementedError( 'Dense to sparse conversion not implemented.') return (sparse_ops.sparse_concat(1, sparse_tensors), keys, weights, [constants.DATA_CATEGORICAL])
def ParseDataTensorOrDict(data): """Return a tensor to use for input data. The incoming features can be a dict where keys are the string names of the columns, which we turn into a single 2-D tensor. Args: data: `Tensor` or `dict` of `Tensor` objects. Returns: A 2-D tensor for input to tensor_forest, a keys tensor for the tf.Examples if they exist, and a list of the type of each column (e.g. continuous float, categorical). """ data_spec = TensorForestDataSpec() if isinstance(data, dict): dense_features_size = 0 dense_features = [] sparse_features = [] for k in sorted(data.keys()): is_sparse = isinstance(data[k], sparse_tensor.SparseTensor) if is_sparse: # TODO(gilberth): support sparse continuous. if data[k].dtype == dtypes.float32: logging.info('TensorForest does not support sparse continuous.') continue elif data_spec.sparse.size() == 0: col_spec = data_spec.sparse.add() col_spec.original_type = DATA_CATEGORICAL col_spec.name = 'all_sparse' col_spec.size = -1 sparse_features.append( sparse_tensor.SparseTensor(data[ k].indices, CastToFloat(data[k].values), data[k].dense_shape)) else: col_spec = data_spec.dense.add() col_spec.original_type = DTYPE_TO_FTYPE[data[k].dtype] col_spec.name = GetColumnName(k, len(dense_features)) # the second dimension of get_shape should always be known. shape = data[k].get_shape() if len(shape) == 1: col_spec.size = 1 else: col_spec.size = shape[1].value dense_features_size += col_spec.size dense_features.append(CastToFloat(data[k])) processed_dense_features = None processed_sparse_features = None if dense_features: processed_dense_features = array_ops.concat(dense_features, 1) data_spec.dense_features_size = dense_features_size if sparse_features: processed_sparse_features = sparse_ops.sparse_concat(1, sparse_features) logging.info(data_spec.SerializeToString()) return processed_dense_features, processed_sparse_features, data_spec elif isinstance(data, sparse_tensor.SparseTensor): col_spec = data_spec.sparse.add() col_spec.name = 'sparse_features' col_spec.original_type = DTYPE_TO_FTYPE[data.dtype] col_spec.size = -1 data_spec.dense_features_size = 0 return None, data, data_spec else: data = ops.convert_to_tensor(data) col_spec = data_spec.dense.add() col_spec.name = 'dense_features' col_spec.original_type = DTYPE_TO_FTYPE[data.dtype] col_spec.size = data.get_shape()[1] data_spec.dense_features_size = col_spec.size return data, None, data_spec
def ParseDataTensorOrDict(data): """Return a tensor to use for input data. The incoming features can be a dict where keys are the string names of the columns, which we turn into a single 2-D tensor. Args: data: `Tensor` or `dict` of `Tensor` objects. Returns: A 2-D tensor for input to tensor_forest, a keys tensor for the tf.Examples if they exist, and a list of the type of each column (e.g. continuous float, categorical). """ data_spec = TensorForestDataSpec() if isinstance(data, dict): dense_features_size = 0 dense_features = [] sparse_features = [] for k in sorted(data.keys()): is_sparse = isinstance(data[k], sparse_tensor.SparseTensor) if is_sparse: # TODO(gilberth): support sparse categorical. if data[k].dtype == dtypes.string: logging.info('TensorForest does not support sparse categorical. ' 'Transform it into a number with hash buckets.') continue elif data_spec.sparse.size() == 0: col_spec = data_spec.sparse.add() col_spec.original_type = DATA_FLOAT col_spec.name = 'all_sparse' col_spec.size = -1 sparse_features.append( sparse_tensor.SparseTensor(data[ k].indices, CastToFloat(data[k].values), data[k].dense_shape)) else: col_spec = data_spec.dense.add() col_spec.original_type = DTYPE_TO_FTYPE[data[k].dtype] col_spec.name = k # the second dimension of get_shape should always be known. shape = data[k].get_shape() if len(shape) == 1: col_spec.size = 1 else: col_spec.size = shape[1].value dense_features_size += col_spec.size x = array_ops.reshape(data[k], [-1, 1]) dense_features.append(CastToFloat(x)) processed_dense_features = None processed_sparse_features = None if dense_features: processed_dense_features = array_ops.concat(dense_features, 1) data_spec.dense_features_size = dense_features_size if sparse_features: processed_sparse_features = sparse_ops.sparse_concat(1, sparse_features) logging.info(data_spec.SerializeToString()) return processed_dense_features, processed_sparse_features, data_spec elif isinstance(data, sparse_tensor.SparseTensor): col_spec = data_spec.sparse.add() col_spec.name = 'sparse_features' col_spec.original_type = DTYPE_TO_FTYPE[data.dtype] col_spec.size = -1 data_spec.dense_features_size = 0 return None, data, data_spec else: data = ops.convert_to_tensor(data) col_spec = data_spec.dense.add() col_spec.name = 'dense_features' col_spec.original_type = DTYPE_TO_FTYPE[data.dtype] col_spec.size = data.get_shape()[1] data_spec.dense_features_size = col_spec.size return data, None, data_spec
def batch_reduce_fn(state, value): return sparse_ops.sparse_concat(0, [state, value])