def _PruneSparseTensor(unpruned, pruned_pattern): """Helper function to prune COO sparse tensor. Given two sparse tensors 'unpruned' and 'pruned_pattern', generates another sparse tensor with indices and values fron 'unpruned' only if its indices also occur in pruned_pattern. Args: unpruned: COO matrix with unpruned indices pruned_pattern: COO matrix with pruned pattern. TODO(tabakg): This is far from optimal. Consider a C++ implementation. Returns: Indices, values, and dense_shape of the pruned matrix. """ pruned_indices = sparse_ops.sparse_reshape(pruned_pattern, shape=(-1, )).indices[..., 0] unpruned_indices = sparse_ops.sparse_reshape(unpruned, shape=(-1, )).indices[..., 0] best_match = array_ops.searchsorted(unpruned_indices, pruned_indices) keep_indices = array_ops.gather( best_match, array_ops.where( math_ops.equal(array_ops.gather(unpruned_indices, best_match), pruned_indices))) return (array_ops.gather_nd(unpruned.indices, keep_indices), array_ops.gather_nd(unpruned.values, keep_indices), pruned_pattern.dense_shape)
def testFeedPartialShapes(self): with self.session(use_gpu=False): # Incorporate new rank into shape information if known sp_input = self._SparseTensorPlaceholder() sp_output = sparse_ops.sparse_reshape(sp_input, [2, 3, 5]) self.assertListEqual(sp_output.indices.get_shape().as_list(), [None, 3]) self.assertListEqual(sp_output.dense_shape.get_shape().as_list(), [3]) # Incorporate known shape information about input indices in output # indices sp_input = self._SparseTensorPlaceholder() sp_input.indices.set_shape([5, None]) sp_output = sparse_ops.sparse_reshape(sp_input, [2, 3, 5]) self.assertListEqual(sp_output.indices.get_shape().as_list(), [5, 3]) self.assertListEqual(sp_output.dense_shape.get_shape().as_list(), [3]) # Even if new_shape has no shape information, we know the ranks of # output indices and shape sp_input = self._SparseTensorPlaceholder() sp_input.indices.set_shape([5, None]) new_shape = array_ops.placeholder(dtypes.int64) sp_output = sparse_ops.sparse_reshape(sp_input, new_shape) self.assertListEqual(sp_output.indices.get_shape().as_list(), [5, None]) self.assertListEqual(sp_output.dense_shape.get_shape().as_list(), [None])
def _get_sparse_tensors(self, inputs, weight_collections=None, trainable=None): sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) id_tensor = sparse_tensors.id_tensor weight_tensor = sparse_tensors.weight_tensor # Expands final dimension, so that embeddings are not combined during # embedding lookup. check_id_rank = check_ops.assert_equal( array_ops.rank(id_tensor), 2, data=[ 'Column {} expected ID tensor of rank 2. '.format(self.name), 'id_tensor shape: ', array_ops.shape(id_tensor)]) with ops.control_dependencies([check_id_rank]): id_tensor = sparse_ops.sparse_reshape( id_tensor, shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) if weight_tensor is not None: check_weight_rank = check_ops.assert_equal( array_ops.rank(weight_tensor), 2, data=[ 'Column {} expected weight tensor of rank 2.'.format(self.name), 'weight_tensor shape:', array_ops.shape(weight_tensor)]) with ops.control_dependencies([check_weight_rank]): weight_tensor = sparse_ops.sparse_reshape( weight_tensor, shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
def testFeedMismatchedSizesWithInferredDim(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1]) with self.assertRaisesOpError("requested shape requires a multiple"): sess.run(sp_output, {sp_input: input_val})
def testFeedDenseReshapeSemantics(self): with self.session() as sess: # Compute a random rank-5 initial shape and new shape, randomly sparsify # it, and check that the output of SparseReshape has the same semantics # as a dense reshape. factors = np.array([2] * 4 + [3] * 4 + [5] * 4) # 810k total elements orig_rank = np.random.randint(2, 7) orig_map = np.random.randint(orig_rank, size=factors.shape) orig_shape = [np.prod(factors[orig_map == d]) for d in range(orig_rank)] new_rank = np.random.randint(2, 7) new_map = np.random.randint(new_rank, size=factors.shape) new_shape = [np.prod(factors[new_map == d]) for d in range(new_rank)] orig_dense = np.random.uniform(size=orig_shape) orig_indices = np.transpose(np.nonzero(orig_dense < 0.5)) orig_values = orig_dense[orig_dense < 0.5] new_dense = np.reshape(orig_dense, new_shape) new_indices = np.transpose(np.nonzero(new_dense < 0.5)) new_values = new_dense[new_dense < 0.5] sp_input = self._SparseTensorPlaceholder() input_val = sparse_tensor.SparseTensorValue(orig_indices, orig_values, orig_shape) sp_output = sparse_ops.sparse_reshape(sp_input, new_shape) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, new_indices) self.assertAllEqual(output_val.values, new_values) self.assertAllEqual(output_val.dense_shape, new_shape)
def dense_labels_to_sparse(dense, length): """Convert dense labels with sequence lengths to sparse tensor. Args: dense: tensor of shape [batch, max_length] length: int tensor of shape [batch] The length of each sequence in dense. Returns: tf.SparseTensor with values only for the valid elements of sequences. """ flat_values = array_ops.reshape(dense, [-1]) flat_indices = math_ops.range( array_ops.shape(flat_values, out_type=dtypes.int64)[0]) mask = array_ops.sequence_mask(length, maxlen=array_ops.shape(dense)[1]) flat_mask = array_ops.reshape(mask, [-1]) indices = array_ops.expand_dims( array_ops.boolean_mask(flat_indices, flat_mask), 1) values = array_ops.boolean_mask(flat_values, flat_mask) sparse = sparse_tensor.SparseTensor( indices=indices, values=math_ops.cast(values, dtypes.int32), dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64)) reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense)) max_length = math_ops.reduce_max(length) return sparse_tensor.SparseTensor( indices=reshaped.indices, values=reshaped.values, dense_shape=[ math_ops.cast(reshaped.dense_shape[0], dtypes.int64), math_ops.cast(max_length, dtypes.int64) ])
def tensors_to_item(self, keys_to_tensors): """Maps the given dictionary of tensors to a concatenated list of keypoints. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. Returns: [time, num_keypoints, 2] tensor of keypoint coordinates, in order [y, x]. Whether the tensor is a SparseTensor or a dense Tensor is determined by the return_dense parameter. Empty positions in the sparse tensor are filled with -1.0 values. """ coordinates = [] for key in self._full_keys: value = keys_to_tensors[key] expanded_dims = array_ops.concat([ math_ops.to_int64(array_ops.shape(value)), constant_op.constant([1], dtype=dtypes.int64) ], 0) coordinate = sparse_ops.sparse_reshape(value, expanded_dims) coordinates.append(coordinate) keypoints = sparse_ops.sparse_concat(2, coordinates) if self._return_dense: keypoints = sparse_ops.sparse_tensor_to_dense( keypoints, default_value=self._default_value) return keypoints
def testFeedDenseReshapeSemantics(self): with self.session(use_gpu=False) as sess: # Compute a random rank-5 initial shape and new shape, randomly sparsify # it, and check that the output of SparseReshape has the same semantics # as a dense reshape. factors = np.array([2] * 4 + [3] * 4 + [5] * 4) # 810k total elements orig_rank = np.random.randint(2, 7) orig_map = np.random.randint(orig_rank, size=factors.shape) orig_shape = [np.prod(factors[orig_map == d]) for d in range(orig_rank)] new_rank = np.random.randint(2, 7) new_map = np.random.randint(new_rank, size=factors.shape) new_shape = [np.prod(factors[new_map == d]) for d in range(new_rank)] orig_dense = np.random.uniform(size=orig_shape) orig_indices = np.transpose(np.nonzero(orig_dense < 0.5)) orig_values = orig_dense[orig_dense < 0.5] new_dense = np.reshape(orig_dense, new_shape) new_indices = np.transpose(np.nonzero(new_dense < 0.5)) new_values = new_dense[new_dense < 0.5] sp_input = self._SparseTensorPlaceholder() input_val = sparse_tensor.SparseTensorValue(orig_indices, orig_values, orig_shape) sp_output = sparse_ops.sparse_reshape(sp_input, new_shape) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, new_indices) self.assertAllEqual(output_val.values, new_values) self.assertAllEqual(output_val.dense_shape, new_shape)
def testFeedMultipleInferredDims(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1, -1]) with self.assertRaisesOpError("only one output dimension may be -1"): sess.run(sp_output, {sp_input: input_val})
def testFeedMismatchedSizesWithInferredDim(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1]) with self.assertRaisesOpError("requested shape requires a multiple"): sess.run(sp_output, {sp_input: input_val})
def _SparseReduceSumSparseGrad(op, unused_output_indices_grad, out_grad, unused_output_shape_grad): """ Args: op: the SparseReorder op unused_output_indices_grad: the incoming gradients of the output indices out_grad: the incoming gradients of the output values Returns: Gradient for each of the 4 input tensors: (input_indices, input_values, input_shape, reduction_axes) The gradients for input_indices, reduction_axes and input_shape is None. """ # sp_indices = op.inputs[0] # vals_shape = array_ops.shape(op.inputs[1]) sp_shape = op.inputs[2] out_shape = op.outputs[2] output_shape_kept_dims = math_ops.to_int64( math_ops.reduced_shape(sp_shape, op.inputs[3])) sp_grad = sparse_tensor.SparseTensor(op.outputs[0], out_grad, out_shape) sp_grad = sparse_ops.sparse_reshape(sp_grad, output_shape_kept_dims) #TODO: replace hardcoded 16 with inferred dimension size from sp_shape # sp_tile = sparse_ops.sparse_concat(2, [sp_grad]*math_ops.to_int64(sp_shape)[2]) #tile gradients along 3rd axis sp_tile = sparse_ops.sparse_concat(2, [sp_grad] * 128) #tile gradients along 3rd axis # (sparse_indices, sparse_values, sparse_shape, reduction_axes) return (None, sp_tile._values, None, None)
def testFeedMultipleInferredDims(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1, -1]) with self.assertRaisesOpError("only one output dimension may be -1"): sess.run(sp_output, {sp_input: input_val})
def dense_labels_to_sparse(dense, length): """Convert dense labels with sequence lengths to sparse tensor. Args: dense: tensor of shape [batch, max_length] length: int tensor of shape [batch] The length of each sequence in dense. Returns: tf.SparseTensor with values only for the valid elements of sequences. """ flat_values = array_ops.reshape(dense, [-1]) flat_indices = math_ops.range( array_ops.shape(flat_values, out_type=dtypes.int64)[0]) mask = array_ops.sequence_mask(length, maxlen=array_ops.shape(dense)[1]) flat_mask = array_ops.reshape(mask, [-1]) indices = array_ops.expand_dims( array_ops.boolean_mask(flat_indices, flat_mask), 1) values = array_ops.boolean_mask(flat_values, flat_mask) sparse = sparse_tensor.SparseTensor( indices=indices, values=math_ops.cast(values, dtypes.int32), dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64)) reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense)) max_length = math_ops.reduce_max(length) return sparse_tensor.SparseTensor( indices=reshaped.indices, values=reshaped.values, dense_shape=[ math_ops.cast(reshaped.dense_shape[0], dtypes.int64), math_ops.cast(max_length, dtypes.int64)])
def batch_reduce_fn(state, value): padded_value = sparse_tensor.SparseTensor( indices=value.indices, values=value.values, dense_shape=padded_shape) reshaped_value = sparse_ops.sparse_reshape( padded_value, array_ops.concat( [np.array([1], dtype=np.int64), padded_value.dense_shape], 0)) return sparse_ops.sparse_concat(0, [state, reshaped_value])
def testFeedMismatchedSizes(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [4, 7]) with self.assertRaisesOpError( "Input to reshape is a tensor with 30 dense values"): sess.run(sp_output, {sp_input: input_val})
def testFeedMismatchedSizes(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [4, 7]) with self.assertRaisesOpError( "Input to reshape is a tensor with 30 dense values"): sess.run(sp_output, {sp_input: input_val})
def testSameShape(self): with self.session(use_gpu=False) as sess: input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(input_val, [5, 6]) output_val = sess.run(sp_output) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
def testSameShape(self): with self.session() as sess: input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(input_val, [5, 6]) output_val = self.evaluate(sp_output) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
def testPropagatesFullyKnownDenseShapeWhenShapePartiallyKnown(self): sp_input = sparse_tensor.SparseTensor.from_value( self._SparseTensorValue_2x3x4()) self.assertAllEqual((2, 3, 4), sp_input.shape) sp_output = sparse_ops.sparse_reshape( sp_input, shape=array_ops.concat( (constant_op.constant([2], dtype=dtypes.int64), array_ops.placeholder(dtype=dtypes.int64, shape=[1])), axis=0)) self.assertAllEqual((2, 3 * 4), sp_output.shape)
def testFeedSameShape(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [5, 6]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
def testFeedSameShapeWithInferredDim(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [-1, 6]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
def testFeedSameShape(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [5, 6]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
def testUpRank(self): with self.session(use_gpu=False) as sess: input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(input_val, [2, 3, 5]) output_val = sess.run(sp_output) self.assertAllEqual(output_val.indices, np.array([[0, 0, 0], [0, 1, 1], [0, 1, 4], [0, 2, 0], [1, 1, 0], [1, 1, 1]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
def testUpRank(self): with self.session() as sess: input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(input_val, [2, 3, 5]) output_val = self.evaluate(sp_output) self.assertAllEqual(output_val.indices, np.array([[0, 0, 0], [0, 1, 1], [0, 1, 4], [0, 2, 0], [1, 1, 0], [1, 1, 1]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
def call(self, inputs): input_shape = array_ops.stack( (math_ops.reduce_prod(array_ops.shape(inputs)[:-1]), self.kernel.shape[0])) output_shape = array_ops.concat( (array_ops.shape(inputs)[:-1], [self.kernel.shape[1]]), -1) x = sparse_ops.sparse_reshape(inputs, input_shape) return array_ops.reshape( self.activation( sparse_ops.sparse_tensor_dense_matmul(x, self.kernel) + self.bias), output_shape)
def testWorksWellWithTfShape(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() shape = array_ops.shape(sp_input) # tf.shape generates int32 output sp_output = sparse_ops.sparse_reshape(sp_input, shape) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
def testWorksWellWithTfShape(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() shape = array_ops.shape(sp_input) # tf.shape generates int32 output sp_output = sparse_ops.sparse_reshape(sp_input, shape) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, input_val.indices) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
def testFeedUpRankWithInferredDim(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [2, -1, 5]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([[0, 0, 0], [0, 1, 1], [0, 1, 4], [0, 2, 0], [1, 1, 0], [1, 1, 1]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
def testFeedDownRankWithInferredDim(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_2x3x4() sp_output = sparse_ops.sparse_reshape(sp_input, [6, -1]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([[0, 1], [1, 0], [1, 2], [3, 3], [4, 1], [4, 3], [5, 2]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [6, 4])
def testFeedUpRankWithInferredDim(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [2, -1, 5]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([[0, 0, 0], [0, 1, 1], [0, 1, 4], [0, 2, 0], [1, 1, 0], [1, 1, 1]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
def testFeedNewShapeSameRank(self): with self.session() as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [3, 10]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([[0, 0], [0, 6], [0, 9], [1, 0], [2, 0], [2, 1]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [3, 10])
def testFeedDownRankWithInferredDim(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_2x3x4() sp_output = sparse_ops.sparse_reshape(sp_input, [6, -1]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([[0, 1], [1, 0], [1, 2], [3, 3], [4, 1], [4, 3], [5, 2]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [6, 4])
def _maybe_reshape_input_tensor(tensor, column_name, output_rank): """Reshape the input tensor by the following rule. 1. If `output_rank > input_rank + 1`, raise a `ValueError`. 2. If `output_rank == input_rank + 1`, expand the tensor by one dimension. 3. If `output_rank == input_rank`, do nothing. 4. If `output_rank < input_rank`, flatten the inner dimensions of the tensor. Args: tensor: A Tensor or SparseTensor to be reshaped. column_name: A string name of the feature column for the tensor. output_rank: the desired rank of the tensor. Returns: A reshaped Tensor or SparseTensor. Raises: ValueError: if `output_rank > input_rank + 1` for the input tensor. """ input_rank = tensor.get_shape().ndims if input_rank is None and isinstance(tensor, sparse_tensor_py.SparseTensor): # Try to get the rank of a sparse tensor by its dense_shape's shape. input_rank = tensor.dense_shape.get_shape().as_list()[0] if input_rank is None: raise ValueError( 'Error while processing column {}. Rank of input Tensor ' 'can not be None.'.format(column_name)) if output_rank > input_rank + 1: raise ValueError( 'Error while processing column {}. Rank of input Tensor ' '({}) should be the same as output_rank ({}). For ' 'example, sequence data should typically be 3 ' 'dimensional (rank 3) while non-sequence data is ' 'typically 2 dimensional (rank 2).'.format(column_name, input_rank, output_rank)) elif output_rank == input_rank + 1: # Expand the tensor's shape by 1 dimension. if isinstance(tensor, sparse_tensor_py.SparseTensor): output_shape = array_ops.concat([tensor.dense_shape, [1]], 0) return sparse_ops.sparse_reshape(tensor, output_shape) else: reshaped = array_ops.expand_dims(tensor, -1) # Try to calculate the new shape. static_shape = tensor.get_shape() if static_shape is not None and static_shape.dims is not None: reshaped.set_shape(static_shape.as_list() + [1]) return reshaped elif output_rank < input_rank: return layers._inner_flatten(tensor, output_rank) # pylint: disable=protected-access else: return tensor
def testFeedNewShapeSameRank(self): with self.session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() input_val = self._SparseTensorValue_5x6() sp_output = sparse_ops.sparse_reshape(sp_input, [3, 10]) output_val = sess.run(sp_output, {sp_input: input_val}) self.assertAllEqual(output_val.indices, np.array([[0, 0], [0, 6], [0, 9], [1, 0], [2, 0], [2, 1]])) self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, [3, 10])
def _MakeAndReshapeTensor(self, tensor_class, original_shape, target_shape): if tensor_class == "sparse": ind = np.zeros([0, len(original_shape)]).astype(np.int64) val = np.array([]).astype(np.float64) shape = np.array(original_shape).astype(np.int64) sp_input = sparse_tensor.SparseTensorValue(ind, val, shape) sp_output = self.evaluate( sparse_ops.sparse_reshape(sp_input, target_shape)) return sp_output.dense_shape else: dense_input = array_ops.zeros(original_shape) dense_output = self.evaluate(array_ops.reshape(dense_input, target_shape)) return dense_output.shape
def _maybe_reshape_input_tensor(tensor, column_name, output_rank): """Reshape the input tensor by the following rule. 1. If `output_rank > input_rank + 1`, raise a `ValueError`. 2. If `output_rank == input_rank + 1`, expand the tensor by one dimension. 3. If `output_rank == input_rank`, do nothing. 4. If `output_rank < input_rank`, flatten the inner dimensions of the tensor. Args: tensor: A Tensor or SparseTensor to be reshaped. column_name: A string name of the feature column for the tensor. output_rank: the desired rank of the tensor. Returns: A reshaped Tensor or SparseTensor. Raises: ValueError: if `output_rank > input_rank + 1` for the input tensor. """ input_rank = tensor.get_shape().ndims if input_rank is None and isinstance(tensor, sparse_tensor_py.SparseTensor): # Try to get the rank of a sparse tensor by its dense_shape's shape. input_rank = tensor.dense_shape.get_shape().as_list()[0] if input_rank is None: raise ValueError('Error while processing column {}. Rank of input Tensor ' 'can not be None.'.format(column_name)) if output_rank > input_rank + 1: raise ValueError('Error while processing column {}. Rank of input Tensor ' '({}) should be the same as output_rank ({}). For ' 'example, sequence data should typically be 3 ' 'dimensional (rank 3) while non-sequence data is ' 'typically 2 dimensional (rank 2).'.format( column_name, input_rank, output_rank)) elif output_rank == input_rank + 1: # Expand the tensor's shape by 1 dimension. if isinstance(tensor, sparse_tensor_py.SparseTensor): output_shape = array_ops.concat([tensor.dense_shape, [1]], 0) return sparse_ops.sparse_reshape(tensor, output_shape) else: reshaped = array_ops.expand_dims(tensor, -1) # Try to calculate the new shape. static_shape = tensor.get_shape() if static_shape is not None and static_shape.dims is not None: reshaped.set_shape(static_shape.as_list() + [1]) return reshaped elif output_rank < input_rank: return layers._inner_flatten(tensor, output_rank) # pylint: disable=protected-access else: return tensor
def tensors_to_item(self, keys_to_tensors): tensor = keys_to_tensors[self._tensor_key] shape = self._shape if self._shape_key: shape = keys_to_tensors[self._shape_key] if isinstance(shape, ops.SparseTensor): shape = sparse_ops.sparse_tensor_to_dense(shape) if isinstance(tensor, ops.SparseTensor): if shape is not None: tensor = sparse_ops.sparse_reshape(tensor, shape) tensor = sparse_ops.sparse_tensor_to_dense(tensor, self._default_value) else: if shape is not None: tensor = array_ops.reshape(tensor, shape) return tensor
def tensors_to_item(self, keys_to_tensors): tensor = keys_to_tensors[self._tensor_key] shape = self._shape if self._shape_keys: shape_dims = [] for k in self._shape_keys: shape_dim = keys_to_tensors[k] if isinstance(shape_dim, ops.SparseTensor): shape_dim = sparse_ops.sparse_tensor_to_dense(shape_dim) shape_dims.append(shape_dim) shape = array_ops.squeeze(array_ops.pack(shape_dims)) if isinstance(tensor, ops.SparseTensor): if shape is not None: tensor = sparse_ops.sparse_reshape(tensor, shape) tensor = sparse_ops.sparse_tensor_to_dense(tensor, self._default_value) else: if shape is not None: tensor = array_ops.reshape(tensor, shape) return tensor
def tensors_to_item(self, keys_to_tensors): tensor = keys_to_tensors[self._tensor_key] shape = self._shape if self._shape_keys: shape_dims = [] for k in self._shape_keys: shape_dim = keys_to_tensors[k] if isinstance(shape_dim, sparse_tensor.SparseTensor): shape_dim = sparse_ops.sparse_tensor_to_dense(shape_dim) shape_dims.append(shape_dim) shape = array_ops.reshape(array_ops.stack(shape_dims), [-1]) if isinstance(tensor, sparse_tensor.SparseTensor): if shape is not None: tensor = sparse_ops.sparse_reshape(tensor, shape) tensor = sparse_ops.sparse_tensor_to_dense(tensor, self._default_value) else: if shape is not None: tensor = array_ops.reshape(tensor, shape) return tensor
def reshape_first_ndims(tensor, first_ndims, new_shape): """Reshapes the first n dims of the input `tensor` to `new shape`. Args: tensor: The input `Tensor`. first_ndims: A int denoting the first n dims. new_shape: A list of int representing the new shape. Returns: A reshaped `Tensor`. """ assert tensor.get_shape().ndims is None or tensor.get_shape( ).ndims >= first_ndims, ( 'Tensor shape is less than {} dims.'.format(first_ndims)) new_shape = array_ops.concat( [new_shape, array_ops.shape(tensor)[first_ndims:]], 0) if isinstance(tensor, sparse_tensor.SparseTensor): return sparse_ops.sparse_reshape(tensor, new_shape) return array_ops.reshape(tensor, new_shape)
def _structuredRaggedSparseElement(self, structure, shapes, dtype, padded_shape): if structure is None: dense_shape = np.maximum(np.amax(shapes, axis=0), padded_shape) values = [] for shape in shapes: dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0) # pylint: disable=g-explicit-length-test sparse = dense_to_sparse(array_ops.zeros(shape, dtype=dtype)) padded_sparse = sparse_tensor.SparseTensor(sparse.indices, sparse.values, dense_shape) reshaped_sparse = sparse_ops.sparse_reshape( padded_sparse, array_ops.concat([np.array([1], dtype=np.int64), dense_shape], 0)) values.append(reshaped_sparse) return sparse_ops.sparse_concat(0, values) else: return tuple([ self._structuredRaggedSparseElement(substructure, shapes, dtype, padded_shape) for substructure in structure ])
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner=None, default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not None, all embeddings are l2-normalized to max_norm before combining. Returns: Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = sparse_ids.dense_shape.get_shape()[0] original_rank = ( array_ops.size(original_shape) if original_rank_dim.value is None else original_rank_dim.value) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1)]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) result = embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice( math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape(tensor_shape.unknown_shape( (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) return final_result
def reshape_fn(value): return sparse_ops.sparse_reshape( value, array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0))
def safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name="safe_embedding_lookup_sparse", partition_strategy=None, # no used max_norm=None, return_trainable=False): """ Provides a dynamic version of `tf.nn.safe_embedding_lookup_sparse`. Lookup embedding results, accounting for empty features and invalid weights. Any IDs will be treated as valid include non-positive IDs. Invalid weights (<= 0) are pruned from input weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A single `dynamic_embedding.Variable` instance representing the complete embedding tensor. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: combined_embeddings: A dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. trainable_wrap: A TrainableWrapper object used to fill the Optimizers `var_list` Only provided if `return_trainable` is True. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if embedding_weights.key_dtype != sparse_ids.dtype: raise TypeError( "embedding_weights.key_dtype should be same with sparse_ids.dtype: " "{} vs. {}".format(embedding_weights.key_dtype, sparse_ids.dtype)) weights_dtype = sparse_weights.dtype if sparse_weights is not None else None if weights_dtype and embedding_weights.value_dtype != weights_dtype: raise TypeError( "embedding_weights.value_dtype should be same with sparse_weights.dtype" ": {} vs. {}".format(embedding_weights.value_dtype, weights_dtype)) scope = variable_scope.get_variable_scope() full_name = scope.name + "/" + name if scope.name else name with ops.name_scope(full_name + "/"): # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = (array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid weights. if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result, trainable_ = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=name + "/embedding_lookup_sparse", max_norm=max_norm, return_trainable=True) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name="where") # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(result.get_shape()[1:])) return (final_result, trainable_) if return_trainable else final_result
def testProvideStaticallyMismatchedSizes(self): input_val = self._SparseTensorValue_5x6() sp_input = sparse_tensor.SparseTensor.from_value(input_val) with self.assertRaisesRegexp(ValueError, "Cannot reshape"): sparse_ops.sparse_reshape(sp_input, [4, 7])
def testStaticShapeInfoPreserved(self): sp_input = sparse_tensor.SparseTensor.from_value( self._SparseTensorValue_5x6()) self.assertAllEqual((5, 6), sp_input.get_shape()) sp_output = sparse_ops.sparse_reshape(sp_input, shape=(1, 5, 2, 3)) self.assertAllEqual((1, 5, 2, 3), sp_output.get_shape())
def testStaticShapeInfoPreservedWithInferredDims(self): sp_input = sparse_tensor.SparseTensor.from_value( self._SparseTensorValue_2x3x4()) self.assertAllEqual((2, 3, 4), sp_input.get_shape()) sp_output = sparse_ops.sparse_reshape(sp_input, shape=(2, -1)) self.assertAllEqual((2, 3 * 4), sp_output.get_shape())
def testRaisesIfMoreThanOneInferredDim(self): sp_input = sparse_tensor.SparseTensor.from_value( self._SparseTensorValue_2x3x4()) with self.assertRaisesRegexp(ValueError, "At most one dimension can"): sparse_ops.sparse_reshape(sp_input, shape=(-1, 2, -1))
def testRaisesIfInferredShapeNotPossible(self): sp_input = sparse_tensor.SparseTensor.from_value( self._SparseTensorValue_2x3x4()) with self.assertRaisesRegexp(ValueError, "Cannot reshape"): sparse_ops.sparse_reshape(sp_input, shape=(-1, 7))