def _construct_sparse_tensors_for_sparse_features(features, tensor_dict): """Merges SparseTensors of indices and values of SparseFeatures. Updates `tensor_dict`. For `SparseFeatures` in the values of `features` expects their `index_key`s and `index_value`s to be present in `tensor_dict` mapping to `SparseTensor`s. Removes those, constructs a single `SparseTensor` from them, and adds it to `tensor_dict` with the key from `features`. Args: features: A `dict` mapping feature keys to `SparseFeature` values. Values of other types will be ignored. tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Expected to contain keys of the `SparseFeature`s' `index_key`s and `value_key`s and mapping them to `SparseTensor`s. """ # Construct SparseTensors for SparseFeatures. for key in sorted(features.keys()): feature = features[key] if isinstance(feature, SparseFeature): sp_ids = tensor_dict[feature.index_key] sp_values = tensor_dict[feature.value_key] tensor_dict[key] = sparse_ops.sparse_merge( sp_ids, sp_values, feature.size, feature.already_sorted) # Remove tensors from dictionary that were only used to construct # SparseTensors for SparseFeature. for key in set(tensor_dict.keys()) - set(features.keys()): del tensor_dict[key]
def testInt32AndFloat32(self): vocab_size = 50 with self.test_session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(dtypes.int32, dtypes.float32) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat32NonCanonicalOrder(self): vocab_size = 50 with self.test_session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float32) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size, already_sorted=True) output = sess.run(sp_output) self._AssertResultsNotSorted(output, vocab_size)
def testInt64AndFloat32(self): vocab_size = 50 with self.session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(np.int64, np.float32) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = self.evaluate(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64Shape(self): vocab_size = [50, 30] with test_util.force_cpu(): indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = self.evaluate(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64Shape(self): vocab_size = [50, 30] with self.session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat32(self): vocab_size = 50 with test_util.force_cpu(): indices, values = self._SparseTensor_3x50(np.int64, np.float32) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = self.evaluate(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64(self): vocab_size = 50 with self.test_session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64(self): vocab_size = [50, 31] with self.test_session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64Shape(self): vocab_size = [50, 30] with test_util.force_cpu(): indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64NonCanonicalOrder(self): vocab_size = 50 with self.test_session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge( indices, values, vocab_size, already_sorted=True) output = sess.run(sp_output) self._AssertResultsNotSorted(output, vocab_size)
def testInt64AndFloat32NonCanonicalOrder(self): vocab_size = 50 with test_util.force_cpu(): indices, values = self._SparseTensor_3x50(np.int64, np.float32) sp_output = sparse_ops.sparse_merge( indices, values, vocab_size, already_sorted=True) output = self.evaluate(sp_output) self._AssertResultsNotSorted(output, vocab_size)
def testInt64AndFloat64NonCanonicalOrder(self): vocab_size = 50 vocab_size_tensor = constant_op.constant(vocab_size, dtypes.int64) with self.session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge( indices, values, vocab_size_tensor, already_sorted=True) output = sess.run(sp_output) self._AssertResultsNotSorted(output, vocab_size)
def testInt32AndFloat32(self): vocab_size = 50 indices_v, values_v = self._SparseTensorValue_3x50(np.int32, np.float32) with self.test_session(use_gpu=False) as sess: for indices in (indices_v, ops.SparseTensor.from_value(indices_v)): for values in (values_v, ops.SparseTensor.from_value(values_v)): sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64NonCanonicalOrder(self): vocab_size = 50 vocab_size_tensor = constant_op.constant(vocab_size, dtypes.int64) with test_util.force_cpu(): indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size_tensor, already_sorted=True) output = sess.run(sp_output) self._AssertResultsNotSorted(output, vocab_size)
def testInt32AndFloat32(self): vocab_size = 50 indices_v, values_v = self._SparseTensorValue_3x50(np.int32, np.float32) with test_util.force_cpu(): for indices in (indices_v, sparse_tensor.SparseTensor.from_value(indices_v)): for values in (values_v, sparse_tensor.SparseTensor.from_value(values_v)): sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = self.evaluate(sp_output) self._AssertResultsSorted(output, vocab_size)
def testInt64AndFloat64NonCanonicalOrder(self): vocab_size = 50 vocab_size_tensor = constant_op.constant(vocab_size, dtypes.int64) with self.session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(np.int64, np.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size_tensor, already_sorted=True) output = self.evaluate(sp_output) self._AssertResultsNotSorted(output, vocab_size)
def testInt64AndFloat64(self): vocab_size = 50 with self.test_session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self.assertAllEqual( output.indices, [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]]) self.assertAllEqual(output.values, [-3, 1, 4, 1, 5, 9]) self.assertAllEqual(output.shape, [3, vocab_size])
def testShouldSetLastDimensionInDynamicShape(self): with ops.Graph().as_default(): shape = constant_op.constant([2, 2], dtype=dtypes.int64) dynamic_shape = array_ops.placeholder_with_default(shape, shape=[2]) ids = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1]], values=[1, 3], dense_shape=dynamic_shape) values = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1]], values=[0.4, 0.7], dense_shape=dynamic_shape) merged = sparse_ops.sparse_merge(sp_ids=ids, sp_values=values, vocab_size=5) self.assertEqual(5, merged.get_shape()[1])
def testShouldSetLastDimensionInDynamicShape(self): with ops.Graph().as_default(): shape = constant_op.constant([2, 2], dtype=dtypes.int64) dynamic_shape = array_ops.placeholder_with_default(shape, shape=[2]) ids = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1]], values=[1, 3], dense_shape=dynamic_shape) values = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1]], values=[0.4, 0.7], dense_shape=dynamic_shape) merged = sparse_ops.sparse_merge( sp_ids=ids, sp_values=values, vocab_size=5) self.assertEqual(5, merged.get_shape()[1])
def testInt64AndFloat64(self): vocab_size = 50 with self.test_session(use_gpu=False) as sess: indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float64) sp_output = sparse_ops.sparse_merge(indices, values, vocab_size) output = sess.run(sp_output) self.assertAllEqual( output.indices, [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]]) self.assertAllEqual( output.values, [-3, 1, 4, 1, 5, 9]) self.assertAllEqual( output.shape, [3, vocab_size])
def _transform_feature(self, inputs): """Returns dense `Tensor` representing feature. Args: inputs: A `_LazyBuilder` object to access inputs. Returns: Transformed feature `Tensor`. Raises: ValueError: if input rank is not known at graph building time. """ id_weight_pair = self.categorical_column._get_sparse_tensors(inputs) # pylint: disable=protected-access id_tensor = id_weight_pair.id_tensor weight_tensor = id_weight_pair.weight_tensor # If the underlying column is weighted, return the input as a dense tensor. if weight_tensor is not None: weighted_column = sparse_ops.sparse_merge( sp_ids=id_tensor, sp_values=weight_tensor, vocab_size=int(self._variable_shape[-1])) # Remove (?, -1) index weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0], weighted_column.dense_shape) #return sparse_ops.sparse_tensor_to_dense(weighted_column) return array_ops.scatter_nd(weighted_column.indices, weighted_column.values, weighted_column.dense_shape) dense_id_tensor = sparse_ops.sparse_tensor_to_dense( id_tensor, default_value=-1) # One hot must be float for tf.concat reasons since all other inputs to # input_layer are float32. one_hot_id_tensor = array_ops.one_hot( dense_id_tensor, depth=self._variable_shape[-1], on_value=1.0, off_value=0.0) # Reduce to get a multi-hot per example. return math_ops.reduce_sum(one_hot_id_tensor, axis=[-2])
def _construct_sparse_tensors_for_sparse_features(features, tensor_dict): """Merges SparseTensors of indices and values of SparseFeatures. Constructs new dict based on `tensor_dict`. For `SparseFeatures` in the values of `features` expects their `index_key`s and `index_value`s to be present in `tensor_dict` mapping to `SparseTensor`s. Constructs a single `SparseTensor` from them, and adds it to the result with the key from `features`. Copies other keys and values from `tensor_dict` with keys present in `features`. Args: features: A `dict` mapping feature keys to `SparseFeature` values. Values of other types will be ignored. tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Expected to contain keys of the `SparseFeature`s' `index_key`s and `value_key`s and mapping them to `SparseTensor`s. Returns: A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Similar to `tensor_dict` except each `SparseFeature`s in `features` results in a single `SparseTensor`. """ tensor_dict = dict(tensor_dict) # Do not modify argument passed in. # Construct SparseTensors for SparseFeatures. for key in sorted(features.keys()): feature = features[key] if isinstance(feature, SparseFeature): if isinstance(feature.index_key, str): sp_ids = tensor_dict[feature.index_key] else: sp_ids = [ tensor_dict[index_key] for index_key in feature.index_key ] sp_values = tensor_dict[feature.value_key] tensor_dict[key] = sparse_ops.sparse_merge( sp_ids, sp_values, vocab_size=feature.size, already_sorted=feature.already_sorted) # Remove tensors from dictionary that were only used to construct # SparseTensors for SparseFeature. for key in set(tensor_dict) - set(features): del tensor_dict[key] return tensor_dict
def _construct_sparse_tensors_for_sparse_features(features, tensor_dict): """Merges SparseTensors of indices and values of SparseFeatures. Constructs new dict based on `tensor_dict`. For `SparseFeatures` in the values of `features` expects their `index_key`s and `index_value`s to be present in `tensor_dict` mapping to `SparseTensor`s. Constructs a single `SparseTensor` from them, and adds it to the result with the key from `features`. Copies other keys and values from `tensor_dict` with keys present in `features`. Args: features: A `dict` mapping feature keys to `SparseFeature` values. Values of other types will be ignored. tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Expected to contain keys of the `SparseFeature`s' `index_key`s and `value_key`s and mapping them to `SparseTensor`s. Returns: A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Similar to `tensor_dict` except each `SparseFeature`s in `features` results in a single `SparseTensor`. """ tensor_dict = dict(tensor_dict) # Do not modify argument passed in. # Construct SparseTensors for SparseFeatures. for key in sorted(features.keys()): feature = features[key] if isinstance(feature, SparseFeature): if isinstance(feature.index_key, str): sp_ids = tensor_dict[feature.index_key] else: sp_ids = [tensor_dict[index_key] for index_key in feature.index_key] sp_values = tensor_dict[feature.value_key] tensor_dict[key] = sparse_ops.sparse_merge( sp_ids, sp_values, vocab_size=feature.size, already_sorted=feature.already_sorted) # Remove tensors from dictionary that were only used to construct # SparseTensors for SparseFeature. for key in set(tensor_dict) - set(features): del tensor_dict[key] return tensor_dict
def _construct_tensors_for_composite_features(features, tensor_dict): """Creates tensors for SparseFeatures and RaggedFeatures. Constructs new dict based on `tensor_dict`. For each key in `features` whose value is a `SparseFeature`: * Looks up that SparseFeature's value_key and index_keys in tensor_dict. * Uses those tensors to construct a single SparseTensor. * Stores that SparseTensor in the output dict under the same key. For each key in `features` whose value is a `RaggedFeature`: * Looks up that RaggedFeature's value_key and partition keys in tensor_dict. * Uses those tensors to construct a single RaggedTensor. * Stores that RaggedTensor in the output dict under the same key. For any other key in `features`: * Copies that key and its value from tensor_dict to the output dictionary. Args: features: A `dict` mapping feature keys to `SparseFeature` or `RaggedFeature` values. Values of other types will be ignored. tensor_dict: A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and `RaggedTensor` values. Expected to contain keys of the `SparseFeature`s' `index_key`s and `value_key`s and mapping them to `SparseTensor`s. Returns: A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and `RaggedTensor` values. Similar to `tensor_dict` except each `SparseFeature` in `features` results in a single `SparseTensor`; and each `RaggedFeature` in `features` results in a single `RaggedTensor`. """ tensor_dict = dict(tensor_dict) # Do not modify argument passed in. updates = {} for key in sorted(features.keys()): feature = features[key] if isinstance(feature, SparseFeature): # Construct SparseTensors for SparseFeatures if isinstance(feature.index_key, str): sp_ids = tensor_dict[feature.index_key] else: sp_ids = [ tensor_dict[index_key] for index_key in feature.index_key ] sp_values = tensor_dict[feature.value_key] updates[key] = sparse_ops.sparse_merge( sp_ids, sp_values, vocab_size=feature.size, already_sorted=feature.already_sorted) elif isinstance(feature, RaggedFeature): # Construct RaggedTensors for RaggedFeatures. value_key = key if feature.value_key is None else feature.value_key rt = tensor_dict[value_key] if isinstance(rt, ragged_tensor.RaggedTensor): # We processed a batch of tf.Example or tf.SequenceExample, or single # tf.SequenceExample. if rt.ragged_rank > 1: # We're processing a batch of SequenceExample, and we effectively have # two batch dimensions. Cllapse those batch dimensions here, and # restore them below (using outer_splits). outer_splits = rt.row_splits rt = rt.values else: outer_splits = None for partition in reversed(feature.partitions): rt = _add_batched_ragged_partition(rt, partition, tensor_dict, key, feature.validate, outer_splits) if outer_splits is not None: rt = ragged_tensor.RaggedTensor.from_row_splits( rt, outer_splits, validate=feature.validate) else: # We processed a single tf.Example. for partition in reversed(feature.partitions): rt = _add_ragged_partition(rt, partition, tensor_dict, feature.row_splits_dtype, feature.validate) updates[key] = rt # Process updates after all composite tensors have been constructed (in case # multiple features use the same value_key, and one uses that key as its # feature key). tensor_dict.update(updates) # Remove tensors from dictionary that were only used to construct # tensors for SparseFeature or RaggedTensor. for key in set(tensor_dict) - set(features): del tensor_dict[key] return tensor_dict
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features = [], [] dense_features_weights, sparse_features_weights = [], [] for column in sorted(set(linear_feature_columns), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column. _RealValuedColumn): # pylint: disable=protected-access # A real-valued column corresponds to a dense feature in SDCA. if column.dimension != 1: raise ValueError( "Invalid column dimension %d for column %s. SDCAOptimizer " "supports only 1-dimensional dense feature columns." % (column.dimension, column.name)) dense_features.append(array_ops.reshape(transformed_tensor, shape=[-1])) # For real valued columns, the variables list contains exactly one # element. dense_features_weights.append(columns_to_variables[column][0]) elif isinstance(column, layers.feature_column. _BucketizedColumn): # pylint: disable=protected-access # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseTensor respresenting the one-hot encoding of the bucketized # feature. dense_bucket_tensor = column.to_dnn_input_layer(transformed_tensor) sparse_bucket_tensor = _dense_to_sparse_tensor(dense_bucket_tensor) sparse_features.append(sparse_bucket_tensor) # For bucketized columns, the variables list contains exactly one # element. sparse_features_weights.append(columns_to_variables[column][0]) elif isinstance(column, (layers.feature_column. _CrossedColumn, # pylint: disable=protected-access layers.feature_column._SparseColumn )): # pylint: disable=protected-access weights_tensor = ops.SparseTensor( indices=transformed_tensor.indices, values=array_ops.ones_like(transformed_tensor.values), shape=transformed_tensor.shape) sparse_features_tensor = sparse_ops.sparse_merge(transformed_tensor, weights_tensor, column.length) sparse_features.append(math_ops.to_float(sparse_features_tensor)) sparse_features_weights.append(columns_to_variables[column][0]) elif isinstance( column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access id_tensor = column.id_tensor(transformed_tensor) weight_tensor = column.weight_tensor(transformed_tensor) sparse_features_tensor = sparse_ops.sparse_merge( id_tensor, weight_tensor, column.length, name="{}_sparse_merge".format(column.name)) sparse_features.append(math_ops.to_float( sparse_features_tensor, name="{}_to_float".format(column.name))) sparse_features_weights.append(columns_to_variables[column][0]) else: raise ValueError("SDCAOptimizer does not support column type %s." % type(column).__name__) example_weights = array_ops.reshape( features[weight_column_name], shape=[-1]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] examples = dict( sparse_features=sparse_features, dense_features=dense_features, example_labels=math_ops.to_float( array_ops.reshape(targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict(sparse_features_weights=sparse_features_weights, dense_features_weights=dense_features_weights) return examples, sdca_variables
def _training_examples_and_variables(): """Returns dictionaries for training examples and variables.""" batch_size = targets.get_shape()[0] # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables # dict as 1-dimensional tensors. dense_features, sparse_features = [], [] dense_features_weights, sparse_features_weights = [], [] # pylint: disable=protected-access for column in sorted(set(linear_feature_columns), key=lambda x: x.key): transformed_tensor = features[column] if isinstance(column, layers.feature_column._RealValuedColumn): # A real-valued column corresponds to a dense feature in SDCA. if column.dimension != 1: raise ValueError( "Invalid column dimension %d for column %s. SDCAOptimizer " "supports only 1-dimensional dense feature columns." % (column.dimension, column.name)) dense_features.append( array_ops.reshape(transformed_tensor, shape=[-1])) # For real valued columns, the variables list contains exactly one # element. dense_features_weights.append( columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._BucketizedColumn): # A bucketized column corresponds to a sparse feature in SDCA. The # bucketized feature is "sparsified" for SDCA by converting it to a # SparseTensor respresenting the one-hot encoding of the bucketized # feature. dense_bucket_tensor = column.to_dnn_input_layer( transformed_tensor) sparse_bucket_tensor = _dense_to_sparse_tensor( dense_bucket_tensor) sparse_features.append(sparse_bucket_tensor) # For bucketized columns, the variables list contains exactly one # element. sparse_features_weights.append( columns_to_variables[column][0]) elif isinstance(column, (layers.feature_column._CrossedColumn, layers.feature_column._SparseColumn)): weights_tensor = ops.SparseTensor( indices=transformed_tensor.indices, values=array_ops.ones_like(transformed_tensor.values), shape=transformed_tensor.shape) sparse_features_tensor = sparse_ops.sparse_merge( transformed_tensor, weights_tensor, column.length) sparse_features.append( math_ops.to_float(sparse_features_tensor)) sparse_features_weights.append( columns_to_variables[column][0]) elif isinstance(column, layers.feature_column._WeightedSparseColumn): id_tensor = column.id_tensor(transformed_tensor) weight_tensor = column.weight_tensor(transformed_tensor) sparse_features_tensor = sparse_ops.sparse_merge( id_tensor, weight_tensor, column.length, name="{}_sparse_merge".format(column.name)) sparse_features.append( math_ops.to_float(sparse_features_tensor, name="{}_to_float".format( column.name))) sparse_features_weights.append( columns_to_variables[column][0]) else: raise ValueError( "SDCAOptimizer does not support column type %s." % type(column).__name__) # pylint: enable=protected-access example_weights = array_ops.reshape( features[weight_column_name], shape=[ -1 ]) if weight_column_name else array_ops.ones([batch_size]) example_ids = features[self._example_id_column] examples = dict(sparse_features=sparse_features, dense_features=dense_features, example_labels=math_ops.to_float( array_ops.reshape(targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) sdca_variables = dict( sparse_features_weights=sparse_features_weights, dense_features_weights=dense_features_weights) return examples, sdca_variables