Ejemplo n.º 1
0
def _construct_sparse_tensors_for_sparse_features(features, tensor_dict):
  """Merges SparseTensors of indices and values of SparseFeatures.

  Updates `tensor_dict`. For `SparseFeatures` in the values of `features`
  expects their `index_key`s and `index_value`s to be present in `tensor_dict`
  mapping to `SparseTensor`s. Removes those, constructs a single `SparseTensor`
  from them, and adds it to `tensor_dict` with the key from `features`.

  Args:
    features: A `dict` mapping feature keys to `SparseFeature` values.
      Values of other types will be ignored.
    tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor`
      values. Expected to contain keys of the `SparseFeature`s' `index_key`s and
      `value_key`s and mapping them to `SparseTensor`s.
  """
  # Construct SparseTensors for SparseFeatures.
  for key in sorted(features.keys()):
    feature = features[key]
    if isinstance(feature, SparseFeature):
      sp_ids = tensor_dict[feature.index_key]
      sp_values = tensor_dict[feature.value_key]
      tensor_dict[key] = sparse_ops.sparse_merge(
          sp_ids,
          sp_values,
          feature.size,
          feature.already_sorted)
  # Remove tensors from dictionary that were only used to construct
  # SparseTensors for SparseFeature.
  for key in set(tensor_dict.keys()) - set(features.keys()):
    del tensor_dict[key]
Ejemplo n.º 2
0
  def testInt32AndFloat32(self):
    vocab_size = 50
    with self.test_session(use_gpu=False) as sess:
      indices, values = self._SparseTensor_3x50(dtypes.int32, dtypes.float32)
      sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

      output = sess.run(sp_output)
      self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 3
0
    def testInt64AndFloat32NonCanonicalOrder(self):
        vocab_size = 50
        with self.test_session(use_gpu=False) as sess:
            indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float32)
            sp_output = sparse_ops.sparse_merge(indices, values, vocab_size, already_sorted=True)

            output = sess.run(sp_output)
            self._AssertResultsNotSorted(output, vocab_size)
Ejemplo n.º 4
0
    def testInt64AndFloat32(self):
        vocab_size = 50
        with self.session(use_gpu=False) as sess:
            indices, values = self._SparseTensor_3x50(np.int64, np.float32)
            sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

            output = self.evaluate(sp_output)
            self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 5
0
  def testInt64AndFloat64Shape(self):
    vocab_size = [50, 30]
    with test_util.force_cpu():
      indices, values = self._SparseTensor_3x50(np.int64, np.float64)
      sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

      output = self.evaluate(sp_output)
      self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 6
0
  def testInt64AndFloat64Shape(self):
    vocab_size = [50, 30]
    with self.session(use_gpu=False) as sess:
      indices, values = self._SparseTensor_3x50(np.int64, np.float64)
      sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

      output = sess.run(sp_output)
      self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 7
0
    def testInt64AndFloat32(self):
        vocab_size = 50
        with test_util.force_cpu():
            indices, values = self._SparseTensor_3x50(np.int64, np.float32)
            sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

            output = self.evaluate(sp_output)
            self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 8
0
  def testInt64AndFloat64(self):
    vocab_size = 50
    with self.test_session(use_gpu=False) as sess:
      indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float64)
      sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

      output = sess.run(sp_output)
      self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 9
0
    def testInt64AndFloat64(self):
        vocab_size = [50, 31]
        with self.test_session(use_gpu=False) as sess:
            indices, values = self._SparseTensor_3x50(np.int64, np.float64)
            sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

            output = sess.run(sp_output)
            self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 10
0
    def testInt64AndFloat64Shape(self):
        vocab_size = [50, 30]
        with test_util.force_cpu():
            indices, values = self._SparseTensor_3x50(np.int64, np.float64)
            sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

            output = sess.run(sp_output)
            self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 11
0
  def testInt64AndFloat64NonCanonicalOrder(self):
    vocab_size = 50
    with self.test_session(use_gpu=False) as sess:
      indices, values = self._SparseTensor_3x50(np.int64, np.float64)
      sp_output = sparse_ops.sparse_merge(
          indices, values, vocab_size, already_sorted=True)

      output = sess.run(sp_output)
      self._AssertResultsNotSorted(output, vocab_size)
Ejemplo n.º 12
0
  def testInt64AndFloat32NonCanonicalOrder(self):
    vocab_size = 50
    with test_util.force_cpu():
      indices, values = self._SparseTensor_3x50(np.int64, np.float32)
      sp_output = sparse_ops.sparse_merge(
          indices, values, vocab_size, already_sorted=True)

      output = self.evaluate(sp_output)
      self._AssertResultsNotSorted(output, vocab_size)
Ejemplo n.º 13
0
  def testInt64AndFloat32NonCanonicalOrder(self):
    vocab_size = 50
    with test_util.force_cpu():
      indices, values = self._SparseTensor_3x50(np.int64, np.float32)
      sp_output = sparse_ops.sparse_merge(
          indices, values, vocab_size, already_sorted=True)

      output = self.evaluate(sp_output)
      self._AssertResultsNotSorted(output, vocab_size)
Ejemplo n.º 14
0
  def testInt64AndFloat64NonCanonicalOrder(self):
    vocab_size = 50
    vocab_size_tensor = constant_op.constant(vocab_size, dtypes.int64)
    with self.session(use_gpu=False) as sess:
      indices, values = self._SparseTensor_3x50(np.int64, np.float64)
      sp_output = sparse_ops.sparse_merge(
          indices, values, vocab_size_tensor, already_sorted=True)

      output = sess.run(sp_output)
      self._AssertResultsNotSorted(output, vocab_size)
Ejemplo n.º 15
0
  def testInt32AndFloat32(self):
    vocab_size = 50
    indices_v, values_v = self._SparseTensorValue_3x50(np.int32, np.float32)
    with self.test_session(use_gpu=False) as sess:
      for indices in (indices_v, ops.SparseTensor.from_value(indices_v)):
        for values in (values_v, ops.SparseTensor.from_value(values_v)):
          sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

          output = sess.run(sp_output)
          self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 16
0
  def testInt32AndFloat32(self):
    vocab_size = 50
    indices_v, values_v = self._SparseTensorValue_3x50(np.int32, np.float32)
    with self.test_session(use_gpu=False) as sess:
      for indices in (indices_v, ops.SparseTensor.from_value(indices_v)):
        for values in (values_v, ops.SparseTensor.from_value(values_v)):
          sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

          output = sess.run(sp_output)
          self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 17
0
    def testInt64AndFloat64NonCanonicalOrder(self):
        vocab_size = 50
        vocab_size_tensor = constant_op.constant(vocab_size, dtypes.int64)
        with test_util.force_cpu():
            indices, values = self._SparseTensor_3x50(np.int64, np.float64)
            sp_output = sparse_ops.sparse_merge(indices,
                                                values,
                                                vocab_size_tensor,
                                                already_sorted=True)

            output = sess.run(sp_output)
            self._AssertResultsNotSorted(output, vocab_size)
Ejemplo n.º 18
0
  def testInt32AndFloat32(self):
    vocab_size = 50
    indices_v, values_v = self._SparseTensorValue_3x50(np.int32, np.float32)
    with test_util.force_cpu():
      for indices in (indices_v,
                      sparse_tensor.SparseTensor.from_value(indices_v)):
        for values in (values_v,
                       sparse_tensor.SparseTensor.from_value(values_v)):
          sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

          output = self.evaluate(sp_output)
          self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 19
0
  def testInt32AndFloat32(self):
    vocab_size = 50
    indices_v, values_v = self._SparseTensorValue_3x50(np.int32, np.float32)
    with test_util.force_cpu():
      for indices in (indices_v,
                      sparse_tensor.SparseTensor.from_value(indices_v)):
        for values in (values_v,
                       sparse_tensor.SparseTensor.from_value(values_v)):
          sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

          output = self.evaluate(sp_output)
          self._AssertResultsSorted(output, vocab_size)
Ejemplo n.º 20
0
    def testInt64AndFloat64NonCanonicalOrder(self):
        vocab_size = 50
        vocab_size_tensor = constant_op.constant(vocab_size, dtypes.int64)
        with self.session(use_gpu=False) as sess:
            indices, values = self._SparseTensor_3x50(np.int64, np.float64)
            sp_output = sparse_ops.sparse_merge(indices,
                                                values,
                                                vocab_size_tensor,
                                                already_sorted=True)

            output = self.evaluate(sp_output)
            self._AssertResultsNotSorted(output, vocab_size)
Ejemplo n.º 21
0
    def testInt64AndFloat64(self):
        vocab_size = 50
        with self.test_session(use_gpu=False) as sess:
            indices, values = self._SparseTensor_3x50(dtypes.int64,
                                                      dtypes.float64)
            sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

            output = sess.run(sp_output)
            self.assertAllEqual(
                output.indices,
                [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]])
            self.assertAllEqual(output.values, [-3, 1, 4, 1, 5, 9])
            self.assertAllEqual(output.shape, [3, vocab_size])
 def testShouldSetLastDimensionInDynamicShape(self):
     with ops.Graph().as_default():
         shape = constant_op.constant([2, 2], dtype=dtypes.int64)
         dynamic_shape = array_ops.placeholder_with_default(shape,
                                                            shape=[2])
         ids = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1]],
                                          values=[1, 3],
                                          dense_shape=dynamic_shape)
         values = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1]],
                                             values=[0.4, 0.7],
                                             dense_shape=dynamic_shape)
         merged = sparse_ops.sparse_merge(sp_ids=ids,
                                          sp_values=values,
                                          vocab_size=5)
         self.assertEqual(5, merged.get_shape()[1])
Ejemplo n.º 23
0
 def testShouldSetLastDimensionInDynamicShape(self):
   with ops.Graph().as_default():
     shape = constant_op.constant([2, 2], dtype=dtypes.int64)
     dynamic_shape = array_ops.placeholder_with_default(shape, shape=[2])
     ids = sparse_tensor.SparseTensor(
         indices=[[0, 0], [0, 1]],
         values=[1, 3],
         dense_shape=dynamic_shape)
     values = sparse_tensor.SparseTensor(
         indices=[[0, 0], [0, 1]],
         values=[0.4, 0.7],
         dense_shape=dynamic_shape)
     merged = sparse_ops.sparse_merge(
         sp_ids=ids, sp_values=values, vocab_size=5)
     self.assertEqual(5, merged.get_shape()[1])
Ejemplo n.º 24
0
  def testInt64AndFloat64(self):
    vocab_size = 50
    with self.test_session(use_gpu=False) as sess:
      indices, values = self._SparseTensor_3x50(dtypes.int64, dtypes.float64)
      sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)

      output = sess.run(sp_output)
      self.assertAllEqual(
          output.indices,
          [[0, 0], [1, 10], [1, 13], [1, 14], [2, 32], [2, 33]])
      self.assertAllEqual(
          output.values,
          [-3, 1, 4, 1, 5, 9])
      self.assertAllEqual(
          output.shape,
          [3, vocab_size])
Ejemplo n.º 25
0
  def _transform_feature(self, inputs):
    """Returns dense `Tensor` representing feature.

    Args:
      inputs: A `_LazyBuilder` object to access inputs.

    Returns:
      Transformed feature `Tensor`.

    Raises:
      ValueError: if input rank is not known at graph building time.
    """
    id_weight_pair = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
    id_tensor = id_weight_pair.id_tensor
    weight_tensor = id_weight_pair.weight_tensor

    # If the underlying column is weighted, return the input as a dense tensor.
    if weight_tensor is not None:
      weighted_column = sparse_ops.sparse_merge(
          sp_ids=id_tensor,
          sp_values=weight_tensor,
          vocab_size=int(self._variable_shape[-1]))
      # Remove (?, -1) index
      weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0],
                                                weighted_column.dense_shape)
      #return sparse_ops.sparse_tensor_to_dense(weighted_column)
      return array_ops.scatter_nd(weighted_column.indices,
                                  weighted_column.values,
                                  weighted_column.dense_shape)

    dense_id_tensor = sparse_ops.sparse_tensor_to_dense(
        id_tensor, default_value=-1)

    # One hot must be float for tf.concat reasons since all other inputs to
    # input_layer are float32.
    one_hot_id_tensor = array_ops.one_hot(
        dense_id_tensor,
        depth=self._variable_shape[-1],
        on_value=1.0,
        off_value=0.0)

    # Reduce to get a multi-hot per example.
    return math_ops.reduce_sum(one_hot_id_tensor, axis=[-2])
Ejemplo n.º 26
0
def _construct_sparse_tensors_for_sparse_features(features, tensor_dict):
    """Merges SparseTensors of indices and values of SparseFeatures.

  Constructs new dict based on `tensor_dict`. For `SparseFeatures` in the values
  of `features` expects their `index_key`s and `index_value`s to be present in
  `tensor_dict` mapping to `SparseTensor`s. Constructs a single `SparseTensor`
  from them, and adds it to the result with the key from `features`.
  Copies other keys and values from `tensor_dict` with keys present in
  `features`.

  Args:
    features: A `dict` mapping feature keys to `SparseFeature` values.
      Values of other types will be ignored.
    tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor`
      values. Expected to contain keys of the `SparseFeature`s' `index_key`s and
      `value_key`s and mapping them to `SparseTensor`s.
  Returns:
    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Similar
    to `tensor_dict` except each `SparseFeature`s in `features` results in a
    single `SparseTensor`.
  """
    tensor_dict = dict(tensor_dict)  # Do not modify argument passed in.
    # Construct SparseTensors for SparseFeatures.
    for key in sorted(features.keys()):
        feature = features[key]
        if isinstance(feature, SparseFeature):
            if isinstance(feature.index_key, str):
                sp_ids = tensor_dict[feature.index_key]
            else:
                sp_ids = [
                    tensor_dict[index_key] for index_key in feature.index_key
                ]
            sp_values = tensor_dict[feature.value_key]
            tensor_dict[key] = sparse_ops.sparse_merge(
                sp_ids,
                sp_values,
                vocab_size=feature.size,
                already_sorted=feature.already_sorted)
    # Remove tensors from dictionary that were only used to construct
    # SparseTensors for SparseFeature.
    for key in set(tensor_dict) - set(features):
        del tensor_dict[key]
    return tensor_dict
Ejemplo n.º 27
0
def _construct_sparse_tensors_for_sparse_features(features, tensor_dict):
  """Merges SparseTensors of indices and values of SparseFeatures.

  Constructs new dict based on `tensor_dict`. For `SparseFeatures` in the values
  of `features` expects their `index_key`s and `index_value`s to be present in
  `tensor_dict` mapping to `SparseTensor`s. Constructs a single `SparseTensor`
  from them, and adds it to the result with the key from `features`.
  Copies other keys and values from `tensor_dict` with keys present in
  `features`.

  Args:
    features: A `dict` mapping feature keys to `SparseFeature` values.
      Values of other types will be ignored.
    tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor`
      values. Expected to contain keys of the `SparseFeature`s' `index_key`s and
      `value_key`s and mapping them to `SparseTensor`s.
  Returns:
    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Similar
    to `tensor_dict` except each `SparseFeature`s in `features` results in a
    single `SparseTensor`.
  """
  tensor_dict = dict(tensor_dict)  # Do not modify argument passed in.
  # Construct SparseTensors for SparseFeatures.
  for key in sorted(features.keys()):
    feature = features[key]
    if isinstance(feature, SparseFeature):
      if isinstance(feature.index_key, str):
        sp_ids = tensor_dict[feature.index_key]
      else:
        sp_ids = [tensor_dict[index_key] for index_key in feature.index_key]
      sp_values = tensor_dict[feature.value_key]
      tensor_dict[key] = sparse_ops.sparse_merge(
          sp_ids,
          sp_values,
          vocab_size=feature.size,
          already_sorted=feature.already_sorted)
  # Remove tensors from dictionary that were only used to construct
  # SparseTensors for SparseFeature.
  for key in set(tensor_dict) - set(features):
    del tensor_dict[key]
  return tensor_dict
Ejemplo n.º 28
0
def _construct_tensors_for_composite_features(features, tensor_dict):
    """Creates tensors for SparseFeatures and RaggedFeatures.

  Constructs new dict based on `tensor_dict`.

  For each key in `features` whose value is a `SparseFeature`:

    * Looks up that SparseFeature's value_key and index_keys in tensor_dict.
    * Uses those tensors to construct a single SparseTensor.
    * Stores that SparseTensor in the output dict under the same key.

  For each key in `features` whose value is a `RaggedFeature`:

    * Looks up that RaggedFeature's value_key and partition keys in tensor_dict.
    * Uses those tensors to construct a single RaggedTensor.
    * Stores that RaggedTensor in the output dict under the same key.

  For any other key in `features`:

    * Copies that key and its value from tensor_dict to the output dictionary.

  Args:
    features: A `dict` mapping feature keys to `SparseFeature` or
      `RaggedFeature` values.  Values of other types will be ignored.
    tensor_dict: A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and
      `RaggedTensor` values.  Expected to contain keys of the `SparseFeature`s'
      `index_key`s and `value_key`s and mapping them to `SparseTensor`s.

  Returns:
    A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and
    `RaggedTensor` values. Similar to `tensor_dict` except each `SparseFeature`
    in `features` results in a single `SparseTensor`; and each `RaggedFeature`
    in `features` results in a single `RaggedTensor`.
  """
    tensor_dict = dict(tensor_dict)  # Do not modify argument passed in.
    updates = {}
    for key in sorted(features.keys()):
        feature = features[key]
        if isinstance(feature, SparseFeature):
            # Construct SparseTensors for SparseFeatures
            if isinstance(feature.index_key, str):
                sp_ids = tensor_dict[feature.index_key]
            else:
                sp_ids = [
                    tensor_dict[index_key] for index_key in feature.index_key
                ]
            sp_values = tensor_dict[feature.value_key]
            updates[key] = sparse_ops.sparse_merge(
                sp_ids,
                sp_values,
                vocab_size=feature.size,
                already_sorted=feature.already_sorted)
        elif isinstance(feature, RaggedFeature):
            # Construct RaggedTensors for RaggedFeatures.
            value_key = key if feature.value_key is None else feature.value_key
            rt = tensor_dict[value_key]
            if isinstance(rt, ragged_tensor.RaggedTensor):
                # We processed a batch of tf.Example or tf.SequenceExample, or single
                # tf.SequenceExample.
                if rt.ragged_rank > 1:
                    # We're processing a batch of SequenceExample, and we effectively have
                    # two batch dimensions.  Cllapse those batch dimensions here, and
                    # restore them below (using outer_splits).
                    outer_splits = rt.row_splits
                    rt = rt.values
                else:
                    outer_splits = None
                for partition in reversed(feature.partitions):
                    rt = _add_batched_ragged_partition(rt, partition,
                                                       tensor_dict, key,
                                                       feature.validate,
                                                       outer_splits)
                if outer_splits is not None:
                    rt = ragged_tensor.RaggedTensor.from_row_splits(
                        rt, outer_splits, validate=feature.validate)
            else:
                # We processed a single tf.Example.
                for partition in reversed(feature.partitions):
                    rt = _add_ragged_partition(rt, partition, tensor_dict,
                                               feature.row_splits_dtype,
                                               feature.validate)
            updates[key] = rt

    # Process updates after all composite tensors have been constructed (in case
    # multiple features use the same value_key, and one uses that key as its
    # feature key).
    tensor_dict.update(updates)

    # Remove tensors from dictionary that were only used to construct
    # tensors for SparseFeature or RaggedTensor.
    for key in set(tensor_dict) - set(features):
        del tensor_dict[key]
    return tensor_dict
Ejemplo n.º 29
0
    def _training_examples_and_variables():
      """Returns dictionaries for training examples and variables."""
      batch_size = targets.get_shape()[0]

      # Iterate over all feature columns and create appropriate lists for dense
      # and sparse features as well as dense and sparse weights (variables) for
      # SDCA.
      # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
      # dict as 1-dimensional tensors.
      dense_features, sparse_features = [], []
      dense_features_weights, sparse_features_weights = [], []
      for column in sorted(set(linear_feature_columns), key=lambda x: x.key):
        transformed_tensor = features[column]
        if isinstance(column, layers.feature_column.
                      _RealValuedColumn):  # pylint: disable=protected-access
          # A real-valued column corresponds to a dense feature in SDCA.
          if column.dimension != 1:
            raise ValueError(
                "Invalid column dimension %d for column %s. SDCAOptimizer "
                "supports only 1-dimensional dense feature columns." %
                (column.dimension, column.name))

          dense_features.append(array_ops.reshape(transformed_tensor,
                                                  shape=[-1]))
          # For real valued columns, the variables list contains exactly one
          # element.
          dense_features_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column.
                        _BucketizedColumn):  # pylint: disable=protected-access
          # A bucketized column corresponds to a sparse feature in SDCA. The
          # bucketized feature is "sparsified" for SDCA by converting it to a
          # SparseTensor respresenting the one-hot encoding of the bucketized
          # feature.
          dense_bucket_tensor = column.to_dnn_input_layer(transformed_tensor)
          sparse_bucket_tensor = _dense_to_sparse_tensor(dense_bucket_tensor)
          sparse_features.append(sparse_bucket_tensor)
          # For bucketized columns, the variables list contains exactly one
          # element.
          sparse_features_weights.append(columns_to_variables[column][0])
        elif isinstance(column,
                        (layers.feature_column.
                         _CrossedColumn,  # pylint: disable=protected-access
                         layers.feature_column._SparseColumn
                        )):  # pylint: disable=protected-access
          weights_tensor = ops.SparseTensor(
              indices=transformed_tensor.indices,
              values=array_ops.ones_like(transformed_tensor.values),
              shape=transformed_tensor.shape)
          sparse_features_tensor = sparse_ops.sparse_merge(transformed_tensor,
                                                           weights_tensor,
                                                           column.length)
          sparse_features.append(math_ops.to_float(sparse_features_tensor))
          sparse_features_weights.append(columns_to_variables[column][0])
        elif isinstance(
            column,
            layers.feature_column._WeightedSparseColumn):  # pylint: disable=protected-access
          id_tensor = column.id_tensor(transformed_tensor)
          weight_tensor = column.weight_tensor(transformed_tensor)
          sparse_features_tensor = sparse_ops.sparse_merge(
              id_tensor, weight_tensor, column.length,
              name="{}_sparse_merge".format(column.name))
          sparse_features.append(math_ops.to_float(
              sparse_features_tensor, name="{}_to_float".format(column.name)))
          sparse_features_weights.append(columns_to_variables[column][0])
        else:
          raise ValueError("SDCAOptimizer does not support column type %s." %
                           type(column).__name__)

      example_weights = array_ops.reshape(
          features[weight_column_name],
          shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
      example_ids = features[self._example_id_column]
      examples = dict(
          sparse_features=sparse_features,
          dense_features=dense_features,
          example_labels=math_ops.to_float(
              array_ops.reshape(targets, shape=[-1])),
          example_weights=example_weights,
          example_ids=example_ids)
      sdca_variables = dict(sparse_features_weights=sparse_features_weights,
                            dense_features_weights=dense_features_weights)
      return examples, sdca_variables
Ejemplo n.º 30
0
        def _training_examples_and_variables():
            """Returns dictionaries for training examples and variables."""
            batch_size = targets.get_shape()[0]

            # Iterate over all feature columns and create appropriate lists for dense
            # and sparse features as well as dense and sparse weights (variables) for
            # SDCA.
            # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
            # dict as 1-dimensional tensors.
            dense_features, sparse_features = [], []
            dense_features_weights, sparse_features_weights = [], []
            # pylint: disable=protected-access
            for column in sorted(set(linear_feature_columns),
                                 key=lambda x: x.key):
                transformed_tensor = features[column]
                if isinstance(column, layers.feature_column._RealValuedColumn):
                    # A real-valued column corresponds to a dense feature in SDCA.
                    if column.dimension != 1:
                        raise ValueError(
                            "Invalid column dimension %d for column %s. SDCAOptimizer "
                            "supports only 1-dimensional dense feature columns."
                            % (column.dimension, column.name))

                    dense_features.append(
                        array_ops.reshape(transformed_tensor, shape=[-1]))
                    # For real valued columns, the variables list contains exactly one
                    # element.
                    dense_features_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column,
                                layers.feature_column._BucketizedColumn):
                    # A bucketized column corresponds to a sparse feature in SDCA. The
                    # bucketized feature is "sparsified" for SDCA by converting it to a
                    # SparseTensor respresenting the one-hot encoding of the bucketized
                    # feature.
                    dense_bucket_tensor = column.to_dnn_input_layer(
                        transformed_tensor)
                    sparse_bucket_tensor = _dense_to_sparse_tensor(
                        dense_bucket_tensor)
                    sparse_features.append(sparse_bucket_tensor)
                    # For bucketized columns, the variables list contains exactly one
                    # element.
                    sparse_features_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column, (layers.feature_column._CrossedColumn,
                                         layers.feature_column._SparseColumn)):
                    weights_tensor = ops.SparseTensor(
                        indices=transformed_tensor.indices,
                        values=array_ops.ones_like(transformed_tensor.values),
                        shape=transformed_tensor.shape)
                    sparse_features_tensor = sparse_ops.sparse_merge(
                        transformed_tensor, weights_tensor, column.length)
                    sparse_features.append(
                        math_ops.to_float(sparse_features_tensor))
                    sparse_features_weights.append(
                        columns_to_variables[column][0])
                elif isinstance(column,
                                layers.feature_column._WeightedSparseColumn):
                    id_tensor = column.id_tensor(transformed_tensor)
                    weight_tensor = column.weight_tensor(transformed_tensor)
                    sparse_features_tensor = sparse_ops.sparse_merge(
                        id_tensor,
                        weight_tensor,
                        column.length,
                        name="{}_sparse_merge".format(column.name))
                    sparse_features.append(
                        math_ops.to_float(sparse_features_tensor,
                                          name="{}_to_float".format(
                                              column.name)))
                    sparse_features_weights.append(
                        columns_to_variables[column][0])
                else:
                    raise ValueError(
                        "SDCAOptimizer does not support column type %s." %
                        type(column).__name__)
            # pylint: enable=protected-access

            example_weights = array_ops.reshape(
                features[weight_column_name], shape=[
                    -1
                ]) if weight_column_name else array_ops.ones([batch_size])
            example_ids = features[self._example_id_column]
            examples = dict(sparse_features=sparse_features,
                            dense_features=dense_features,
                            example_labels=math_ops.to_float(
                                array_ops.reshape(targets, shape=[-1])),
                            example_weights=example_weights,
                            example_ids=example_ids)
            sdca_variables = dict(
                sparse_features_weights=sparse_features_weights,
                dense_features_weights=dense_features_weights)
            return examples, sdca_variables