Ejemplo n.º 1
0
    def call(self, features, cols_to_output_tensors=None):
        """Returns a dense tensor corresponding to the `feature_columns`.

    Args:
      features: A mapping from key to tensors. `FeatureColumn`s look up via
        these keys. For example `numeric_column('price')` will look at 'price'
        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
        on corresponding `FeatureColumn`.
      cols_to_output_tensors: If not `None`, this will be filled with a dict
        mapping feature columns to output tensors created.

    Returns:
      A `Tensor` which represents input layer of a model. Its shape
      is (batch_size, first_layer_dimension) and its dtype is `float32`.
      first_layer_dimension is determined based on given `feature_columns`.

    Raises:
      ValueError: If features are not a dictionary.
    """
        if not isinstance(features, dict):
            raise ValueError('We expected a dictionary here. Instead we got: ',
                             features)
        transformation_cache = fc.FeatureTransformationCache(features)
        output_tensors = []
        for column in self._feature_columns:
            with ops.name_scope(column.name):
                tensor = column.get_dense_tensor(transformation_cache,
                                                 self._state_manager)
                processed_tensors = self._process_dense_tensor(column, tensor)
                if cols_to_output_tensors is not None:
                    cols_to_output_tensors[column] = processed_tensors
                output_tensors.append(processed_tensors)
        return self._verify_and_concat_tensors(output_tensors)
Ejemplo n.º 2
0
def _get_sequence_dense_tensor_state(column, features):
    state_manager = fc._StateManagerImpl(fc_lib.DenseFeatures(column),
                                         trainable=True)
    column.create_state(state_manager)
    dense_tensor, lengths = column.get_sequence_dense_tensor(
        fc.FeatureTransformationCache(features), state_manager)
    return dense_tensor, lengths, state_manager
Ejemplo n.º 3
0
    def call(self, features, cols_to_output_tensors=None, training=None):
        """Returns a dense tensor corresponding to the `feature_columns`.

    Example usage:

    >>> t1 = tf.feature_column.embedding_column(
    ...    tf.feature_column.categorical_column_with_hash_bucket("t1", 2),
    ...    dimension=8)
    >>> t2 = tf.feature_column.numeric_column('t2')
    >>> feature_layer = tf.compat.v1.keras.layers.DenseFeatures([t1, t2])
    >>> features = {"t1": tf.constant(["a", "b"]), "t2": tf.constant([1, 2])}
    >>> dense_tensor = feature_layer(features, training=True)

    Args:
      features: A mapping from key to tensors. `FeatureColumn`s look up via
        these keys. For example `numeric_column('price')` will look at 'price'
        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
        on corresponding `FeatureColumn`.
      cols_to_output_tensors: If not `None`, this will be filled with a dict
        mapping feature columns to output tensors created.
      training: Python boolean or None, indicating whether to the layer is being
        run in training mode. This argument is passed to the call method of any
        `FeatureColumn` that takes a `training` argument. For example, if a
        `FeatureColumn` performed dropout, the column could expose a `training`
        argument to control whether the dropout should be applied. If `None`,
        defaults to `tf.keras.backend.learning_phase()`.


    Returns:
      A `Tensor` which represents input layer of a model. Its shape
      is (batch_size, first_layer_dimension) and its dtype is `float32`.
      first_layer_dimension is determined based on given `feature_columns`.

    Raises:
      ValueError: If features are not a dictionary.
    """
        if training is None:
            training = backend.learning_phase()
        if not isinstance(features, dict):
            raise ValueError('We expected a dictionary here. Instead we got: ',
                             features)
        transformation_cache = fc.FeatureTransformationCache(features)
        output_tensors = []
        for column in self._feature_columns:
            with backend.name_scope(column.name):
                try:
                    tensor = column.get_dense_tensor(transformation_cache,
                                                     self._state_manager,
                                                     training=training)
                except TypeError:
                    tensor = column.get_dense_tensor(transformation_cache,
                                                     self._state_manager)
                processed_tensors = self._process_dense_tensor(column, tensor)
                if cols_to_output_tensors is not None:
                    cols_to_output_tensors[column] = processed_tensors
                output_tensors.append(processed_tensors)
        return self._verify_and_concat_tensors(output_tensors)
Ejemplo n.º 4
0
    def call(self, features, training=None):
        """Returns sequence input corresponding to the `feature_columns`.

    Args:
      features: A dict mapping keys to tensors.
      training: Python boolean or None, indicating whether to the layer is being
        run in training mode. This argument is passed to the call method of any
        `FeatureColumn` that takes a `training` argument. For example, if a
        `FeatureColumn` performed dropout, the column could expose a `training`
        argument to control whether the dropout should be applied. If `None`,
        defaults to `tf.keras.backend.learning_phase()`.


    Returns:
      An `(input_layer, sequence_length)` tuple where:
      - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
          `T` is the maximum sequence length for this batch, which could differ
          from batch to batch. `D` is the sum of `num_elements` for all
          `feature_columns`.
      - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
          length for each example.

    Raises:
      ValueError: If features are not a dictionary.
    """
        if not isinstance(features, dict):
            raise ValueError('We expected a dictionary here. Instead we got: ',
                             features)
        if training is None:
            training = backend.learning_phase()
        transformation_cache = fc.FeatureTransformationCache(features)
        output_tensors = []
        sequence_lengths = []

        for column in self._feature_columns:
            with backend.name_scope(column.name):
                try:
                    dense_tensor, sequence_length = column.get_sequence_dense_tensor(
                        transformation_cache,
                        self._state_manager,
                        training=training)
                except TypeError:
                    dense_tensor, sequence_length = column.get_sequence_dense_tensor(
                        transformation_cache, self._state_manager)
                # Flattens the final dimension to produce a 3D Tensor.
                output_tensors.append(
                    self._process_dense_tensor(column, dense_tensor))
                sequence_lengths.append(sequence_length)

        # Check and process sequence lengths.
        fc._verify_static_batch_size_equality(sequence_lengths,
                                              self._feature_columns)
        sequence_length = _assert_all_equal_and_return(sequence_lengths)

        return self._verify_and_concat_tensors(output_tensors), sequence_length
Ejemplo n.º 5
0
    def call(self, features):
        """Returns sequence input corresponding to the `feature_columns`.

    Args:
      features: A dict mapping keys to tensors.

    Returns:
      An `(input_layer, sequence_length)` tuple where:
      - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
          `T` is the maximum sequence length for this batch, which could differ
          from batch to batch. `D` is the sum of `num_elements` for all
          `feature_columns`.
      - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
          length for each example.

    Raises:
      ValueError: If features are not a dictionary.
    """
        if not isinstance(features, dict):
            raise ValueError('We expected a dictionary here. Instead we got: ',
                             features)
        transformation_cache = fc.FeatureTransformationCache(features)
        output_tensors = []
        sequence_lengths = []

        for column in self._feature_columns:
            with ops.name_scope(column.name):
                dense_tensor, sequence_length = column.get_sequence_dense_tensor(
                    transformation_cache, self._state_manager)
                # Flattens the final dimension to produce a 3D Tensor.
                output_tensors.append(
                    self._process_dense_tensor(column, dense_tensor))
                sequence_lengths.append(sequence_length)

        # Check and process sequence lengths.
        fc._verify_static_batch_size_equality(sequence_lengths,
                                              self._feature_columns)
        sequence_length = _assert_all_equal_and_return(sequence_lengths)

        return self._verify_and_concat_tensors(output_tensors), sequence_length
Ejemplo n.º 6
0
    def call(self, features, cols_to_output_tensors=None):
        if not isinstance(features, dict):
            raise ValueError('We expected a dictionary here. Instead we got: ',
                             features)

        using_features = self.filter_not_used_features(features)
        transformation_cache = fc.FeatureTransformationCache(using_features)

        self.sparse_pulling_features = self.get_sparse_pulling_feature(
            using_features)

        pulled_mapping_values = self._state_manager.pull(
            self.sparse_pulling_features)

        output_tensors = []
        for column in self._feature_columns:
            if column.categorical_column.name not in pulled_mapping_values:
                raise ValueError("column not found in pulled_mapping_values")

            mapping_value = pulled_mapping_values[
                column.categorical_column.name]
            with ops.control_dependencies([mapping_value]):
                tensor = column.get_dense_tensor(transformation_cache,
                                                 self._state_manager)

            processed_tensors = self._process_dense_tensor(column, tensor)

            if cols_to_output_tensors is not None:
                cols_to_output_tensors[column] = processed_tensors

            output_tensors.append(processed_tensors)

        if self.is_concat:
            return self._verify_and_concat_tensors(output_tensors)
        else:
            return output_tensors
Ejemplo n.º 7
0
def _get_sparse_tensors(column, features):
    return column.get_sparse_tensors(fc.FeatureTransformationCache(features),
                                     None)
Ejemplo n.º 8
0
def _get_sequence_dense_tensor(column, features):
    return column.get_sequence_dense_tensor(
        fc.FeatureTransformationCache(features), None)
Ejemplo n.º 9
0
 def fc_fn(tensors):
     fc.transform_feature(fcv2.FeatureTransformationCache(tensors), None)
Ejemplo n.º 10
0
    def get_train_step(self, state_manager, weight_column_name, loss_type,
                       feature_columns, features, targets, bias_var,
                       global_step):
        """Returns the training operation of an SdcaModel optimizer."""

        batch_size = array_ops.shape(targets)[0]
        cache = feature_column_v2.FeatureTransformationCache(features)

        # Iterate over all feature columns and create appropriate lists for dense
        # and sparse features as well as dense and sparse weights (variables) for
        # SDCA.
        dense_features, dense_feature_weights = [], []
        sparse_feature_with_values, sparse_feature_with_values_weights = [], []
        for column in sorted(feature_columns, key=lambda x: x.name):
            if isinstance(column, feature_column_v2.CategoricalColumn):
                id_weight_pair = column.get_sparse_tensors(
                    cache, state_manager)
                sparse_feature_with_values.append(
                    self._prune_and_unique_sparse_ids(id_weight_pair))
                # If a partitioner was used during variable creation, we will have a
                # list of Variables here larger than 1.
                sparse_feature_with_values_weights.append(
                    state_manager.get_variable(column, 'weights'))
            elif isinstance(column, feature_column_v2.DenseColumn):
                if column.variable_shape.ndims != 1:
                    raise ValueError(
                        'Column %s has rank %d, larger than 1.' %
                        (type(column).__name__, column.variable_shape.ndims))
                dense_features.append(
                    column.get_dense_tensor(cache, state_manager))
                # For real valued columns, the variables list contains exactly one
                # element.
                dense_feature_weights.append(
                    state_manager.get_variable(column, 'weights'))
            else:
                raise ValueError(
                    'LinearSDCA does not support column type %s.' %
                    type(column).__name__)

        # Add the bias column
        dense_features.append(array_ops.ones([batch_size, 1]))
        dense_feature_weights.append(bias_var)

        example_weights = array_ops.reshape(
            features[weight_column_name],
            shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
        example_ids = features[self._example_id_column]
        training_examples = dict(sparse_features=sparse_feature_with_values,
                                 dense_features=dense_features,
                                 example_labels=math_ops.to_float(
                                     array_ops.reshape(targets, shape=[-1])),
                                 example_weights=example_weights,
                                 example_ids=example_ids)
        training_variables = dict(
            sparse_features_weights=sparse_feature_with_values_weights,
            dense_features_weights=dense_feature_weights)
        sdca_model = sdca_ops._SDCAModel(  # pylint: disable=protected-access
            examples=training_examples,
            variables=training_variables,
            options=dict(
                symmetric_l1_regularization=self._symmetric_l1_regularization,
                symmetric_l2_regularization=self._symmetric_l2_regularization,
                adaptive=self._adaptive,
                num_loss_partitions=self._num_loss_partitions,
                num_table_shards=self._num_table_shards,
                loss_type=loss_type))
        train_op = sdca_model.minimize(global_step=global_step)
        return sdca_model, train_op