Exemple #1
0
def encode_features(features,
                    feature_columns,
                    mode=tf_estimator.ModeKeys.TRAIN,
                    scope=None):
    """Returns dense tensors from features using feature columns.

  This function encodes the feature column transformation on the 'raw'
  `features`.


  Args:
    features: (dict) mapping feature names to feature values, possibly obtained
      from input_fn.
    feature_columns: (list)  list of feature columns.
    mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or
      inference. See `ModeKeys`.
    scope: (str) variable scope for the per column input layers.

  Returns:
    (dict) A mapping from columns to dense tensors.
  """
    # Having scope here for backward compatibility.
    del scope
    trainable = (mode == tf_estimator.ModeKeys.TRAIN)
    cols_to_tensors = {}

    # TODO: Ensure only v2 Feature Columns are used.
    if hasattr(feature_column_lib, "is_feature_column_v2"
               ) and feature_column_lib.is_feature_column_v2(feature_columns):
        dense_feature_columns = [
            col for col in feature_columns if not _is_sequence_column_v2(col)
        ]
        sequence_feature_columns = [
            col for col in feature_columns if _is_sequence_column_v2(col)
        ]

        if dense_feature_columns:
            dense_layer = tf.compat.v1.keras.layers.DenseFeatures(
                feature_columns=dense_feature_columns,
                name="encoding_layer",
                trainable=trainable)
            dense_layer(features, cols_to_output_tensors=cols_to_tensors)

        for col in sequence_feature_columns:
            sequence_feature_layer = tf.keras.experimental.SequenceFeatures(
                col)
            sequence_input, _ = sequence_feature_layer(features)
            cols_to_tensors[col] = sequence_input
    else:
        tf.compat.v1.feature_column.input_layer(
            features=features,
            feature_columns=feature_columns,
            trainable=trainable,
            cols_to_output_tensors=cols_to_tensors)

    return cols_to_tensors
Exemple #2
0
  def linear_logit_fn(features):
    """Linear model logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.

    Returns:
      A `Tensor` representing the logits.
    """
    if feature_column_lib.is_feature_column_v2(feature_columns):
      linear_model = feature_column_lib.LinearModel(
          feature_columns=feature_columns,
          units=units,
          sparse_combiner=sparse_combiner,
          name='linear_model')
      logits = linear_model(features)
      bias = linear_model.bias

      # We'd like to get all the non-bias variables associated with this
      # LinearModel.
      # TODO(rohanj): Figure out how to get shared embedding weights variable
      # here.
      variables = linear_model.variables
      variables.remove(bias)

      # Expand (potential) Partitioned variables
      bias = _get_expanded_variable_list([bias])
    else:
      linear_model = feature_column._LinearModel(  # pylint: disable=protected-access
          feature_columns=feature_columns,
          units=units,
          sparse_combiner=sparse_combiner,
          name='linear_model')
      logits = linear_model(features)
      cols_to_vars = linear_model.cols_to_vars()
      bias = cols_to_vars.pop('bias')
      variables = cols_to_vars.values()
    variables = _get_expanded_variable_list(variables)

    if units > 1:
      summary.histogram('bias', bias)
    else:
      # If units == 1, the bias value is a length-1 list of a scalar Tensor,
      # so we should provide a scalar summary.
      summary.scalar('bias', bias[0][0])
    summary.scalar('fraction_of_zero_weights',
                   _compute_fraction_of_zero(variables))
    return logits
Exemple #3
0
def _validate_linear_sdca_optimizer_for_linear_classifier(
    feature_columns,
    n_classes,
    optimizer,
    sparse_combiner):
  """Helper function for the initialization of LinearClassifier."""
  if isinstance(optimizer, LinearSDCA):
    if sparse_combiner != 'sum':
      raise ValueError('sparse_combiner must be "sum" when optimizer '
                       'is a LinearSDCA object.')
    if not feature_column_lib.is_feature_column_v2(feature_columns):
      raise ValueError('V2 feature columns required when optimizer '
                       'is a LinearSDCA object.')
    if n_classes > 2:
      raise ValueError('LinearSDCA cannot be used in a multi-class setting.')
Exemple #4
0
def _validate_linear_sdca_optimizer_for_linear_regressor(
    feature_columns,
    label_dimension,
    optimizer,
    sparse_combiner):
  """Helper function for the initialization of LinearRegressor."""
  if isinstance(optimizer, LinearSDCA):
    if sparse_combiner != 'sum':
      raise ValueError('sparse_combiner must be "sum" when optimizer '
                       'is a LinearSDCA object.')
    if not feature_column_lib.is_feature_column_v2(feature_columns):
      raise ValueError('V2 feature columns required when optimizer '
                       'is a LinearSDCA object.')
    if label_dimension > 1:
      raise ValueError('LinearSDCA can only be used with one-dimensional '
                       'label.')
def encode_features(features,
                    feature_columns,
                    mode=model_fn.ModeKeys.TRAIN,
                    scope=None):
  """Returns dense tensors from features using feature columns.

  This function encodes the feature column transformation on the 'raw'
  `features`.


  Args:
    features: (dict) mapping feature names to feature values, possibly obtained
      from input_fn.
    feature_columns: (list)  list of feature columns.
    mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or
      inference. See `ModeKeys`.
    scope: (str) variable scope for the per column input layers.

  Returns:
    (dict) A mapping from columns to dense tensors.
  """
  # Having scope here for backward compatibility.
  del scope
  trainable = (mode == model_fn.ModeKeys.TRAIN)
  cols_to_tensors = {}

  if hasattr(feature_column_lib, "is_feature_column_v2"
            ) and feature_column_lib.is_feature_column_v2(feature_columns):
    dense_layer = feature_column_lib.DenseFeatures(
        feature_columns=feature_columns,
        name="encoding_layer",
        trainable=trainable)
    dense_layer(features, cols_to_output_tensors=cols_to_tensors)
  else:
    feature_column.input_layer(
        features=features,
        feature_columns=feature_columns,
        trainable=trainable,
        cols_to_output_tensors=cols_to_tensors)

  return cols_to_tensors
Exemple #6
0
  def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               input_layer_partitioner,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModel, self).__init__(name=name, **kwargs)
    if feature_column_lib.is_feature_column_v2(feature_columns):
      self._input_layer = feature_column_lib.DenseFeatures(
          feature_columns=feature_columns, name='input_layer')
    else:
      self._input_layer = feature_column.InputLayer(
          feature_columns=feature_columns,
          name='input_layer',
          create_scope_now=False)

    self._add_layer(self._input_layer, 'input_layer')

    self._dropout = dropout
    self._batch_norm = batch_norm

    self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        hidden_layer = core_layers.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope,
            _scope=hidden_layer_scope)
        self._add_layer(hidden_layer, hidden_layer_scope.name)
        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = core_layers.Dropout(rate=self._dropout)
          self._add_layer(dropout_layer, dropout_layer.name)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_layer = normalization.BatchNormalization(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name='batchnorm_%d' % layer_id,
              _scope='batchnorm_%d' % layer_id)
          self._add_layer(batch_norm_layer, batch_norm_layer.name)
          self._batch_norm_layers.append(batch_norm_layer)

    with variable_scope.variable_scope('logits') as logits_scope:
      self._logits_layer = core_layers.Dense(
          units=units,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer(),
          name=logits_scope,
          _scope=logits_scope)
      self._add_layer(self._logits_layer, logits_scope.name)
      self._logits_scope_name = logits_scope.name
    self._input_layer_partitioner = input_layer_partitioner
Exemple #7
0
    def _input_layer_fn(features,
                        is_training,
                        scope_name="Phoenix/Input",
                        lengths_feature_name=None):

        with tf.compat.v1.variable_scope(scope_name):
            if problem_type == phoenix_spec_pb2.PhoenixSpec.CNN:
                # Sometimes we only get the image feature as a tensor.
                if not isinstance(features, dict):
                    return features, None
                return tf.cast(features[feature_columns[0].name],
                               dtype=tf.float32), None
            # DNN
            elif problem_type == phoenix_spec_pb2.PhoenixSpec.DNN:
                # To allow running a custom evaluation where multiple batches are
                # aggregated in a single metric_fn call, we need to define the
                # batch_size based on the input_fn, but DenseFeatures does not allow
                # this.
                if (len(feature_columns) == 1 and isinstance(
                        feature_columns[0],
                        type(tf.feature_column.numeric_column("x")))):
                    return tf.cast(features[feature_columns[0].name],
                                   dtype=tf.float32), None
                # All are TF1 feature columns
                elif all([
                        not feature_column_lib.is_feature_column_v2([fc])
                        for fc in feature_columns
                ]):
                    return tf.compat.v1.feature_column.input_layer(
                        features, feature_columns, trainable=is_training), None
                # Some are TF1 feature columns
                elif any([
                        not feature_column_lib.is_feature_column_v2([fc])
                        for fc in feature_columns
                ]):
                    fc_v1 = [
                        fc for fc in feature_columns
                        if not feature_column_lib.is_feature_column_v2([fc])
                    ]
                    fc_v2 = [
                        fc for fc in feature_columns
                        if feature_column_lib.is_feature_column_v2([fc])
                    ]
                    input_1 = tf.compat.v1.feature_column.input_layer(
                        features, fc_v1, trainable=is_training)
                    input_2 = tf.keras.layers.DenseFeatures(
                        fc_v2, name="input_layer_fc_v2",
                        trainable=is_training)(features)
                    return tf.concat([input_1, input_2], axis=1), None

                # None is TF1 feature columns
                else:
                    return tf.keras.layers.DenseFeatures(
                        feature_columns,
                        name="input_layer",
                        trainable=is_training)(features), None

            # RNN
            elif (problem_type
                  == phoenix_spec_pb2.PhoenixSpec.RNN_ALL_ACTIVATIONS
                  or problem_type
                  == phoenix_spec_pb2.PhoenixSpec.RNN_LAST_ACTIVATIONS):
                if lengths_feature_name:
                    return (tf.cast(features[feature_columns[0].name],
                                    dtype=tf.float32),
                            features[lengths_feature_name])
                elif (feature_columns[0].name in features and not isinstance(
                        features[feature_columns[0].name], tf.SparseTensor)):
                    return tf.cast(features[feature_columns[0].name],
                                   dtype=tf.float32), None
                else:
                    # IMPORTANT NOTE:
                    # When you use Keras layers with variables, always give them a name!
                    # If not, keras will add "_#" (e.g., dense_1 instead of dense).
                    # It will add the suffix even if the outer-scope is different.
                    # This is a surprising behavior.
                    # TODO(mazzawi): Contact the Keras team about this.
                    return tf.keras.experimental.SequenceFeatures(
                        feature_columns=feature_columns,
                        trainable=is_training,
                        name=scope_name)(features)
            else:
                raise ValueError("Unknown problem type")
Exemple #8
0
    def __init__(self,
                 units,
                 hidden_units,
                 feature_columns,
                 activation_fn,
                 dropout,
                 batch_norm,
                 name=None,
                 **kwargs):
        super(_DNNModelV2, self).__init__(name=name, **kwargs)

        # Add this name_scope for backward compatibility, as previously it's used
        # in variable_scope
        with ops.name_scope(
                'input_from_feature_columns') as input_feature_column_scope:
            layer_name = input_feature_column_scope + 'input_layer'
            if feature_column_lib.is_feature_column_v2(feature_columns):
                self._input_layer = feature_column_lib.DenseFeatures(
                    feature_columns=feature_columns, name=layer_name)
            else:
                self._input_layer = feature_column.InputLayer(
                    feature_columns=feature_columns,
                    name=layer_name,
                    create_scope_now=False)

        self._add_layer(self._input_layer, self._input_layer.name)

        self._dropout = dropout
        self._batch_norm = batch_norm

        self._hidden_layers = []
        self._dropout_layers = []
        self._batch_norm_layers = []
        self._hidden_layer_scope_names = []
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with ops.name_scope('hiddenlayer_%d' %
                                layer_id) as hidden_layer_scope:
                # Get scope name without the trailing slash.
                hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
                hidden_layer = core_layers.Dense(
                    units=num_hidden_units,
                    activation=activation_fn,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name=hidden_shared_name)
                self._add_layer(hidden_layer, hidden_shared_name)
                self._hidden_layer_scope_names.append(hidden_shared_name)
                self._hidden_layers.append(hidden_layer)
                if self._dropout is not None:
                    dropout_layer = core_layers.Dropout(rate=self._dropout)
                    self._add_layer(dropout_layer, dropout_layer.name)
                    self._dropout_layers.append(dropout_layer)
                if self._batch_norm:
                    batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
                    batch_norm_layer = normalization.BatchNormalization(
                        # The default momentum 0.99 actually crashes on certain
                        # problem, so here we use 0.999, which is the default of
                        # tf.contrib.layers.batch_norm.
                        momentum=0.999,
                        trainable=True,
                        name=batch_norm_name)
                    self._add_layer(batch_norm_layer, batch_norm_name)
                    self._batch_norm_layers.append(batch_norm_layer)

        with ops.name_scope('logits') as logits_scope:
            logits_shared_name = _name_from_scope_name(logits_scope)
            self._logits_layer = core_layers.Dense(
                units=units,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer(),
                name=logits_shared_name)
            self._add_layer(self._logits_layer, logits_shared_name)
            self._logits_scope_name = logits_shared_name
Exemple #9
0
def encode_listwise_features(features,
                             context_feature_columns,
                             example_feature_columns,
                             input_size=None,
                             mode=tf_estimator.ModeKeys.TRAIN,
                             scope=None):
    """Returns dense tensors from features using feature columns.

  Args:
    features: (dict) mapping feature names (str) to feature values (`tf.Tensor`
      or `tf.SparseTensor`), possibly obtained from input_fn. For context
      features, the tensors are 2-D, while for example features the tensors are
      3-D.
    context_feature_columns: (dict) context feature names to columns.
    example_feature_columns: (dict) example feature names to columns.
    input_size: (int) [DEPRECATED: Use without this argument.] number of
      examples per query. If this is None, input_size is inferred as the size
      of second dimension of the Tensor corresponding to one of the example
      feature columns.
    mode: (`estimator.ModeKeys`) Specifies if this is training, evaluation or
      inference. See `ModeKeys`.
    scope: (str) variable scope for the per column input layers.

  Returns:
    context_features: (dict) A mapping from context feature names to dense
    2-D tensors of shape [batch_size, ...].
    example_features: (dict) A mapping from example feature names to dense
    3-D tensors of shape [batch_size, input_size, ...].

  Raises:
    ValueError: If `input size` is not equal to 2nd dimension of example
    tensors.
  """
    context_features = {}
    if context_feature_columns:
        context_cols_to_tensors = encode_features(
            features, context_feature_columns.values(), mode=mode, scope=scope)
        context_features = {
            name: context_cols_to_tensors[col]
            for name, col in six.iteritems(context_feature_columns)
        }

    # Compute example_features. Note that the keys in `example_feature_columns`
    # dict can be different from the keys in the `features` dict. We only need to
    # reshape the per-example tensors in `features`. To obtain the keys for
    # per-example features, we use the parsing feature specs.
    example_features = {}
    if example_feature_columns:
        if feature_column_lib.is_feature_column_v2(
                example_feature_columns.values()):
            example_specs = tf.compat.v2.feature_column.make_parse_example_spec(
                example_feature_columns.values())
        else:
            example_specs = tf.compat.v1.feature_column.make_parse_example_spec(
                example_feature_columns.values())
        example_name = next(six.iterkeys(example_specs))
        batch_size = tf.shape(input=features[example_name])[0]
        if input_size is None:
            input_size = tf.shape(input=features[example_name])[1]
        # Reshape [batch_size, input_size] to [batch * input_size] so that
        # features are encoded.
        reshaped_features = {}
        for name in example_specs:
            if name not in features:
                tf.compat.v1.logging.warn(
                    "Feature {} is not found.".format(name))
                continue
            try:
                reshaped_features[name] = utils.reshape_first_ndims(
                    features[name], 2, [batch_size * input_size])
            except:
                raise ValueError(
                    "2nd dimension of tensor must be equal to input size: {}, "
                    "but found feature {} with shape {}.".format(
                        input_size, name, features[name].get_shape()))

        example_cols_to_tensors = encode_features(
            reshaped_features,
            example_feature_columns.values(),
            mode=mode,
            scope=scope)
        example_features = {
            name: utils.reshape_first_ndims(example_cols_to_tensors[col], 1,
                                            [batch_size, input_size])
            for name, col in six.iteritems(example_feature_columns)
        }

    return context_features, example_features
Exemple #10
0
  def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModelV2, self).__init__(name=name, **kwargs)
    with ops.name_scope(
        'input_from_feature_columns') as input_feature_column_scope:
      layer_name = input_feature_column_scope + 'input_layer'
      if feature_column_lib.is_feature_column_v2(feature_columns):
        self._input_layer = dense_features_v2.DenseFeatures(
            feature_columns=feature_columns, name=layer_name)
      else:
        raise ValueError(
            'Received a feature column from TensorFlow v1, but this is a '
            'TensorFlow v2 Estimator. Please either use v2 feature columns '
            '(accessible via tf.feature_column.* in TF 2.x) with this '
            'Estimator, or switch to a v1 Estimator for use with v1 feature '
            'columns (accessible via tf.compat.v1.estimator.* and '
            'tf.compat.v1.feature_column.*, respectively.')

    self._dropout = dropout
    self._batch_norm = batch_norm

    self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with ops.name_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        # Get scope name without the trailing slash.
        hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
        hidden_layer = keras_core.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
            name=hidden_shared_name)
        self._hidden_layer_scope_names.append(hidden_shared_name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = keras_core.Dropout(rate=self._dropout)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
          # TODO(scottzhu): Change back to use BatchNormalization when the
          # cleanup is done.
          batch_norm_layer = keras_norm.BatchNormalizationBase(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name=batch_norm_name)
          self._batch_norm_layers.append(batch_norm_layer)

    with ops.name_scope('logits') as logits_scope:
      logits_shared_name = _name_from_scope_name(logits_scope)
      self._logits_layer = keras_core.Dense(
          units=units,
          activation=None,
          kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
          name=logits_shared_name)
      self._logits_scope_name = logits_shared_name
Exemple #11
0
def _sdca_model_fn(features, labels, mode, head, feature_columns, optimizer):
  """A model_fn for linear models that use the SDCA optimizer.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape `[batch_size]`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    feature_columns: An iterable containing all the feature columns used by
      the model.
    optimizer: a `LinearSDCA` instance.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: mode or params are invalid, or features has the wrong type.
  """
  assert feature_column_lib.is_feature_column_v2(feature_columns)
  # The _sdca_model_fn is used by _linear_model_fn with LinearSDCA optimizer in
  # both v1 and v2 Linear Estimators, and the only difference is to check head
  # type. Here we check the instance for both v1 and v2 Head to avoid duplicate
  # codes. Later v1 and v2 versions of _sdca_model_fn can be created if
  # necessary.
  if isinstance(head,
                (binary_class_head.BinaryClassHead,
                 head_lib._BinaryLogisticHeadWithSigmoidCrossEntropyLoss)):  # pylint: disable=protected-access
    loss_type = 'logistic_loss'
  elif isinstance(head, (regression_head.RegressionHead,
                         head_lib._RegressionHeadWithMeanSquaredErrorLoss)):  # pylint: disable=protected-access
    assert head.logits_dimension == 1
    loss_type = 'squared_loss'
  else:
    raise ValueError('Unsupported head type: {}'.format(head))

  linear_model = feature_column_lib.LinearModel(
      feature_columns=feature_columns, units=1, sparse_combiner='sum')
  logits = linear_model(features)

  bias = linear_model.bias

  # We'd like to get all the non-bias variables associated with this
  # LinearModel.
  # TODO(rohanj): Figure out how to get shared embedding weights variable
  # here.
  variables = linear_model.variables
  variables.remove(bias)

  # Expand (potential) Partitioned variables
  bias = _get_expanded_variable_list([bias])
  variables = _get_expanded_variable_list(variables)
  summary.scalar('bias', bias[0][0])
  summary.scalar('fraction_of_zero_weights',
                 _compute_fraction_of_zero(variables))

  if mode == ModeKeys.TRAIN:
    sdca_model, train_op = optimizer.get_train_step(
        linear_model.layer._state_manager,  # pylint: disable=protected-access
        head._weight_column,  # pylint: disable=protected-access
        loss_type,
        feature_columns,
        features,
        labels,
        linear_model.bias,
        training.get_global_step())

    update_weights_hook = _SDCAUpdateWeightsHook(sdca_model, train_op)

    model_fn_ops = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=lambda unused_loss_fn: train_op,
        logits=logits)
    return model_fn_ops._replace(training_chief_hooks=(
        model_fn_ops.training_chief_hooks + (update_weights_hook,)))
  else:
    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        logits=logits)