def _wide_and_deep_classifier(wide_columns, deep_columns, dnn_hidden_units,
                              learning_rate):
    """Build a simple keras wide and deep model.

  Args:
    wide_columns: Feature columns wrapped in indicator_column for wide (linear)
      part of the model.
    deep_columns: Feature columns for deep part of the model.
    dnn_hidden_units: [int], the layer sizes of the hidden DNN.
    learning_rate: [float], learning rate of the Adam optimizer.

  Returns:
    A Wide and Deep Keras model
  """
    # Keras needs the feature definitions at compile time.
    # TODO(b/139081439): Automate generation of input layers from FeatureColumn.
    input_layers = {
        colname: tf.keras.layers.Input(name=colname,
                                       shape=(),
                                       dtype=tf.float32)
        for colname in features.transformed_names(
            features.DENSE_FLOAT_FEATURE_KEYS)
    }
    input_layers.update({
        colname: tf.keras.layers.Input(name=colname, shape=(), dtype='int32')
        for colname in features.transformed_names(features.VOCAB_FEATURE_KEYS)
    })
    input_layers.update({
        colname: tf.keras.layers.Input(name=colname, shape=(), dtype='int32')
        for colname in features.transformed_names(features.BUCKET_FEATURE_KEYS)
    })
    input_layers.update({
        colname: tf.keras.layers.Input(name=colname, shape=(), dtype='int32')
        for colname in features.transformed_names(
            features.CATEGORICAL_FEATURE_KEYS)
    })

    # TODO(b/161952382): Replace with Keras premade models and
    # Keras preprocessing layers.
    deep = tf.keras.layers.DenseFeatures(deep_columns)(input_layers)
    for numnodes in dnn_hidden_units:
        deep = tf.keras.layers.Dense(numnodes)(deep)
    wide = tf.keras.layers.DenseFeatures(wide_columns)(input_layers)

    output = tf.keras.layers.Dense(1, activation='sigmoid')(
        tf.keras.layers.concatenate([deep, wide]))
    output = tf.squeeze(output, -1)

    model = tf.keras.Model(input_layers, output)
    model.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
                  metrics=[tf.keras.metrics.BinaryAccuracy()])
    model.summary(print_fn=logging.info)
    return model
def _build_keras_model(hidden_units, learning_rate):
    """Creates a DNN Keras model for classifying taxi data.

  Args:
    hidden_units: [int], the layer sizes of the DNN (input layer first).
    learning_rate: [float], learning rate of the Adam optimizer.

  Returns:
    A keras Model.
  """
    real_valued_columns = [
        tf.feature_column.numeric_column(key, shape=()) for key in
        features.transformed_names(features.DENSE_FLOAT_FEATURE_KEYS)
    ]
    categorical_columns = [
        tf.feature_column.categorical_column_with_identity(  # pylint: disable=g-complex-comprehension
            key,
            num_buckets=features.VOCAB_SIZE + features.OOV_SIZE,
            default_value=0)
        for key in features.transformed_names(features.VOCAB_FEATURE_KEYS)
    ]
    categorical_columns += [
        tf.feature_column.categorical_column_with_identity(  # pylint: disable=g-complex-comprehension
            key,
            num_buckets=num_buckets,
            default_value=0) for key, num_buckets in zip(
                features.transformed_names(features.BUCKET_FEATURE_KEYS),
                features.BUCKET_FEATURE_BUCKET_COUNT)
    ]
    categorical_columns += [
        tf.feature_column.categorical_column_with_identity(  # pylint: disable=g-complex-comprehension
            key,
            num_buckets=num_buckets,
            default_value=0) for key, num_buckets in zip(
                features.transformed_names(features.CATEGORICAL_FEATURE_KEYS),
                features.CATEGORICAL_FEATURE_MAX_VALUES)
    ]
    indicator_column = [
        tf.feature_column.indicator_column(categorical_column)
        for categorical_column in categorical_columns
    ]

    model = _wide_and_deep_classifier(
        # TODO(b/140320729) Replace with premade wide_and_deep keras model
        wide_columns=indicator_column,
        deep_columns=real_valued_columns,
        dnn_hidden_units=hidden_units,
        learning_rate=learning_rate)
    return model
Esempio n. 3
0
def _build_estimator(config, hidden_units=None, warm_start_from=None):
  """Build an estimator for predicting the tipping behavior of taxi riders.

  Args:
    config: tf.estimator.RunConfig defining the runtime environment for the
      estimator (including model_dir).
    hidden_units: [int], the layer sizes of the DNN (input layer first)
    warm_start_from: Optional directory to warm start from.

  Returns:
    A dict of the following:
      - estimator: The estimator that will be used for training and eval.
      - train_spec: Spec for training.
      - eval_spec: Spec for eval.
      - eval_input_receiver_fn: Input function for eval.
  """
  real_valued_columns = [
      tf.feature_column.numeric_column(key, shape=())
      for key in features.transformed_names(features.DENSE_FLOAT_FEATURE_KEYS)
  ]

  categorical_columns = []
  for key in features.transformed_names(features.VOCAB_FEATURE_KEYS):
    categorical_columns.append(
        tf.feature_column.categorical_column_with_identity(
            key,
            num_buckets=features.VOCAB_SIZE + features.OOV_SIZE,
            default_value=0))

  for key, num_buckets in zip(
      features.transformed_names(features.BUCKET_FEATURE_KEYS),
      features.BUCKET_FEATURE_BUCKET_COUNT):
    categorical_columns.append(
        tf.feature_column.categorical_column_with_identity(
            key, num_buckets=num_buckets, default_value=0))

  for key, num_buckets in zip(
      features.transformed_names(features.CATEGORICAL_FEATURE_KEYS),
      features.CATEGORICAL_FEATURE_MAX_VALUES):
    categorical_columns.append(
        tf.feature_column.categorical_column_with_identity(
            key, num_buckets=num_buckets, default_value=0))

  return tf.estimator.DNNLinearCombinedClassifier(
      config=config,
      linear_feature_columns=categorical_columns,
      dnn_feature_columns=real_valued_columns,
      dnn_hidden_units=hidden_units or [100, 70, 50, 25],
      warm_start_from=warm_start_from)
Esempio n. 4
0
 def testTransformedNames(self):
     names = ["f1", "cf"]
     self.assertEqual(["f1_xf", "cf_xf"], features.transformed_names(names))