def _wide_and_deep_classifier(wide_columns, deep_columns, dnn_hidden_units, learning_rate): """Build a simple keras wide and deep model. Args: wide_columns: Feature columns wrapped in indicator_column for wide (linear) part of the model. deep_columns: Feature columns for deep part of the model. dnn_hidden_units: [int], the layer sizes of the hidden DNN. learning_rate: [float], learning rate of the Adam optimizer. Returns: A Wide and Deep Keras model """ # Keras needs the feature definitions at compile time. # TODO(b/139081439): Automate generation of input layers from FeatureColumn. input_layers = { colname: tf.keras.layers.Input(name=colname, shape=(), dtype=tf.float32) for colname in features.transformed_names( features.DENSE_FLOAT_FEATURE_KEYS) } input_layers.update({ colname: tf.keras.layers.Input(name=colname, shape=(), dtype='int32') for colname in features.transformed_names(features.VOCAB_FEATURE_KEYS) }) input_layers.update({ colname: tf.keras.layers.Input(name=colname, shape=(), dtype='int32') for colname in features.transformed_names(features.BUCKET_FEATURE_KEYS) }) input_layers.update({ colname: tf.keras.layers.Input(name=colname, shape=(), dtype='int32') for colname in features.transformed_names( features.CATEGORICAL_FEATURE_KEYS) }) # TODO(b/161952382): Replace with Keras premade models and # Keras preprocessing layers. deep = tf.keras.layers.DenseFeatures(deep_columns)(input_layers) for numnodes in dnn_hidden_units: deep = tf.keras.layers.Dense(numnodes)(deep) wide = tf.keras.layers.DenseFeatures(wide_columns)(input_layers) output = tf.keras.layers.Dense(1, activation='sigmoid')( tf.keras.layers.concatenate([deep, wide])) output = tf.squeeze(output, -1) model = tf.keras.Model(input_layers, output) model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=learning_rate), metrics=[tf.keras.metrics.BinaryAccuracy()]) model.summary(print_fn=logging.info) return model
def _build_keras_model(hidden_units, learning_rate): """Creates a DNN Keras model for classifying taxi data. Args: hidden_units: [int], the layer sizes of the DNN (input layer first). learning_rate: [float], learning rate of the Adam optimizer. Returns: A keras Model. """ real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in features.transformed_names(features.DENSE_FLOAT_FEATURE_KEYS) ] categorical_columns = [ tf.feature_column.categorical_column_with_identity( # pylint: disable=g-complex-comprehension key, num_buckets=features.VOCAB_SIZE + features.OOV_SIZE, default_value=0) for key in features.transformed_names(features.VOCAB_FEATURE_KEYS) ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( # pylint: disable=g-complex-comprehension key, num_buckets=num_buckets, default_value=0) for key, num_buckets in zip( features.transformed_names(features.BUCKET_FEATURE_KEYS), features.BUCKET_FEATURE_BUCKET_COUNT) ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( # pylint: disable=g-complex-comprehension key, num_buckets=num_buckets, default_value=0) for key, num_buckets in zip( features.transformed_names(features.CATEGORICAL_FEATURE_KEYS), features.CATEGORICAL_FEATURE_MAX_VALUES) ] indicator_column = [ tf.feature_column.indicator_column(categorical_column) for categorical_column in categorical_columns ] model = _wide_and_deep_classifier( # TODO(b/140320729) Replace with premade wide_and_deep keras model wide_columns=indicator_column, deep_columns=real_valued_columns, dnn_hidden_units=hidden_units, learning_rate=learning_rate) return model
def _build_estimator(config, hidden_units=None, warm_start_from=None): """Build an estimator for predicting the tipping behavior of taxi riders. Args: config: tf.estimator.RunConfig defining the runtime environment for the estimator (including model_dir). hidden_units: [int], the layer sizes of the DNN (input layer first) warm_start_from: Optional directory to warm start from. Returns: A dict of the following: - estimator: The estimator that will be used for training and eval. - train_spec: Spec for training. - eval_spec: Spec for eval. - eval_input_receiver_fn: Input function for eval. """ real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in features.transformed_names(features.DENSE_FLOAT_FEATURE_KEYS) ] categorical_columns = [] for key in features.transformed_names(features.VOCAB_FEATURE_KEYS): categorical_columns.append( tf.feature_column.categorical_column_with_identity( key, num_buckets=features.VOCAB_SIZE + features.OOV_SIZE, default_value=0)) for key, num_buckets in zip( features.transformed_names(features.BUCKET_FEATURE_KEYS), features.BUCKET_FEATURE_BUCKET_COUNT): categorical_columns.append( tf.feature_column.categorical_column_with_identity( key, num_buckets=num_buckets, default_value=0)) for key, num_buckets in zip( features.transformed_names(features.CATEGORICAL_FEATURE_KEYS), features.CATEGORICAL_FEATURE_MAX_VALUES): categorical_columns.append( tf.feature_column.categorical_column_with_identity( key, num_buckets=num_buckets, default_value=0)) return tf.estimator.DNNLinearCombinedClassifier( config=config, linear_feature_columns=categorical_columns, dnn_feature_columns=real_valued_columns, dnn_hidden_units=hidden_units or [100, 70, 50, 25], warm_start_from=warm_start_from)
def testTransformedNames(self): names = ["f1", "cf"] self.assertEqual(["f1_xf", "cf_xf"], features.transformed_names(names))