def _build_estimator(config, hidden_units=None, warm_start_from=None): """Build an estimator for predicting the tipping behavior of taxi riders. Args: config: tf.estimator.RunConfig defining the runtime environment for the estimator (including model_dir). hidden_units: [int], the layer sizes of the DNN (input layer first) warm_start_from: Optional directory to warm start from. Returns: A dict of the following: - estimator: The estimator that will be used for training and eval. - train_spec: Spec for training. - eval_spec: Spec for eval. - eval_input_receiver_fn: Input function for eval. """ real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in features.transformed_names(features.DENSE_FLOAT_FEATURE_KEYS) ] categorical_columns = [] for key in features.transformed_names(features.VOCAB_FEATURE_KEYS): categorical_columns.append( tf.feature_column.categorical_column_with_identity( key, num_buckets=features.VOCAB_SIZE + features.OOV_SIZE, default_value=0)) for key, num_buckets in zip( features.transformed_names(features.BUCKET_FEATURE_KEYS), features.BUCKET_FEATURE_BUCKET_COUNT): categorical_columns.append( tf.feature_column.categorical_column_with_identity( key, num_buckets=num_buckets, default_value=0)) for key, num_buckets in zip( features.transformed_names(features.CATEGORICAL_FEATURE_KEYS), features.CATEGORICAL_FEATURE_MAX_VALUES): categorical_columns.append( tf.feature_column.categorical_column_with_identity( key, num_buckets=num_buckets, default_value=0)) # Plug your model with Tensorflow return tf.estimator.DNNLinearCombinedClassifier( config=config, n_classes=5, linear_feature_columns=categorical_columns, dnn_feature_columns=real_valued_columns, # dnn_hidden_units=hidden_units or [100, 70, 50, 25], dnn_hidden_units=[100, 70, 50, 25], warm_start_from=warm_start_from)
def testTransformedNames(self): names = ["f1", "cf"] self.assertEqual(["f1_xf", "cf_xf"], features.transformed_names(names))
def _build_keras_model( hparams: kerastuner.HyperParameters, tf_transform_output: tft.TFTransformOutput) -> tf.keras.Model: """Creates a Keras WideDeep Classifier model. Args: hparams: Holds HyperParameters for tuning. tf_transform_output: A TFTransformOutput. Returns: A keras Model. """ real_keys = features.NUMERIC_FEATURE_KEYS sparse_keys = features.VOCAB_FEATURE_KEYS + features.BUCKET_FEATURE_KEYS + features.CATEGORICAL_FEATURE_KEYS # Defines deep feature columns and input layers. deep_columns = [ tf.feature_column.numeric_column(key=features.transformed_name(key), shape=()) for key in features.NUMERIC_FEATURE_KEYS ] input_layers = { column.key: tf.keras.layers.Input(name=column.key, shape=(), dtype=tf.float32) for column in deep_columns } # Defines wide feature columns and input layers. categorical_columns = [ tf.feature_column.categorical_column_with_identity( key=features.transformed_name(key), num_buckets=tf_transform_output. num_buckets_for_transformed_feature( features.transformed_name(key)), default_value=0) for key in features.CATEGORICAL_FEATURE_KEYS ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( # pylint: disable=g-complex-comprehension key, num_buckets=features.VOCAB_SIZE + features.OOV_SIZE, default_value=0) for key in features.transformed_names(features.VOCAB_FEATURE_KEYS) ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( # pylint: disable=g-complex-comprehension key, num_buckets=num_buckets, default_value=0) for key, num_buckets in zip( features.transformed_names(features.BUCKET_FEATURE_KEYS), features.BUCKET_FEATURE_BUCKET_COUNT) ] wide_columns = [ tf.feature_column.indicator_column(categorical_column) for categorical_column in categorical_columns ] input_layers.update({ column.categorical_column.key: tf.keras.layers.Input(name=column.categorical_column.key, shape=(), dtype=tf.int32) for column in wide_columns }) # Build Keras model using hparams. deep = tf.keras.layers.DenseFeatures(deep_columns)(input_layers) for n in range(int(hparams.get('n_layers'))): deep = tf.keras.layers.Dense(units=hparams.get('n_units_' + str(n + 1)))(deep) wide = tf.keras.layers.DenseFeatures(wide_columns)(input_layers) # output = tf.keras.layers.Dense(features.NUM_CLASSES, activation='softmax')( # tf.keras.layers.concatenate([deep, wide])) output = tf.keras.layers.Dense(1, activation='sigmoid')( tf.keras.layers.concatenate([deep, wide])) output = tf.squeeze(output, -1) model = tf.keras.Model(input_layers, output) model.compile( loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=hparams.get('learning_rate')), # metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) metrics=[ tf.keras.metrics.TruePositives(name='tp'), tf.keras.metrics.FalsePositives(name='fp'), tf.keras.metrics.TrueNegatives(name='tn'), tf.keras.metrics.FalseNegatives(name='fn'), tf.keras.metrics.BinaryAccuracy(name='binary_accuracy'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.AUC(name='auc'), ]) model.summary(print_fn=absl.logging.info) return model