예제 #1
0
def _build_estimator(config, hidden_units=None, warm_start_from=None):
    """Build an estimator for predicting the tipping behavior of taxi riders.

  Args:
    config: tf.estimator.RunConfig defining the runtime environment for the
      estimator (including model_dir).
    hidden_units: [int], the layer sizes of the DNN (input layer first)
    warm_start_from: Optional directory to warm start from.

  Returns:
    A dict of the following:
      - estimator: The estimator that will be used for training and eval.
      - train_spec: Spec for training.
      - eval_spec: Spec for eval.
      - eval_input_receiver_fn: Input function for eval.
  """
    real_valued_columns = [
        tf.feature_column.numeric_column(key, shape=()) for key in
        features.transformed_names(features.DENSE_FLOAT_FEATURE_KEYS)
    ]

    categorical_columns = []
    for key in features.transformed_names(features.VOCAB_FEATURE_KEYS):
        categorical_columns.append(
            tf.feature_column.categorical_column_with_identity(
                key,
                num_buckets=features.VOCAB_SIZE + features.OOV_SIZE,
                default_value=0))

    for key, num_buckets in zip(
            features.transformed_names(features.BUCKET_FEATURE_KEYS),
            features.BUCKET_FEATURE_BUCKET_COUNT):
        categorical_columns.append(
            tf.feature_column.categorical_column_with_identity(
                key, num_buckets=num_buckets, default_value=0))

    for key, num_buckets in zip(
            features.transformed_names(features.CATEGORICAL_FEATURE_KEYS),
            features.CATEGORICAL_FEATURE_MAX_VALUES):
        categorical_columns.append(
            tf.feature_column.categorical_column_with_identity(
                key, num_buckets=num_buckets, default_value=0))


# Plug your model with Tensorflow
    return tf.estimator.DNNLinearCombinedClassifier(
        config=config,
        n_classes=5,
        linear_feature_columns=categorical_columns,
        dnn_feature_columns=real_valued_columns,
        #      dnn_hidden_units=hidden_units or [100, 70, 50, 25],
        dnn_hidden_units=[100, 70, 50, 25],
        warm_start_from=warm_start_from)
예제 #2
0
 def testTransformedNames(self):
     names = ["f1", "cf"]
     self.assertEqual(["f1_xf", "cf_xf"], features.transformed_names(names))
예제 #3
0
def _build_keras_model(
        hparams: kerastuner.HyperParameters,
        tf_transform_output: tft.TFTransformOutput) -> tf.keras.Model:
    """Creates a Keras WideDeep Classifier model.
    Args:
      hparams: Holds HyperParameters for tuning.
      tf_transform_output: A TFTransformOutput.
    Returns:
      A keras Model.
    """

    real_keys = features.NUMERIC_FEATURE_KEYS
    sparse_keys = features.VOCAB_FEATURE_KEYS + features.BUCKET_FEATURE_KEYS + features.CATEGORICAL_FEATURE_KEYS

    # Defines deep feature columns and input layers.
    deep_columns = [
        tf.feature_column.numeric_column(key=features.transformed_name(key),
                                         shape=())
        for key in features.NUMERIC_FEATURE_KEYS
    ]

    input_layers = {
        column.key: tf.keras.layers.Input(name=column.key,
                                          shape=(),
                                          dtype=tf.float32)
        for column in deep_columns
    }

    # Defines wide feature columns and input layers.
    categorical_columns = [
        tf.feature_column.categorical_column_with_identity(
            key=features.transformed_name(key),
            num_buckets=tf_transform_output.
            num_buckets_for_transformed_feature(
                features.transformed_name(key)),
            default_value=0) for key in features.CATEGORICAL_FEATURE_KEYS
    ]

    categorical_columns += [
        tf.feature_column.categorical_column_with_identity(  # pylint: disable=g-complex-comprehension
            key,
            num_buckets=features.VOCAB_SIZE + features.OOV_SIZE,
            default_value=0)
        for key in features.transformed_names(features.VOCAB_FEATURE_KEYS)
    ]

    categorical_columns += [
        tf.feature_column.categorical_column_with_identity(  # pylint: disable=g-complex-comprehension
            key,
            num_buckets=num_buckets,
            default_value=0) for key, num_buckets in zip(
                features.transformed_names(features.BUCKET_FEATURE_KEYS),
                features.BUCKET_FEATURE_BUCKET_COUNT)
    ]

    wide_columns = [
        tf.feature_column.indicator_column(categorical_column)
        for categorical_column in categorical_columns
    ]

    input_layers.update({
        column.categorical_column.key:
        tf.keras.layers.Input(name=column.categorical_column.key,
                              shape=(),
                              dtype=tf.int32)
        for column in wide_columns
    })

    # Build Keras model using hparams.
    deep = tf.keras.layers.DenseFeatures(deep_columns)(input_layers)
    for n in range(int(hparams.get('n_layers'))):
        deep = tf.keras.layers.Dense(units=hparams.get('n_units_' +
                                                       str(n + 1)))(deep)

    wide = tf.keras.layers.DenseFeatures(wide_columns)(input_layers)

    # output = tf.keras.layers.Dense(features.NUM_CLASSES, activation='softmax')(
    #             tf.keras.layers.concatenate([deep, wide]))

    output = tf.keras.layers.Dense(1, activation='sigmoid')(
        tf.keras.layers.concatenate([deep, wide]))
    output = tf.squeeze(output, -1)

    model = tf.keras.Model(input_layers, output)
    model.compile(
        loss='binary_crossentropy',
        optimizer=tf.keras.optimizers.Adam(lr=hparams.get('learning_rate')),
        # metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
        metrics=[
            tf.keras.metrics.TruePositives(name='tp'),
            tf.keras.metrics.FalsePositives(name='fp'),
            tf.keras.metrics.TrueNegatives(name='tn'),
            tf.keras.metrics.FalseNegatives(name='fn'),
            tf.keras.metrics.BinaryAccuracy(name='binary_accuracy'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.AUC(name='auc'),
        ])
    model.summary(print_fn=absl.logging.info)

    return model