Esempio n. 1
0
def preprocessing_fn(inputs: tf.Tensor) -> tf.Tensor:
    """tf.transform's callback function for preprocessing inputs.

    Args:
      inputs: map from feature keys to raw not-yet-transformed features.

    Returns:
      Map from string feature key to transformed feature operations.
    """
    outputs = {}

    for key in features.ONE_HOT_FEATURES.keys():
        dim = features.ONE_HOT_FEATURES[key]
        int_value = tft.compute_and_apply_vocabulary(fill_in_missing(
            inputs[key]),
                                                     top_k=dim + 1)
        outputs[features.transformed_name(key)] = convert_num_to_one_hot(
            int_value, num_labels=dim + 1)

    for key, bucket_count in features.BUCKET_FEATURES.items():
        temp_feature = tft.bucketize(
            convert_zip_code(fill_in_missing(inputs[key])),
            bucket_count,
            always_return_num_quantiles=False,
        )
        outputs[features.transformed_name(key)] = convert_num_to_one_hot(
            temp_feature, num_labels=bucket_count + 1)

    for key in features.TEXT_FEATURES.keys():
        outputs[features.transformed_name(key)] = fill_in_missing(inputs[key])

    outputs[features.transformed_name(features.LABEL_KEY)] = fill_in_missing(
        inputs[features.LABEL_KEY])

    return outputs
Esempio n. 2
0
def _build_keras_model(tf_transform_output, hidden_units, learning_rate):
    """Creates a DNN Keras model for classifying taxi data.
  Args:
    hidden_units: [int], the layer sizes of the DNN (input layer first).
  Returns:
    A keras Model.
  """

    numeric_columns = [
        tf.feature_column.numeric_column(key=features.transformed_name(key),
                                         shape=())
        for key in features.NUMERIC_FEATURE_KEYS
    ]

    categorical_columns = [
        tf.feature_column.categorical_column_with_identity(
            key=features.transformed_name(key),
            num_buckets=tf_transform_output.
            num_buckets_for_transformed_feature(
                features.transformed_name(key)),
            default_value=0) for key in features.CATEGORICAL_FEATURE_KEYS
    ]

    indicator_columns = [
        tf.feature_column.indicator_column(categorical_column)
        for categorical_column in categorical_columns
    ]

    model = _wide_and_deep_classifier(
        # TODO(b/139668410) replace with premade wide_and_deep keras model
        wide_columns=indicator_columns,
        deep_columns=numeric_columns,
        dnn_hidden_units=hidden_units,
        learning_rate=learning_rate)
    return model
Esempio n. 3
0
def _build_keras_model(hparams: kerastuner.HyperParameters, 
                       tf_transform_output: tft.TFTransformOutput) -> tf.keras.Model:
  """Creates a Keras WideDeep Classifier model.
  Args:
    hparams: Holds HyperParameters for tuning.
    tf_transform_output: A TFTransformOutput.
  Returns:
    A keras Model.
  """
  # Defines deep feature columns and input layers.
  deep_columns = [
      tf.feature_column.numeric_column(
          key=features.transformed_name(key), 
          shape=())
      for key in features.NUMERIC_FEATURE_KEYS
  ]
  
  input_layers = {
      column.key: tf.keras.layers.Input(name=column.key, shape=(), dtype=tf.float32)
      for column in deep_columns
  }    

  # Defines wide feature columns and input layers.
  categorical_columns = [
      tf.feature_column.categorical_column_with_identity(
          key=features.transformed_name(key), 
          num_buckets=tf_transform_output.num_buckets_for_transformed_feature(features.transformed_name(key)), 
          default_value=0)
      for key in features.CATEGORICAL_FEATURE_KEYS
  ]

  wide_columns = [
      tf.feature_column.indicator_column(categorical_column)
      for categorical_column in categorical_columns
  ]
    
  input_layers.update({
      column.categorical_column.key: tf.keras.layers.Input(name=column.categorical_column.key, shape=(), dtype=tf.int32)
      for column in wide_columns
  })

  # Build Keras model using hparams.
  deep = tf.keras.layers.DenseFeatures(deep_columns)(input_layers)
  for n in range(int(hparams.get('n_layers'))):
    deep = tf.keras.layers.Dense(units=hparams.get('n_units_' + str(n + 1)))(deep)

  wide = tf.keras.layers.DenseFeatures(wide_columns)(input_layers)

  output = tf.keras.layers.Dense(features.NUM_CLASSES, activation='softmax')(
               tf.keras.layers.concatenate([deep, wide]))

  model = tf.keras.Model(input_layers, output)
  model.compile(
      loss='sparse_categorical_crossentropy',
      optimizer=tf.keras.optimizers.Adam(lr=hparams.get('learning_rate')),
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
  model.summary(print_fn=absl.logging.info)

  return model    
def _input_fn(file_pattern, tf_transform_output, batch_size=200):
    """Generates features and label for tuning/training.

  Args:
    file_pattern: input tfrecord file pattern.
    tf_transform_output: A TFTransformOutput.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

  Returns:
    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
  """
    transformed_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy()
    )

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transformed_feature_spec,
        reader=_gzip_reader_fn,
        label_key=features.transformed_name(features.LABEL_KEY),
    )

    return dataset
Esempio n. 5
0
def _input_fn(filenames, tf_transform_output, batch_size=200):
    """Generates features and labels for training or evaluation.

  Args:
    filenames: [str] list of CSV files to read data from.
    tf_transform_output: A TFTransformOutput.
    batch_size: int First dimension size of the Tensors returned by input_fn

  Returns:
    A (features, indices) tuple where features is a dictionary of
      Tensors, and indices is a single Tensor of label indices.
  """
    transformed_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy())

    dataset = tf.data.experimental.make_batched_features_dataset(
        filenames,
        batch_size,
        transformed_feature_spec,
        reader=_gzip_reader_fn)

    transformed_features = tf.compat.v1.data.make_one_shot_iterator(
        dataset).get_next()
    # We pop the label because we do not want to use it as a feature while we're
    # training.
    return transformed_features, transformed_features.pop(
        features.transformed_name(features.LABEL_KEY))
def preprocessing_fn(inputs):
    """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
    outputs = {}
    for key in outputs.keys():
        # Preserve this feature as a dense float, setting nan's to the mean.
        outputs[features.transformed_name(key)] = tft.scale_to_z_score(
            _fill_in_missing(inputs[key]))

    outputs[features.transformed_name(
        features.LABEL_KEY)] = inputs[features.LABEL_KEY]

    return outputs
def preprocessing_fn(inputs):
  """Preprocesses Covertype Dataset."""

  outputs = {}

  # Scale numerical features
  for key in features.NUMERIC_FEATURE_KEYS:
    outputs[features.transformed_name(key)] = tft.scale_to_z_score(
        _fill_in_missing(inputs[key]))

  # Generate vocabularies and maps categorical features
  for key in features.CATEGORICAL_FEATURE_KEYS:
    outputs[features.transformed_name(key)] = tft.compute_and_apply_vocabulary(
        x=_fill_in_missing(inputs[key]), num_oov_buckets=1, vocab_filename=key)

  # Convert Cover_Type to dense tensor
  outputs[features.transformed_name(features.LABEL_KEY)] = _fill_in_missing(
      inputs[features.LABEL_KEY])

  return outputs
Esempio n. 8
0
  def serve_tf_examples_fn(serialized_tf_examples):
    """Returns the output to be used in the serving signature."""
    feature_spec = tf_transform_output.raw_feature_spec()
    feature_spec.pop(features.LABEL_KEY)
    parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)

    transformed_features = model.tft_layer(parsed_features)
    transformed_features.pop(
        features.transformed_name(features.LABEL_KEY), None)

    return model(transformed_features)
def preprocessing_fn(inputs):
    """tf.transform's callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
    outputs = {}
    for key in features.DENSE_FLOAT_FEATURE_KEYS:
        # Preserve this feature as a dense float, setting nan's to the mean.
        outputs[features.transformed_name(key)] = tft.scale_to_z_score(
            _fill_in_missing(inputs[key]))

    for key in features.VOCAB_FEATURE_KEYS:
        # Build a vocabulary for this feature.
        outputs[features.transformed_name(
            key)] = tft.compute_and_apply_vocabulary(
                _fill_in_missing(inputs[key]),
                top_k=features.VOCAB_SIZE,
                num_oov_buckets=features.OOV_SIZE)

    for key, num_buckets in zip(features.BUCKET_FEATURE_KEYS,
                                features.BUCKET_FEATURE_BUCKET_COUNT):
        outputs[features.transformed_name(key)] = tft.bucketize(
            _fill_in_missing(inputs[key]), num_buckets)

    for key in features.CATEGORICAL_FEATURE_KEYS:
        outputs[features.transformed_name(key)] = _fill_in_missing(inputs[key])

    # TODO(b/157064428): Support label transformation for Keras.
    # Do not apply label transformation as it will result in wrong evaluation.
    outputs[features.transformed_name(
        features.LABEL_KEY)] = inputs[features.LABEL_KEY]

    return outputs
Esempio n. 10
0
def _eval_input_receiver_fn(tf_transform_output, schema):
  """Build everything needed for the tf-model-analysis to run the model.

  Args:
    tf_transform_output: A TFTransformOutput.
    schema: the schema of the input data.

  Returns:
    EvalInputReceiver function, which contains:
      - Tensorflow graph which parses raw untransformed features, applies the
        tf-transform preprocessing operators.
      - Set of raw, untransformed features.
      - Label against which predictions will be compared.
  """
  # Notice that the inputs are raw features, not transformed features here.
  raw_feature_spec = _get_raw_feature_spec(schema)

  serialized_tf_example = tf.compat.v1.placeholder(
      dtype=tf.string, shape=[None], name='input_example_tensor')

  # Add a parse_example operator to the tensorflow graph, which will parse
  # raw, untransformed, tf examples.
  raw_features = tf.io.parse_example(
      serialized=serialized_tf_example, features=raw_feature_spec)

  # Now that we have our raw examples, process them through the tf-transform
  # function computed during the preprocessing step.
  transformed_features = tf_transform_output.transform_raw_features(
      raw_features)

  # The key name MUST be 'examples'.
  receiver_tensors = {'examples': serialized_tf_example}

  # NOTE: Model is driven by transformed features (since training works on the
  # materialized output of TFT, but slicing will happen on raw features.
  raw_features.update(transformed_features)

  return tfma.export.EvalInputReceiver(
      features=raw_features,
      receiver_tensors=receiver_tensors,
      labels=transformed_features[features.transformed_name(
          features.LABEL_KEY)])
Esempio n. 11
0
def _input_fn(file_pattern, data_accessor, tf_transform_output, batch_size=200):
  """Generates features and label for tuning/training.

  Args:
    file_pattern: List of paths or patterns of input tfrecord files.
    data_accessor: DataAccessor for converting input to RecordBatch.
    tf_transform_output: A TFTransformOutput.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

  Returns:
    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
  """
  return data_accessor.tf_dataset_factory(
      file_pattern,
      dataset_options.TensorFlowDatasetOptions(
          batch_size=batch_size,
          label_key=features.transformed_name(features.LABEL_KEY)),
      tf_transform_output.transformed_metadata.schema)
def get_model(show_summary: bool = True) -> tf.keras.models.Model:
    """
    This function defines a Keras model and returns the model as a Keras object.
    """

    # one-hot categorical features
    input_features = []
    for key, dim in features.ONE_HOT_FEATURES.items():
        input_features.append(
            tf.keras.Input(
                shape=(dim + 1,), name=features.transformed_name(key)
            )
        )

    # adding bucketized features
    for key, dim in features.BUCKET_FEATURES.items():
        input_features.append(
            tf.keras.Input(
                shape=(dim + 1,), name=features.transformed_name(key)
            )
        )

    # adding text input features
    input_texts = []
    for key in features.TEXT_FEATURES.keys():
        input_texts.append(
            tf.keras.Input(
                shape=(1,), name=features.transformed_name(key), dtype=tf.string
            )
        )

    # embed text features
    MODULE_URL = "https://tfhub.dev/google/universal-sentence-encoder/4"
    embed = hub.KerasLayer(MODULE_URL)
    reshaped_narrative = tf.reshape(input_texts[0], [-1])
    embed_narrative = embed(reshaped_narrative)
    deep_ff = tf.keras.layers.Reshape((512,), input_shape=(1, 512))(
        embed_narrative
    )

    deep = tf.keras.layers.Dense(256, activation="relu")(deep_ff)
    deep = tf.keras.layers.Dense(64, activation="relu")(deep)
    deep = tf.keras.layers.Dense(16, activation="relu")(deep)

    wide_ff = tf.keras.layers.concatenate(input_features)
    wide = tf.keras.layers.Dense(16, activation="relu")(wide_ff)

    both = tf.keras.layers.concatenate([deep, wide])

    output = tf.keras.layers.Dense(1, activation="sigmoid")(both)

    inputs = input_features + input_texts

    keras_model = tf.keras.models.Model(inputs, output)
    keras_model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=constants.LEARNING_RATE
        ),
        loss="binary_crossentropy",
        metrics=[
            tf.keras.metrics.BinaryAccuracy(),
            tf.keras.metrics.TruePositives(),
        ],
    )
    if show_summary:
        keras_model.summary()

    return keras_model
Esempio n. 13
0
 def testTransformedName(self):
     name = "a_name"
     expected = "a_name_xf"
     self.assertEqual(expected, features.transformed_name(name))