Ejemplo n.º 1
0
        def preprocessing_fn(inputs):

            _ = tft.vocabulary(inputs['s'])

            _ = tft.bucketize(inputs['x'], 2, name='bucketize')

            return {
                'x_min':
                tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
                'x_mean':
                tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
                'y_min':
                tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
                'y_mean':
                tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
            }
Ejemplo n.º 2
0
        def preprocessing_fn(inputs):

            integerized_s = tft.compute_and_apply_vocabulary(inputs['s'])

            _ = tft.bucketize(inputs['x'], 2, name='bucketize')

            return {
                'integerized_s':
                integerized_s,
                'x_min':
                tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
                'x_mean':
                tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
                'y_min':
                tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
                'y_mean':
                tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
            }
Ejemplo n.º 3
0
        def preprocessing_fn(inputs):
            def repeat(in_tensor, value):
                batch_size = tf.shape(in_tensor)[0]
                return tf.ones([batch_size], value.dtype) * value

            return {
                'min': tft.map(repeat, inputs['a'], tft.min(inputs['a'])),
                'max': tft.map(repeat, inputs['a'], tft.max(inputs['a'])),
                'sum': tft.map(repeat, inputs['a'], tft.sum(inputs['a'])),
                'size': tft.map(repeat, inputs['a'], tft.size(inputs['a'])),
                'mean': tft.map(repeat, inputs['a'], tft.mean(inputs['a']))
            }
Ejemplo n.º 4
0
    def preprocessing_fn(inputs):

      _ = tft.vocabulary(inputs['s'], vocab_filename='vocab1')

      _ = tft.bucketize(inputs['x'], 2, name='bucketize')

      return {
          'x_min':
              tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
          'x_mean':
              tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
          'y_min':
              tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
          'y_mean':
              tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
          's_integerized':
              tft.compute_and_apply_vocabulary(
                  inputs['s'],
                  labels=inputs['label'],
                  use_adjusted_mutual_info=True),
      }
Ejemplo n.º 5
0
  def preprocessing_fn(inputs):
    """User defined preprocessing function for movielens columns.

    Args:
      inputs: a `dict` that maps EXAMPLE_COLUMNS to the corresponding
        Tensor/SparseTensor.
    Returns:
      A `dict` that maps EXAMPLE_COLUMNS to the transformed Tensor/SparseTensor.
    """
    result = {column_name: inputs[column_name]
              for column_name in EXAMPLE_COLUMNS}

    rating_max = tft.max(inputs[QUERY_RATED_MOVIE_SCORES].values)

    rating_min = tft.min(inputs[QUERY_RATED_MOVIE_SCORES].values)

    def scale_sparse_values(x, min_value, max_value):
      """0-1 normalization of the values of a SparseTensor.

      Args:
        x: a input sparse tensor.
        min_value: minimum value for x.values.
        max_value: maximum value for x.values.
      Returns:
        A sparse tensor y such as that y.values is the result of
        0-1 normalization of x.values.
      """
      scaled_values = (x.values - min_value) / (max_value - min_value)
      return tf.SparseTensor(indices=x.indices, values=scaled_values,
                             dense_shape=x.dense_shape)

    result[QUERY_RATED_MOVIE_SCORES] = scale_sparse_values(
        inputs[QUERY_RATED_MOVIE_SCORES],
        rating_min, rating_max)

    genre_vocab = tft.uniques(tf.concat(
        [inputs[QUERY_RATED_GENRE_IDS].values,
         inputs[CANDIDATE_GENRE_IDS].values], 0))

    movie_vocab = tft.uniques(tf.concat(
        [inputs[QUERY_RATED_MOVIE_IDS].values,
         inputs[CANDIDATE_MOVIE_ID].values,
         inputs[RANKING_CANDIDATE_MOVIE_IDS].values], 0))

    def map_to_int(x, vocabulary_or_file):
      """Maps string tensor into indexes using vocab.

      Args:
        x : a Tensor/SparseTensor of string.
        vocabulary_or_file: a Tensor/SparseTensor containing unique string
          values within x or a single value for the file where the vocabulary
          is stored.

      Returns:
        A Tensor/SparseTensor of indexes (int) of the same shape as x.
      """
      # TODO(b/62489180): Remove this workaround once TFT 0.2.0 is released.
      if hasattr(impl,
                 '_asset_files_supported') and impl._asset_files_supported():  # pylint: disable=protected-access
        table = tf.contrib.lookup.string_to_index_table_from_file(
            vocabulary_file=vocabulary_or_file, num_oov_buckets=1)
      else:
        table = tf.contrib.lookup.string_to_index_table_from_tensor(
            mapping=vocabulary_or_file, num_oov_buckets=1)
      return table.lookup(x)

    result[QUERY_RATED_GENRE_IDS] = tft.apply_function(
        map_to_int, inputs[QUERY_RATED_GENRE_IDS], genre_vocab)

    result[CANDIDATE_GENRE_IDS] = tft.apply_function(
        map_to_int, inputs[CANDIDATE_GENRE_IDS], genre_vocab)

    result[QUERY_RATED_MOVIE_IDS] = tft.apply_function(
        map_to_int, inputs[QUERY_RATED_MOVIE_IDS], movie_vocab)

    result[CANDIDATE_MOVIE_ID] = tft.apply_function(
        map_to_int, inputs[CANDIDATE_MOVIE_ID], movie_vocab)

    result[RANKING_CANDIDATE_MOVIE_IDS] = tft.apply_function(
        map_to_int, inputs[RANKING_CANDIDATE_MOVIE_IDS], movie_vocab)

    return result
Ejemplo n.º 6
0
 def preprocessing_fn(inputs):
     scaled_to_0 = tft.map(lambda x, y: x - y, inputs['x'],
                           tft.min(inputs['x']))
     scaled_to_0_1 = tft.map(lambda x, y: x / y, scaled_to_0,
                             tft.max(scaled_to_0))
     return {'x_scaled': scaled_to_0_1}
Ejemplo n.º 7
0
 def min_fn(inputs):
     return {
         'min': tft.map(repeat, inputs['a'], tft.min(inputs['a']))
     }
Ejemplo n.º 8
0
    def preprocessing_fn(inputs):
        """User defined preprocessing function for movielens columns.

    Args:
      inputs: a `dict` that maps EXAMPLE_COLUMNS to the corresponding
        Tensor/SparseTensor.
    Returns:
      A `dict` that maps EXAMPLE_COLUMNS to the transformed Tensor/SparseTensor.
    """
        result = {
            column_name: inputs[column_name]
            for column_name in EXAMPLE_COLUMNS
        }

        rating_max = tft.max(inputs[QUERY_RATED_MOVIE_SCORES].values)

        rating_min = tft.min(inputs[QUERY_RATED_MOVIE_SCORES].values)

        def scale_sparse_values(x, min_value, max_value):
            """0-1 normalization of the values of a SparseTensor.

      Args:
        x: a input sparse tensor.
        min_value: minimum value for x.values.
        max_value: maximum value for x.values.
      Returns:
        A sparse tensor y such as that y.values is the result of
        0-1 normalization of x.values.
      """
            scaled_values = (x.values - min_value) / (max_value - min_value)
            return tf.SparseTensor(indices=x.indices,
                                   values=scaled_values,
                                   dense_shape=x.dense_shape)

        result[QUERY_RATED_MOVIE_SCORES] = scale_sparse_values(
            inputs[QUERY_RATED_MOVIE_SCORES], rating_min, rating_max)

        genre_vocab = tft.uniques(
            tf.concat([
                inputs[QUERY_RATED_GENRE_IDS].values,
                inputs[CANDIDATE_GENRE_IDS].values
            ], 0))

        movie_vocab = tft.uniques(
            tf.concat([
                inputs[QUERY_RATED_MOVIE_IDS].values,
                inputs[CANDIDATE_MOVIE_ID].values,
                inputs[RANKING_CANDIDATE_MOVIE_IDS].values
            ], 0))

        def map_to_int(x, vocabulary_or_file):
            """Maps string tensor into indexes using vocab.

      Args:
        x : a Tensor/SparseTensor of string.
        vocabulary_or_file: a Tensor/SparseTensor containing unique string
          values within x or a single value for the file where the vocabulary
          is stored.

      Returns:
        A Tensor/SparseTensor of indexes (int) of the same shape as x.
      """
            # TODO(b/62489180): Remove this workaround once TFT 0.2.0 is released.
            if hasattr(impl, '_asset_files_supported'
                       ) and impl._asset_files_supported():  # pylint: disable=protected-access
                table = tf.contrib.lookup.string_to_index_table_from_file(
                    vocabulary_file=vocabulary_or_file, num_oov_buckets=1)
            else:
                table = tf.contrib.lookup.string_to_index_table_from_tensor(
                    mapping=vocabulary_or_file, num_oov_buckets=1)
            return table.lookup(x)

        result[QUERY_RATED_GENRE_IDS] = tft.apply_function(
            map_to_int, inputs[QUERY_RATED_GENRE_IDS], genre_vocab)

        result[CANDIDATE_GENRE_IDS] = tft.apply_function(
            map_to_int, inputs[CANDIDATE_GENRE_IDS], genre_vocab)

        result[QUERY_RATED_MOVIE_IDS] = tft.apply_function(
            map_to_int, inputs[QUERY_RATED_MOVIE_IDS], movie_vocab)

        result[CANDIDATE_MOVIE_ID] = tft.apply_function(
            map_to_int, inputs[CANDIDATE_MOVIE_ID], movie_vocab)

        result[RANKING_CANDIDATE_MOVIE_IDS] = tft.apply_function(
            map_to_int, inputs[RANKING_CANDIDATE_MOVIE_IDS], movie_vocab)

        return result
Ejemplo n.º 9
0
 def apply(x):
     m = tft.min(x)
     x = _impute(x, m)
     return x