def preprocessing_fn(inputs): _ = tft.vocabulary(inputs['s']) _ = tft.bucketize(inputs['x'], 2, name='bucketize') return { 'x_min': tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'x_mean': tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'y_min': tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 'y_mean': tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']), }
def preprocessing_fn(inputs): integerized_s = tft.compute_and_apply_vocabulary(inputs['s']) _ = tft.bucketize(inputs['x'], 2, name='bucketize') return { 'integerized_s': integerized_s, 'x_min': tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'x_mean': tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'y_min': tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 'y_mean': tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']), }
def preprocessing_fn(inputs): def repeat(in_tensor, value): batch_size = tf.shape(in_tensor)[0] return tf.ones([batch_size], value.dtype) * value return { 'min': tft.map(repeat, inputs['a'], tft.min(inputs['a'])), 'max': tft.map(repeat, inputs['a'], tft.max(inputs['a'])), 'sum': tft.map(repeat, inputs['a'], tft.sum(inputs['a'])), 'size': tft.map(repeat, inputs['a'], tft.size(inputs['a'])), 'mean': tft.map(repeat, inputs['a'], tft.mean(inputs['a'])) }
def preprocessing_fn(inputs): _ = tft.vocabulary(inputs['s'], vocab_filename='vocab1') _ = tft.bucketize(inputs['x'], 2, name='bucketize') return { 'x_min': tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'x_mean': tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']), 'y_min': tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 'y_mean': tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']), 's_integerized': tft.compute_and_apply_vocabulary( inputs['s'], labels=inputs['label'], use_adjusted_mutual_info=True), }
def preprocessing_fn(inputs): """User defined preprocessing function for movielens columns. Args: inputs: a `dict` that maps EXAMPLE_COLUMNS to the corresponding Tensor/SparseTensor. Returns: A `dict` that maps EXAMPLE_COLUMNS to the transformed Tensor/SparseTensor. """ result = {column_name: inputs[column_name] for column_name in EXAMPLE_COLUMNS} rating_max = tft.max(inputs[QUERY_RATED_MOVIE_SCORES].values) rating_min = tft.min(inputs[QUERY_RATED_MOVIE_SCORES].values) def scale_sparse_values(x, min_value, max_value): """0-1 normalization of the values of a SparseTensor. Args: x: a input sparse tensor. min_value: minimum value for x.values. max_value: maximum value for x.values. Returns: A sparse tensor y such as that y.values is the result of 0-1 normalization of x.values. """ scaled_values = (x.values - min_value) / (max_value - min_value) return tf.SparseTensor(indices=x.indices, values=scaled_values, dense_shape=x.dense_shape) result[QUERY_RATED_MOVIE_SCORES] = scale_sparse_values( inputs[QUERY_RATED_MOVIE_SCORES], rating_min, rating_max) genre_vocab = tft.uniques(tf.concat( [inputs[QUERY_RATED_GENRE_IDS].values, inputs[CANDIDATE_GENRE_IDS].values], 0)) movie_vocab = tft.uniques(tf.concat( [inputs[QUERY_RATED_MOVIE_IDS].values, inputs[CANDIDATE_MOVIE_ID].values, inputs[RANKING_CANDIDATE_MOVIE_IDS].values], 0)) def map_to_int(x, vocabulary_or_file): """Maps string tensor into indexes using vocab. Args: x : a Tensor/SparseTensor of string. vocabulary_or_file: a Tensor/SparseTensor containing unique string values within x or a single value for the file where the vocabulary is stored. Returns: A Tensor/SparseTensor of indexes (int) of the same shape as x. """ # TODO(b/62489180): Remove this workaround once TFT 0.2.0 is released. if hasattr(impl, '_asset_files_supported') and impl._asset_files_supported(): # pylint: disable=protected-access table = tf.contrib.lookup.string_to_index_table_from_file( vocabulary_file=vocabulary_or_file, num_oov_buckets=1) else: table = tf.contrib.lookup.string_to_index_table_from_tensor( mapping=vocabulary_or_file, num_oov_buckets=1) return table.lookup(x) result[QUERY_RATED_GENRE_IDS] = tft.apply_function( map_to_int, inputs[QUERY_RATED_GENRE_IDS], genre_vocab) result[CANDIDATE_GENRE_IDS] = tft.apply_function( map_to_int, inputs[CANDIDATE_GENRE_IDS], genre_vocab) result[QUERY_RATED_MOVIE_IDS] = tft.apply_function( map_to_int, inputs[QUERY_RATED_MOVIE_IDS], movie_vocab) result[CANDIDATE_MOVIE_ID] = tft.apply_function( map_to_int, inputs[CANDIDATE_MOVIE_ID], movie_vocab) result[RANKING_CANDIDATE_MOVIE_IDS] = tft.apply_function( map_to_int, inputs[RANKING_CANDIDATE_MOVIE_IDS], movie_vocab) return result
def preprocessing_fn(inputs): scaled_to_0 = tft.map(lambda x, y: x - y, inputs['x'], tft.min(inputs['x'])) scaled_to_0_1 = tft.map(lambda x, y: x / y, scaled_to_0, tft.max(scaled_to_0)) return {'x_scaled': scaled_to_0_1}
def min_fn(inputs): return { 'min': tft.map(repeat, inputs['a'], tft.min(inputs['a'])) }
def preprocessing_fn(inputs): """User defined preprocessing function for movielens columns. Args: inputs: a `dict` that maps EXAMPLE_COLUMNS to the corresponding Tensor/SparseTensor. Returns: A `dict` that maps EXAMPLE_COLUMNS to the transformed Tensor/SparseTensor. """ result = { column_name: inputs[column_name] for column_name in EXAMPLE_COLUMNS } rating_max = tft.max(inputs[QUERY_RATED_MOVIE_SCORES].values) rating_min = tft.min(inputs[QUERY_RATED_MOVIE_SCORES].values) def scale_sparse_values(x, min_value, max_value): """0-1 normalization of the values of a SparseTensor. Args: x: a input sparse tensor. min_value: minimum value for x.values. max_value: maximum value for x.values. Returns: A sparse tensor y such as that y.values is the result of 0-1 normalization of x.values. """ scaled_values = (x.values - min_value) / (max_value - min_value) return tf.SparseTensor(indices=x.indices, values=scaled_values, dense_shape=x.dense_shape) result[QUERY_RATED_MOVIE_SCORES] = scale_sparse_values( inputs[QUERY_RATED_MOVIE_SCORES], rating_min, rating_max) genre_vocab = tft.uniques( tf.concat([ inputs[QUERY_RATED_GENRE_IDS].values, inputs[CANDIDATE_GENRE_IDS].values ], 0)) movie_vocab = tft.uniques( tf.concat([ inputs[QUERY_RATED_MOVIE_IDS].values, inputs[CANDIDATE_MOVIE_ID].values, inputs[RANKING_CANDIDATE_MOVIE_IDS].values ], 0)) def map_to_int(x, vocabulary_or_file): """Maps string tensor into indexes using vocab. Args: x : a Tensor/SparseTensor of string. vocabulary_or_file: a Tensor/SparseTensor containing unique string values within x or a single value for the file where the vocabulary is stored. Returns: A Tensor/SparseTensor of indexes (int) of the same shape as x. """ # TODO(b/62489180): Remove this workaround once TFT 0.2.0 is released. if hasattr(impl, '_asset_files_supported' ) and impl._asset_files_supported(): # pylint: disable=protected-access table = tf.contrib.lookup.string_to_index_table_from_file( vocabulary_file=vocabulary_or_file, num_oov_buckets=1) else: table = tf.contrib.lookup.string_to_index_table_from_tensor( mapping=vocabulary_or_file, num_oov_buckets=1) return table.lookup(x) result[QUERY_RATED_GENRE_IDS] = tft.apply_function( map_to_int, inputs[QUERY_RATED_GENRE_IDS], genre_vocab) result[CANDIDATE_GENRE_IDS] = tft.apply_function( map_to_int, inputs[CANDIDATE_GENRE_IDS], genre_vocab) result[QUERY_RATED_MOVIE_IDS] = tft.apply_function( map_to_int, inputs[QUERY_RATED_MOVIE_IDS], movie_vocab) result[CANDIDATE_MOVIE_ID] = tft.apply_function( map_to_int, inputs[CANDIDATE_MOVIE_ID], movie_vocab) result[RANKING_CANDIDATE_MOVIE_IDS] = tft.apply_function( map_to_int, inputs[RANKING_CANDIDATE_MOVIE_IDS], movie_vocab) return result
def apply(x): m = tft.min(x) x = _impute(x, m) return x