def call(self, inputs): # TODO(tanzheny): Add ragged support. # TODO(tanzheny): Add int support. if isinstance(inputs, sparse_tensor.SparseTensor): sparse_values = inputs.values sparse_hashed_values = string_ops.string_to_hash_bucket_fast( sparse_values, self._num_bins, name='lookup') return sparse_tensor.SparseTensor(indices=inputs.indices, values=sparse_hashed_values, dense_shape=inputs.dense_shape) # string_to_hash_bucket_fast uses FarmHash as hash function. return string_ops.string_to_hash_bucket_fast(inputs, self._num_bins, name='lookup')
def testStringToOneHashBucketFast(self): with self.cached_session(): input_string = array_ops.placeholder(dtypes.string) output = string_ops.string_to_hash_bucket_fast(input_string, 1) result = output.eval(feed_dict={input_string: ['a', 'b', 'c']}) self.assertAllEqual([0, 0, 0], result)
def _transform_feature(self, inputs): input_tensor = inputs.get(self.key) if not isinstance(input_tensor, sparse_tensor_lib.SparseTensor): raise ValueError('SparseColumn input must be a SparseTensor.') if (input_tensor.dtype != dtypes.string and not input_tensor.dtype.is_integer): raise ValueError('input tensors dtype must be string or integer. ' 'dtype: {}, column_name: {}'.format( input_tensor.dtype, self.key)) if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( 'Column dtype and SparseTensors dtype must be compatible. ' 'key: {}, column dtype: {}, tensor dtype: {}'.format( self.key, self.dtype, input_tensor.dtype)) if self.dtype == dtypes.string: sparse_values = input_tensor.values else: sparse_values = string_ops.as_string(input_tensor.values) sparse_id_values = string_ops.string_to_hash_bucket_fast( sparse_values, self.hash_bucket_size, name='lookup') return sparse_tensor_lib.SparseTensor(input_tensor.indices, sparse_id_values, input_tensor.dense_shape)
def default_partition_fn(keys, shard_num): """The default partition function. partition keys by "mod" strategy. keys: a tensor presents the keys to be partitioned. shard_num: the num of partitions Returns: a tensor with same shape as keys with type of `tf.int32`, represents the corresponding partition-ids of keys. """ keys_op = ops.convert_to_tensor(keys, name="keys") gpu_mode = _pywrap_util_port.IsGoogleCudaEnabled() with ops.colocate_with(keys_op): if keys_op.dtype == dtypes.int64 and gpu_mode: # This branch has low performance on some multi-CPU scenario, # so we try to use default branch when GPUs are not available. mask = constant_op.constant(0x7fffffff, dtypes.int64) keys_int32 = math_ops.cast(bitwise_ops.bitwise_and(keys_op, mask), dtypes.int32) mod = math_ops.mod(keys_int32, constant_op.constant(shard_num, dtypes.int32)) ids = math_ops.cast(mod, dtype=dtypes.int32) elif keys_op.dtype == dtypes.string: ids = string_ops.string_to_hash_bucket_fast(keys_op, shard_num) mask = constant_op.constant(0x7fffffff, dtypes.int64) ids = math_ops.cast(bitwise_ops.bitwise_and(ids, mask), dtypes.int32) else: ids = math_ops.cast(math_ops.mod(keys_op, shard_num), dtype=dtypes.int32) return ids
def _shard_indices(self, keys): if self._key_dtype == dtypes.string: indices = string_ops.string_to_hash_bucket_fast( keys, self._num_shards) else: indices = math_ops.mod(keys, self._num_shards) return math_ops.cast(indices, dtypes.int32)
def _transform_feature(self, inputs): input_tensor = inputs.get(self.key) if not isinstance(input_tensor, sparse_tensor_lib.SparseTensor): raise ValueError('SparseColumn input must be a SparseTensor.') if (input_tensor.dtype != dtypes.string and not input_tensor.dtype.is_integer): raise ValueError('input tensors dtype must be string or integer. ' 'dtype: {}, column_name: {}'.format( input_tensor.dtype, self.key)) if self.dtype.is_integer != input_tensor.dtype.is_integer: raise ValueError( 'Column dtype and SparseTensors dtype must be compatible. ' 'key: {}, column dtype: {}, tensor dtype: {}'.format( self.key, self.dtype, input_tensor.dtype)) if self.dtype == dtypes.string: sparse_values = input_tensor.values else: sparse_values = string_ops.as_string(input_tensor.values) sparse_id_values = string_ops.string_to_hash_bucket_fast( sparse_values, self.hash_bucket_size, name='lookup') return sparse_tensor_lib.SparseTensor( input_tensor.indices, sparse_id_values, input_tensor.dense_shape)
def insert_transformed_feature(self, columns_to_tensors): """Handles sparse column to id conversion.""" sparse_id_values = string_ops.string_to_hash_bucket_fast( columns_to_tensors[self.name].values, self.bucket_size, name=self.name + "_lookup") columns_to_tensors[self] = ops.SparseTensor( columns_to_tensors[self.name].indices, sparse_id_values, columns_to_tensors[self.name].shape)
def input_fn(params): """Generates an input function for training or evaluation. This uses the input pipeline based approach using file name queue to read data so that entire data is not loaded in memory. Args: params (dict): Dictionary of additional params like batch_size Returns: A function () -> (features, indices) where features is a dictionary of Tensors, and indices is a single Tensor of label indices. """ if FLAGS.use_tpu: batch_size = params['batch_size'] else: batch_size = FLAGS.train_batch_size shuffle = True dataset = tf.contrib.data.TextLineDataset([filename]) dataset = dataset.cache().repeat(FLAGS.num_epochs) if shuffle: dataset = dataset.shuffle(batch_size * 10) dataset = dataset.batch(batch_size) iterator = dataset.make_one_shot_iterator() rows = iterator.get_next() # Parse the CSV File features = parse_csv(rows) table = tf.contrib.lookup.index_table_from_tensor(tf.constant(LABELS)) label_tensor = table.lookup(features.pop(LABEL_COLUMN)) # Convert categorical (string) values to one_hot values for col, vals in CATEGORICAL_COLS: bucket_size = vals if isinstance(vals, int) else len(vals) if isinstance(vals, int): indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size) else: table = tf.contrib.lookup.index_table_from_tensor(vals) indices = table.lookup(features[col]) indices = tf.cast(indices, tf.int32) features[col] = tf.reshape( indices, [batch_size, indices.get_shape().as_list()[1]]) for feature in CONTINUOUS_COLS: real_valued_tensor = tf.to_float(features[feature]) features[feature] = tf.reshape( real_valued_tensor, [batch_size, real_valued_tensor.get_shape().as_list()[1]]) labels = tf.reshape(tf.cast(label_tensor, tf.int32), [batch_size]) return features, labels
def call(self, inputs): # TODO(tanzheny): Add int support. # string_to_hash_bucket_fast uses FarmHash as hash function. if ragged_tensor.is_ragged(inputs): return ragged_functional_ops.map_flat_values( string_ops.string_to_hash_bucket_fast, inputs, num_buckets=self._num_bins, name='hash') elif isinstance(inputs, sparse_tensor.SparseTensor): sparse_values = inputs.values sparse_hashed_values = string_ops.string_to_hash_bucket_fast( sparse_values, self._num_bins, name='hash') return sparse_tensor.SparseTensor(indices=inputs.indices, values=sparse_hashed_values, dense_shape=inputs.dense_shape) else: return string_ops.string_to_hash_bucket_fast(inputs, self._num_bins, name='hash')
def replace_oov_buckets(self, inputs, lookups): if self.num_oov_tokens <= 1: return lookups if inputs.dtype.is_integer: inputs = string_ops.as_string(inputs) hashed_inputs = string_ops.string_to_hash_bucket_fast( inputs, num_buckets=self.num_oov_tokens) if self.reserve_zero: hashed_inputs = math_ops.add(hashed_inputs, 1) return array_ops.where(math_ops.equal(lookups, -1), hashed_inputs, lookups)
def get_indices(col, embedding_size, bucket_size): if col_type != 'int': indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size, name="deep_shared_hash_" + col + str(shared_flag)) else: table = tf.contrib.lookup.index_table_from_tensor( embedding_size) indices = table.lookup(features[col]) return indices
def testStringToHashBucketsFast(self): with self.cached_session(): input_string = array_ops.placeholder(dtypes.string) output = string_ops.string_to_hash_bucket_fast(input_string, 10) result = output.eval(feed_dict={input_string: ['a', 'b', 'c', 'd']}) # Fingerprint64('a') -> 12917804110809363939 -> mod 10 -> 9 # Fingerprint64('b') -> 11795596070477164822 -> mod 10 -> 2 # Fingerprint64('c') -> 11430444447143000872 -> mod 10 -> 2 # Fingerprint64('d') -> 4470636696479570465 -> mod 10 -> 5 self.assertAllEqual([9, 2, 2, 5], result)
def _transform_feature(self, inputs): input_tensor = inputs.get(self.key) flat_input = array_ops.reshape(input_tensor, (-1,)) input_tensor = tf.string_split(flat_input, self.category_delimiter) if not isinstance(input_tensor, sparse_tensor_lib.SparseTensor): raise ValueError('SparseColumn input must be a SparseTensor.') sparse_values = input_tensor.values # tf.summary.text(self.key, flat_input) sparse_id_values = string_ops.string_to_hash_bucket_fast(sparse_values, self.num_buckets, name='lookup') return sparse_tensor_lib.SparseTensor(input_tensor.indices, sparse_id_values, input_tensor.dense_shape)
def _apply_transform(self, input_tensors, **kwargs): """Applies the transformation to the `transform_input`. Args: input_tensors: a list of Tensors representing the input to the Transform. **kwargs: additional keyword arguments, unused here. Returns: A namedtuple of Tensors representing the transformed output. """ result = string_ops.string_to_hash_bucket_fast(input_tensors[0], self._num_buckets, name=None) # pylint: disable=not-callable return self.return_type(result)
def _replace_oov_buckets(self, inputs, lookups): """Replace the default OOV value with one of the OOV bucket values.""" if self.oov_tokens is None: return lookups num_oov_elements = self.oov_tokens.shape.num_elements() if inputs.dtype.is_integer: oov_indices = math_ops.floormod(inputs, num_oov_elements) else: oov_indices = string_ops.string_to_hash_bucket_fast( inputs, num_buckets=num_oov_elements) oov_values = array_ops.gather(self.oov_tokens, oov_indices) oov_locations = math_ops.equal(lookups, self.table._default_value) # pylint: disable=protected-access return array_ops.where(oov_locations, oov_values, lookups)
def model_fn(features, labels, mode): _, _ = features, labels v = variables.Variable(0, name='some_var', dtype=dtypes.int64) # We verify the value of filepath_tensor is replaced with a path to the # saved model's assets directory by assigning a hash of filepath_tensor # to some_var. filepath_tensor = ops.convert_to_tensor(absolute_filepath) ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, filepath_tensor) scaffold = monitored_session.Scaffold( local_init_op=state_ops.assign( v, string_ops.string_to_hash_bucket_fast( filepath_tensor, num_buckets)).op ) return model_fn_lib.EstimatorSpec( mode, scaffold=scaffold, train_op=state_ops.assign_add(training.get_global_step(), 1), loss=array_ops.identity(0))
def test_with_assets(self): filename = 'test_asset' tmpdir = tempfile.mkdtemp() absolute_filepath = os.path.join(tmpdir, filename) num_buckets = 1000 with open(absolute_filepath, 'w') as f: f.write(b'test') def model_fn(features, labels, mode): _, _ = features, labels v = variables.Variable(0, name='some_var', dtype=dtypes.int64) # We verify the value of filepath_tensor is replaced with a path to the # saved model's assets directory by assigning a hash of filepath_tensor # to some_var. filepath_tensor = ops.convert_to_tensor(absolute_filepath) ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, filepath_tensor) scaffold = monitored_session.Scaffold( local_init_op=state_ops.assign( v, string_ops.string_to_hash_bucket_fast( filepath_tensor, num_buckets)).op) return model_fn_lib.EstimatorSpec(mode, scaffold=scaffold, train_op=state_ops.assign_add( training.get_global_step(), 1), loss=array_ops.identity(0)) export_dir = self._export_estimator(predict=False, model_fn=model_fn) sme = saved_model_estimator.SavedModelEstimator( export_dir, self._get_tmp_dir()) with self.session() as sess: expected_bucket = sess.run( string_ops.string_to_hash_bucket_fast( os.path.join(export_dir, constants.ASSETS_DIRECTORY, filename), num_buckets)) sme.train(dummy_input_fn, steps=1) self.assertEqual(expected_bucket, sme.get_variable_value('some_var'))
def _shard_indices(self, keys): if self._key_dtype == dtypes.string: indices = string_ops.string_to_hash_bucket_fast(keys, self._num_shards) else: indices = math_ops.mod(keys, self._num_shards) return math_ops.cast(indices, dtypes.int32)
def wide_and_deep(features=None, params=None): ############### WIDE_CATE_COLS = params['WIDE_CATE_COLS'] CONTINUOUS_COLS = params['CONTINUOUS_COLS'] DEEP_EMBEDDING_COLS = params['DEEP_EMBEDDING_COLS'] WIDE_CROSS_COLS = params['WIDE_CROSS_COLS'] DEEP_SHARED_EMBEDDING_COLS = params['DEEP_SHARED_EMBEDDING_COLS'] _HIDDEN_UNITS = params['_HIDDEN_UNITS'] _LINEAR_LEARNING_RATE = params['_LINEAR_LEARNING_RATE'] _DNN_LEARNING_RATE = params['_DNN_LEARNING_RATE'] wide_logits = None linear_absolute_scope = None if params['WIDE']: wide_sum = [] with variable_scope.variable_scope( 'linear', values=tuple(six.itervalues(features))) as scope: linear_absolute_scope = scope.name for col, size in WIDE_CATE_COLS: w_wide = tf.get_variable( shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Weights_Cate" + col) indices = string_ops.string_to_hash_bucket_fast( features[col], size, name="wide_hash_" + col) wide_sum.append( tf.nn.embedding_lookup(w_wide, indices, name="wide_cat_lookup_" + col)) # for col, size in WIDE_BUCKET_COLS: # w_wide = tf.get_variable(shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True, # name="Wide_Part_Weights_Bucket" + col) # indices = string_ops.string_to_hash_bucket_fast( # features[col], size, name="wide_hash_" + col) # wide_sum.append(tf.nn.embedding_lookup(w_wide, indices, name="wide_bucket_lookup_" + col)) for col1, col2, size in WIDE_CROSS_COLS: w_wide = tf.get_variable( shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Weights_Cross" + col1 + '_' + col2) # cross_input = tf.as_string(tf.string_to_number(features[col1],_dtypes.int64)*tf.string_to_number(features[col2],_dtypes.int64)) cross_input = tf.string_join([features[col1], features[col2]], separator="_") indices = string_ops.string_to_hash_bucket_fast( cross_input, size, name="wide_hash_" + col1 + '_' + col2) wide_sum.append( tf.nn.embedding_lookup(w_wide, indices, name="wide_cross_lookup_" + col1 + '_' + col2)) w_wide = tf.get_variable(shape=[len(CONTINUOUS_COLS), 1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Weights_Continus") bias = tf.get_variable(shape=[1], initializer=init_ops.zeros_initializer, trainable=True, name="Wide_Part_Bias") x = tf.concat([ tf.expand_dims(tf.to_float(features[col]), -1) for col in CONTINUOUS_COLS ], 1, name='continus_concat') continue_logits = tf.matmul(x, w_wide) + bias wide_logits = tf.reduce_sum(wide_sum, 0) wide_logits += continue_logits ################## deep_logits = None dnn_absolute_scope = None if params['DEEP']: # with tf.variable_scope('Deep_model'): with variable_scope.variable_scope( 'Deep_model', values=tuple(six.itervalues(features)), ) as scope: dnn_absolute_scope = scope.name # Convert categorical (string) values to embeddings deep_sum = [] for col, vals, embedding_size, col_type in DEEP_EMBEDDING_COLS: bucket_size = vals if isinstance(vals, int) else len(vals) # embed_initializer = tf.truncated_normal_initializer( # stddev=(1.0 / tf.sqrt(float(embedding_size)))) embeddings = tf.get_variable( shape=[bucket_size, embedding_size], initializer=init_ops.glorot_uniform_initializer(), name="deep_embedding_" + col) if col_type != 'int': indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size, name="deep_hash_" + col) else: table = tf.contrib.lookup.index_table_from_tensor(vals) indices = table.lookup(features[col]) seq_emb = tf.nn.embedding_lookup(embeddings, indices, name="deep_lookup_" + col) if col_type == 'seq': print("test my seq:", col) seq_emb = tf.reduce_mean(seq_emb, 1) deep_sum.append(seq_emb) for cols, vals, embedding_size, col_type, shared_flag in DEEP_SHARED_EMBEDDING_COLS: def get_indices(col, embedding_size, bucket_size): if col_type != 'int': indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size, name="deep_shared_hash_" + col + str(shared_flag)) else: table = tf.contrib.lookup.index_table_from_tensor( embedding_size) indices = table.lookup(features[col]) return indices bucket_size = vals if isinstance(vals, int) else len(vals) embeddings = tf.get_variable( shape=[bucket_size, embedding_size], initializer=init_ops.glorot_uniform_initializer(), name="deep_shared_embedding_" + '_'.join(c for c in cols) + str(shared_flag)) for col in cols: indices = get_indices(col, embedding_size, bucket_size) seq_emb = tf.nn.embedding_lookup( embeddings, indices, name="deep_shared_lookup_" + col + str(shared_flag)) if col.endswith('seq'): seq_emb = tf.reduce_mean(seq_emb, 1) deep_sum.append(seq_emb) for col in CONTINUOUS_COLS: deep_sum.append( tf.expand_dims(tf.to_float(features[col]), -1, name='continuous_' + col)) curr_layer = tf.concat(deep_sum, 1, name="deep_inputs_layer") # Build the DNN for index, layer_size in enumerate(_HIDDEN_UNITS): curr_layer = tf.layers.dense( curr_layer, layer_size, activation=tf.nn.relu, kernel_initializer=init_ops.glorot_uniform_initializer(), name="deep_hidden_layer" + str(index)) deep_logits = tf.layers.dense(curr_layer, units=1, name="deep_logits") #################################### my_head = head._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access loss_reduction=losses.Reduction.SUM) print(my_head.logits_dimension) if deep_logits is not None and wide_logits is not None: logits = deep_logits + wide_logits elif deep_logits is not None: logits = deep_logits else: logits = wide_logits dnn_optimizer = optimizers.get_optimizer_instance( 'Adagrad', learning_rate=_DNN_LEARNING_RATE) def _linear_learning_rate(num_linear_feature_columns): default_learning_rate = 1. / math.sqrt(num_linear_feature_columns) return min(_LINEAR_LEARNING_RATE, default_learning_rate) linear_optimizer = optimizers.get_optimizer_instance( 'Ftrl', learning_rate=_linear_learning_rate(len(WIDE_CATE_COLS))) def _train_op_fn(loss): train_ops = [] global_step = training_util.get_global_step() if deep_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_absolute_scope))) if wide_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_absolute_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): return state_ops.assign_add(global_step, 1).op return my_head, logits, _train_op_fn
def embedding_layer(features=None, params=None): ############### CONTINUOUS_COLS = params['CONTINUOUS_COLS'] DEEP_EMBEDDING_COLS = params['DEEP_EMBEDDING_COLS'] DEEP_SHARED_EMBEDDING_COLS = params['DEEP_SHARED_EMBEDDING_COLS'] _HIDDEN_UNITS = params['_HIDDEN_UNITS'] ################## if True: with variable_scope.variable_scope( 'Deep_model', values=tuple(six.itervalues(features)), ) as scope: deep_sum = [] for col, vals, embedding_size, col_type in DEEP_EMBEDDING_COLS: bucket_size = vals if isinstance(vals, int) else len(vals) embeddings = tf.get_variable( shape=[bucket_size, embedding_size], initializer=init_ops.glorot_uniform_initializer(), name="deep_embedding_" + col) if col_type != 'int': indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size, name="deep_hash_" + col) else: table = tf.contrib.lookup.index_table_from_tensor(vals) indices = table.lookup(features[col]) seq_emb = tf.nn.embedding_lookup(embeddings, indices, name="deep_lookup_" + col) if col_type == 'list': print("test my seq:", col) seq_emb = tf.reduce_mean(seq_emb, 1) print(seq_emb) deep_sum.append(seq_emb) for cols, vals, embedding_size, col_type, shared_flag in DEEP_SHARED_EMBEDDING_COLS: def get_indices(col, embedding_size, bucket_size): if col_type != 'int': indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size, name="deep_shared_hash_" + col + str(shared_flag)) else: table = tf.contrib.lookup.index_table_from_tensor( embedding_size) indices = table.lookup(features[col]) return indices bucket_size = vals if isinstance(vals, int) else len(vals) embeddings = tf.get_variable( shape=[bucket_size, embedding_size], initializer=init_ops.glorot_uniform_initializer(), name="deep_shared_embedding_" + '_'.join(c for c in cols) + str(shared_flag)) for col in cols: indices = get_indices(col, embedding_size, bucket_size) seq_emb = tf.nn.embedding_lookup( embeddings, indices, name="deep_shared_lookup_" + col + str(shared_flag)) if col.endswith('seq'): print("into...") seq_emb = tf.reduce_mean(seq_emb, 1) deep_sum.append(seq_emb) for col in CONTINUOUS_COLS: deep_sum.append( tf.expand_dims(tf.to_float(features[col]), -1, name='continuous_' + col)) curr_layer = tf.concat(deep_sum, 1, name="deep_inputs_layer") # Build the DNN #################################### return curr_layer
def model_fn(mode, features, labels, embedding_size=8, hidden_units=[100, 70, 50, 20], learning_rate=0.1): """Creates a feed forward network classification network. Args: mode (str): Mode running training, evaluation or prediction. features (dict): Dictionary of input feature Tensors. labels (Tensor): Class label Tensor. embedding_size (int): Size of embeddings. hidden_units (list): Hidden units. learning_rate (float): Learning rate for the SGD. Returns: A Tuple or Dict depending on the mode. """ label_values = tf.constant(LABELS) # Keep variance constant with changing embedding sizes. embed_initializer = tf.truncated_normal_initializer( stddev=(1.0 / tf.sqrt(float(embedding_size)))) with tf.variable_scope('embeddings', initializer=embed_initializer): # Convert categorical (string) values to embeddings for col, vals in CATEGORICAL_COLS: bucket_size = vals if isinstance(vals, int) else len(vals) embeddings = tf.get_variable(col, shape=[bucket_size, embedding_size]) if isinstance(vals, int): indices = string_ops.string_to_hash_bucket_fast(features[col], bucket_size) else: table = tf.contrib.lookup.index_table_from_tensor(vals) indices = table.lookup(features[col]) features[col] = tf.nn.embedding_lookup(embeddings, indices) for col in CONTINUOUS_COLS: # Give continuous columns an extra trivial dimension # So they can be concatenated with embedding tensors features[col] = tf.expand_dims(tf.to_float(features[col]), -1) # Concatenate the (now all dense) features. # We need to sort the tensors so that they end up in the same order for # prediction, evaluation, and training sorted_feature_tensors = zip(*sorted(features.iteritems()))[1] inputs = tf.concat(sorted_feature_tensors, 1) # Build the DNN curr_layer = inputs for layer_size in hidden_units: curr_layer = tf.layers.dense( curr_layer, layer_size, activation=tf.nn.relu, # This initializer prevents variance from exploding or vanishing when # compounded through different sized layers. kernel_initializer=tf.contrib.layers.variance_scaling_initializer(), ) # Add the output layer logits = tf.layers.dense( curr_layer, len(LABELS), # Do not use ReLU on last layer activation=None, kernel_initializer=tf.contrib.layers.variance_scaling_initializer()) if mode in (PREDICT, EVAL): probabilities = tf.nn.softmax(logits) predicted_indices = tf.argmax(probabilities, 1) if mode in (TRAIN, EVAL): # Convert the string label column to indices # Build a lookup table inside the graph table = tf.contrib.lookup.index_table_from_tensor(label_values) # Use the lookup table to convert string labels to ints label_indices = table.lookup(labels) # Make labels a vector label_indices_vector = tf.squeeze(label_indices) # global_step is necessary in eval to correctly load the step # of the checkpoint we are evaluating global_step = tf.train.get_or_create_global_step() if mode == PREDICT: # Convert predicted_indices back into strings return { 'predictions': tf.gather(label_values, predicted_indices), 'confidence': tf.reduce_max(probabilities, axis=1) } if mode == TRAIN: # Build training operation. cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_indices_vector)) tf.summary.scalar('loss', cross_entropy) train_op = tf.train.FtrlOptimizer( learning_rate=learning_rate, l1_regularization_strength=3.0, l2_regularization_strength=10.0).minimize( cross_entropy, global_step=global_step) return train_op, global_step if mode == EVAL: # Return accuracy and area under ROC curve metrics # See https://en.wikipedia.org/wiki/Receiver_operating_characteristic # See https://www.kaggle.com/wiki/AreaUnderCurve\ labels_one_hot = tf.one_hot( label_indices_vector, depth=label_values.shape[0], on_value=True, off_value=False, dtype=tf.bool) return { 'accuracy': tf.metrics.accuracy(label_indices, predicted_indices), 'auroc': tf.metrics.auc(labels_one_hot, probabilities) }
def model_fn(mode, features, labels, embedding_size=8, hidden_units=[100, 70, 50, 20], learning_rate=0.1): """Creates a feed forward network classification network. Args: mode (str): Mode running training, evaluation or prediction. features (dict): Dictionary of input feature Tensors. labels (Tensor): Class label Tensor. embedding_size (int): Size of embeddings. hidden_units (list): Hidden units. learning_rate (float): Learning rate for the SGD. Returns: A Tuple or Dict depending on the mode. """ label_values = tf.constant(LABELS) # Keep variance constant with changing embedding sizes. embed_initializer = tf.truncated_normal_initializer( stddev=(1.0 / tf.sqrt(float(embedding_size)))) with tf.variable_scope('embeddings', initializer=embed_initializer): # Convert categorical (string) values to embeddings for col, vals in CATEGORICAL_COLS: bucket_size = vals if isinstance(vals, int) else len(vals) embeddings = tf.get_variable(col, shape=[bucket_size, embedding_size]) if isinstance(vals, int): indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size) else: table = tf.contrib.lookup.index_table_from_tensor(vals) indices = table.lookup(features[col]) features[col] = tf.nn.embedding_lookup(embeddings, indices) for col in CONTINUOUS_COLS: # Give continuous columns an extra trivial dimension # So they can be concatenated with embedding tensors features[col] = tf.expand_dims(tf.to_float(features[col]), -1) # Concatenate the (now all dense) features. # We need to sort the tensors so that they end up in the same order for # prediction, evaluation, and training sorted_feature_tensors = zip(*sorted(features.iteritems()))[1] inputs = tf.concat(sorted_feature_tensors, 1) # Build the DNN curr_layer = inputs for layer_size in hidden_units: curr_layer = tf.layers.dense( curr_layer, layer_size, activation=tf.nn.relu, # This initializer prevents variance from exploding or vanishing when # compounded through different sized layers. kernel_initializer=tf.contrib.layers.variance_scaling_initializer( ), ) # Add the output layer logits = tf.layers.dense( curr_layer, len(LABELS), # Do not use ReLU on last layer activation=None, kernel_initializer=tf.contrib.layers.variance_scaling_initializer()) if mode in (PREDICT, EVAL): probabilities = tf.nn.softmax(logits) predicted_indices = tf.argmax(probabilities, 1) if mode in (TRAIN, EVAL): # Convert the string label column to indices # Build a lookup table inside the graph table = tf.contrib.lookup.index_table_from_tensor(label_values) # Use the lookup table to convert string labels to ints label_indices = table.lookup(labels) # Make labels a vector label_indices_vector = tf.squeeze(label_indices) # global_step is necessary in eval to correctly load the step # of the checkpoint we are evaluating global_step = tf.contrib.framework.get_or_create_global_step() if mode == PREDICT: # Convert predicted_indices back into strings return { 'predictions': tf.gather(label_values, predicted_indices), 'confidence': tf.reduce_max(probabilities, axis=1) } if mode == TRAIN: # Build training operation. cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_indices_vector)) tf.summary.scalar('loss', cross_entropy) train_op = tf.train.FtrlOptimizer( learning_rate=learning_rate, l1_regularization_strength=3.0, l2_regularization_strength=10.0).minimize(cross_entropy, global_step=global_step) return train_op, global_step if mode == EVAL: # Return accuracy and area under ROC curve metrics # See https://en.wikipedia.org/wiki/Receiver_operating_characteristic # See https://www.kaggle.com/wiki/AreaUnderCurve\ labels_one_hot = tf.one_hot(label_indices_vector, depth=label_values.shape[0], on_value=True, off_value=False, dtype=tf.bool) return { 'accuracy': tf.metrics.accuracy(label_indices, predicted_indices), 'auroc': tf.metrics.auc(labels_one_hot, probabilities) }
def model_fn(mode, features, labels, embedding_size=8, hidden_units=[100, 70, 50, 20], learning_rate=0.1): """Create a Feed forward network classification network Args: mode (string): Mode running training, evaluation or prediction features (dict): Dictionary of input feature Tensors labels (Tensor): Class label Tensor hidden_units (list): Hidden units learning_rate (float): Learning rate for the SGD Returns: Depending on the mode returns Tuple or Dict """ label_values = tf.constant(LABELS) # Keep variance constant with changing embedding sizes. with tf.variable_scope('embeddings', initializer=tf.truncated_normal_initializer( stddev=(1.0 / tf.sqrt(float(embedding_size))))): # Convert categorical (string) values to one_hot values for col, bucket_size in CATEGORICAL_COLS: embeddings = tf.get_variable(col, shape=[bucket_size, embedding_size]) indices = string_ops.string_to_hash_bucket_fast( features[col], bucket_size) features[col] = tf.squeeze(tf.nn.embedding_lookup( embeddings, indices), axis=[1]) for feature in CONTINUOUS_COLS: features[feature] = tf.to_float(features[feature]) # Concatenate the (now all dense) features. # We need to sort the tensors so that they end up in the same order for # prediction, evaluation, and training sorted_feature_tensors = zip(*sorted(features.iteritems()))[1] inputs = tf.concat(sorted_feature_tensors, 1) # Build the DNN layers_size = [inputs.get_shape()[1]] + hidden_units layers_shape = zip(layers_size[0:], layers_size[1:] + [len(LABELS)]) curr_layer = inputs # Set default initializer to variance_scaling_initializer # This initializer prevents variance from exploding or vanishing when # compounded through different sized layers. with tf.variable_scope( 'dnn', initializer=tf.contrib.layers.variance_scaling_initializer()): # Creates the relu hidden layers for num, shape in enumerate(layers_shape): with tf.variable_scope('relu_{}'.format(num)): weights = tf.get_variable('weights', shape) biases = tf.get_variable('biases', shape[1], initializer=tf.zeros_initializer( tf.float32)) activations = tf.matmul(curr_layer, weights) + biases if num < len(layers_shape) - 1: curr_layer = tf.nn.relu(activations) else: curr_layer = activations # Make predictions logits = curr_layer if mode in (PREDICT, EVAL): probabilities = tf.nn.softmax(logits) predicted_indices = tf.argmax(probabilities, 1) if mode in (TRAIN, EVAL): # Convert the string label column to indices # Build a lookup table inside the graph table = tf.contrib.lookup.string_to_index_table_from_tensor( label_values) # Use the lookup table to convert string labels to ints label_indices = table.lookup(labels) # Make labels a vector label_indices_vector = tf.squeeze(label_indices) # global_step is necessary in eval to correctly load the step # of the checkpoint we are evaluating global_step = tf.contrib.framework.get_or_create_global_step() if mode == PREDICT: # Convert predicted_indices back into strings return { 'predictions': tf.gather(label_values, predicted_indices), 'confidence': tf.reduce_max(probabilities, axis=1) } if mode == TRAIN: # Build training operation. cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_indices_vector)) tf.summary.scalar('loss', cross_entropy) train_op = tf.train.FtrlOptimizer( learning_rate=learning_rate, l1_regularization_strength=3.0, l2_regularization_strength=10.0).minimize(cross_entropy, global_step=global_step) return train_op, global_step if mode == EVAL: # Return accuracy and area under ROC curve metrics # See https://en.wikipedia.org/wiki/Receiver_operating_characteristic # See https://www.kaggle.com/wiki/AreaUnderCurve return { 'accuracy': tf.contrib.metrics.streaming_accuracy(predicted_indices, label_indices), 'auroc': tf.contrib.metrics.streaming_auc(predicted_indices, label_indices) }