def call(self, features, mode, **kwargs): is_training = mode == ModeKeys.TRAIN # The Keras training.Model adds a name_scope with the name of the model # which modifies the constructed graph. Hence we add another name_scope # here which is the one before the training.Model one was applied. # TODO: Remove this in TF 2.0 (b/116728605) with ops.name_scope(name=_get_previous_name_scope()): # TODO: Remove dependence on variable scope for partitioning. with variable_scope.variable_scope( 'input_from_feature_columns', partitioner=self._input_layer_partitioner, reuse=tf.AUTO_REUSE): net = self._input_layer(features) net_collections = [self._input_layer(features)] for i in range(len(self._hidden_layers)): net = self._hidden_layers[i](net) if self._dropout is not None and is_training: net = self._dropout_layers[i](net, training=True) if self._batch_norm and is_training: net = self._batch_norm_layers[i](net, training=True) net_collections.append(net) if self._connected_mode == 'first_dense': net = tf.concat([net, self._input_layer], axis=1) elif self._connected_mode == 'dense': net = tf.concat(net_collections, axis=1) elif self._connected_mode == 'resnet': net = tf.concat([net, net_collections[i + 1 - 1]], axis=1) add_layer_summary(net, self._hidden_layer_scope_names[i]) if self._connected_mode == 'last_dense': net = tf.concat(net_collections, axis=1) logits = self._logits_layer(net) add_layer_summary(logits, self._logits_scope_name) return logits
def _wide_deep_combined_model_fn( features, labels, mode, head, model_type, with_cnn=False, cnn_optimizer='Adagrad', linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_connected_mode=None, input_layer_partitioner=None, config=None): """Wide and Deep combined model_fn. (Dnn, Cnn, Linear) Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. model_type: one of `wide_deep`, `deep`, `wide_deep`. with_cnn: Bool, set True to combine image input featrues using cnn. cnn_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the CNN model. Defaults to the Adagrad optimizer. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_connected_mode: List of connected mode. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. dnn_batch_norm: Bool, add BN layer after each DNN layer input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if with_cnn: try: cnn_features = features.pop('image') # separate image feature from input_fn except KeyError: raise ValueError('No input image features, must provide image features if use cnn.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( tf.min_max_variable_partitioner(max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # weight decay lr global_step = tf.Variable(0) _LINEAR_LEARNING_RATE = tf.train.exponential_decay( _linear_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_linear_decay_rate, staircase=False) _DNN_LEARNING_RATE = tf.train.exponential_decay( _dnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_dnn_decay_rate, staircase=False) _CNN_LEARNING_RATE = tf.train.exponential_decay( _cnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_cnn_decay_rate, staircase=False) # Build DNN Logits. dnn_parent_scope = 'dnn' if model_type == 'wide_deep' or not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) if model_type == 'wide_deep': check_no_sync_replicas_optimizer(dnn_optimizer) dnn_partitioner = tf.min_max_variable_partitioner(max_partitions=num_ps_replicas) with tf.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = multidnn_logit_fn_builder( units=head.logits_dimension, hidden_units_list=dnn_hidden_units, connected_mode_list=dnn_connected_mode, feature_columns=dnn_feature_columns, input_layer_partitioner=input_layer_partitioner ) dnn_logits = dnn_logit_fn(features=features, mode=mode) # Build Linear Logits. linear_parent_scope = 'linear' if model_type == 'deep' or not linear_feature_columns: linear_logits = None else: linear_optimizer = get_optimizer_instance(linear_optimizer, learning_rate=_LINEAR_LEARNING_RATE) check_no_sync_replicas_optimizer(linear_optimizer) with tf.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear_logit_fn_builder( units=head.logits_dimension, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) add_layer_summary(linear_logits, scope.name) # Build CNN Logits. cnn_parent_scope = 'cnn' if not with_cnn: cnn_logits = None else: cnn_optimizer = get_optimizer_instance( cnn_optimizer, learning_rate=_CNN_LEARNING_RATE) with tf.variable_scope( cnn_parent_scope, values=tuple([cnn_features]), partitioner=input_layer_partitioner) as scope: img_vec = Vgg16().build(cnn_features) cnn_logits = tf.layers.dense( img_vec, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer(), name=scope) add_layer_summary(cnn_logits, scope.name) # Combine logits and build full model. logits_combine = [] # _BinaryLogisticHeadWithSigmoidCrossEntropyLoss, logits_dimension=1 for logits in [dnn_logits, linear_logits, cnn_logits]: # shape: [batch_size, 1] if logits is not None: logits_combine.append(logits) logits = tf.add_n(logits_combine) def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = tf.train.get_global_step() # BN, when training, the moving_mean and moving_variance need to be updated. By default the # update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) if cnn_logits is not None: train_ops.append( cnn_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=cnn_parent_scope))) # Create an op that groups multiple ops. When this op finishes, # all ops in inputs have finished. This op has no output. train_op = tf.group(*train_ops) with tf.control_dependencies([train_op]): # Returns a context manager that specifies an op to colocate with. with tf.colocate_with(global_step): return tf.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_logit_fn(features, mode, model_id, units, hidden_units, connected_mode, feature_columns, input_layer_partitioner): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. model_id: An int indicating the model index of multi dnn. units: An int indicating the dimension of the logit layer. In the MultiHead case, this should be the sum of all component Heads' logit dimensions. hidden_units: Iterable of integer number of hidden units per layer. connected_mode: one of {`simple`, `first_dense`, `last_dense`, `dense`, `resnet`} or arbitrary connections index tuples. 1. `simple`: normal dnn architecture. 2. `first_dense`: add addition connections from first input layer to all hidden layers. 3. `last_dense`: add addition connections from all previous layers to last layer. 4. `dense`: add addition connections between all layers, similar to DenseNet. 5. `resnet`: add addition connections between adjacent layers, similar to ResNet. 6. arbitrary connections list: add addition connections from layer_0 to layer_1 like 0-1. eg: [0-1,0-3,1-2] index start from zero (input_layer), max index is len(hidden_units), smaller index first. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. batch_norm: Bool, Whether to use BN in dnn. input_layer_partitioner: Partitioner for input layer. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. Raises: AssertError: If connected_mode is string, but not one of `simple`, `first_dense`, `last_dense`, `dense` or `resnet` """ if isinstance(connected_mode, str): assert connected_mode in { 'simple', 'first_dense', 'lase_dense', 'dense', 'resnet' }, ('Invalid connected_mode: {}'.format(connected_mode)) with tf.variable_scope('input_from_feature_columns', values=tuple(iter(features.values())), partitioner=input_layer_partitioner, reuse=tf.AUTO_REUSE): net = tf.feature_column.input_layer(features=features, feature_columns=feature_columns) input_layer = net if connected_mode == 'simple': for layer_id, num_hidden_units in enumerate(hidden_units): with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format( model_id, layer_id), values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=ACTIVATION_FN, use_bias=True, kernel_initializer=tf.glorot_uniform_initializer( ), # also called Xavier uniform initializer. bias_initializer=tf.zeros_initializer(), kernel_regularizer=REG, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, trainable=True, reuse=None, name=hidden_layer_scope) if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN: net = tf.layers.dropout( net, rate=DROPOUT, training=True ) # rate=0.1 would drop out 10% of input units. if BATCH_NORM: net = tf.layers.batch_normalization(net) add_layer_summary(net, hidden_layer_scope.name) elif connected_mode == 'first_dense': for layer_id, num_hidden_units in enumerate(hidden_units): with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format( model_id, layer_id), values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=ACTIVATION_FN, kernel_initializer=tf.glorot_uniform_initializer( ), # also called Xavier uniform initializer. kernel_regularizer=REG, name=hidden_layer_scope) if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN: net = tf.layers.dropout(net, rate=DROPOUT, training=True) if BATCH_NORM: net = tf.layers.batch_normalization(net) net = tf.concat([net, input_layer], axis=1) add_layer_summary(net, hidden_layer_scope.name) elif connected_mode == 'last_dense': net_collections = [input_layer] for layer_id, num_hidden_units in enumerate(hidden_units): with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format( model_id, layer_id), values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=ACTIVATION_FN, kernel_initializer=tf.glorot_uniform_initializer( ), # also called Xavier uniform initializer. kernel_regularizer=REG, name=hidden_layer_scope) if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN: net = tf.layers.dropout(net, rate=DROPOUT, training=True) if BATCH_NORM: net = tf.layers.batch_normalization(net) net_collections.append(net) add_layer_summary(net, hidden_layer_scope.name) net = tf.concat( net_collections, axis=1 ) # Concatenates the list of tensors `values` along dimension `axis` elif connected_mode == 'dense': net_collections = [input_layer] for layer_id, num_hidden_units in enumerate(hidden_units): with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format( model_id, layer_id), values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=ACTIVATION_FN, kernel_initializer=tf.glorot_uniform_initializer( ), # also called Xavier uniform initializer. kernel_regularizer=REG, name=hidden_layer_scope) if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN: net = tf.layers.dropout( net, rate=DROPOUT, training=True ) # rate=0.1 would drop out 10% of input units. if BATCH_NORM: net = tf.layers.batch_normalization(net) net_collections.append(net) net = tf.concat(net_collections, axis=1) add_layer_summary(net, hidden_layer_scope.name) elif connected_mode == 'resnet': # connect layers in turn 0-1; 1-2; 2-3; net_collections = [input_layer] for layer_id, num_hidden_units in enumerate(hidden_units): with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format( model_id, layer_id), values=(net, )) as hidden_layer_scope: net = tf.layers.dense( net, units=num_hidden_units, activation=ACTIVATION_FN, kernel_initializer=tf.glorot_uniform_initializer( ), # also called Xavier uniform initializer. kernel_regularizer=REG, name=hidden_layer_scope) if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN: net = tf.layers.dropout(net, rate=DROPOUT, training=True) if BATCH_NORM: net = tf.layers.batch_normalization(net) net = tf.concat([net, net_collections[layer_id + 1 - 1]], axis=1) net_collections.append(net) add_layer_summary(net, hidden_layer_scope.name) else: # arbitrary connections, ['0-1','0-3','1-3'], small index layer first connected_mode = [map(int, s.split('-')) for s in connected_mode] # map each layer index to its early connected layer index: {1: [0], 2: [1], 3: [0]} connected_mapping = {} for i, j in connected_mode: if j not in connected_mapping: connected_mapping[j] = [i] else: connected_mapping[j] = connected_mapping[j].append(i) net_collections = [input_layer] for layer_id, num_hidden_units in enumerate(hidden_units): with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format( model_id, layer_id), values=(net, )) as hidden_layer_scope: net = tf.layers.dense( net, units=num_hidden_units, activation=ACTIVATION_FN, kernel_initializer=tf.glorot_uniform_initializer( ), # also called Xavier uniform initializer. kernel_regularizer=REG, name=hidden_layer_scope) if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN: net = tf.layers.dropout(net, rate=DROPOUT, training=True) if BATCH_NORM: net = tf.layers.batch_normalization(net) connect_net_collections = [ net for idx, net in enumerate(net_collections) if idx in connected_mapping[layer_id + 1] ] connect_net_collections.append(net) net = tf.concat(connect_net_collections, axis=1) net_collections.append(net) add_layer_summary(net, hidden_layer_scope.name) with tf.variable_scope('dnn_{}/logits'.format(model_id), values=(net, )) as logits_scope: logits = tf.layers.dense( net, units=units, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=REG, name=logits_scope) add_layer_summary(logits, logits_scope.name) return logits