def _dnn_model_fn(features, labels, mode, head, optimizer='Adagrad', input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If features has the wrong type. """ if not isinstance(features, dict): raise ValueError( 'features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = get_optimizer_instance(optimizer, learning_rate=0.05) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas) with tf.compat.v1.variable_scope('dnn', values=tuple( six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # unit is num_classes, shape(batch_size, num_classes) logits = [] for idx, m in enumerate(model_collections): logits.append( _dnn_logit_fn(features, mode, idx + 1, head.logits_dimension, m.hidden_units, m.connected_layers, feature_columns, input_layer_partitioner)) logits = tf.add_n( logits ) # add logit layer is same with concactenate the layer before logit layer def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=tf.compat.v1.train.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _wide_deep_combined_model_fn( features, labels, mode, head, model_type, with_cnn=False, cnn_optimizer='Adagrad', linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_connected_mode=None, input_layer_partitioner=None, config=None): """Wide and Deep combined model_fn. (Dnn, Cnn, Linear) Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. model_type: one of `wide_deep`, `deep`, `wide_deep`. with_cnn: Bool, set True to combine image input featrues using cnn. cnn_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the CNN model. Defaults to the Adagrad optimizer. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_connected_mode: List of connected mode. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. dnn_batch_norm: Bool, add BN layer after each DNN layer input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if with_cnn: try: cnn_features = features.pop('image') # separate image feature from input_fn except KeyError: raise ValueError('No input image features, must provide image features if use cnn.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( tf.min_max_variable_partitioner(max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # weight decay lr global_step = tf.Variable(0) _LINEAR_LEARNING_RATE = tf.train.exponential_decay( _linear_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_linear_decay_rate, staircase=False) _DNN_LEARNING_RATE = tf.train.exponential_decay( _dnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_dnn_decay_rate, staircase=False) _CNN_LEARNING_RATE = tf.train.exponential_decay( _cnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_cnn_decay_rate, staircase=False) # Build DNN Logits. dnn_parent_scope = 'dnn' if model_type == 'wide_deep' or not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) if model_type == 'wide_deep': check_no_sync_replicas_optimizer(dnn_optimizer) dnn_partitioner = tf.min_max_variable_partitioner(max_partitions=num_ps_replicas) with tf.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = multidnn_logit_fn_builder( units=head.logits_dimension, hidden_units_list=dnn_hidden_units, connected_mode_list=dnn_connected_mode, feature_columns=dnn_feature_columns, input_layer_partitioner=input_layer_partitioner ) dnn_logits = dnn_logit_fn(features=features, mode=mode) # Build Linear Logits. linear_parent_scope = 'linear' if model_type == 'deep' or not linear_feature_columns: linear_logits = None else: linear_optimizer = get_optimizer_instance(linear_optimizer, learning_rate=_LINEAR_LEARNING_RATE) check_no_sync_replicas_optimizer(linear_optimizer) with tf.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear_logit_fn_builder( units=head.logits_dimension, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) add_layer_summary(linear_logits, scope.name) # Build CNN Logits. cnn_parent_scope = 'cnn' if not with_cnn: cnn_logits = None else: cnn_optimizer = get_optimizer_instance( cnn_optimizer, learning_rate=_CNN_LEARNING_RATE) with tf.variable_scope( cnn_parent_scope, values=tuple([cnn_features]), partitioner=input_layer_partitioner) as scope: img_vec = Vgg16().build(cnn_features) cnn_logits = tf.layers.dense( img_vec, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer(), name=scope) add_layer_summary(cnn_logits, scope.name) # Combine logits and build full model. logits_combine = [] # _BinaryLogisticHeadWithSigmoidCrossEntropyLoss, logits_dimension=1 for logits in [dnn_logits, linear_logits, cnn_logits]: # shape: [batch_size, 1] if logits is not None: logits_combine.append(logits) logits = tf.add_n(logits_combine) def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = tf.train.get_global_step() # BN, when training, the moving_mean and moving_variance need to be updated. By default the # update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) if cnn_logits is not None: train_ops.append( cnn_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=cnn_parent_scope))) # Create an op that groups multiple ops. When this op finishes, # all ops in inputs have finished. This op has no output. train_op = tf.group(*train_ops) with tf.control_dependencies([train_op]): # Returns a context manager that specifies an op to colocate with. with tf.colocate_with(global_step): return tf.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def combined_model_fn( features, labels, mode, head, dnn_connected_mode= None, features_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, input_layer_partitioner=None, config=None): """Wide and Deep combined model_fn. (Dnn, Cnn, Linear) Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. model_type: one of `wide`, `deep`, `wide_deep`. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_connected_mode: List of connected mode. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. dnn_batch_norm: Bool, add BN layer after each DNN layer input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or (tf.min_max_variable_partitioner(max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # weight decay lr global_step = tf.Variable(0) _DNN_LEARNING_RATE = tf.train.exponential_decay( _dnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_dnn_decay_rate, staircase=False) dnn_parent_scope = 'dnn' dnn_optimizer = get_optimizer_instance(dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) dnn_partitioner = tf.min_max_variable_partitioner(max_partitions=num_ps_replicas) with tf.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)),partitioner=dnn_partitioner): dnn_logit_fn = multidnn_logit_fn_builder( units=head.logits_dimension, hidden_units_list=dnn_hidden_units, feature_columns=features_columns, connected_mode_list=dnn_connected_mode, input_layer_partitioner=input_layer_partitioner ) logits = dnn_logit_fn(features=features, mode=mode) def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = tf.train.get_global_step() # BN, when training, the moving_mean and moving_variance need to be updated. By default the # update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_ops.append(dnn_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) # Create an op that groups multiple ops. When this op finishes, # all ops in inputs have finished. This op has no output. train_op = tf.group(*train_ops) with tf.control_dependencies([train_op]): # Returns a context manager that specifies an op to colocate with. with tf.colocate_with(global_step): return tf.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)