def build_dnn_model(mode, inputs, columns, config): """Compute logits of the dnn part (output distribution) Args: inputs: (dict) contains the inputs of the graph (features, labels...) this can be `tf.placeholder` or outputs of `tf.data` columns: (list) contains dnn feature columns config: (configparser) contains hyperparameters for model building Returns: logits: (tf.Tensor) output of the model """ features = inputs['features'] num_ps_replicas = config['dnn_model'].get('num_ps_replicas',0) input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # parse configurations units = int(config['dnn_model'].get('units', 1)) dnn_hidden_units = [int(n) for n in config['dnn_model'].get('hiden_units', '512,128,64').split(',')] dnn_activation_fn = tf.nn.relu if config['dnn_model'].get('activation_fn', None) is not None: dnn_activation_fn = eval(config['dnn_model']['activation_fn']) dnn_dropout = None if config['dnn_model'].get('dropout', None) is not None: dnn_dropout = float(config['dnn_model']['dropout']) batch_norm = False if config['dnn_model'].get('batch_norm', '').lower() == 'true': batch_norm = True # build dnn part dnn_logit_fn = dnn._dnn_logit_fn_builder( units=units, hidden_units=dnn_hidden_units, feature_columns=columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner ) dnn_logits = dnn_logit_fn(features=features, mode=mode) return dnn_logits
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = dnn._dnn_logit_fn_builder( # pylint: disable=protected-access units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner) dnn_logits = dnn_logit_fn(features=features, mode=mode) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear._linear_logit_fn_builder( # pylint: disable=protected-access units=head.logits_dimension, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): return distribute_lib.increment_var(global_step) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = dnn._dnn_logit_fn_builder( # pylint: disable=protected-access units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner) dnn_logits = dnn_logit_fn(features=features, mode=mode) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear._linear_logit_fn_builder( # pylint: disable=protected-access units=head.logits_dimension, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_linear_combined_model_fn( features, labels, mode, head, num_workers, opt, linear_feature_columns=None, dnn_feature_columns=None, dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # combined_optimizer = CombinedOptimizer(linear_feature_columns) # sync_optimizer = tf.train.SyncReplicasOptimizer(combined_optimizer, replicas_to_aggregate=num_workers, total_num_replicas=num_workers) dnn_parent_scope = 'dnn' linear_parent_scope = 'linear' dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = dnn._dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner) dnn_logits = dnn_logit_fn(features=features, mode=mode) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear._linear_logit_fn_builder( units=head.logits_dimension, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) logits = dnn_logits + linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" global_step = training_util.get_global_step() pairs = opt.compute_gradients(loss) train_op = opt.apply_gradients(pairs, global_step) # train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _base_model(features=None, parent_scope_name=None, mode=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = dnn._dnn_logit_fn_builder( # pylint: disable=protected-access units=1, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, input_layer_partitioner=None) dnn_logits = dnn_logit_fn(features=features, mode=mode) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear._linear_logit_fn_builder( # pylint: disable=protected-access units=1, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) _add_layer_summary(linear_logits, scope.name) # Combine logits if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=parent_scope_name + '/' + dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=parent_scope_name + '/' + linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): return tf.no_op() return logits, _train_op_fn
def _deepfm_model_fn(features, labels, mode, head, fm_first_feature_columns=None, fm_second_feature_columns=None, embedding_size=None, field_size=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """DNN and FM combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. fm_first_feature_columns: An iterable containing order-1 feature columns used by the fm model. fm_second_feature_columns: An iterable containing order-2 feature columns used by the fm model. embedding_size: input field vectors can be of different sizes, their embeddings are of the same size. field_size: The number of order-2 feature columns. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the FM model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If both `fm_first_feature_columns` and `fm_second_feature_columns` and `dnn_features_columns` are em pty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not fm_first_feature_columns and not dnn_feature_columns and not fm_second_feature_columns: raise ValueError( 'Either fm_first_feature_columns or dnn_feature_columns or fm_second_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = dnn._dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner) dnn_logits = dnn_logit_fn(features=features, mode=mode) # Build FM Logits. fm_parent_scope = 'fm' def cal_fm_first_logits(): logit_fn = linear._linear_logit_fn_builder( units=head.logits_dimension, feature_columns=fm_first_feature_columns) fm_first_logits = logit_fn(features=features) _add_layer_summary(fm_first_logits, scope.name) return fm_first_logits def cal_fm_second_logits(): embeddings = tf.feature_column.input_layer( features=features, feature_columns=fm_second_feature_columns) embeddings = tf.reshape(embeddings, shape=[-1, field_size, embedding_size]) sum_square = tf.square(tf.reduce_sum(embeddings, 1)) square_sum = tf.reduce_sum(tf.square(embeddings), 1) fm_second_logits = 0.5 * tf.reduce_sum( tf.subtract(sum_square, square_sum), 1, keep_dims=True) _add_layer_summary(fm_second_logits, scope.name) return fm_second_logits if not fm_first_feature_columns and not fm_second_feature_columns: fm_first_logits = None fm_second_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_fm_learning_rate( len(fm_first_feature_columns) + len(fm_second_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( fm_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: if not fm_first_feature_columns: fm_first_logits = None fm_second_logits = cal_fm_second_logits() elif not fm_second_feature_columns: fm_second_logits = None fm_first_logits = cal_fm_first_logits() else: fm_first_logits = cal_fm_first_logits() fm_second_logits = cal_fm_second_logits() def add_logits(logits, to_add_logits): if logits is None: return to_add_logits else: return logits + to_add_logits if to_add_logits is not None else logits # Combine logits and build full model. logits = None logits = add_logits(logits, dnn_logits) logits = add_logits(logits, fm_second_logits) logits = add_logits(logits, fm_first_logits) def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if fm_first_logits is not None or fm_second_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=fm_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): return distribute_lib.increment_var(global_step) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)