def _dnn_model_fn(features,
                          labels,
                          mode,
                          head,
                          optimizer='Adagrad',
                          input_layer_partitioner=None,
                          config=None):
            """Deep Neural Net model_fn.
            Args:
              features: dict of `Tensor`.
              labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
                dtype `int32` or `int64` in the range `[0, n_classes)`.
              mode: Defines whether this is training, evaluation or prediction.
                See `ModeKeys`.
              head: A `head_lib._Head` instance.
              hidden_units: Iterable of integer number of hidden units per layer.
              feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
              optimizer: String, `tf.Optimizer` object, or callable that creates the
                optimizer to use for training. If not specified, will use the Adagrad
                optimizer with a default learning rate of 0.05.
              activation_fn: Activation function applied to each layer.
              dropout: When not `None`, the probability we will drop out a given
                coordinate.
              input_layer_partitioner: Partitioner for input layer. Defaults
                to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
              config: `RunConfig` object to configure the runtime settings.
            Returns:
              predictions: A dict of `Tensor` objects.
              loss: A scalar containing the loss of the step.
              train_op: The op for training.
            Raises:
              ValueError: If features has the wrong type.
            """
            if not isinstance(features, dict):
                raise ValueError(
                    'features should be a dictionary of `Tensor`s. '
                    'Given type: {}'.format(type(features)))
            optimizer = get_optimizer_instance(optimizer, learning_rate=0.05)
            num_ps_replicas = config.num_ps_replicas if config else 0

            partitioner = tf.compat.v1.min_max_variable_partitioner(
                max_partitions=num_ps_replicas)
            with tf.compat.v1.variable_scope('dnn',
                                             values=tuple(
                                                 six.itervalues(features)),
                                             partitioner=partitioner):
                input_layer_partitioner = input_layer_partitioner or (
                    tf.compat.v1.min_max_variable_partitioner(
                        max_partitions=num_ps_replicas,
                        min_slice_size=64 << 20))
                # unit is num_classes, shape(batch_size, num_classes)
                logits = []
                for idx, m in enumerate(model_collections):
                    logits.append(
                        _dnn_logit_fn(features, mode, idx + 1,
                                      head.logits_dimension, m.hidden_units,
                                      m.connected_layers, feature_columns,
                                      input_layer_partitioner))
                logits = tf.add_n(
                    logits
                )  # add logit layer is same with concactenate the layer before logit layer

                def _train_op_fn(loss):
                    """Returns the op to optimize the loss."""
                    return optimizer.minimize(
                        loss, global_step=tf.compat.v1.train.get_global_step())

            return head.create_estimator_spec(features=features,
                                              mode=mode,
                                              labels=labels,
                                              train_op_fn=_train_op_fn,
                                              logits=logits)
Example #2
0
def _wide_deep_combined_model_fn(
        features, labels, mode, head,
        model_type,
        with_cnn=False,
        cnn_optimizer='Adagrad',
        linear_feature_columns=None,
        linear_optimizer='Ftrl',
        dnn_feature_columns=None,
        dnn_optimizer='Adagrad',
        dnn_hidden_units=None,
        dnn_connected_mode=None,
        input_layer_partitioner=None,
        config=None):
    """Wide and Deep combined model_fn. (Dnn, Cnn, Linear)
    Args:
        features: dict of `Tensor`.
        labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
            `int32` or `int64` in the range `[0, n_classes)`.
      mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`.
      head: A `Head` instance.
      model_type: one of `wide_deep`, `deep`, `wide_deep`.
      with_cnn: Bool, set True to combine image input featrues using cnn.
      cnn_optimizer: String, `Optimizer` object, or callable that defines the
        optimizer to use for training the CNN model. Defaults to the Adagrad
        optimizer.
      linear_feature_columns: An iterable containing all the feature columns used
          by the Linear model.
      linear_optimizer: String, `Optimizer` object, or callable that defines the
          optimizer to use for training the Linear model. Defaults to the Ftrl
          optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used by
        the DNN model.
      dnn_optimizer: String, `Optimizer` object, or callable that defines the
        optimizer to use for training the DNN model. Defaults to the Adagrad
        optimizer.
      dnn_hidden_units: List of hidden units per DNN layer.
      dnn_connected_mode: List of connected mode.
      dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
          will use `tf.nn.relu`.
      dnn_dropout: When not `None`, the probability we will drop out a given DNN
          coordinate.
      dnn_batch_norm: Bool, add BN layer after each DNN layer
      input_layer_partitioner: Partitioner for input layer.
          config: `RunConfig` object to configure the runtime settings.
    Returns:
        `ModelFnOps`
    Raises:
        ValueError: If both `linear_feature_columns` and `dnn_features_columns`
            are empty at the same time, or `input_layer_partitioner` is missing,
            or features has the wrong type.
    """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))
    if with_cnn:
        try:
            cnn_features = features.pop('image')  # separate image feature from input_fn
        except KeyError:
            raise ValueError('No input image features, must provide image features if use cnn.')
    num_ps_replicas = config.num_ps_replicas if config else 0
    input_layer_partitioner = input_layer_partitioner or (
        tf.min_max_variable_partitioner(max_partitions=num_ps_replicas,
                                        min_slice_size=64 << 20))
    # weight decay lr
    global_step = tf.Variable(0)
    _LINEAR_LEARNING_RATE = tf.train.exponential_decay(
        _linear_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_linear_decay_rate,
        staircase=False)
    _DNN_LEARNING_RATE = tf.train.exponential_decay(
        _dnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_dnn_decay_rate,
        staircase=False)
    _CNN_LEARNING_RATE = tf.train.exponential_decay(
        _cnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_cnn_decay_rate,
        staircase=False)

    # Build DNN Logits.
    dnn_parent_scope = 'dnn'
    if model_type == 'wide_deep' or not dnn_feature_columns:
        dnn_logits = None
    else:
        dnn_optimizer = get_optimizer_instance(
            dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
        if model_type == 'wide_deep':
            check_no_sync_replicas_optimizer(dnn_optimizer)
        dnn_partitioner = tf.min_max_variable_partitioner(max_partitions=num_ps_replicas)
        with tf.variable_scope(
                dnn_parent_scope,
                values=tuple(six.itervalues(features)),
                partitioner=dnn_partitioner):
            dnn_logit_fn = multidnn_logit_fn_builder(
                units=head.logits_dimension,
                hidden_units_list=dnn_hidden_units,
                connected_mode_list=dnn_connected_mode,
                feature_columns=dnn_feature_columns,
                input_layer_partitioner=input_layer_partitioner
            )
            dnn_logits = dnn_logit_fn(features=features, mode=mode)

    # Build Linear Logits.
    linear_parent_scope = 'linear'
    if model_type == 'deep' or not linear_feature_columns:
        linear_logits = None
    else:
        linear_optimizer = get_optimizer_instance(linear_optimizer,
            learning_rate=_LINEAR_LEARNING_RATE)
        check_no_sync_replicas_optimizer(linear_optimizer)
        with tf.variable_scope(
                linear_parent_scope,
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner) as scope:
            logit_fn = linear_logit_fn_builder(
                units=head.logits_dimension,
                feature_columns=linear_feature_columns)
            linear_logits = logit_fn(features=features)
            add_layer_summary(linear_logits, scope.name)

    # Build CNN Logits.
    cnn_parent_scope = 'cnn'
    if not with_cnn:
        cnn_logits = None
    else:
        cnn_optimizer = get_optimizer_instance(
            cnn_optimizer, learning_rate=_CNN_LEARNING_RATE)
        with tf.variable_scope(
                cnn_parent_scope,
                values=tuple([cnn_features]),
                partitioner=input_layer_partitioner) as scope:
            img_vec = Vgg16().build(cnn_features)
            cnn_logits = tf.layers.dense(
                img_vec,
                units=head.logits_dimension,
                kernel_initializer=tf.glorot_uniform_initializer(),
                name=scope)
            add_layer_summary(cnn_logits, scope.name)

    # Combine logits and build full model.
    logits_combine = []
    # _BinaryLogisticHeadWithSigmoidCrossEntropyLoss, logits_dimension=1
    for logits in [dnn_logits, linear_logits, cnn_logits]:  # shape: [batch_size, 1]
        if logits is not None:
            logits_combine.append(logits)
    logits = tf.add_n(logits_combine)

    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        train_ops = []
        global_step = tf.train.get_global_step()
        # BN, when training, the moving_mean and moving_variance need to be updated. By default the
        # update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            if dnn_logits is not None:
                train_ops.append(
                    dnn_optimizer.minimize(
                        loss,
                        global_step=global_step,
                        var_list=tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope=dnn_parent_scope)))
            if linear_logits is not None:
                train_ops.append(
                    linear_optimizer.minimize(
                        loss,
                        global_step=global_step,
                        var_list=tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope=linear_parent_scope)))
            if cnn_logits is not None:
                train_ops.append(
                    cnn_optimizer.minimize(
                        loss,
                        global_step=global_step,
                        var_list=tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope=cnn_parent_scope)))
            # Create an op that groups multiple ops. When this op finishes,
            # all ops in inputs have finished. This op has no output.
            train_op = tf.group(*train_ops)
        with tf.control_dependencies([train_op]):
            # Returns a context manager that specifies an op to colocate with.
            with tf.colocate_with(global_step):
                return tf.assign_add(global_step, 1)

    return head.create_estimator_spec(
                          features=features,
                          mode=mode,
                          labels=labels,
                          train_op_fn=_train_op_fn,
                          logits=logits)
Example #3
0
def combined_model_fn(
        features, labels, mode, head,
        dnn_connected_mode= None,
        features_columns=None,
        dnn_optimizer='Adagrad',
        dnn_hidden_units=None,
        input_layer_partitioner=None,
        config=None):
    """Wide and Deep combined model_fn. (Dnn, Cnn, Linear)
    Args:
        features: dict of `Tensor`.
        labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
            `int32` or `int64` in the range `[0, n_classes)`.
      mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`.
      head: A `Head` instance.
      model_type: one of `wide`, `deep`, `wide_deep`.
      linear_feature_columns: An iterable containing all the feature columns used
          by the Linear model.
      linear_optimizer: String, `Optimizer` object, or callable that defines the
          optimizer to use for training the Linear model. Defaults to the Ftrl
          optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used by
        the DNN model.
      dnn_optimizer: String, `Optimizer` object, or callable that defines the
        optimizer to use for training the DNN model. Defaults to the Adagrad
        optimizer.
      dnn_hidden_units: List of hidden units per DNN layer.
      dnn_connected_mode: List of connected mode.
      dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
          will use `tf.nn.relu`.
      dnn_dropout: When not `None`, the probability we will drop out a given DNN
          coordinate.
      dnn_batch_norm: Bool, add BN layer after each DNN layer
      input_layer_partitioner: Partitioner for input layer.
          config: `RunConfig` object to configure the runtime settings.
    Returns:
        `ModelFnOps`
    Raises:
        ValueError: If both `linear_feature_columns` and `dnn_features_columns`
            are empty at the same time, or `input_layer_partitioner` is missing,
            or features has the wrong type.
    """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))

    num_ps_replicas = config.num_ps_replicas if config else 0
    input_layer_partitioner = input_layer_partitioner or (tf.min_max_variable_partitioner(max_partitions=num_ps_replicas,
                                        min_slice_size=64 << 20))
    # weight decay lr
    global_step = tf.Variable(0)

    _DNN_LEARNING_RATE = tf.train.exponential_decay(
        _dnn_init_learning_rate,
        global_step=global_step,
        decay_steps=decay_steps,
        decay_rate=_dnn_decay_rate,
        staircase=False)

    dnn_parent_scope = 'dnn'
    dnn_optimizer = get_optimizer_instance(dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
    dnn_partitioner = tf.min_max_variable_partitioner(max_partitions=num_ps_replicas)

    with tf.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)),partitioner=dnn_partitioner):
        dnn_logit_fn = multidnn_logit_fn_builder(
            units=head.logits_dimension,
            hidden_units_list=dnn_hidden_units,
            feature_columns=features_columns,
            connected_mode_list=dnn_connected_mode,
            input_layer_partitioner=input_layer_partitioner
        )
        logits = dnn_logit_fn(features=features, mode=mode)

    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        train_ops = []
        global_step = tf.train.get_global_step()
        # BN, when training, the moving_mean and moving_variance need to be updated. By default the
        # update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_ops.append(dnn_optimizer.minimize(
                    loss,
                    global_step=global_step,
                    var_list=tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES,
                        scope=dnn_parent_scope)))

            # Create an op that groups multiple ops. When this op finishes,
            # all ops in inputs have finished. This op has no output.
            train_op = tf.group(*train_ops)
        with tf.control_dependencies([train_op]):
            # Returns a context manager that specifies an op to colocate with.
            with tf.colocate_with(global_step):
                return tf.assign_add(global_step, 1)

    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)