Beispiel #1
0
 def call(self, features, mode, **kwargs):
     is_training = mode == ModeKeys.TRAIN
     # The Keras training.Model adds a name_scope with the name of the model
     # which modifies the constructed graph. Hence we add another name_scope
     # here which is the one before the training.Model one was applied.
     # TODO: Remove this in TF 2.0 (b/116728605)
     with ops.name_scope(name=_get_previous_name_scope()):
         # TODO: Remove dependence on variable scope for partitioning.
         with variable_scope.variable_scope(
                 'input_from_feature_columns',
                 partitioner=self._input_layer_partitioner,
                 reuse=tf.AUTO_REUSE):
             net = self._input_layer(features)
         net_collections = [self._input_layer(features)]
         for i in range(len(self._hidden_layers)):
             net = self._hidden_layers[i](net)
             if self._dropout is not None and is_training:
                 net = self._dropout_layers[i](net, training=True)
             if self._batch_norm and is_training:
                 net = self._batch_norm_layers[i](net, training=True)
             net_collections.append(net)
             if self._connected_mode == 'first_dense':
                 net = tf.concat([net, self._input_layer], axis=1)
             elif self._connected_mode == 'dense':
                 net = tf.concat(net_collections, axis=1)
             elif self._connected_mode == 'resnet':
                 net = tf.concat([net, net_collections[i + 1 - 1]], axis=1)
             add_layer_summary(net, self._hidden_layer_scope_names[i])
         if self._connected_mode == 'last_dense':
             net = tf.concat(net_collections, axis=1)
         logits = self._logits_layer(net)
         add_layer_summary(logits, self._logits_scope_name)
         return logits
Beispiel #2
0
def _wide_deep_combined_model_fn(
        features, labels, mode, head,
        model_type,
        with_cnn=False,
        cnn_optimizer='Adagrad',
        linear_feature_columns=None,
        linear_optimizer='Ftrl',
        dnn_feature_columns=None,
        dnn_optimizer='Adagrad',
        dnn_hidden_units=None,
        dnn_connected_mode=None,
        input_layer_partitioner=None,
        config=None):
    """Wide and Deep combined model_fn. (Dnn, Cnn, Linear)
    Args:
        features: dict of `Tensor`.
        labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
            `int32` or `int64` in the range `[0, n_classes)`.
      mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`.
      head: A `Head` instance.
      model_type: one of `wide_deep`, `deep`, `wide_deep`.
      with_cnn: Bool, set True to combine image input featrues using cnn.
      cnn_optimizer: String, `Optimizer` object, or callable that defines the
        optimizer to use for training the CNN model. Defaults to the Adagrad
        optimizer.
      linear_feature_columns: An iterable containing all the feature columns used
          by the Linear model.
      linear_optimizer: String, `Optimizer` object, or callable that defines the
          optimizer to use for training the Linear model. Defaults to the Ftrl
          optimizer.
      dnn_feature_columns: An iterable containing all the feature columns used by
        the DNN model.
      dnn_optimizer: String, `Optimizer` object, or callable that defines the
        optimizer to use for training the DNN model. Defaults to the Adagrad
        optimizer.
      dnn_hidden_units: List of hidden units per DNN layer.
      dnn_connected_mode: List of connected mode.
      dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
          will use `tf.nn.relu`.
      dnn_dropout: When not `None`, the probability we will drop out a given DNN
          coordinate.
      dnn_batch_norm: Bool, add BN layer after each DNN layer
      input_layer_partitioner: Partitioner for input layer.
          config: `RunConfig` object to configure the runtime settings.
    Returns:
        `ModelFnOps`
    Raises:
        ValueError: If both `linear_feature_columns` and `dnn_features_columns`
            are empty at the same time, or `input_layer_partitioner` is missing,
            or features has the wrong type.
    """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))
    if with_cnn:
        try:
            cnn_features = features.pop('image')  # separate image feature from input_fn
        except KeyError:
            raise ValueError('No input image features, must provide image features if use cnn.')
    num_ps_replicas = config.num_ps_replicas if config else 0
    input_layer_partitioner = input_layer_partitioner or (
        tf.min_max_variable_partitioner(max_partitions=num_ps_replicas,
                                        min_slice_size=64 << 20))
    # weight decay lr
    global_step = tf.Variable(0)
    _LINEAR_LEARNING_RATE = tf.train.exponential_decay(
        _linear_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_linear_decay_rate,
        staircase=False)
    _DNN_LEARNING_RATE = tf.train.exponential_decay(
        _dnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_dnn_decay_rate,
        staircase=False)
    _CNN_LEARNING_RATE = tf.train.exponential_decay(
        _cnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_cnn_decay_rate,
        staircase=False)

    # Build DNN Logits.
    dnn_parent_scope = 'dnn'
    if model_type == 'wide_deep' or not dnn_feature_columns:
        dnn_logits = None
    else:
        dnn_optimizer = get_optimizer_instance(
            dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
        if model_type == 'wide_deep':
            check_no_sync_replicas_optimizer(dnn_optimizer)
        dnn_partitioner = tf.min_max_variable_partitioner(max_partitions=num_ps_replicas)
        with tf.variable_scope(
                dnn_parent_scope,
                values=tuple(six.itervalues(features)),
                partitioner=dnn_partitioner):
            dnn_logit_fn = multidnn_logit_fn_builder(
                units=head.logits_dimension,
                hidden_units_list=dnn_hidden_units,
                connected_mode_list=dnn_connected_mode,
                feature_columns=dnn_feature_columns,
                input_layer_partitioner=input_layer_partitioner
            )
            dnn_logits = dnn_logit_fn(features=features, mode=mode)

    # Build Linear Logits.
    linear_parent_scope = 'linear'
    if model_type == 'deep' or not linear_feature_columns:
        linear_logits = None
    else:
        linear_optimizer = get_optimizer_instance(linear_optimizer,
            learning_rate=_LINEAR_LEARNING_RATE)
        check_no_sync_replicas_optimizer(linear_optimizer)
        with tf.variable_scope(
                linear_parent_scope,
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner) as scope:
            logit_fn = linear_logit_fn_builder(
                units=head.logits_dimension,
                feature_columns=linear_feature_columns)
            linear_logits = logit_fn(features=features)
            add_layer_summary(linear_logits, scope.name)

    # Build CNN Logits.
    cnn_parent_scope = 'cnn'
    if not with_cnn:
        cnn_logits = None
    else:
        cnn_optimizer = get_optimizer_instance(
            cnn_optimizer, learning_rate=_CNN_LEARNING_RATE)
        with tf.variable_scope(
                cnn_parent_scope,
                values=tuple([cnn_features]),
                partitioner=input_layer_partitioner) as scope:
            img_vec = Vgg16().build(cnn_features)
            cnn_logits = tf.layers.dense(
                img_vec,
                units=head.logits_dimension,
                kernel_initializer=tf.glorot_uniform_initializer(),
                name=scope)
            add_layer_summary(cnn_logits, scope.name)

    # Combine logits and build full model.
    logits_combine = []
    # _BinaryLogisticHeadWithSigmoidCrossEntropyLoss, logits_dimension=1
    for logits in [dnn_logits, linear_logits, cnn_logits]:  # shape: [batch_size, 1]
        if logits is not None:
            logits_combine.append(logits)
    logits = tf.add_n(logits_combine)

    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        train_ops = []
        global_step = tf.train.get_global_step()
        # BN, when training, the moving_mean and moving_variance need to be updated. By default the
        # update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            if dnn_logits is not None:
                train_ops.append(
                    dnn_optimizer.minimize(
                        loss,
                        global_step=global_step,
                        var_list=tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope=dnn_parent_scope)))
            if linear_logits is not None:
                train_ops.append(
                    linear_optimizer.minimize(
                        loss,
                        global_step=global_step,
                        var_list=tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope=linear_parent_scope)))
            if cnn_logits is not None:
                train_ops.append(
                    cnn_optimizer.minimize(
                        loss,
                        global_step=global_step,
                        var_list=tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope=cnn_parent_scope)))
            # Create an op that groups multiple ops. When this op finishes,
            # all ops in inputs have finished. This op has no output.
            train_op = tf.group(*train_ops)
        with tf.control_dependencies([train_op]):
            # Returns a context manager that specifies an op to colocate with.
            with tf.colocate_with(global_step):
                return tf.assign_add(global_step, 1)

    return head.create_estimator_spec(
                          features=features,
                          mode=mode,
                          labels=labels,
                          train_op_fn=_train_op_fn,
                          logits=logits)
Beispiel #3
0
def _dnn_logit_fn(features, mode, model_id, units, hidden_units,
                  connected_mode, feature_columns, input_layer_partitioner):
    """Deep Neural Network logit_fn.

    Args:
        features: This is the first item returned from the `input_fn`
            passed to `train`, `evaluate`, and `predict`. This should be a
            single `Tensor` or `dict` of same.
        mode: Optional. Specifies if this training, evaluation or prediction. See
            `ModeKeys`.
        model_id: An int indicating the model index of multi dnn.
        units: An int indicating the dimension of the logit layer.  In the
            MultiHead case, this should be the sum of all component Heads' logit
            dimensions.
        hidden_units: Iterable of integer number of hidden units per layer.
        connected_mode: one of {`simple`, `first_dense`, `last_dense`, `dense`, `resnet`}
            or arbitrary connections index tuples.
            1. `simple`: normal dnn architecture.
            2. `first_dense`: add addition connections from first input layer to all hidden layers.
            3. `last_dense`: add addition connections from all previous layers to last layer.
            4. `dense`: add addition connections between all layers, similar to DenseNet.
            5. `resnet`: add addition connections between adjacent layers, similar to ResNet.
            6. arbitrary connections list: add addition connections from layer_0 to layer_1 like 0-1.
                eg: [0-1,0-3,1-2]  index start from zero (input_layer), max index is len(hidden_units), smaller index first.
        feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
        activation_fn: Activation function applied to each layer.
        dropout: When not `None`, the probability we will drop out a given coordinate.
        batch_norm: Bool, Whether to use BN in dnn.
        input_layer_partitioner: Partitioner for input layer.

    Returns:
        A `Tensor` representing the logits, or a list of `Tensor`'s representing
        multiple logits in the MultiHead case.

    Raises:
        AssertError: If connected_mode is string, but not one of `simple`, `first_dense`, `last_dense`, 
            `dense` or `resnet`
    """

    if isinstance(connected_mode, str):
        assert connected_mode in {
            'simple', 'first_dense', 'lase_dense', 'dense', 'resnet'
        }, ('Invalid connected_mode: {}'.format(connected_mode))
    with tf.variable_scope('input_from_feature_columns',
                           values=tuple(iter(features.values())),
                           partitioner=input_layer_partitioner,
                           reuse=tf.AUTO_REUSE):
        net = tf.feature_column.input_layer(features=features,
                                            feature_columns=feature_columns)
    input_layer = net
    if connected_mode == 'simple':
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format(
                    model_id, layer_id),
                                   values=(net, )) as hidden_layer_scope:
                net = core_layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=ACTIVATION_FN,
                    use_bias=True,
                    kernel_initializer=tf.glorot_uniform_initializer(
                    ),  # also called Xavier uniform initializer.
                    bias_initializer=tf.zeros_initializer(),
                    kernel_regularizer=REG,
                    bias_regularizer=None,
                    activity_regularizer=None,
                    kernel_constraint=None,
                    bias_constraint=None,
                    trainable=True,
                    reuse=None,
                    name=hidden_layer_scope)
                if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN:
                    net = tf.layers.dropout(
                        net, rate=DROPOUT, training=True
                    )  # rate=0.1 would drop out 10% of input units.
                if BATCH_NORM:
                    net = tf.layers.batch_normalization(net)
            add_layer_summary(net, hidden_layer_scope.name)

    elif connected_mode == 'first_dense':
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format(
                    model_id, layer_id),
                                   values=(net, )) as hidden_layer_scope:
                net = core_layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=ACTIVATION_FN,
                    kernel_initializer=tf.glorot_uniform_initializer(
                    ),  # also called Xavier uniform initializer.
                    kernel_regularizer=REG,
                    name=hidden_layer_scope)
                if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN:
                    net = tf.layers.dropout(net, rate=DROPOUT, training=True)
                if BATCH_NORM:
                    net = tf.layers.batch_normalization(net)
                net = tf.concat([net, input_layer], axis=1)
            add_layer_summary(net, hidden_layer_scope.name)

    elif connected_mode == 'last_dense':
        net_collections = [input_layer]
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format(
                    model_id, layer_id),
                                   values=(net, )) as hidden_layer_scope:
                net = core_layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=ACTIVATION_FN,
                    kernel_initializer=tf.glorot_uniform_initializer(
                    ),  # also called Xavier uniform initializer.
                    kernel_regularizer=REG,
                    name=hidden_layer_scope)
                if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN:
                    net = tf.layers.dropout(net, rate=DROPOUT, training=True)
                if BATCH_NORM:
                    net = tf.layers.batch_normalization(net)
                net_collections.append(net)
            add_layer_summary(net, hidden_layer_scope.name)
        net = tf.concat(
            net_collections, axis=1
        )  # Concatenates the list of tensors `values` along dimension `axis`

    elif connected_mode == 'dense':
        net_collections = [input_layer]
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format(
                    model_id, layer_id),
                                   values=(net, )) as hidden_layer_scope:
                net = core_layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=ACTIVATION_FN,
                    kernel_initializer=tf.glorot_uniform_initializer(
                    ),  # also called Xavier uniform initializer.
                    kernel_regularizer=REG,
                    name=hidden_layer_scope)
                if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN:
                    net = tf.layers.dropout(
                        net, rate=DROPOUT, training=True
                    )  # rate=0.1 would drop out 10% of input units.
                if BATCH_NORM:
                    net = tf.layers.batch_normalization(net)
                net_collections.append(net)
                net = tf.concat(net_collections, axis=1)
            add_layer_summary(net, hidden_layer_scope.name)

    elif connected_mode == 'resnet':  # connect layers in turn 0-1; 1-2; 2-3;
        net_collections = [input_layer]
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format(
                    model_id, layer_id),
                                   values=(net, )) as hidden_layer_scope:
                net = tf.layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=ACTIVATION_FN,
                    kernel_initializer=tf.glorot_uniform_initializer(
                    ),  # also called Xavier uniform initializer.
                    kernel_regularizer=REG,
                    name=hidden_layer_scope)
                if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN:
                    net = tf.layers.dropout(net, rate=DROPOUT, training=True)
                if BATCH_NORM:
                    net = tf.layers.batch_normalization(net)
                net = tf.concat([net, net_collections[layer_id + 1 - 1]],
                                axis=1)
                net_collections.append(net)
            add_layer_summary(net, hidden_layer_scope.name)

    else:  # arbitrary connections, ['0-1','0-3','1-3'], small index layer first
        connected_mode = [map(int, s.split('-')) for s in connected_mode]
        # map each layer index to its early connected layer index: {1: [0], 2: [1], 3: [0]}
        connected_mapping = {}
        for i, j in connected_mode:
            if j not in connected_mapping:
                connected_mapping[j] = [i]
            else:
                connected_mapping[j] = connected_mapping[j].append(i)

        net_collections = [input_layer]
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with tf.variable_scope('dnn_{}/hiddenlayer_{}'.format(
                    model_id, layer_id),
                                   values=(net, )) as hidden_layer_scope:
                net = tf.layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=ACTIVATION_FN,
                    kernel_initializer=tf.glorot_uniform_initializer(
                    ),  # also called Xavier uniform initializer.
                    kernel_regularizer=REG,
                    name=hidden_layer_scope)
                if DROPOUT is not None and mode == tf.estimator.ModeKeys.TRAIN:
                    net = tf.layers.dropout(net, rate=DROPOUT, training=True)
                if BATCH_NORM:
                    net = tf.layers.batch_normalization(net)
                connect_net_collections = [
                    net for idx, net in enumerate(net_collections)
                    if idx in connected_mapping[layer_id + 1]
                ]
                connect_net_collections.append(net)
                net = tf.concat(connect_net_collections, axis=1)
                net_collections.append(net)
            add_layer_summary(net, hidden_layer_scope.name)

    with tf.variable_scope('dnn_{}/logits'.format(model_id),
                           values=(net, )) as logits_scope:
        logits = tf.layers.dense(
            net,
            units=units,
            kernel_initializer=tf.glorot_uniform_initializer(),
            kernel_regularizer=REG,
            name=logits_scope)
    add_layer_summary(logits, logits_scope.name)
    return logits