def build_model(self, features, feature_columns, is_training):
   """See base class."""
   self._feature_columns = feature_columns
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=self._num_ps_replicas,
       min_slice_size=64 << 20)
   with variable_scope.variable_scope(
       self._scope,
       values=features.values(),
       partitioner=partitioner) as scope:
     if self._joint_weights:
       logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
     else:
       logits, _, _ = layers.weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
   return logits
 def build_model(self, features, feature_columns, is_training):
   """See base class."""
   self._feature_columns = feature_columns
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=self._num_ps_replicas,
       min_slice_size=64 << 20)
   with variable_scope.variable_scope(
       self._scope,
       values=features.values(),
       partitioner=partitioner) as scope:
     if self._joint_weights:
       logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
     else:
       logits, _, _ = layers.weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
   return logits
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_partitioner: Optional. Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer") or "Ftrl"
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = params.get("input_layer_partitioner") or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})
  fix_global_step_increment_bug = params.get(
      "fix_global_step_increment_bug", True)

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  linear_optimizer = _get_optimizer(linear_optimizer)
  _check_no_sync_replicas_optimizer(linear_optimizer)
  dnn_optimizer = _get_optimizer(dnn_optimizer)
  _check_no_sync_replicas_optimizer(dnn_optimizer)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if not dnn_hidden_units:
      raise ValueError(
          "dnn_hidden_units must be defined when dnn_feature_columns is "
          "specified.")
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):
      with variable_scope.variable_scope(
          "input_from_feature_columns",
          values=tuple(six.itervalues(features)),
          partitioner=input_layer_partitioner) as dnn_input_scope:
        if all(
            isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
            for fc in dnn_feature_columns
        ):
          net = layers.input_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope],
              scope=dnn_input_scope)
        else:
          net = fc_core.input_layer(
              features=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope])

      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            "hiddenlayer_%d" % layer_id,
            values=(net,)) as dnn_hidden_layer_scope:
          net = layers.fully_connected(
              net,
              num_hidden_units,
              activation_fn=dnn_activation_fn,
              variables_collections=[dnn_parent_scope],
              scope=dnn_hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = layers.dropout(
                net,
                keep_prob=(1.0 - dnn_dropout))
        # TODO(b/31209633): Consider adding summary before dropout.
        _add_layer_summary(net, dnn_hidden_layer_scope.name)

      with variable_scope.variable_scope(
          "logits",
          values=(net,)) as dnn_logits_scope:
        dnn_logits = layers.fully_connected(
            net,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[dnn_parent_scope],
            scope=dnn_logits_scope)
      _add_layer_summary(dnn_logits, dnn_logits_scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=linear_partitioner) as scope:
      if all(isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
             for fc in linear_feature_columns):
        if joint_linear_weights:
          linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
        else:
          linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
      else:
        linear_logits = fc_core.linear_model(
            features=features,
            feature_columns=linear_feature_columns,
            units=head.logits_dimension,
            weight_collections=[linear_parent_scope])

      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=dnn_optimizer,
              gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, dnn_parent_scope,
                  dnn_input_scope.name),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=linear_optimizer,
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))

    train_op = control_flow_ops.group(*train_ops)
    if fix_global_step_increment_bug:
      with ops.control_dependencies([train_op]):
        with ops.colocate_with(global_step):
          return state_ops.assign_add(global_step, 1).op
    return train_op

  return head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_make_training_op,
      logits=logits)
Example #4
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Estimator's linear model_fn."""
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  enable_centered_bias = params.get("enable_centered_bias", True)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  num_label_columns = 1 if n_classes == 2 else n_classes
  loss_fn = _softmax_cross_entropy_loss
  if n_classes == 2:
    loss_fn = _log_loss_with_two_classes

  feat_values = (features.values() if isinstance(features, dict)
                 else [features])
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)
  with variable_scope.variable_op_scope(
      feat_values, "linear", partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=["linear"],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=["linear"],
              scope=scope))

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = loss_fn(logits, targets)
    if weight_column_name:
      weight_tensor = array_ops.reshape(
          math_ops.to_float(features[weight_column_name]), shape=(-1,))
      loss = _weighted_loss(loss, weight_tensor)
    else:
      loss = math_ops.reduce_mean(loss, name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_ops = []
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()

    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    train_ops.append(optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))
    if enable_centered_bias:
      train_ops.append(
          _centered_bias_step(targets, loss_fn, num_label_columns))

  predictions = {}
  if n_classes == 2:
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, control_flow_ops.group(*train_ops)
Example #5
0
def _linear_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns)
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = config.num_ps_replicas if config else 0
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_scope(
      parent_scope, values=features.values(), partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

  def _train_op_fn(loss):
    global_step = contrib_variables.get_global_step()
    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    return (_get_optimizer(optimizer).apply_gradients(
        zip(grads, my_vars), global_step=global_step))

  return head.head_ops(features, labels, mode, _train_op_fn, logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
    """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
    head = params["head"]
    linear_feature_columns = params.get("linear_feature_columns")
    linear_optimizer = params.get("linear_optimizer") or "Ftrl"
    joint_linear_weights = params.get("joint_linear_weights")
    dnn_feature_columns = params.get("dnn_feature_columns")
    dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
    dnn_hidden_units = params.get("dnn_hidden_units")
    dnn_activation_fn = params.get("dnn_activation_fn")
    dnn_dropout = params.get("dnn_dropout")
    gradient_clip_norm = params.get("gradient_clip_norm")
    input_layer_min_slice_size = (params.get("input_layer_min_slice_size")
                                  or 64 << 20)
    num_ps_replicas = config.num_ps_replicas if config else 0
    embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

    if not linear_feature_columns and not dnn_feature_columns:
        raise ValueError(
            "Either linear_feature_columns or dnn_feature_columns must be defined."
        )

    features = _get_feature_dict(features)

    # Build DNN Logits.
    dnn_parent_scope = "dnn"

    if not dnn_feature_columns:
        dnn_logits = None
    else:
        input_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas,
                min_slice_size=input_layer_min_slice_size))
        input_layer_scope = dnn_parent_scope + "/input_from_feature_columns"
        with variable_scope.variable_scope(
                input_layer_scope,
                values=features.values(),
                partitioner=input_layer_partitioner) as scope:
            net = layers.input_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=dnn_feature_columns,
                weight_collections=[dnn_parent_scope],
                scope=scope)

        hidden_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas))
        for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
            with variable_scope.variable_scope(
                    dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
                    values=[net],
                    partitioner=hidden_layer_partitioner) as scope:
                net = layers.fully_connected(
                    net,
                    num_hidden_units,
                    activation_fn=dnn_activation_fn,
                    variables_collections=[dnn_parent_scope],
                    scope=scope)
                if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
            # TODO(b/31209633): Consider adding summary before dropout.
            _add_hidden_layer_summary(net, scope.name)

        with variable_scope.variable_scope(
                dnn_parent_scope + "/logits",
                values=[net],
                partitioner=hidden_layer_partitioner) as scope:
            dnn_logits = layers.fully_connected(
                net,
                head.logits_dimension,
                activation_fn=None,
                variables_collections=[dnn_parent_scope],
                scope=scope)
        _add_hidden_layer_summary(dnn_logits, scope.name)

    # Build Linear logits.
    linear_parent_scope = "linear"

    if not linear_feature_columns:
        linear_logits = None
    else:
        linear_partitioner = partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20)
        with variable_scope.variable_scope(
                linear_parent_scope,
                values=features.values(),
                partitioner=linear_partitioner) as scope:
            if joint_linear_weights:
                linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=linear_feature_columns,
                    num_outputs=head.logits_dimension,
                    weight_collections=[linear_parent_scope],
                    scope=scope)
            else:
                linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=linear_feature_columns,
                    num_outputs=head.logits_dimension,
                    weight_collections=[linear_parent_scope],
                    scope=scope)

    # Combine logits and build full model.
    if dnn_logits is not None and linear_logits is not None:
        logits = dnn_logits + linear_logits
    elif dnn_logits is not None:
        logits = dnn_logits
    else:
        logits = linear_logits

    def _make_training_op(training_loss):
        """Training op for the DNN linear combined model."""
        train_ops = []
        if dnn_logits is not None:
            train_ops.append(
                optimizers.optimize_loss(
                    loss=training_loss,
                    global_step=contrib_variables.get_global_step(),
                    learning_rate=_DNN_LEARNING_RATE,
                    optimizer=_get_optimizer(dnn_optimizer),
                    gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                        embedding_lr_multipliers, dnn_parent_scope,
                        input_layer_scope),
                    clip_gradients=gradient_clip_norm,
                    variables=ops.get_collection(dnn_parent_scope),
                    name=dnn_parent_scope,
                    # Empty summaries, because head already logs "loss" summary.
                    summaries=[]))
        if linear_logits is not None:
            train_ops.append(
                optimizers.optimize_loss(
                    loss=training_loss,
                    global_step=contrib_variables.get_global_step(),
                    learning_rate=_linear_learning_rate(
                        len(linear_feature_columns)),
                    optimizer=_get_optimizer(linear_optimizer),
                    clip_gradients=gradient_clip_norm,
                    variables=ops.get_collection(linear_parent_scope),
                    name=linear_parent_scope,
                    # Empty summaries, because head already logs "loss" summary.
                    summaries=[]))

        return control_flow_ops.group(*train_ops)

    return head.create_model_fn_ops(features,
                                    labels,
                                    mode,
                                    _make_training_op,
                                    logits=logits)
Example #7
0
def _linear_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns)
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = config.num_ps_replicas if config else 0
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

    def _train_op_fn(loss):
      global_step = contrib_variables.get_global_step()
      my_vars = ops.get_collection(parent_scope)
      grads = gradients.gradients(loss, my_vars)
      if gradient_clip_norm:
        grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
      return (_get_optimizer(optimizer).apply_gradients(
          zip(grads, my_vars), global_step=global_step))

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
Example #8
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
Example #9
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    head = params.get("head", None)
    if not head:
        # TODO(zakaria): Remove these params and make head mandatory
        head = head_lib._multi_class_head(  # pylint: disable=protected-access
            params.get("n_classes"),
            weight_column_name=params["weight_column_name"],
            enable_centered_bias=params.get("enable_centered_bias", False))

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)

    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=head.logits_dimension,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=head.logits_dimension,
                weight_collections=[parent_scope],
                scope=scope))

    def _train_op_fn(loss):
        global_step = contrib_variables.get_global_step()
        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        return (optimizer.apply_gradients(zip(grads, my_vars),
                                          global_step=global_step))

    return head.head_ops(features, targets, mode, _train_op_fn, logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    `estimator.ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer")
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer")
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn")
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = params["num_ps_replicas"]

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        dnn_parent_scope + "/input_from_feature_columns",
        values=features.values(),
        partitioner=input_layer_partitioner) as scope:
      net = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
          values=[net],
          partitioner=hidden_layer_partitioner) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=scope)
        if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - dnn_dropout))
      # TODO(b/31209633): Consider adding summary before dropout.
      _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
        dnn_parent_scope + "/logits",
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      dnn_logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=scope)
    _add_hidden_layer_summary(dnn_logits, scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=features.values(),
        partitioner=linear_partitioner) as scope:
      if joint_linear_weights:
        linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)
      else:
        linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=_get_optimizer(dnn_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=_get_optimizer(linear_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))

    return control_flow_ops.group(*train_ops)

  return head.head_ops(
      features, labels, mode, _make_training_op, logits=logits)
Example #11
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Estimator's linear model_fn."""
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    feat_values = (features.values()
                   if isinstance(features, dict) else [features])
    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(feat_values,
                                          "linear",
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=["linear"],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=["linear"],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
Example #12
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  enable_centered_bias = params.get("enable_centered_bias", True)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  num_label_columns = 1 if n_classes == 2 else n_classes
  loss_fn = _softmax_cross_entropy_loss
  if n_classes == 2:
    loss_fn = _log_loss_with_two_classes

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)
  with variable_scope.variable_op_scope(
      features.values(), parent_scope, partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=[parent_scope],
              scope=scope))

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = loss_fn(logits, targets)
    if weight_column_name:
      weight_tensor = array_ops.reshape(
          math_ops.to_float(features[weight_column_name]), shape=(-1,))
      loss = _weighted_loss(loss, weight_tensor)
    else:
      loss = math_ops.reduce_mean(loss, name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_ops = []
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()

    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    train_ops.append(optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))
    if enable_centered_bias:
      train_ops.append(
          _centered_bias_step(targets, loss_fn, num_label_columns))

  predictions = {}
  if n_classes == 2:
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, control_flow_ops.group(*train_ops)
Example #13
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  head = params.get("head", None)
  if not head:
    # TODO(zakaria): Remove these params and make head mandatory
    head = head_lib._multi_class_head(  # pylint: disable=protected-access
        params.get("n_classes"),
        weight_column_name=params["weight_column_name"],
        enable_centered_bias=params.get("enable_centered_bias", False))

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_op_scope(
      features.values(), parent_scope, partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

  def _train_op_fn(loss):
    global_step = contrib_variables.get_global_step()
    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    return (optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))

  return head.head_ops(features, targets, mode, _train_op_fn, logits)