Ejemplo n.º 1
0
 def build_model(self, features, feature_columns, is_training):
   """See base class."""
   self._feature_columns = feature_columns
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=self._num_ps_replicas,
       min_slice_size=64 << 20)
   with variable_scope.variable_scope(
       self._scope,
       values=features.values(),
       partitioner=partitioner) as scope:
     if self._joint_weights:
       logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
     else:
       logits, _, _ = layers.weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
   return logits
Ejemplo n.º 2
0
 def build_model(self, features, feature_columns, is_training):
   """See base class."""
   self._feature_columns = feature_columns
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=self._num_ps_replicas,
       min_slice_size=64 << 20)
   with variable_scope.variable_scope(
       self._scope,
       values=features.values(),
       partitioner=partitioner) as scope:
     if self._joint_weights:
       logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
     else:
       logits, _, _ = layers.weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
   return logits
Ejemplo n.º 3
0
    def _get_train_ops(self, features, targets):
        """See base class."""
        self._validate_linear_feature_columns(features)
        if not isinstance(self._linear_optimizer,
                          sdca_optimizer.SDCAOptimizer):
            return super(LinearClassifier,
                         self)._get_train_ops(features, targets)

        # SDCA currently supports binary classification only.
        if self._target_column.num_label_columns > 2:
            raise ValueError(
                "SDCA does not currently support multi-class classification.")
        global_step = contrib_variables.get_global_step()
        assert global_step

        logits, columns_to_variables, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=self._linear_feature_columns,
            num_outputs=self._target_column.num_label_columns,
            weight_collections=[self._linear_weight_collection],
            name="linear")
        with ops.control_dependencies([self._centered_bias()]):
            loss = self._loss(logits, targets, features)
        logging_ops.scalar_summary("loss", loss)

        train_ops = self._linear_optimizer.get_train_step(
            self._linear_feature_columns,
            self._target_column.weight_column_name, "logistic_loss", features,
            targets, columns_to_variables, global_step)

        return train_ops, loss
Ejemplo n.º 4
0
    def _get_train_ops(self, features, targets):
        """See base class."""
        if not isinstance(self._linear_optimizer,
                          sdca_optimizer.SDCAOptimizer):
            return super(LinearRegressor,
                         self)._get_train_ops(features, targets)
        global_step = contrib_variables.get_or_create_global_step()

        logits, columns_to_variables, bias = (
            layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=self._linear_feature_columns,
                num_outputs=self._target_column.num_label_columns,
                weight_collections=[self._linear_model.get_scope_name()],
                scope=self._linear_model.get_scope_name()))
        with ops.control_dependencies([self._centered_bias()]):
            loss = self._target_column.loss(logits, targets, features)
        logging_ops.scalar_summary("loss", loss)

        train_feature_columns = _maybe_add_bias_column(
            self._linear_feature_columns, features, bias, targets,
            self._enable_centered_bias, columns_to_variables)

        train_op = self._linear_optimizer.get_train_step(
            train_feature_columns, self._target_column.weight_column_name,
            self._loss_type(), features, targets, columns_to_variables,
            global_step)
        return train_op, loss
Ejemplo n.º 5
0
  def _get_train_ops(self, features, targets):
    """See base class."""
    if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer):
      return super(LinearRegressor, self)._get_train_ops(features, targets)
    assert not self._joint_weights, ("_joint_weights is incompatible with"
                                     " SDCAOptimizer.")
    global_step = contrib_variables.get_or_create_global_step()

    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=self._linear_feature_columns,
            num_outputs=self._head.logits_dimension,
            weight_collections=[self._linear_model.get_scope_name()],
            scope=self._linear_model.get_scope_name()))
    _add_bias_column(self._linear_feature_columns, features, bias, targets,
                     columns_to_variables)

    def _train_op_fn(unused_loss):
      sdca_model, train_op = self._linear_optimizer.get_train_step(
          columns_to_variables, self._weight_column_name,
          self._loss_type(), features, targets, global_step)
      return sdca_model.update_weights(train_op)

    model_fn_ops = self._head.head_ops(features, targets,
                                       estimator.ModeKeys.TRAIN, _train_op_fn,
                                       logits=logits)
    return model_fn_ops.training_op, model_fn_ops.loss
Ejemplo n.º 6
0
    def _get_train_ops(self, features, targets):
        """See base class."""
        if not isinstance(self._linear_optimizer,
                          sdca_optimizer.SDCAOptimizer):
            return super(LinearRegressor,
                         self)._get_train_ops(features, targets)
        assert not self._joint_weights, ("_joint_weights is incompatible with"
                                         " SDCAOptimizer.")
        global_step = contrib_variables.get_or_create_global_step()

        logits, columns_to_variables, bias = (
            layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=self._linear_feature_columns,
                num_outputs=self._head.logits_dimension,
                weight_collections=[self._linear_model.get_scope_name()],
                scope=self._linear_model.get_scope_name()))
        _add_bias_column(self._linear_feature_columns, features, bias, targets,
                         columns_to_variables)

        def _train_op_fn(unused_loss):
            sdca_model, train_op = self._linear_optimizer.get_train_step(
                columns_to_variables, self._weight_column_name,
                self._loss_type(), features, targets, global_step)
            return sdca_model.update_weights(train_op)

        model_fn_ops = self._head.head_ops(features,
                                           targets,
                                           estimator.ModeKeys.TRAIN,
                                           _train_op_fn,
                                           logits=logits)
        return model_fn_ops.training_op, model_fn_ops.loss
Ejemplo n.º 7
0
  def _get_train_ops(self, features, targets):
    """See base class."""
    if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer):
      return super(LinearRegressor, self)._get_train_ops(features, targets)
    assert not self._joint_weights, ("_joint_weights is incompatible with"
                                     " SDCAOptimizer.")
    global_step = contrib_variables.get_or_create_global_step()

    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=self._linear_feature_columns,
            num_outputs=self._target_column.num_label_columns,
            weight_collections=[self._linear_model.get_scope_name()],
            scope=self._linear_model.get_scope_name()))
    with ops.control_dependencies([self._centered_bias()]):
      loss = self._target_column.loss(logits, targets, features)
      logging_ops.scalar_summary("loss", loss)

      _add_bias_column(self._linear_feature_columns, features, bias, targets,
                       columns_to_variables)

    train_op = self._linear_optimizer.get_train_step(
        columns_to_variables, self._target_column.weight_column_name,
        self._loss_type(), features, targets, global_step)
    return train_op, loss
Ejemplo n.º 8
0
  def _get_train_ops(self, features, targets):
    """See base class."""
    self._validate_linear_feature_columns(features)
    if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer):
      return super(LinearClassifier, self)._get_train_ops(features, targets)

    # SDCA currently supports binary classification only.
    if self._target_column.num_label_columns > 2:
      raise ValueError(
          "SDCA does not currently support multi-class classification.")
    global_step = contrib_variables.get_global_step()
    assert global_step

    logits, columns_to_variables, _ = layers.weighted_sum_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=self._linear_feature_columns,
        num_outputs=self._target_column.num_label_columns,
        weight_collections=[self._linear_weight_collection],
        name="linear")
    with ops.control_dependencies([self._centered_bias()]):
      loss = self._loss(logits, targets, features)
    logging_ops.scalar_summary("loss", loss)

    train_ops = self._linear_optimizer.get_train_step(
        self._linear_feature_columns, self._target_column.weight_column_name,
        "logistic_loss", features, targets, columns_to_variables, global_step)

    return train_ops, loss
 def _linear_logits(self, features):
   logits, _, _ = layers.weighted_sum_from_feature_columns(
       columns_to_tensors=features,
       feature_columns=self._get_linear_feature_columns(),
       num_outputs=self._num_label_columns(),
       weight_collections=[self._linear_weight_collection],
       name="linear")
   return logits
Ejemplo n.º 10
0
    def build_model(self, features, feature_columns, is_training):
        """See base class."""
        features = self._get_feature_dict(features)
        self._feature_columns = feature_columns

        logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=self._get_feature_columns(),
            num_outputs=self._num_label_columns,
            weight_collections=[self._weight_collection_name],
            name="linear")
        return logits
Ejemplo n.º 11
0
  def build_model(self, features, feature_columns, is_training):
    """See base class."""
    features = self._get_feature_dict(features)
    self._feature_columns = feature_columns

    logits, _, _ = layers.weighted_sum_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=self._get_feature_columns(),
        num_outputs=self._num_label_columns,
        weight_collections=[self._weight_collection_name],
        name="linear")
    return logits
Ejemplo n.º 12
0
def sdca_classifier_model_fn(features, targets, mode, params):
    """Estimator's linear model_fn."""
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    weight_column_name = params["weight_column_name"]
    loss_type = params["loss_type"]
    enable_centered_bias = params.get("enable_centered_bias", True)

    if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
        raise ValueError("Optimizer must be of type SDCAOptimizer")

    loss_fn = {
        "logistic_loss": _log_loss_with_two_classes,
        "hinge_loss": _hinge_loss,
    }[loss_type]

    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1))

    train_feature_columns = _maybe_add_bias_column(feature_columns, features,
                                                   bias, targets,
                                                   enable_centered_bias,
                                                   columns_to_variables)

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss")

    train_op = None
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()
        # TODO(zoy): Combine linear_feature_columns and columns_to_variables.
        train_op = optimizer.get_train_step(train_feature_columns,
                                            weight_column_name, loss_type,
                                            features, targets,
                                            columns_to_variables, global_step)

    predictions = {}
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, train_op
Ejemplo n.º 13
0
def sdca_classifier_model_fn(features, targets, mode, params):
  """Estimator's linear model_fn."""
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  weight_column_name = params["weight_column_name"]
  loss_type = params["loss_type"]
  enable_centered_bias = params.get("enable_centered_bias", True)

  if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
    raise ValueError("Optimizer must be of type SDCAOptimizer")

  loss_fn = {
      "logistic_loss": _log_loss_with_two_classes,
      "hinge_loss": _hinge_loss,
  }[loss_type]

  logits, columns_to_variables, bias = (
      layers.weighted_sum_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=feature_columns,
          num_outputs=1))

  if enable_centered_bias:
    _add_bias_column(feature_columns, features, bias, targets,
                     columns_to_variables)

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_op = None
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()
    train_op = optimizer.get_train_step(
        columns_to_variables, weight_column_name, loss_type, features,
        targets, global_step)

  predictions = {}
  predictions[_LOGISTIC] = math_ops.sigmoid(logits)
  logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, train_op
Ejemplo n.º 14
0
 def build_linear_logits(features,
                         linear_feature_columns,
                         num_ps_replicas,
                         logits_dimension,
                         linear_parent_scope):
     linear_partitioner = partitioned_variables.min_max_variable_partitioner(
             max_partitions=num_ps_replicas,
             min_slice_size=64 << 20)
     with variable_scope.variable_scope(
             linear_parent_scope,
             values=tuple(six.itervalues(features)),
             partitioner=linear_partitioner) as scope:
         linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
                 columns_to_tensors=features,
                 feature_columns=linear_feature_columns,
                 num_outputs=logits_dimension,
                 weight_collections=[linear_parent_scope],
                 scope=scope)
     return linear_logits
Ejemplo n.º 15
0
def linear_layer(inputs, features, outputs, weight_collections, scope):
    """ Generates an output by multiplying each feature value by a weight
    Args:
        inputs: Input dictionary containing the data.
        features: List of columns to read.
        outputs: Number of outputs/classes.
        weight_collections: Collection where to add the trainable variables.
            By default added to GraphKeys.VARIABLES.
        scope: Name of the scope where to add the variables.
    Returns:
        Output of the linear model.
    """
    with tf.variable_scope(scope) as sc:
        logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=inputs,
            feature_columns=features,
            num_outputs=outputs,
            weight_collections=weight_collections,
            scope=sc)
        return logits
Ejemplo n.º 16
0
def _get_linear_train_and_loss_ops(features, target, linear_feature_columns,
                                   target_column, linear_optimizer, loss_type,
                                   centered_bias, scope_name):
  """Returns train and loss ops for SDCAOptimizer."""
  global_step = contrib_variables.get_global_step()
  assert global_step

  logits, columns_to_variables, _ = layers.weighted_sum_from_feature_columns(
      columns_to_tensors=features,
      feature_columns=linear_feature_columns,
      num_outputs=target_column.num_label_columns,
      weight_collections=[scope_name],
      scope=scope_name)
  with ops.control_dependencies([centered_bias]):
    loss = target_column.loss(logits, target, features)
  logging_ops.scalar_summary("loss", loss)

  train_op = linear_optimizer.get_train_step(linear_feature_columns,
                                             target_column.weight_column_name,
                                             loss_type, features, target,
                                             columns_to_variables, global_step)
  return train_op, loss
Ejemplo n.º 17
0
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
    """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
    head = params["head"]
    linear_feature_columns = params.get("linear_feature_columns")
    linear_optimizer = params.get("linear_optimizer") or "Ftrl"
    joint_linear_weights = params.get("joint_linear_weights")
    dnn_feature_columns = params.get("dnn_feature_columns")
    dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
    dnn_hidden_units = params.get("dnn_hidden_units")
    dnn_activation_fn = params.get("dnn_activation_fn")
    dnn_dropout = params.get("dnn_dropout")
    gradient_clip_norm = params.get("gradient_clip_norm")
    input_layer_min_slice_size = (params.get("input_layer_min_slice_size")
                                  or 64 << 20)
    num_ps_replicas = config.num_ps_replicas if config else 0
    embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

    if not linear_feature_columns and not dnn_feature_columns:
        raise ValueError(
            "Either linear_feature_columns or dnn_feature_columns must be defined."
        )

    features = _get_feature_dict(features)

    # Build DNN Logits.
    dnn_parent_scope = "dnn"

    if not dnn_feature_columns:
        dnn_logits = None
    else:
        input_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas,
                min_slice_size=input_layer_min_slice_size))
        input_layer_scope = dnn_parent_scope + "/input_from_feature_columns"
        with variable_scope.variable_scope(
                input_layer_scope,
                values=features.values(),
                partitioner=input_layer_partitioner) as scope:
            net = layers.input_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=dnn_feature_columns,
                weight_collections=[dnn_parent_scope],
                scope=scope)

        hidden_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas))
        for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
            with variable_scope.variable_scope(
                    dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
                    values=[net],
                    partitioner=hidden_layer_partitioner) as scope:
                net = layers.fully_connected(
                    net,
                    num_hidden_units,
                    activation_fn=dnn_activation_fn,
                    variables_collections=[dnn_parent_scope],
                    scope=scope)
                if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
            # TODO(b/31209633): Consider adding summary before dropout.
            _add_hidden_layer_summary(net, scope.name)

        with variable_scope.variable_scope(
                dnn_parent_scope + "/logits",
                values=[net],
                partitioner=hidden_layer_partitioner) as scope:
            dnn_logits = layers.fully_connected(
                net,
                head.logits_dimension,
                activation_fn=None,
                variables_collections=[dnn_parent_scope],
                scope=scope)
        _add_hidden_layer_summary(dnn_logits, scope.name)

    # Build Linear logits.
    linear_parent_scope = "linear"

    if not linear_feature_columns:
        linear_logits = None
    else:
        linear_partitioner = partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20)
        with variable_scope.variable_scope(
                linear_parent_scope,
                values=features.values(),
                partitioner=linear_partitioner) as scope:
            if joint_linear_weights:
                linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=linear_feature_columns,
                    num_outputs=head.logits_dimension,
                    weight_collections=[linear_parent_scope],
                    scope=scope)
            else:
                linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=linear_feature_columns,
                    num_outputs=head.logits_dimension,
                    weight_collections=[linear_parent_scope],
                    scope=scope)

    # Combine logits and build full model.
    if dnn_logits is not None and linear_logits is not None:
        logits = dnn_logits + linear_logits
    elif dnn_logits is not None:
        logits = dnn_logits
    else:
        logits = linear_logits

    def _make_training_op(training_loss):
        """Training op for the DNN linear combined model."""
        train_ops = []
        if dnn_logits is not None:
            train_ops.append(
                optimizers.optimize_loss(
                    loss=training_loss,
                    global_step=contrib_variables.get_global_step(),
                    learning_rate=_DNN_LEARNING_RATE,
                    optimizer=_get_optimizer(dnn_optimizer),
                    gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                        embedding_lr_multipliers, dnn_parent_scope,
                        input_layer_scope),
                    clip_gradients=gradient_clip_norm,
                    variables=ops.get_collection(dnn_parent_scope),
                    name=dnn_parent_scope,
                    # Empty summaries, because head already logs "loss" summary.
                    summaries=[]))
        if linear_logits is not None:
            train_ops.append(
                optimizers.optimize_loss(
                    loss=training_loss,
                    global_step=contrib_variables.get_global_step(),
                    learning_rate=_linear_learning_rate(
                        len(linear_feature_columns)),
                    optimizer=_get_optimizer(linear_optimizer),
                    clip_gradients=gradient_clip_norm,
                    variables=ops.get_collection(linear_parent_scope),
                    name=linear_parent_scope,
                    # Empty summaries, because head already logs "loss" summary.
                    summaries=[]))

        return control_flow_ops.group(*train_ops)

    return head.create_model_fn_ops(features,
                                    labels,
                                    mode,
                                    _make_training_op,
                                    logits=logits)
Ejemplo n.º 18
0
def sdca_model_fn(features, labels, mode, params):
  """A model_fn for linear models that use the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
          `_RegressionHead` or `_BinaryLogisticHead`.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: An `SDCAOptimizer` instance.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
    ValueError: If the type of head is neither `_BinarySvmHead`, nor
      `_RegressionHead` nor `_MultiClassHead`.
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  weight_column_name = params["weight_column_name"]
  update_weights_hook = params.get("update_weights_hook", None)

  if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
    raise ValueError("Optimizer must be of type SDCAOptimizer")

  if isinstance(head, head_lib._BinarySvmHead):  # pylint: disable=protected-access
    loss_type = "hinge_loss"
  elif isinstance(head, head_lib._BinaryLogisticHead):  # pylint: disable=protected-access
    loss_type = "logistic_loss"
  elif isinstance(head, head_lib._RegressionHead):  # pylint: disable=protected-access
    assert head.logits_dimension == 1, ("SDCA only applies for "
                                        "logits_dimension=1.")
    loss_type = "squared_loss"
  else:
    raise ValueError("Unsupported head type: {}".format(head))

  parent_scope = "linear"

  with variable_scope.variable_op_scope(
      features.values(), parent_scope) as scope:
    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1,
            scope=scope))

    _add_bias_column(feature_columns, features, bias, columns_to_variables)

  def _train_op_fn(unused_loss):
    global_step = contrib_variables.get_global_step()
    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                    weight_column_name,
                                                    loss_type, features,
                                                    labels, global_step)
    if update_weights_hook is not None:
      update_weights_hook.set_parameters(sdca_model, train_op)
    return train_op

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      labels=labels,
      mode=mode,
      train_op_fn=_train_op_fn,
      logits=logits)
  if update_weights_hook is not None:
    return model_fn_ops._replace(
        training_chief_hooks=(model_fn_ops.training_chief_hooks +
                              [update_weights_hook]))
  return model_fn_ops
Ejemplo n.º 19
0
def _linear_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns)
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = config.num_ps_replicas if config else 0
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

    def _train_op_fn(loss):
      global_step = contrib_variables.get_global_step()
      my_vars = ops.get_collection(parent_scope)
      grads = gradients.gradients(loss, my_vars)
      if gradient_clip_norm:
        grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
      return (_get_optimizer(optimizer).apply_gradients(
          zip(grads, my_vars), global_step=global_step))

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
Ejemplo n.º 20
0
def sdca_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn that uses the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: An `SDCAOptimizer` instance.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * loss_type: A string. Must be either "logistic_loss" or "hinge_loss".
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  weight_column_name = params["weight_column_name"]
  loss_type = params["loss_type"]
  update_weights_hook = params.get("update_weights_hook")

  if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
    raise ValueError("Optimizer must be of type SDCAOptimizer")

  loss_fn = {
      "logistic_loss": _log_loss_with_two_classes,
      "hinge_loss": _hinge_loss,
  }[loss_type]

  logits, columns_to_variables, bias = (
      layers.weighted_sum_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=feature_columns,
          num_outputs=1))

  _add_bias_column(feature_columns, features, bias, targets,
                   columns_to_variables)

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_op = None
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()
    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                    weight_column_name,
                                                    loss_type, features,
                                                    targets, global_step)
    if update_weights_hook is not None:
      update_weights_hook.set_parameters(sdca_model, train_op)

  predictions = {}
  predictions[_LOGISTIC] = math_ops.sigmoid(logits)
  logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, train_op
Ejemplo n.º 21
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  head = params.get("head", None)
  if not head:
    # TODO(zakaria): Remove these params and make head mandatory
    head = head_lib._multi_class_head(  # pylint: disable=protected-access
        params.get("n_classes"),
        weight_column_name=params["weight_column_name"],
        enable_centered_bias=params.get("enable_centered_bias", False))

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_op_scope(
      features.values(), parent_scope, partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

  def _train_op_fn(loss):
    global_step = contrib_variables.get_global_step()
    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    return (optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))

  return head.head_ops(features, targets, mode, _train_op_fn, logits)
Ejemplo n.º 22
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  enable_centered_bias = params.get("enable_centered_bias", True)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  num_label_columns = 1 if n_classes == 2 else n_classes
  loss_fn = _softmax_cross_entropy_loss
  if n_classes == 2:
    loss_fn = _log_loss_with_two_classes

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)
  with variable_scope.variable_op_scope(
      features.values(), parent_scope, partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=[parent_scope],
              scope=scope))

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = loss_fn(logits, targets)
    if weight_column_name:
      weight_tensor = array_ops.reshape(
          math_ops.to_float(features[weight_column_name]), shape=(-1,))
      loss = _weighted_loss(loss, weight_tensor)
    else:
      loss = math_ops.reduce_mean(loss, name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_ops = []
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()

    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    train_ops.append(optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))
    if enable_centered_bias:
      train_ops.append(
          _centered_bias_step(targets, loss_fn, num_label_columns))

  predictions = {}
  if n_classes == 2:
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, control_flow_ops.group(*train_ops)
Ejemplo n.º 23
0
def sdca_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` with values in the set {0, 1}.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
          `_RegressionHead` or `_BinaryLogisticHead`.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * l1_regularization: Global (across all examples) L1-regularization
          parameter.
      * l2_regularization: Global (across all examples) L2-regularization
          parameter.
      * num_loss_partitions: Number of partitions of the global loss function
          optimized by `SDCAOptimizer`.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If the type of head is not one of `_BinarySvmHead`,
      `_RegressionHead` or `_MultiClassHead`.
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  example_id_column = params["example_id_column"]
  l1_regularization = params["l1_regularization"]
  l2_regularization = params["l2_regularization"]
  num_loss_partitions = params["num_loss_partitions"]
  weight_column_name = params["weight_column_name"]
  update_weights_hook = params.get("update_weights_hook", None)
  partitioner = params["partitioner"]

  loss_type = None
  if isinstance(head, head_lib._BinarySvmHead):  # pylint: disable=protected-access
    loss_type = "hinge_loss"
  elif isinstance(head, head_lib._BinaryLogisticHead):  # pylint: disable=protected-access
    loss_type = "logistic_loss"
  elif isinstance(head, head_lib._RegressionHead):  # pylint: disable=protected-access
    loss_type = "squared_loss"
  else:
    raise ValueError("Unsupported head type: {}".format(type(head)))

  assert head.logits_dimension == 1, (
      "SDCA only applies to logits_dimension=1.")

  # Update num_loss_partitions based on number of workers.
  n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas)
  optimizer = sdca_optimizer.SDCAOptimizer(
      example_id_column=example_id_column,
      num_loss_partitions=n_loss_partitions,
      symmetric_l1_regularization=l1_regularization,
      symmetric_l2_regularization=l2_regularization,
      partitioner=partitioner)

  parent_scope = "linear"

  with variable_scope.variable_scope(
      values=features.values(), name_or_scope=parent_scope,
      partitioner=partitioner) as scope:
    features = features.copy()
    features.update(layers.transform_features(features, feature_columns))
    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1,
            scope=scope))

    _add_bias_column(feature_columns, features, bias, columns_to_variables)

  def _train_op_fn(unused_loss):
    global_step = training_util.get_global_step()
    sdca_model, train_op = optimizer.get_train_step(
        columns_to_variables, weight_column_name, loss_type, features, labels,
        global_step)
    if update_weights_hook is not None:
      update_weights_hook.set_parameters(sdca_model, train_op)
    return train_op

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      labels=labels,
      mode=mode,
      train_op_fn=_train_op_fn,
      logits=logits)
  if update_weights_hook is not None:
    return model_fn_ops._replace(training_chief_hooks=(
        model_fn_ops.training_chief_hooks + [update_weights_hook]))
  return model_fn_ops
Ejemplo n.º 24
0
def sdca_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` with values in the set {0, 1}.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
          `_RegressionHead` or `_BinaryLogisticHead`.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * l1_regularization: Global (across all examples) L1-regularization
          parameter.
      * l2_regularization: Global (across all examples) L2-regularization
          parameter.
      * num_loss_partitions: Number of partitions of the global loss function
          optimized by `SDCAOptimizer`.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If the type of head is not one of `_BinarySvmHead`,
      `_RegressionHead` or `_MultiClassHead`.
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  example_id_column = params["example_id_column"]
  l1_regularization = params["l1_regularization"]
  l2_regularization = params["l2_regularization"]
  num_loss_partitions = params["num_loss_partitions"]
  weight_column_name = params["weight_column_name"]
  update_weights_hook = params.get("update_weights_hook", None)

  loss_type = None
  if isinstance(head, head_lib._BinarySvmHead):  # pylint: disable=protected-access
    loss_type = "hinge_loss"
  elif isinstance(head, head_lib._BinaryLogisticHead):  # pylint: disable=protected-access
    loss_type = "logistic_loss"
  elif isinstance(head, head_lib._RegressionHead):  # pylint: disable=protected-access
    loss_type = "squared_loss"
  else:
    raise ValueError("Unsupported head type: {}".format(type(head)))

  assert head.logits_dimension == 1, (
      "SDCA only applies to logits_dimension=1.")

  # Update num_loss_partitions based on number of workers.
  n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas)
  optimizer = sdca_optimizer.SDCAOptimizer(
      example_id_column=example_id_column,
      num_loss_partitions=n_loss_partitions,
      symmetric_l1_regularization=l1_regularization,
      symmetric_l2_regularization=l2_regularization)

  parent_scope = "linear"

  with variable_scope.variable_op_scope(features.values(),
                                        parent_scope) as scope:
    features = features.copy()
    features.update(layers.transform_features(features, feature_columns))
    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1,
            scope=scope))

    _add_bias_column(feature_columns, features, bias, columns_to_variables)

  def _train_op_fn(unused_loss):
    global_step = contrib_variables.get_global_step()
    sdca_model, train_op = optimizer.get_train_step(
        columns_to_variables, weight_column_name, loss_type, features, labels,
        global_step)
    if update_weights_hook is not None:
      update_weights_hook.set_parameters(sdca_model, train_op)
    return train_op

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      labels=labels,
      mode=mode,
      train_op_fn=_train_op_fn,
      logits=logits)
  if update_weights_hook is not None:
    return model_fn_ops._replace(training_chief_hooks=(
        model_fn_ops.training_chief_hooks + [update_weights_hook]))
  return model_fn_ops
Ejemplo n.º 25
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    head = params.get("head", None)
    if not head:
        # TODO(zakaria): Remove these params and make head mandatory
        head = head_lib._multi_class_head(  # pylint: disable=protected-access
            params.get("n_classes"),
            weight_column_name=params["weight_column_name"],
            enable_centered_bias=params.get("enable_centered_bias", False))

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)

    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=head.logits_dimension,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=head.logits_dimension,
                weight_collections=[parent_scope],
                scope=scope))

    def _train_op_fn(loss):
        global_step = contrib_variables.get_global_step()
        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        return (optimizer.apply_gradients(zip(grads, my_vars),
                                          global_step=global_step))

    return head.head_ops(features, targets, mode, _train_op_fn, logits)
Ejemplo n.º 26
0
def sdca_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn that uses the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: An `SDCAOptimizer` instance.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * loss_type: A string. Must be either "logistic_loss" or "hinge_loss".
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    weight_column_name = params["weight_column_name"]
    loss_type = params.get("loss_type", None)
    update_weights_hook = params.get("update_weights_hook")

    if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
        raise ValueError("Optimizer must be of type SDCAOptimizer")

    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1))

    _add_bias_column(feature_columns, features, bias, targets,
                     columns_to_variables)

    if loss_type is "hinge_loss":
        head = head_lib._binary_svm_head(  # pylint: disable=protected-access
            weight_column_name=weight_column_name,
            enable_centered_bias=False)
    else:
        # pylint: disable=protected-access
        head = head_lib._multi_class_head(
            2,  # pylint: disable=protected-access
            weight_column_name=weight_column_name,
            enable_centered_bias=False)

    def _train_op_fn(unused_loss):
        global_step = contrib_variables.get_global_step()
        sdca_model, train_op = optimizer.get_train_step(
            columns_to_variables, weight_column_name, loss_type, features,
            targets, global_step)
        if update_weights_hook is not None:
            update_weights_hook.set_parameters(sdca_model, train_op)
        return train_op

    return head.head_ops(features, targets, mode, _train_op_fn, logits)
Ejemplo n.º 27
0
def sdca_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn that uses the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: An `SDCAOptimizer` instance.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * loss_type: A string. Must be either "logistic_loss" or "hinge_loss".
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  weight_column_name = params["weight_column_name"]
  loss_type = params.get("loss_type", None)
  update_weights_hook = params.get("update_weights_hook")

  if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
    raise ValueError("Optimizer must be of type SDCAOptimizer")

  logits, columns_to_variables, bias = (
      layers.weighted_sum_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=feature_columns,
          num_outputs=1))

  _add_bias_column(feature_columns, features, bias, targets,
                   columns_to_variables)

  if loss_type is "hinge_loss":
    head = head_lib._binary_svm_head(  # pylint: disable=protected-access
        weight_column_name=weight_column_name,
        enable_centered_bias=False)
  else:
    # pylint: disable=protected-access
    head = head_lib._multi_class_head(2,  # pylint: disable=protected-access
                                      weight_column_name=weight_column_name,
                                      enable_centered_bias=False)
  def _train_op_fn(unused_loss):
    global_step = contrib_variables.get_global_step()
    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                    weight_column_name,
                                                    loss_type, features,
                                                    targets, global_step)
    if update_weights_hook is not None:
      update_weights_hook.set_parameters(sdca_model, train_op)
    return train_op

  return head.head_ops(features, targets, mode, _train_op_fn, logits)
Ejemplo n.º 28
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Estimator's linear model_fn."""
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)

    if not isinstance(features, dict):
        features = {"": features}

    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    feat_values = features.values() if isinstance(features,
                                                  dict) else [features]
    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(feat_values,
                                          "linear",
                                          partitioner=partitioner) as scope:
        logits, _, _ = (layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=num_label_columns,
            weight_collections=["linear"],
            scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
Ejemplo n.º 29
0
def sdca_model_fn(features, labels, mode, params):
  """A model_fn for linear models that use the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
          `_RegressionHead` or `_MultiClassHead`.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: An `SDCAOptimizer` instance.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
    ValueError: If the type of head is neither `_BinarySvmHead`, nor
      `_RegressionHead` nor `_MultiClassHead`.
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  weight_column_name = params["weight_column_name"]
  update_weights_hook = params.get("update_weights_hook", None)

  if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
    raise ValueError("Optimizer must be of type SDCAOptimizer")

  # pylint: disable=protected-access
  if isinstance(head, head_lib._BinarySvmHead):
    loss_type = "hinge_loss"
  elif isinstance(
      head, (head_lib._MultiClassHead, head_lib._BinaryLogisticHead)):
    loss_type = "logistic_loss"
  elif isinstance(head, head_lib._RegressionHead):
    loss_type = "squared_loss"
  else:
    raise ValueError("Unsupported head type: {}".format(head))
  # pylint: enable=protected-access

  parent_scope = "linear"

  with variable_scope.variable_op_scope(
      features.values(), parent_scope) as scope:
    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1,
            scope=scope))

    _add_bias_column(feature_columns, features, bias, labels,
                     columns_to_variables)

  def _train_op_fn(unused_loss):
    global_step = contrib_variables.get_global_step()
    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                    weight_column_name,
                                                    loss_type, features,
                                                    labels, global_step)
    if update_weights_hook is not None:
      update_weights_hook.set_parameters(sdca_model, train_op)
    return train_op

  model_fn_ops = head.head_ops(features, labels, mode, _train_op_fn, logits)
  if update_weights_hook is not None:
    return model_fn_ops._replace(
        training_chief_hooks=(model_fn_ops.training_chief_hooks +
                              [update_weights_hook]))
  return model_fn_ops
Ejemplo n.º 30
0
def _linear_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns)
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = config.num_ps_replicas if config else 0
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_scope(
      parent_scope, values=features.values(), partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

  def _train_op_fn(loss):
    global_step = contrib_variables.get_global_step()
    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    return (_get_optimizer(optimizer).apply_gradients(
        zip(grads, my_vars), global_step=global_step))

  return head.head_ops(features, labels, mode, _train_op_fn, logits)
Ejemplo n.º 31
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
Ejemplo n.º 32
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Estimator's linear model_fn."""
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  enable_centered_bias = params.get("enable_centered_bias", True)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  num_label_columns = 1 if n_classes == 2 else n_classes
  loss_fn = _softmax_cross_entropy_loss
  if n_classes == 2:
    loss_fn = _log_loss_with_two_classes

  feat_values = (features.values() if isinstance(features, dict)
                 else [features])
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)
  with variable_scope.variable_op_scope(
      feat_values, "linear", partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=["linear"],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=["linear"],
              scope=scope))

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = loss_fn(logits, targets)
    if weight_column_name:
      weight_tensor = array_ops.reshape(
          math_ops.to_float(features[weight_column_name]), shape=(-1,))
      loss = _weighted_loss(loss, weight_tensor)
    else:
      loss = math_ops.reduce_mean(loss, name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_ops = []
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()

    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    train_ops.append(optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))
    if enable_centered_bias:
      train_ops.append(
          _centered_bias_step(targets, loss_fn, num_label_columns))

  predictions = {}
  if n_classes == 2:
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, control_flow_ops.group(*train_ops)
Ejemplo n.º 33
0
def sdca_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn that uses the SDCA optimizer.

  Args:
    features: A dict of `Tensor` keyed by column name.
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: An `SDCAOptimizer` instance.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * loss_type: A string. Must be either "logistic_loss" or "hinge_loss".
      * update_weights_hook: A `SessionRunHook` object or None. Used to update
          model weights.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    weight_column_name = params["weight_column_name"]
    loss_type = params["loss_type"]
    update_weights_hook = params.get("update_weights_hook")

    if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
        raise ValueError("Optimizer must be of type SDCAOptimizer")

    loss_fn = {
        "logistic_loss": _log_loss_with_two_classes,
        "hinge_loss": _hinge_loss,
    }[loss_type]

    logits, columns_to_variables, bias = (
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            num_outputs=1))

    _add_bias_column(feature_columns, features, bias, targets,
                     columns_to_variables)

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_op = None
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()
        sdca_model, train_op = optimizer.get_train_step(
            columns_to_variables, weight_column_name, loss_type, features,
            targets, global_step)
        if update_weights_hook is not None:
            update_weights_hook.set_parameters(sdca_model, train_op)

    predictions = {}
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, train_op
Ejemplo n.º 34
0
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_partitioner: Optional. Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer") or "Ftrl"
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = params.get("input_layer_partitioner") or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})
  fix_global_step_increment_bug = params.get(
      "fix_global_step_increment_bug", True)

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  linear_optimizer = _get_optimizer(linear_optimizer)
  _check_no_sync_replicas_optimizer(linear_optimizer)
  dnn_optimizer = _get_optimizer(dnn_optimizer)
  _check_no_sync_replicas_optimizer(dnn_optimizer)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if not dnn_hidden_units:
      raise ValueError(
          "dnn_hidden_units must be defined when dnn_feature_columns is "
          "specified.")
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):
      with variable_scope.variable_scope(
          "input_from_feature_columns",
          values=tuple(six.itervalues(features)),
          partitioner=input_layer_partitioner) as dnn_input_scope:
        if all(
            isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
            for fc in dnn_feature_columns
        ):
          net = layers.input_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope],
              scope=dnn_input_scope)
        else:
          net = fc_core.input_layer(
              features=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope])

      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            "hiddenlayer_%d" % layer_id,
            values=(net,)) as dnn_hidden_layer_scope:
          net = layers.fully_connected(
              net,
              num_hidden_units,
              activation_fn=dnn_activation_fn,
              variables_collections=[dnn_parent_scope],
              scope=dnn_hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = layers.dropout(
                net,
                keep_prob=(1.0 - dnn_dropout))
        # TODO(b/31209633): Consider adding summary before dropout.
        _add_layer_summary(net, dnn_hidden_layer_scope.name)

      with variable_scope.variable_scope(
          "logits",
          values=(net,)) as dnn_logits_scope:
        dnn_logits = layers.fully_connected(
            net,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[dnn_parent_scope],
            scope=dnn_logits_scope)
      _add_layer_summary(dnn_logits, dnn_logits_scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=linear_partitioner) as scope:
      if all(isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
             for fc in linear_feature_columns):
        if joint_linear_weights:
          linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
        else:
          linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
      else:
        linear_logits = fc_core.linear_model(
            features=features,
            feature_columns=linear_feature_columns,
            units=head.logits_dimension,
            weight_collections=[linear_parent_scope])

      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=dnn_optimizer,
              gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, dnn_parent_scope,
                  dnn_input_scope.name),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=linear_optimizer,
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))

    train_op = control_flow_ops.group(*train_ops)
    if fix_global_step_increment_bug:
      with ops.control_dependencies([train_op]):
        with ops.colocate_with(global_step):
          return state_ops.assign_add(global_step, 1).op
    return train_op

  return head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_make_training_op,
      logits=logits)
Ejemplo n.º 35
0
def _dnn_linear_combined_model_fn(features, labels, mode, params):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    `estimator.ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer")
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer")
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn")
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = params["num_ps_replicas"]

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        dnn_parent_scope + "/input_from_feature_columns",
        values=features.values(),
        partitioner=input_layer_partitioner) as scope:
      net = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
          values=[net],
          partitioner=hidden_layer_partitioner) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=scope)
        if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - dnn_dropout))
      # TODO(b/31209633): Consider adding summary before dropout.
      _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
        dnn_parent_scope + "/logits",
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      dnn_logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=scope)
    _add_hidden_layer_summary(dnn_logits, scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=features.values(),
        partitioner=linear_partitioner) as scope:
      if joint_linear_weights:
        linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)
      else:
        linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=_get_optimizer(dnn_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=_get_optimizer(linear_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))

    return control_flow_ops.group(*train_ops)

  return head.head_ops(
      features, labels, mode, _make_training_op, logits=logits)