Exemple #1
0
def poisson_regression_head(
    weight_column=None,
    label_dimension=1,
    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
    compute_full_loss=True,
    name=None):
  """Creates a `_Head` for poisson regression using `tf.nn.log_poisson_loss`.

  The loss is the weighted sum over all input dimensions. Namely, if the input
  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
  sum over both `batch_size` and `label_dimension`.

  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
  In many applications, the shape is `[batch_size, label_dimension]`.

  The `labels` shape must match `logits`, namely
  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
  `[D0, D1, ... DN]` is also supported.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
  `[D0, D1, ... DN, label_dimension]`.

  This is implemented as a generalized linear model, see
  https://en.wikipedia.org/wiki/Generalized_linear_model.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    label_dimension: Number of regression labels per example. This is the size
      of the last dimension of the labels `Tensor` (typically, this has shape
      `[batch_size, label_dimension]`).
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch and label dimension. Defaults to
      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
      `batch size * label_dimension`. See `tf.losses.Reduction`.
    compute_full_loss: Whether to include the constant `log(z!)` term in
      computing the poisson loss. See `tf.nn.log_poisson_loss` for the full
      documentation.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for poisson regression.

  Raises:
    ValueError: If `label_dimension` or `loss_reduction` is invalid.
  """
  def _poisson_loss(labels, logits):
    return nn.log_poisson_loss(
        targets=labels, log_input=logits, compute_full_loss=compute_full_loss)
  return head_lib._regression_head_with_mean_squared_error_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      label_dimension=label_dimension,
      loss_reduction=loss_reduction,
      loss_fn=_poisson_loss,
      inverse_link_fn=math_ops.exp,
      name=name)
Exemple #2
0
def regression_head(weight_column=None,
                    label_dimension=1,
                    name=None):
  """Creates a `_Head` for regression using the mean squared loss.

  Uses `mean_squared_error` loss.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    label_dimension: Number of regression labels per example. This is the size
      of the last dimension of the labels `Tensor` (typically, this has shape
      `[batch_size, label_dimension]`).
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`.

  Returns:
    An instance of `_Head` for linear regression.
  """
  return head_lib._regression_head_with_mean_squared_error_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      label_dimension=label_dimension,
      name=name)
Exemple #3
0
def poisson_regression_head(
    weight_column=None,
    label_dimension=1,
    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
    compute_full_loss=True,
    name=None):
  """Creates a `_Head` for poisson regression using `tf.nn.log_poisson_loss`.

  The loss is the weighted sum over all input dimensions. Namely, if the input
  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
  sum over both `batch_size` and `label_dimension`.

  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
  In many applications, the shape is `[batch_size, label_dimension]`.

  The `labels` shape must match `logits`, namely
  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
  `[D0, D1, ... DN]` is also supported.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
  `[D0, D1, ... DN, label_dimension]`.

  This is implemented as a generalized linear model, see
  https://en.wikipedia.org/wiki/Generalized_linear_model.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    label_dimension: Number of regression labels per example. This is the size
      of the last dimension of the labels `Tensor` (typically, this has shape
      `[batch_size, label_dimension]`).
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch and label dimension. Defaults to
      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
      `batch size * label_dimension`. See `tf.losses.Reduction`.
    compute_full_loss: Whether to include the constant `log(z!)` term in
      computing the poisson loss. See `tf.nn.log_poisson_loss` for the full
      documentation.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for poisson regression.

  Raises:
    ValueError: If `label_dimension` or `loss_reduction` is invalid.
  """
  def _poisson_loss(labels, logits):
    return nn.log_poisson_loss(
        targets=labels, log_input=logits, compute_full_loss=compute_full_loss)
  return head_lib._regression_head_with_mean_squared_error_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      label_dimension=label_dimension,
      loss_reduction=loss_reduction,
      loss_fn=_poisson_loss,
      inverse_link_fn=math_ops.exp,
      name=name)
def _create_regression_head(label_dimension, weight_column=None):
    if label_dimension != 1:
        raise ValueError('For now only 1 dimension regression is supported.'
                         'label_dimension given as {}'.format(label_dimension))
    # pylint: disable=protected-access
    return head_lib._regression_head_with_mean_squared_error_loss(
        label_dimension=label_dimension,
        weight_column=weight_column,
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
def _create_regression_head(label_dimension, weight_column=None):
  if label_dimension != 1:
    raise ValueError('For now only 1 dimension regression is supported.'
                     'label_dimension given as {}'.format(label_dimension))
  # pylint: disable=protected-access
  return head_lib._regression_head_with_mean_squared_error_loss(
      label_dimension=label_dimension,
      weight_column=weight_column,
      loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
Exemple #6
0
    def __init__(self,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_feature_key=None,
                 optimizer=None,
                 config=None,
                 partitioner=None):
        """Construct a `LinearClassifier` estimator object.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        Note that class labels are integers representing the class index (i.e.
        values from 0 to n_classes-1). For arbitrary label values (e.g. string
        labels), convert to class indices first.
      weight_feature_key: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: The optimizer used to train the model. If specified, it should
        be either an instance of `tf.Optimizer` or the SDCAOptimizer. If `None`,
        the Ftrl optimizer will be used.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.

    Returns:
      A `LinearClassifier` estimator.

    Raises:
      ValueError: if n_classes < 2.
    """
        super(LinearClassifier, self).__init__(
            model_fn=_linear_model_fn,
            model_dir=model_dir,
            config=config,
            params={
                # pylint: disable=protected-access
                # TODO(xiejw): Switch to the classifier head.
                'head':
                head_lib._regression_head_with_mean_squared_error_loss(
                    label_dimension=n_classes,
                    weight_feature_key=weight_feature_key),
                # pylint: enable=protected-access
                'feature_columns':
                feature_columns,
                'optimizer':
                optimizer,
                'partitioner':
                partitioner,
            })
Exemple #7
0
def regression_head(weight_column=None,
                    label_dimension=1,
                    loss_reduction=losses.Reduction.SUM,
                    loss_fn=None,
                    name=None):
  """Creates a `_Head` for regression using the `mean_squared_error` loss.

  The loss is the weighted sum over all input dimensions. Namely, if the input
  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
  sum over both `batch_size` and `label_dimension`.

  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
  In many applications, the shape is `[batch_size, label_dimension]`.

  The `labels` shape must match `logits`, namely
  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
  `[D0, D1, ... DN]` is also supported.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
  `[D0, D1, ... DN, label_dimension]`.

  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
  `(labels, logits, features)` as arguments and returns unreduced loss with
  shape `[D0, D1, ... DN, label_dimension]`.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    label_dimension: Number of regression labels per example. This is the size
      of the last dimension of the labels `Tensor` (typically, this has shape
      `[batch_size, label_dimension]`).
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch. Defaults to `SUM`.
    loss_fn: Optional loss function.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for linear regression.

  Raises:
    ValueError: If `label_dimension` or `loss_reduction` is invalid.
  """
  return head_lib._regression_head_with_mean_squared_error_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      label_dimension=label_dimension,
      loss_reduction=loss_reduction,
      loss_fn=loss_fn,
      name=name)
Exemple #8
0
  def __init__(self,
               feature_columns,
               model_dir=None,
               n_classes=2,
               weight_feature_key=None,
               optimizer=None,
               config=None,
               partitioner=None):
    """Construct a `LinearClassifier` estimator object.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        Note that class labels are integers representing the class index (i.e.
        values from 0 to n_classes-1). For arbitrary label values (e.g. string
        labels), convert to class indices first.
      weight_feature_key: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: The optimizer used to train the model. If specified, it should
        be either an instance of `tf.Optimizer` or the SDCAOptimizer. If `None`,
        the Ftrl optimizer will be used.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.

    Returns:
      A `LinearClassifier` estimator.

    Raises:
      ValueError: if n_classes < 2.
    """
    super(LinearClassifier, self).__init__(
        model_fn=_linear_model_fn,
        model_dir=model_dir,
        config=config,
        params={
            # pylint: disable=protected-access
            # TODO(xiejw): Switch to the classifier head.
            'head': head_lib._regression_head_with_mean_squared_error_loss(
                label_dimension=n_classes,
                weight_feature_key=weight_feature_key),
            # pylint: enable=protected-access
            'feature_columns': feature_columns,
            'optimizer': optimizer,
            'partitioner': partitioner,
        })
Exemple #9
0
  def __init__(self,
               feature_columns,
               model_dir=None,
               label_dimension=1,
               weight_column=None,
               optimizer='Ftrl',
               config=None,
               partitioner=None):
    """Initializes a `LinearRegressor` instance.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
    """
    super(LinearRegressor, self).__init__(
        model_fn=_linear_model_fn,
        model_dir=model_dir,
        config=config,
        params={
            # pylint: disable=protected-access
            'head':
                head_lib._regression_head_with_mean_squared_error_loss(
                    label_dimension=label_dimension,
                    weight_column=weight_column),
            # pylint: enable=protected-access
            'feature_columns':
                feature_columns,
            'optimizer':
                optimizer,
            'partitioner':
                partitioner,
        })
Exemple #10
0
    def __init__(self,
                 feature_columns,
                 model_dir=None,
                 label_dimension=1,
                 weight_column=None,
                 optimizer='Ftrl',
                 config=None,
                 partitioner=None):
        """Initializes a `LinearRegressor` instance.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
    """
        head = head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
            label_dimension=label_dimension,
            weight_column=weight_column)

        def _model_fn(features, labels, mode, config):
            return _linear_model_fn(features=features,
                                    labels=labels,
                                    mode=mode,
                                    head=head,
                                    feature_columns=tuple(feature_columns
                                                          or []),
                                    optimizer=optimizer,
                                    partitioner=partitioner,
                                    config=config)

        super(LinearRegressor, self).__init__(model_fn=_model_fn,
                                              model_dir=model_dir,
                                              config=config)
Exemple #11
0
 def _model_fn(features, labels, mode, config):
     return _dnn_model_fn(
         features=features,
         labels=labels,
         mode=mode,
         head=head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
             label_dimension=label_dimension,
             weight_feature_key=weight_feature_key),
         hidden_units=hidden_units,
         feature_columns=tuple(feature_columns or []),
         optimizer=optimizer,
         activation_fn=activation_fn,
         dropout=dropout,
         input_layer_partitioner=input_layer_partitioner,
         config=config)
Exemple #12
0
 def _model_fn(features, labels, mode, config):
   return _dnn_model_fn(
       features=features,
       labels=labels,
       mode=mode,
       head=head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
           label_dimension=label_dimension,
           weight_feature_key=weight_feature_key),
       hidden_units=hidden_units,
       feature_columns=tuple(feature_columns or []),
       optimizer=optimizer,
       activation_fn=activation_fn,
       dropout=dropout,
       input_layer_partitioner=input_layer_partitioner,
       config=config)
 def _model_fn(features, labels, mode, config):
   return _dnn_linear_combined_model_fn(
       features=features,
       labels=labels,
       mode=mode,
       head=head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
           label_dimension=label_dimension),
       linear_feature_columns=linear_feature_columns,
       linear_optimizer=linear_optimizer,
       dnn_feature_columns=dnn_feature_columns,
       dnn_optimizer=dnn_optimizer,
       dnn_hidden_units=dnn_hidden_units,
       dnn_activation_fn=dnn_activation_fn,
       dnn_dropout=dnn_dropout,
       input_layer_partitioner=input_layer_partitioner,
       config=config)
Exemple #14
0
 def _model_fn(features, labels, mode, config):
     return _dnn_linear_combined_model_fn(
         features=features,
         labels=labels,
         mode=mode,
         head=head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
             label_dimension=label_dimension),
         linear_feature_columns=linear_feature_columns,
         linear_optimizer=linear_optimizer,
         dnn_feature_columns=dnn_feature_columns,
         dnn_optimizer=dnn_optimizer,
         dnn_hidden_units=dnn_hidden_units,
         dnn_activation_fn=dnn_activation_fn,
         dnn_dropout=dnn_dropout,
         input_layer_partitioner=input_layer_partitioner,
         config=config)
Exemple #15
0
    def __init__(self,
                 feature_columns,
                 model_dir=None,
                 label_dimension=1,
                 weight_feature_key=None,
                 optimizer=None,
                 config=None,
                 partitioner=None):
        """Initializes a `LinearRegressor` instance.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_feature_key: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: string, `tf.Optimizer` object, or callable that returns
        `tf.Optimizer`. Defines the optimizer to use for training. If `None`,
        will use the FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
    """
        super(LinearRegressor, self).__init__(
            model_fn=_linear_model_fn,
            model_dir=model_dir,
            config=config,
            params={
                # pylint: disable=protected-access
                'head':
                head_lib._regression_head_with_mean_squared_error_loss(
                    label_dimension=label_dimension,
                    weight_feature_key=weight_feature_key),
                # pylint: enable=protected-access
                'feature_columns':
                feature_columns,
                'optimizer':
                optimizer,
                'partitioner':
                partitioner,
            })
Exemple #16
0
  def __init__(self,
               model_dir=None,
               label_dimension=1,
               weight_column=None,
               optimizer='Ftrl',
               config=None,
               loss_reduction=losses.Reduction.SUM):
    """Initializes a BaselineRegressor instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
         weights. It will be multiplied by the loss of the example.
      optimizer: String, `tf.Optimizer` object, or callable that creates the
        optimizer to use for training. If not specified, will use
        `FtrlOptimizer` with a default learning rate of 0.3.
      config: `RunConfig` object to configure the runtime settings.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
    Returns:
      A `BaselineRegressor` estimator.
    """

    head = head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
        label_dimension=label_dimension,
        weight_column=weight_column,
        loss_reduction=loss_reduction)
    def _model_fn(features, labels, mode, config):
      return _baseline_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          optimizer=optimizer,
          config=config)
    super(BaselineRegressor, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config)
Exemple #17
0
  def __init__(self,
               model_dir=None,
               label_dimension=1,
               weight_column=None,
               optimizer='Ftrl',
               config=None,
               loss_reduction=losses.Reduction.SUM):
    """Initializes a BaselineRegressor instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
         weights. It will be multiplied by the loss of the example.
      optimizer: String, `tf.Optimizer` object, or callable that creates the
        optimizer to use for training. If not specified, will use
        `FtrlOptimizer` with a default learning rate of 0.3.
      config: `RunConfig` object to configure the runtime settings.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
    Returns:
      A `BaselineRegressor` estimator.
    """

    head = head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
        label_dimension=label_dimension,
        weight_column=weight_column,
        loss_reduction=loss_reduction)
    def _model_fn(features, labels, mode, config):
      return _baseline_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          optimizer=optimizer,
          config=config)
    super(BaselineRegressor, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config)
Exemple #18
0
  def __init__(self,
               feature_columns,
               model_dir=None,
               label_dimension=1,
               weight_feature_key=None,
               optimizer=None,
               config=None,
               partitioner=None):
    """Initializes a `LinearRegressor` instance.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_feature_key: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: string, `tf.Optimizer` object, or callable that returns
        `tf.Optimizer`. Defines the optimizer to use for training. If `None`,
        will use the FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
    """
    super(LinearRegressor, self).__init__(
        model_fn=_linear_model_fn,
        model_dir=model_dir,
        config=config,
        params={
            # pylint: disable=protected-access
            'head': head_lib._regression_head_with_mean_squared_error_loss(
                label_dimension=label_dimension,
                weight_feature_key=weight_feature_key),
            # pylint: enable=protected-access
            'feature_columns': feature_columns,
            'optimizer': optimizer,
            'partitioner': partitioner,
        })
Exemple #19
0
    def __init__(self,
                 feature_columns,
                 model_dir=None,
                 label_dimension=1,
                 weight_column=None,
                 optimizer='Ftrl',
                 config=None,
                 partitioner=None,
                 warm_start_from=None,
                 loss_reduction=losses.Reduction.SUM):
        """Initializes a `LinearRegressor` instance.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights and biases are warm-started, and it is assumed that vocabularies
        and Tensor names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
    """
        head = head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
            label_dimension=label_dimension,
            weight_column=weight_column,
            loss_reduction=loss_reduction)

        def _model_fn(features, labels, mode, config):
            """Call the defined shared _linear_model_fn and possibly warm-start."""
            estimator_spec = _linear_model_fn(features=features,
                                              labels=labels,
                                              mode=mode,
                                              head=head,
                                              feature_columns=tuple(
                                                  feature_columns or []),
                                              optimizer=optimizer,
                                              partitioner=partitioner,
                                              config=config)
            # pylint: disable=protected-access
            warm_start_settings = warm_starting_util._get_default_warm_start_settings(
                warm_start_from)
            if warm_start_settings:
                warm_starting_util._warm_start(warm_start_settings)
            # pylint: enable=protected-access

            return estimator_spec

        super(LinearRegressor, self).__init__(model_fn=_model_fn,
                                              model_dir=model_dir,
                                              config=config)
Exemple #20
0
  def __init__(self,
               feature_columns,
               model_dir=None,
               label_dimension=1,
               weight_column=None,
               optimizer='Ftrl',
               config=None,
               partitioner=None,
               warm_start_from=None,
               loss_reduction=losses.Reduction.SUM):
    """Initializes a `LinearRegressor` instance.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      weight_column: A string or a `_NumericColumn` created by
        `tf.feature_column.numeric_column` defining feature column representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example. If it is a string, it is
        used as a key to fetch weight tensor from the `features`. If it is a
        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
        then weight_column.normalizer_fn is applied on it to get weight tensor.
      optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
        to FTRL optimizer.
      config: `RunConfig` object to configure the runtime settings.
      partitioner: Optional. Partitioner for input layer.
      warm_start_from: A string filepath to a checkpoint to warm-start from, or
        a `WarmStartSettings` object to fully configure warm-starting.  If the
        string filepath is provided instead of a `WarmStartSettings`, then all
        weights and biases are warm-started, and it is assumed that vocabularies
        and Tensor names are unchanged.
      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
        to reduce training loss over batch. Defaults to `SUM`.
    """
    head = head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
        label_dimension=label_dimension, weight_column=weight_column,
        loss_reduction=loss_reduction)

    def _model_fn(features, labels, mode, config):
      """Call the defined shared _linear_model_fn."""
      return _linear_model_fn(
          features=features,
          labels=labels,
          mode=mode,
          head=head,
          feature_columns=tuple(feature_columns or []),
          optimizer=optimizer,
          partitioner=partitioner,
          config=config)

    super(LinearRegressor, self).__init__(
        model_fn=_model_fn,
        model_dir=model_dir,
        config=config,
        warm_start_from=warm_start_from)
Exemple #21
0
def logistic_regression_head(
    weight_column=None,
    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
    name=None):
  """Creates a `_Head` for logistic regression.

  Uses `sigmoid_cross_entropy_with_logits` loss, which is the same as
  `binary_classification_head`. The differences compared to
  `binary_classification_head` are:

  * Does not support `label_vocabulary`. Instead, labels must be float in the
    range [0, 1].
  * Does not calculate some metrics that do not make sense, such as AUC.
  * In `PREDICT` mode, only returns logits and predictions
    (`=tf.sigmoid(logits)`), whereas `binary_classification_head` also returns
    probabilities, classes, and class_ids.
  * Export output defaults to `RegressionOutput`, whereas
    `binary_classification_head` defaults to `PredictOutput`.

  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
  In many applications, the shape is `[batch_size, 1]`.

  The `labels` shape must match `logits`, namely
  `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`.

  If `weight_column` is specified, weights must be of shape
  `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`.

  This is implemented as a generalized linear model, see
  https://en.wikipedia.org/wiki/Generalized_linear_model.

  Args:
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch and label dimension. Defaults to
      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
      `batch size * label_dimension`. See `tf.losses.Reduction`.
    name: name of the head. If provided, summary and metrics keys will be
      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.

  Returns:
    An instance of `_Head` for logistic regression.

  Raises:
    ValueError: If `loss_reduction` is invalid.
  """
  def _logistic_loss(labels, logits):
    labels = head_lib._assert_range(  # pylint:disable=protected-access
        labels, n_classes=2, message='Labels must be in range [0, 1]')
    return nn.sigmoid_cross_entropy_with_logits(
        labels=labels, logits=logits)
  # TODO(roumposg): Rename to _regression_head, since it supports loss_fn arg.
  return head_lib._regression_head_with_mean_squared_error_loss(  # pylint:disable=protected-access
      weight_column=weight_column,
      label_dimension=1,
      loss_reduction=loss_reduction,
      loss_fn=_logistic_loss,
      inverse_link_fn=math_ops.sigmoid,
      name=name)