예제 #1
0
def _parse_features_if_necessary(features, feature_columns):
    """Helper function to convert the input points into a usable format.

  Args:
    features: The input features.
    feature_columns: An optionable iterable containing all the feature columns
      used by the model. All items in the set should be feature column instances
      that can be passed to `tf.compat.v1.feature_column.input_layer`. If this
      is None, all features will be used.

  Returns:
    If `features` is a dict of `k` features (optionally filtered by
    `feature_columns`), each of which is a vector of `n` scalars, the return
    value is a Tensor of shape `(n, k)` representing `n` input points, where the
    items in the `k` dimension are sorted lexicographically by `features` key.
    If `features` is not a dict, it is returned unmodified.
  """
    if not isinstance(features, dict):
        return features

    if feature_columns:
        return fc.input_layer(features, feature_columns)

    keys = sorted(features.keys())
    with ops.colocate_with(features[keys[0]]):
        return array_ops.concat([features[k] for k in keys], axis=1)
예제 #2
0
def _parse_features_if_necessary(features, feature_columns):
  """Helper function to convert the input points into a usable format.

  Args:
    features: The input features.
    feature_columns: An optionable iterable containing all the feature columns
      used by the model. All items in the set should be feature column instances
      that can be passed to `tf.feature_column.input_layer`. If this is None,
      all features will be used.

  Returns:
    If `features` is a dict of `k` features (optionally filtered by
    `feature_columns`), each of which is a vector of `n` scalars, the return
    value is a Tensor of shape `(n, k)` representing `n` input points, where the
    items in the `k` dimension are sorted lexicographically by `features` key.
    If `features` is not a dict, it is returned unmodified.
  """
  if not isinstance(features, dict):
    return features

  if feature_columns:
    return fc.input_layer(features, feature_columns)

  keys = sorted(features.keys())
  with ops.colocate_with(features[keys[0]]):
    return array_ops.concat([features[k] for k in keys], axis=1)
예제 #3
0
    def test_functional_input_layer_with_numpy_input_fn(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in input_layer
        price = fc.numeric_column('price')
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        # one_hot_body_style has 3 dims in input_layer.
        one_hot_body_style = fc.indicator_column(body_style)
        # embedded_body_style has 5 dims in input_layer.
        embedded_body_style = fc.embedding_column(body_style,
                                                  dimension=5,
                                                  initializer=_initializer)

        input_fn = numpy_io.numpy_input_fn(x={
            'price':
            np.array([11., 12., 13., 14.]),
            'body-style':
            np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
                                           batch_size=2,
                                           shuffle=False)
        features = input_fn()
        net = fc.input_layer(features,
                             [price, one_hot_body_style, embedded_body_style])
        self.assertEqual(1 + 3 + 5, net.shape[1])
        with self._initialized_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.],
                                 [1., 2., 3., 4., 5., 1., 0., 0., 12]],
                                sess.run(net))

            coord.request_stop()
            coord.join(threads)
예제 #4
0
  def test_functional_input_layer_with_numpy_input_fn(self):
    embedding_values = (
        (1., 2., 3., 4., 5.),  # id 0
        (6., 7., 8., 9., 10.),  # id 1
        (11., 12., 13., 14., 15.)  # id 2
    )
    def _initializer(shape, dtype, partition_info):
      del shape, dtype, partition_info
      return embedding_values

    # price has 1 dimension in input_layer
    price = fc.numeric_column('price')
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
    # one_hot_body_style has 3 dims in input_layer.
    one_hot_body_style = fc.indicator_column(body_style)
    # embedded_body_style has 5 dims in input_layer.
    embedded_body_style = fc.embedding_column(body_style, dimension=5,
                                              initializer=_initializer)

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([11., 12., 13., 14.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.input_layer(features,
                         [price, one_hot_body_style, embedded_body_style])
    self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      # Each row is formed by concatenating `embedded_body_style`,
      # `one_hot_body_style`, and `price` in order.
      self.assertAllEqual(
          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
          sess.run(net))

      coord.request_stop()
      coord.join(threads)
예제 #5
0
  def _get_exogenous_embedding_shape(self):
    """Computes the shape of the vector returned by _process_exogenous_features.

    Returns:
      The shape as a list. Does not include a batch dimension.
    """
    if not self._exogenous_feature_columns:
      return (0,)
    with ops.Graph().as_default():
      parsed_features = (
          feature_column.make_parse_example_spec(
              self._exogenous_feature_columns))
      placeholder_features = parsing_ops.parse_example(
          serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string),
          features=parsed_features)
      embedded = feature_column.input_layer(
          features=placeholder_features,
          feature_columns=self._exogenous_feature_columns)
      return embedded.get_shape().as_list()[1:]
예제 #6
0
  def _get_exogenous_embedding_shape(self):
    """Computes the shape of the vector returned by _process_exogenous_features.

    Returns:
      The shape as a list. Does not include a batch dimension.
    """
    if not self._exogenous_feature_columns:
      return (0,)
    with ops.Graph().as_default():
      parsed_features = (
          feature_column.make_parse_example_spec(
              self._exogenous_feature_columns))
      placeholder_features = parsing_ops.parse_example(
          serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string),
          features=parsed_features)
      embedded = feature_column.input_layer(
          features=placeholder_features,
          feature_columns=self._exogenous_feature_columns)
      return embedded.get_shape().as_list()[1:]
예제 #7
0
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_partitioner: Optional. Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer") or "Ftrl"
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = params.get("input_layer_partitioner") or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})
  fix_global_step_increment_bug = params.get(
      "fix_global_step_increment_bug", True)

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  linear_optimizer = _get_optimizer(linear_optimizer)
  _check_no_sync_replicas_optimizer(linear_optimizer)
  dnn_optimizer = _get_optimizer(dnn_optimizer)
  _check_no_sync_replicas_optimizer(dnn_optimizer)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if not dnn_hidden_units:
      raise ValueError(
          "dnn_hidden_units must be defined when dnn_feature_columns is "
          "specified.")
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):
      with variable_scope.variable_scope(
          "input_from_feature_columns",
          values=tuple(six.itervalues(features)),
          partitioner=input_layer_partitioner) as dnn_input_scope:
        if all(
            isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
            for fc in dnn_feature_columns
        ):
          net = layers.input_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope],
              scope=dnn_input_scope)
        else:
          net = fc_core.input_layer(
              features=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope])

      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            "hiddenlayer_%d" % layer_id,
            values=(net,)) as dnn_hidden_layer_scope:
          net = layers.fully_connected(
              net,
              num_hidden_units,
              activation_fn=dnn_activation_fn,
              variables_collections=[dnn_parent_scope],
              scope=dnn_hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = layers.dropout(
                net,
                keep_prob=(1.0 - dnn_dropout))
        # TODO(b/31209633): Consider adding summary before dropout.
        _add_layer_summary(net, dnn_hidden_layer_scope.name)

      with variable_scope.variable_scope(
          "logits",
          values=(net,)) as dnn_logits_scope:
        dnn_logits = layers.fully_connected(
            net,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[dnn_parent_scope],
            scope=dnn_logits_scope)
      _add_layer_summary(dnn_logits, dnn_logits_scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=linear_partitioner) as scope:
      if all(isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
             for fc in linear_feature_columns):
        if joint_linear_weights:
          linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
        else:
          linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
      else:
        linear_logits = fc_core.linear_model(
            features=features,
            feature_columns=linear_feature_columns,
            units=head.logits_dimension,
            weight_collections=[linear_parent_scope])

      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=dnn_optimizer,
              gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, dnn_parent_scope,
                  dnn_input_scope.name),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=linear_optimizer,
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))

    train_op = control_flow_ops.group(*train_ops)
    if fix_global_step_increment_bug:
      with ops.control_dependencies([train_op]):
        with ops.colocate_with(global_step):
          return state_ops.assign_add(global_step, 1).op
    return train_op

  return head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_make_training_op,
      logits=logits)
def _dnn_tree_combined_model_fn(
        features,
        labels,
        mode,
        head,
        dnn_hidden_units,
        dnn_feature_columns,
        tree_learner_config,
        num_trees,
        tree_examples_per_layer,
        config=None,
        dnn_optimizer="Adagrad",
        dnn_activation_fn=nn.relu,
        dnn_dropout=None,
        dnn_input_layer_partitioner=None,
        dnn_input_layer_to_tree=True,
        dnn_steps_to_train=10000,
        predict_with_tree_only=False,
        tree_feature_columns=None,
        tree_center_bias=False,
        dnn_to_tree_distillation_param=None,
        use_core_versions=False,
        output_type=model.ModelBuilderOutputType.MODEL_FN_OPS,
        override_global_step_value=None):
    """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    predict_with_tree_only: Whether to use only the tree model output as the
      final prediction.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.
    dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the
      float defines the weight of the distillation loss, and the loss_fn, for
      computing distillation loss, takes dnn_logits, tree_logits and weight
      tensor. If the entire tuple is None, no distillation will be applied. If
      only the loss_fn is None, we will take the sigmoid/softmax cross entropy
      loss be default. When distillation is applied, `predict_with_tree_only`
      will be set to True.
    use_core_versions: Whether feature columns and loss are from the core (as
      opposed to contrib) version of tensorflow.
    output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec
      (new interface).
    override_global_step_value: If after the training is done, global step
      value must be reset to this value. This is particularly useful for hyper
      parameter tuning, which can't recognize early stopping due to the number
      of trees. If None, no override of global step will happen.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
    if not isinstance(features, dict):
        raise ValueError("features should be a dictionary of `Tensor`s. "
                         "Given type: {}".format(type(features)))

    if not dnn_feature_columns:
        raise ValueError("dnn_feature_columns must be specified")

    if dnn_to_tree_distillation_param:
        if not predict_with_tree_only:
            logging.warning(
                "update predict_with_tree_only to True since distillation"
                "is specified.")
            predict_with_tree_only = True

    # Build DNN Logits.
    dnn_parent_scope = "dnn"
    dnn_partitioner = dnn_input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))

    if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC
            and not use_core_versions):
        raise ValueError("You must use core versions with Estimator Spec")
    global_step = training_util.get_global_step()

    with variable_scope.variable_scope(dnn_parent_scope,
                                       values=tuple(six.itervalues(features)),
                                       partitioner=dnn_partitioner):

        with variable_scope.variable_scope(
                "input_from_feature_columns",
                values=tuple(six.itervalues(features)),
                partitioner=dnn_partitioner) as input_layer_scope:
            if use_core_versions:
                input_layer = feature_column_lib.input_layer(
                    features=features,
                    feature_columns=dnn_feature_columns,
                    weight_collections=[dnn_parent_scope])
            else:
                input_layer = layers.input_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=dnn_feature_columns,
                    weight_collections=[dnn_parent_scope],
                    scope=input_layer_scope)

        def dnn_logits_fn():
            """Builds the logits from the input layer."""
            previous_layer = input_layer
            for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
                with variable_scope.variable_scope(
                        "hiddenlayer_%d" % layer_id,
                        values=(previous_layer, )) as hidden_layer_scope:
                    net = layers.fully_connected(
                        previous_layer,
                        num_hidden_units,
                        activation_fn=dnn_activation_fn,
                        variables_collections=[dnn_parent_scope],
                        scope=hidden_layer_scope)
                    if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                        net = layers.dropout(net,
                                             keep_prob=(1.0 - dnn_dropout))
                _add_hidden_layer_summary(net, hidden_layer_scope.name)
                previous_layer = net
            with variable_scope.variable_scope(
                    "logits", values=(previous_layer, )) as logits_scope:
                dnn_logits = layers.fully_connected(
                    previous_layer,
                    head.logits_dimension,
                    activation_fn=None,
                    variables_collections=[dnn_parent_scope],
                    scope=logits_scope)
            _add_hidden_layer_summary(dnn_logits, logits_scope.name)
            return dnn_logits

        if predict_with_tree_only and mode == model_fn.ModeKeys.INFER:
            dnn_logits = array_ops.constant(0.0)
            dnn_train_op_fn = control_flow_ops.no_op
        elif predict_with_tree_only and mode == model_fn.ModeKeys.EVAL:
            dnn_logits = control_flow_ops.cond(
                global_step > dnn_steps_to_train,
                lambda: array_ops.constant(0.0), dnn_logits_fn)
            dnn_train_op_fn = control_flow_ops.no_op
        else:
            dnn_logits = dnn_logits_fn()

            def dnn_train_op_fn(loss):
                """Returns the op to optimize the loss."""
                return optimizers.optimize_loss(
                    loss=loss,
                    global_step=training_util.get_global_step(),
                    learning_rate=_DNN_LEARNING_RATE,
                    optimizer=_get_optimizer(dnn_optimizer),
                    name=dnn_parent_scope,
                    variables=ops.get_collection(
                        ops.GraphKeys.TRAINABLE_VARIABLES,
                        scope=dnn_parent_scope),
                    # Empty summaries to prevent optimizers from logging training_loss.
                    summaries=[])

    # Build Tree Logits.
    with ops.device(global_step.device):
        ensemble_handle = model_ops.tree_ensemble_variable(
            stamp_token=0,
            tree_ensemble_config="",  # Initialize an empty ensemble.
            name="ensemble_model")

    tree_features = features.copy()
    if dnn_input_layer_to_tree:
        tree_features["dnn_input_layer"] = input_layer
        tree_feature_columns.append(
            layers.real_valued_column("dnn_input_layer"))
    gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
        is_chief=config.is_chief,
        num_ps_replicas=config.num_ps_replicas,
        ensemble_handle=ensemble_handle,
        center_bias=tree_center_bias,
        examples_per_layer=tree_examples_per_layer,
        learner_config=tree_learner_config,
        feature_columns=tree_feature_columns,
        logits_dimension=head.logits_dimension,
        features=tree_features,
        use_core_columns=use_core_versions)

    with ops.name_scope("gbdt"):
        predictions_dict = gbdt_model.predict(mode)
        tree_logits = predictions_dict["predictions"]

        def _tree_train_op_fn(loss):
            """Returns the op to optimize the loss."""
            if dnn_to_tree_distillation_param:
                loss_weight, loss_fn = dnn_to_tree_distillation_param
                # pylint: disable=protected-access
                if use_core_versions:
                    weight_tensor = head_lib._weight_tensor(
                        features, head._weight_column)
                else:
                    weight_tensor = head_lib._weight_tensor(
                        features, head.weight_column_name)
                # pylint: enable=protected-access
                dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

                if loss_fn is None:
                    # we create the loss_fn similar to the head loss_fn for
                    # multi_class_head used previously as the default one.
                    n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
                    loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(
                        n_classes)

                dnn_to_tree_distillation_loss = loss_weight * loss_fn(
                    dnn_logits_fixed, tree_logits, weight_tensor)
                summary.scalar("dnn_to_tree_distillation_loss",
                               dnn_to_tree_distillation_loss)
                loss += dnn_to_tree_distillation_loss

            update_op = gbdt_model.train(loss, predictions_dict, labels)
            with ops.control_dependencies(
                [update_op]), (ops.colocate_with(global_step)):
                update_op = state_ops.assign_add(global_step, 1).op
                return update_op

    if predict_with_tree_only:
        if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER:
            tree_train_logits = tree_logits
        else:
            tree_train_logits = control_flow_ops.cond(
                global_step > dnn_steps_to_train, lambda: tree_logits,
                lambda: dnn_logits)
    else:
        tree_train_logits = dnn_logits + tree_logits

    def _no_train_op_fn(loss):
        """Returns a no-op."""
        del loss
        return control_flow_ops.no_op()

    if tree_center_bias:
        num_trees += 1
    finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

    if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS:
        model_fn_ops = head.create_model_fn_ops(features=features,
                                                mode=mode,
                                                labels=labels,
                                                train_op_fn=_no_train_op_fn,
                                                logits=tree_train_logits)
        if mode != model_fn.ModeKeys.TRAIN:
            return model_fn_ops
        dnn_train_op = head.create_model_fn_ops(features=features,
                                                mode=mode,
                                                labels=labels,
                                                train_op_fn=dnn_train_op_fn,
                                                logits=dnn_logits).train_op
        tree_train_op = head.create_model_fn_ops(
            features=tree_features,
            mode=mode,
            labels=labels,
            train_op_fn=_tree_train_op_fn,
            logits=tree_train_logits).train_op

        # Add the hooks
        model_fn_ops.training_hooks.extend([
            trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
                                        tree_train_op),
            trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                          finalized_trees,
                                          override_global_step_value)
        ])
        return model_fn_ops

    elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC:
        fusion_spec = head.create_estimator_spec(features=features,
                                                 mode=mode,
                                                 labels=labels,
                                                 train_op_fn=_no_train_op_fn,
                                                 logits=tree_train_logits)
        if mode != model_fn.ModeKeys.TRAIN:
            return fusion_spec
        dnn_spec = head.create_estimator_spec(features=features,
                                              mode=mode,
                                              labels=labels,
                                              train_op_fn=dnn_train_op_fn,
                                              logits=dnn_logits)
        tree_spec = head.create_estimator_spec(features=tree_features,
                                               mode=mode,
                                               labels=labels,
                                               train_op_fn=_tree_train_op_fn,
                                               logits=tree_train_logits)

        training_hooks = [
            trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train,
                                        tree_spec.train_op),
            trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                          finalized_trees,
                                          override_global_step_value)
        ]
        fusion_spec = fusion_spec._replace(training_hooks=training_hooks +
                                           list(fusion_spec.training_hooks))
        return fusion_spec
예제 #9
0
  def _process_exogenous_features(self, times, features):
    """Create a single vector from exogenous features.

    Args:
      times: A [batch size, window size] vector of times for this batch,
          primarily used to check the shape information of exogenous features.
      features: A dictionary of exogenous features corresponding to the columns
          in self._exogenous_feature_columns. Each value should have a shape
          prefixed by [batch size, window size].
    Returns:
      A Tensor with shape [batch size, window size, exogenous dimension], where
      the size of the exogenous dimension depends on the exogenous feature
      columns passed to the model's constructor.
    Raises:
      ValueError: If an exogenous feature has an unknown rank.
    """
    if self._exogenous_feature_columns:
      exogenous_features_single_batch_dimension = {}
      for name, tensor in features.items():
        if tensor.get_shape().ndims is None:
          # input_from_feature_columns does not support completely unknown
          # feature shapes, so we save on a bit of logic and provide a better
          # error message by checking that here.
          raise ValueError(
              ("Features with unknown rank are not supported. Got shape {} for "
               "feature {}.").format(tensor.get_shape(), name))
        tensor_shape_dynamic = array_ops.shape(tensor)
        tensor = array_ops.reshape(
            tensor,
            array_ops.concat([[tensor_shape_dynamic[0]
                               * tensor_shape_dynamic[1]],
                              tensor_shape_dynamic[2:]], axis=0))
        # Avoid shape warnings when embedding "scalar" exogenous features (those
        # with only batch and window dimensions); input_from_feature_columns
        # expects input ranks to match the embedded rank.
        if tensor.get_shape().ndims == 1 and tensor.dtype != dtypes.string:
          exogenous_features_single_batch_dimension[name] = tensor[:, None]
        else:
          exogenous_features_single_batch_dimension[name] = tensor
      embedded_exogenous_features_single_batch_dimension = (
          feature_column.input_layer(
              features=exogenous_features_single_batch_dimension,
              feature_columns=self._exogenous_feature_columns,
              trainable=True))
      exogenous_regressors = array_ops.reshape(
          embedded_exogenous_features_single_batch_dimension,
          array_ops.concat(
              [
                  array_ops.shape(times), array_ops.shape(
                      embedded_exogenous_features_single_batch_dimension)[1:]
              ],
              axis=0))
      exogenous_regressors.set_shape(times.get_shape().concatenate(
          embedded_exogenous_features_single_batch_dimension.get_shape()[1:]))
      exogenous_regressors = math_ops.cast(
          exogenous_regressors, dtype=self.dtype)
    else:
      # Not having any exogenous features is a special case so that models can
      # avoid superfluous updates, which may not be free of side effects due to
      # bias terms in transformations.
      exogenous_regressors = None
    return exogenous_regressors
def _dnn_tree_combined_model_fn(
    features,
    labels,
    mode,
    head,
    dnn_hidden_units,
    dnn_feature_columns,
    tree_learner_config,
    num_trees,
    tree_examples_per_layer,
    config=None,
    dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu,
    dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True,
    dnn_steps_to_train=10000,
    predict_with_tree_only=False,
    tree_feature_columns=None,
    tree_center_bias=False,
    dnn_to_tree_distillation_param=None,
    use_core_versions=False,
    output_type=model.ModelBuilderOutputType.MODEL_FN_OPS,
    override_global_step_value=None):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    predict_with_tree_only: Whether to use only the tree model output as the
      final prediction.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.
    dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the
      float defines the weight of the distillation loss, and the loss_fn, for
      computing distillation loss, takes dnn_logits, tree_logits and weight
      tensor. If the entire tuple is None, no distillation will be applied. If
      only the loss_fn is None, we will take the sigmoid/softmax cross entropy
      loss be default. When distillation is applied, `predict_with_tree_only`
      will be set to True.
    use_core_versions: Whether feature columns and loss are from the core (as
      opposed to contrib) version of tensorflow.
    output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec
      (new interface).
    override_global_step_value: If after the training is done, global step
      value must be reset to this value. This is particularly useful for hyper
      parameter tuning, which can't recognize early stopping due to the number
      of trees. If None, no override of global step will happen.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  if dnn_to_tree_distillation_param:
    if not predict_with_tree_only:
      logging.warning("update predict_with_tree_only to True since distillation"
                      "is specified.")
      predict_with_tree_only = True

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))

  if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and
      not use_core_versions):
    raise ValueError("You must use core versions with Estimator Spec")
  global_step = training_util.get_global_step()

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      if use_core_versions:
        input_layer = feature_column_lib.input_layer(
            features=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope])
      else:
        input_layer = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope],
            scope=input_layer_scope)
    def dnn_logits_fn():
      """Builds the logits from the input layer."""
      previous_layer = input_layer
      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            "hiddenlayer_%d" % layer_id,
            values=(previous_layer,)) as hidden_layer_scope:
          net = layers.fully_connected(
              previous_layer,
              num_hidden_units,
              activation_fn=dnn_activation_fn,
              variables_collections=[dnn_parent_scope],
              scope=hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
        _add_hidden_layer_summary(net, hidden_layer_scope.name)
        previous_layer = net
      with variable_scope.variable_scope(
          "logits", values=(previous_layer,)) as logits_scope:
        dnn_logits = layers.fully_connected(
            previous_layer,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[dnn_parent_scope],
            scope=logits_scope)
      _add_hidden_layer_summary(dnn_logits, logits_scope.name)
      return dnn_logits
    if predict_with_tree_only and mode == model_fn.ModeKeys.INFER:
      dnn_logits = array_ops.constant(0.0)
      dnn_train_op_fn = control_flow_ops.no_op
    elif predict_with_tree_only and mode == model_fn.ModeKeys.EVAL:
      dnn_logits = control_flow_ops.cond(
          global_step > dnn_steps_to_train,
          lambda: array_ops.constant(0.0),
          dnn_logits_fn)
      dnn_train_op_fn = control_flow_ops.no_op
    else:
      dnn_logits = dnn_logits_fn()
      def dnn_train_op_fn(loss):
        """Returns the op to optimize the loss."""
        return optimizers.optimize_loss(
            loss=loss,
            global_step=training_util.get_global_step(),
            learning_rate=_DNN_LEARNING_RATE,
            optimizer=_get_optimizer(dnn_optimizer),
            name=dnn_parent_scope,
            variables=ops.get_collection(
                ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope),
            # Empty summaries to prevent optimizers from logging training_loss.
            summaries=[])

  # Build Tree Logits.
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features,
      use_core_columns=use_core_versions)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      if dnn_to_tree_distillation_param:
        loss_weight, loss_fn = dnn_to_tree_distillation_param
        # pylint: disable=protected-access
        if use_core_versions:
          weight_tensor = head_lib._weight_tensor(features, head._weight_column)
        else:
          weight_tensor = head_lib._weight_tensor(
              features, head.weight_column_name)
        # pylint: enable=protected-access
        dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

        if loss_fn is None:
          # we create the loss_fn similar to the head loss_fn for
          # multi_class_head used previously as the default one.
          n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
          loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(
              n_classes)

        dnn_to_tree_distillation_loss = loss_weight * loss_fn(
            dnn_logits_fixed, tree_logits, weight_tensor)
        summary.scalar("dnn_to_tree_distillation_loss",
                       dnn_to_tree_distillation_loss)
        loss += dnn_to_tree_distillation_loss

      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  if predict_with_tree_only:
    if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER:
      tree_train_logits = tree_logits
    else:
      tree_train_logits = control_flow_ops.cond(
          global_step > dnn_steps_to_train,
          lambda: tree_logits,
          lambda: dnn_logits)
  else:
    tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS:
    model_fn_ops = head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_no_train_op_fn,
        logits=tree_train_logits)
    if mode != model_fn.ModeKeys.TRAIN:
      return model_fn_ops
    dnn_train_op = head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=dnn_train_op_fn,
        logits=dnn_logits).train_op
    tree_train_op = head.create_model_fn_ops(
        features=tree_features,
        mode=mode,
        labels=labels,
        train_op_fn=_tree_train_op_fn,
        logits=tree_train_logits).train_op

    # Add the hooks
    model_fn_ops.training_hooks.extend([
        trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
                                    tree_train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees,
                                      override_global_step_value)
    ])
    return model_fn_ops

  elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC:
    fusion_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_no_train_op_fn,
        logits=tree_train_logits)
    if mode != model_fn.ModeKeys.TRAIN:
      return fusion_spec
    dnn_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=dnn_train_op_fn,
        logits=dnn_logits)
    tree_spec = head.create_estimator_spec(
        features=tree_features,
        mode=mode,
        labels=labels,
        train_op_fn=_tree_train_op_fn,
        logits=tree_train_logits)

    training_hooks = [
        trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train,
                                    tree_spec.train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees,
                                      override_global_step_value)
    ]
    fusion_spec = fusion_spec._replace(training_hooks=training_hooks +
                                       list(fusion_spec.training_hooks))
    return fusion_spec
예제 #11
0
def _dnn_model_fn(features, labels, mode, params, config=None):
    """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`. Note that a string containing the unqualified
          name of the op may also be provided, e.g., "relu", "tanh", or
          "sigmoid".
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
    head = params["head"]
    hidden_units = params["hidden_units"]
    feature_columns = params["feature_columns"]
    optimizer = params.get("optimizer") or "Adagrad"
    activation_fn = _get_activation_fn(params.get("activation_fn"))
    dropout = params.get("dropout")
    gradient_clip_norm = params.get("gradient_clip_norm")
    input_layer_min_slice_size = (params.get("input_layer_min_slice_size")
                                  or 64 << 20)
    num_ps_replicas = config.num_ps_replicas if config else 0
    embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

    features = _get_feature_dict(features)
    parent_scope = "dnn"

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas)
    with variable_scope.variable_scope(parent_scope,
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):
        input_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas,
                min_slice_size=input_layer_min_slice_size))
        with variable_scope.variable_scope(
                "input_from_feature_columns",
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner) as input_layer_scope:
            if all(
                    isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
                    for fc in feature_columns):
                net = layers.input_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=feature_columns,
                    weight_collections=[parent_scope],
                    scope=input_layer_scope)
            else:
                net = fc_core.input_layer(features=features,
                                          feature_columns=feature_columns,
                                          weight_collections=[parent_scope])

        for layer_id, num_hidden_units in enumerate(hidden_units):
            with variable_scope.variable_scope(
                    "hiddenlayer_%d" % layer_id,
                    values=(net, )) as hidden_layer_scope:
                net = layers.fully_connected(
                    net,
                    num_hidden_units,
                    activation_fn=activation_fn,
                    variables_collections=[parent_scope],
                    scope=hidden_layer_scope)
                if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = layers.dropout(net, keep_prob=(1.0 - dropout))
            _add_hidden_layer_summary(net, hidden_layer_scope.name)

        with variable_scope.variable_scope("logits",
                                           values=(net, )) as logits_scope:
            logits = layers.fully_connected(
                net,
                head.logits_dimension,
                activation_fn=None,
                variables_collections=[parent_scope],
                scope=logits_scope)
        _add_hidden_layer_summary(logits, logits_scope.name)

        def _train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizers.optimize_loss(
                loss=loss,
                global_step=training_util.get_global_step(),
                learning_rate=_LEARNING_RATE,
                optimizer=_get_optimizer(optimizer),
                gradient_multipliers=(
                    dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                        embedding_lr_multipliers, parent_scope,
                        input_layer_scope.name)),
                clip_gradients=gradient_clip_norm,
                name=parent_scope,
                # Empty summaries to prevent optimizers from logging training_loss.
                summaries=[])

        return head.create_model_fn_ops(features=features,
                                        mode=mode,
                                        labels=labels,
                                        train_op_fn=_train_op_fn,
                                        logits=logits)
예제 #12
0
def _dnn_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`. Note that a string containing the unqualified
          name of the op may also be provided, e.g., "relu", "tanh", or
          "sigmoid".
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  head = params["head"]
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or "Adagrad"
  activation_fn = _get_activation_fn(params.get("activation_fn"))
  dropout = params.get("dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  input_layer_min_slice_size = (
      params.get("input_layer_min_slice_size") or 64 << 20)
  num_ps_replicas = config.num_ps_replicas if config else 0
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

  features = _get_feature_dict(features)
  parent_scope = "dnn"

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=input_layer_min_slice_size))
    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as input_layer_scope:
      if all([
          isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
          for fc in feature_columns
      ]):
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=input_layer_scope)
      else:
        net = fc_core.input_layer(
            features=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope])

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(net,)) as hidden_layer_scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=activation_fn,
            variables_collections=[parent_scope],
            scope=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope(
        "logits",
        values=(net,)) as logits_scope:
      logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_LEARNING_RATE,
          optimizer=_get_optimizer(optimizer),
          gradient_multipliers=(
              dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, parent_scope,
                  input_layer_scope.name)),
          clip_gradients=gradient_clip_norm,
          name=parent_scope,
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
예제 #13
0
  def _process_exogenous_features(self, times, features):
    """Create a single vector from exogenous features.

    Args:
      times: A [batch size, window size] vector of times for this batch,
          primarily used to check the shape information of exogenous features.
      features: A dictionary of exogenous features corresponding to the columns
          in self._exogenous_feature_columns. Each value should have a shape
          prefixed by [batch size, window size].
    Returns:
      A Tensor with shape [batch size, window size, exogenous dimension], where
      the size of the exogenous dimension depends on the exogenous feature
      columns passed to the model's constructor.
    Raises:
      ValueError: If an exogenous feature has an unknown rank.
    """
    if self._exogenous_feature_columns:
      exogenous_features_single_batch_dimension = {}
      for name, tensor in features.items():
        if tensor.get_shape().ndims is None:
          # input_from_feature_columns does not support completely unknown
          # feature shapes, so we save on a bit of logic and provide a better
          # error message by checking that here.
          raise ValueError(
              ("Features with unknown rank are not supported. Got shape {} for "
               "feature {}.").format(tensor.get_shape(), name))
        tensor_shape_dynamic = array_ops.shape(tensor)
        tensor = array_ops.reshape(
            tensor,
            array_ops.concat([[tensor_shape_dynamic[0]
                               * tensor_shape_dynamic[1]],
                              tensor_shape_dynamic[2:]], axis=0))
        # Avoid shape warnings when embedding "scalar" exogenous features (those
        # with only batch and window dimensions); input_from_feature_columns
        # expects input ranks to match the embedded rank.
        if tensor.get_shape().ndims == 1 and tensor.dtype != dtypes.string:
          exogenous_features_single_batch_dimension[name] = tensor[:, None]
        else:
          exogenous_features_single_batch_dimension[name] = tensor
      embedded_exogenous_features_single_batch_dimension = (
          feature_column.input_layer(
              features=exogenous_features_single_batch_dimension,
              feature_columns=self._exogenous_feature_columns,
              trainable=True))
      exogenous_regressors = array_ops.reshape(
          embedded_exogenous_features_single_batch_dimension,
          array_ops.concat(
              [
                  array_ops.shape(times), array_ops.shape(
                      embedded_exogenous_features_single_batch_dimension)[1:]
              ],
              axis=0))
      exogenous_regressors.set_shape(times.get_shape().concatenate(
          embedded_exogenous_features_single_batch_dimension.get_shape()[1:]))
      exogenous_regressors = math_ops.cast(
          exogenous_regressors, dtype=self.dtype)
    else:
      # Not having any exogenous features is a special case so that models can
      # avoid superfluous updates, which may not be free of side effects due to
      # bias terms in transformations.
      exogenous_regressors = None
    return exogenous_regressors