def testInitFromPartitionVar(self):
    checkpoint_dir = self.get_temp_dir()
    with self.test_session() as session:
      v1 = _create_partition_checkpoints(session, checkpoint_dir)

    # New graph and session.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as session:
        with variable_scope.variable_scope("some_scope"):
          my1 = variable_scope.get_variable(
              name="my1",
              shape=[100, 100],
              initializer=init_ops.truncated_normal_initializer(0.5),
              partitioner=partitioned_variables.min_max_variable_partitioner(
                  max_partitions=5, axis=0, min_slice_size=8 << 10))
          my1_var_list = my1._get_variable_list()
        with variable_scope.variable_scope("some_other_scope"):
          my2 = variable_scope.get_variable(
              name="var1",
              shape=[100, 100],
              initializer=init_ops.truncated_normal_initializer(0.5),
              partitioner=partitioned_variables.min_max_variable_partitioner(
                  max_partitions=5, axis=0, min_slice_size=8 << 10))
          my2_var_list = my2._get_variable_list()

        checkpoint_utils.init_from_checkpoint(checkpoint_dir, {
            "scope/var1": "some_scope/my1",
            "scope/": "some_other_scope/"})

        session.run(variables.global_variables_initializer())
        my1_values = session.run(my1_var_list)
        self.assertAllEqual(my1_values, v1)
        my2_values = session.run(my2_var_list)
        self.assertAllEqual(my2_values, v1)

    # New graph and session.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as session:
        with variable_scope.variable_scope("some_scope"):
          my1 = variable_scope.get_variable(
              name="my1",
              shape=[100, 100],
              initializer=init_ops.truncated_normal_initializer(0.5),
              partitioner=partitioned_variables.min_max_variable_partitioner(
                  max_partitions=5, axis=0, min_slice_size=8 << 10))
          my1_var_list = my1._get_variable_list()

        checkpoint_utils.init_from_checkpoint(checkpoint_dir,
                                              {"scope/var1": my1_var_list,})

        session.run(variables.global_variables_initializer())
        my1_values = session.run(my1_var_list)
        self.assertAllEqual(my1_values, v1)
  def build_model(self, features, feature_columns, is_training):
    """See base class."""
    self._feature_columns = feature_columns

    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=self._num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        self._scope + "/input_from_feature_columns",
        values=features.values(),
        partitioner=input_layer_partitioner) as scope:
      net = layers.input_from_feature_columns(
          features,
          self._get_feature_columns(),
          weight_collections=[self._scope],
          trainable=self._trainable,
          scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=self._num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(self._hidden_units):
      with variable_scope.variable_scope(
          self._scope + "/hiddenlayer_%d" % layer_id,
          values=[net],
          partitioner=hidden_layer_partitioner) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=self._activation_fn,
            variables_collections=[self._scope],
            trainable=self._trainable,
            scope=scope)
        if self._dropout is not None and is_training:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - self._dropout))
      self._add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
        self._scope + "/logits",
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      logits = layers.fully_connected(
          net,
          self._num_label_columns,
          activation_fn=None,
          variables_collections=[self._scope],
          trainable=self._trainable,
          scope=scope)
    self._add_hidden_layer_summary(logits, "logits")
    return logits
Example #3
0
 def _test_logits(self, mode, rnn_units, logits_dimension, features_fn,
                  sequence_feature_columns, context_feature_columns,
                  expected_logits):
   """Tests that the expected logits are calculated."""
   with ops.Graph().as_default():
     # Global step needed for MonitoredSession, which is in turn used to
     # explicitly set variable weights through a checkpoint.
     training_util.create_global_step()
     # Use a variable scope here with 'rnn', emulating the rnn model_fn, so
     # the checkpoint naming is shared.
     with variable_scope.variable_scope('rnn'):
       input_layer_partitioner = (
           partitioned_variables.min_max_variable_partitioner(
               max_partitions=0, min_slice_size=64 << 20))
       logit_fn = rnn._rnn_logit_fn_builder(
           output_units=logits_dimension,
           rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units),
           sequence_feature_columns=sequence_feature_columns,
           context_feature_columns=context_feature_columns,
           input_layer_partitioner=input_layer_partitioner)
       # Features are constructed within this function, otherwise the Tensors
       # containing the features would be defined outside this graph.
       logits = logit_fn(features=features_fn(), mode=mode)
       with monitored_session.MonitoredTrainingSession(
           checkpoint_dir=self._model_dir) as sess:
         self.assertAllClose(expected_logits, sess.run(logits), atol=1e-4)
 def build_model(self, features, feature_columns, is_training):
   """See base class."""
   self._feature_columns = feature_columns
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=self._num_ps_replicas,
       min_slice_size=64 << 20)
   with variable_scope.variable_scope(
       self._scope,
       values=features.values(),
       partitioner=partitioner) as scope:
     if self._joint_weights:
       logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
     else:
       logits, _, _ = layers.weighted_sum_from_feature_columns(
           columns_to_tensors=features,
           feature_columns=self._get_feature_columns(),
           num_outputs=self._num_label_columns,
           weight_collections=[self._scope],
           scope=scope)
   return logits
Example #5
0
def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer,
                     partitioner, config):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape `[batch_size, logits_dimension]`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    feature_columns: An iterable containing all the feature columns used by
      the model.
    optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training. If `None`, will use a FTRL optimizer.
    partitioner: Partitioner for variables.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: mode or params are invalid, or features has the wrong type.
  """
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))

  optimizer = optimizers.get_optimizer_instance(
      optimizer or _get_default_optimizer(feature_columns),
      learning_rate=_LEARNING_RATE)
  num_ps_replicas = config.num_ps_replicas if config else 0

  partitioner = partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  with variable_scope.variable_scope(
      'linear',
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):

    logit_fn = _linear_logit_fn_builder(
        units=head.logits_dimension, feature_columns=feature_columns)
    logits = logit_fn(features=features)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizer.minimize(
          loss,
          global_step=training_util.get_global_step())

    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
Example #6
0
def _linear_model_fn(features, labels, mode, params, config):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: Dict of `Tensor`.
    labels: `Tensor` of shape `[batch_size, logits_dimension]`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: If mode or params are invalid.
  """
  head = params['head']
  feature_columns = tuple(params['feature_columns'])
  optimizer = optimizers.get_optimizer_instance(
      params.get('optimizer') or _get_default_optimizer(feature_columns),
      learning_rate=_LEARNING_RATE)
  num_ps_replicas = config.num_ps_replicas if config else 0

  partitioner = params.get('partitioner') or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  with variable_scope.variable_scope(
      'linear',
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):

    logits = feature_column_lib.linear_model(
        features=features,
        feature_columns=feature_columns,
        units=head.logits_dimension)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizer.minimize(
          loss,
          global_step=training_util.get_global_step())

    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
Example #7
0
def _linear_model_fn(features, labels, mode, params, config):
    """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: Dict of `Tensor`.
    labels: `Tensor` of shape `[batch_size, logits_dimension]`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: If mode or params are invalid.
  """
    head = params['head']
    feature_columns = tuple(params['feature_columns'])
    optimizer = optimizers.get_optimizer_instance(
        params.get('optimizer') or _get_default_optimizer(feature_columns),
        learning_rate=_LEARNING_RATE)
    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = params.get('partitioner') or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))

    with variable_scope.variable_scope('linear',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):

        logits = feature_column_lib.linear_model(
            features=features,
            feature_columns=feature_columns,
            units=head.logits_dimension)

        def _train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizer.minimize(
                loss, global_step=training_util.get_global_step())

        return head.create_estimator_spec(features=features,
                                          mode=mode,
                                          labels=labels,
                                          train_op_fn=_train_op_fn,
                                          logits=logits)
 def _testMinMaxVariablePartitioner(self, max_partitions, axis, min_slice_size,
                                    var_name, var_shape, expected_axis_shards,
                                    expected_partitions):
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=max_partitions, axis=axis, min_slice_size=min_slice_size)
   with variable_scope.variable_scope("root", partitioner=partitioner):
     v0 = variable_scope.get_variable(
         var_name, dtype=dtypes.float32, shape=var_shape)
     v0_list = v0._get_variable_list()
     v0_part = v0._get_partitions()
     self.assertEqual(len(v0_list), expected_axis_shards)
     self.assertAllEqual(v0_part, expected_partitions)
Example #9
0
def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer,
                     partitioner, config):
    """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape `[batch_size, logits_dimension]`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    feature_columns: An iterable containing all the feature columns used by
      the model.
    optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training. If `None`, will use a FTRL optimizer.
    partitioner: Partitioner for variables.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: mode or params are invalid, or features has the wrong type.
  """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))
    optimizer = optimizers.get_optimizer_instance(
        optimizer or _get_default_optimizer(feature_columns),
        learning_rate=_LEARNING_RATE)
    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))

    with variable_scope.variable_scope('linear',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):

        logit_fn = _linear_logit_fn_builder(units=head.logits_dimension,
                                            feature_columns=feature_columns)
        logits = logit_fn(features=features)

        def _train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizer.minimize(
                loss, global_step=training_util.get_global_step())

        return head.create_estimator_spec(features=features,
                                          mode=mode,
                                          labels=labels,
                                          train_op_fn=_train_op_fn,
                                          logits=logits)
Example #10
0
 def _testMinMaxVariablePartitioner(self, max_partitions, axis, min_slice_size,
                                    var_name, var_shape, expected_axis_shards,
                                    expected_partitions):
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=max_partitions, axis=axis, min_slice_size=min_slice_size)
   with variable_scope.variable_scope("root", partitioner=partitioner):
     v0 = variable_scope.get_variable(
         var_name, dtype=dtypes.float32, shape=var_shape)
     v0_list = v0._get_variable_list()
     v0_part = v0._get_partitions()
     self.assertEqual(len(v0_list), expected_axis_shards)
     self.assertAllEqual(v0_part, expected_partitions)
Example #11
0
def _dnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  hidden_units,
                  feature_columns,
                  optimizer='Adagrad',
                  activation_fn=nn.relu,
                  dropout=None,
                  input_layer_partitioner=None,
                  config=None):

    optimizer = optimizers.get_optimizer_instance(optimizer,
                                                  learning_rate=_LEARNING_RATE)
    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas)
    with variable_scope.variable_scope('dnn',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):
        input_layer_partitioner = input_layer_partitioner or (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas, min_slice_size=64 << 20))

        logit_fn = _dnn_logit_fn_builder(
            units=head.logits_dimension,
            hidden_units=hidden_units,
            feature_columns=feature_columns,
            activation_fn=activation_fn,
            dropout=dropout,
            input_layer_partitioner=input_layer_partitioner)
        logits = logit_fn(features=features, mode=mode)

        return head.create_estimator_spec(features=features,
                                          mode=mode,
                                          labels=labels,
                                          optimizer=optimizer,
                                          logits=logits)
Example #12
0
 def build_model(self, features, feature_columns, is_training):
     """See base class."""
     self._feature_columns = feature_columns
     partitioner = partitioned_variables.min_max_variable_partitioner(
         max_partitions=self._num_ps_replicas, min_slice_size=64 << 20)
     with variable_scope.variable_op_scope(features.values(), "linear",
                                           partitioner) as scope:
         logits, _, _ = layers.weighted_sum_from_feature_columns(
             columns_to_tensors=features,
             feature_columns=self._get_feature_columns(),
             num_outputs=self._num_label_columns,
             weight_collections=[self._weight_collection_name],
             scope=scope)
     return logits
Example #13
0
 def get_user_input_from_feature_columns(self):
     input_layer_partitioner = (
         partitioned_variables.min_max_variable_partitioner(
             max_partitions=self.num_ps_replicas, min_slice_size=64 << 20))
     with variable_scope.variable_scope(
             "input_from_feature_columns",
             values=tuple(six.itervalues(self.features)),
             partitioner=input_layer_partitioner) as dnn_input_scope:
         self.dnn_net = \
                     layers.input_from_feature_columns(
                             columns_to_tensors=self.features,
                             feature_columns=self.dnn_features_columns,
                             weight_collections=[self.dnn_parent_scope],
                             scope=dnn_input_scope)
def _EmbeddingParamsAsPartitionedVariable(num_shards,
                                          vocab_size,
                                          dtype=dtypes.float32,
                                          shape=None):
  p, params, feed_dict = _EmbeddingParams(
      num_shards, vocab_size, dtype=dtype, shape=shape)
  shape = shape or [10]
  partitioned_variable = variable_scope.get_variable(
      "p",
      shape=[vocab_size] + shape,
      initializer=array_ops.concat([params[p_i.name] for p_i in p], 0),
      partitioner=partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_shards, min_slice_size=1))
  return p, partitioned_variable, params, feed_dict
  def build_model(self, features, feature_columns, is_training):
    """See base class."""
    features = self._get_feature_dict(features)
    self._feature_columns = feature_columns

    net = layers.input_from_feature_columns(
        features,
        self._get_feature_columns(),
        weight_collections=[self._weight_collection_name])
    for layer_id, num_hidden_units in enumerate(self._hidden_units):
      with variable_scope.variable_op_scope(
          [net], "hiddenlayer_%d" % layer_id,
          partitioner=partitioned_variables.min_max_variable_partitioner(
              max_partitions=self._config.num_ps_replicas)) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=self._activation_fn,
            variables_collections=[self._weight_collection_name],
            scope=scope)
        if self._dropout is not None and is_training:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - self._dropout))
      self._add_hidden_layer_summary(net, scope.name)
    with variable_scope.variable_op_scope(
        [net], "dnn_logits",
        partitioner=partitioned_variables.min_max_variable_partitioner(
            max_partitions=self._config.num_ps_replicas)) as scope:
      logits = layers.fully_connected(
          net,
          self._num_label_columns,
          activation_fn=None,
          variables_collections=[self._weight_collection_name],
          scope=scope)
    self._add_hidden_layer_summary(logits, "dnn_logits")
    return logits
 def _dnn_logits(self, features, is_training=False):
     net = layers.input_from_feature_columns(
         features, self._get_dnn_feature_columns(), weight_collections=[self._dnn_weight_collection]
     )
     for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units):
         with variable_scope.variable_op_scope(
             [net],
             "hiddenlayer_%d" % layer_id,
             partitioner=partitioned_variables.min_max_variable_partitioner(
                 max_partitions=self._config.num_ps_replicas
             ),
         ) as scope:
             net = layers.fully_connected(
                 net,
                 num_hidden_units,
                 activation_fn=self._dnn_activation_fn,
                 variables_collections=[self._dnn_weight_collection],
                 scope=scope,
             )
             if self._dnn_dropout is not None and is_training:
                 net = layers.dropout(net, keep_prob=(1.0 - self._dnn_dropout))
         self._add_hidden_layer_summary(net, scope.name)
     with variable_scope.variable_op_scope(
         [net],
         "dnn_logit",
         partitioner=partitioned_variables.min_max_variable_partitioner(max_partitions=self._config.num_ps_replicas),
     ) as scope:
         logit = layers.fully_connected(
             net,
             self._target_column.num_label_columns,
             activation_fn=None,
             variables_collections=[self._dnn_weight_collection],
             scope=scope,
         )
     self._add_hidden_layer_summary(logit, "dnn_logit")
     return logit
def _EmbeddingParamsAsPartitionedVariable(num_shards,
                                          vocab_size,
                                          dtype=dtypes.float32,
                                          shape=None,
                                          use_resource=False):
  p, params, feed_dict = _EmbeddingParams(
      num_shards, vocab_size, dtype=dtype, shape=shape)
  shape = shape or [10]
  partitioned_variable = variable_scope.get_variable(
      "p",
      shape=[vocab_size] + shape,
      initializer=array_ops.concat([params[p_i.name] for p_i in p], 0),
      partitioner=partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_shards, min_slice_size=1),
      use_resource=use_resource)
  return p, partitioned_variable, params, feed_dict
Example #18
0
 def build_dnn(self):
     dnn_partitioner = (
             partitioned_variables.min_max_variable_partitioner(
                     max_partitions=self.num_ps_replicas,
                     min_slice_size=64 << 20))
     with tf.variable_scope(self.dnn_parent_scope,
                            values=tuple(six.itervalues(self.features)),
                            partitioner=dnn_partitioner,
                            reuse=False):
         self.get_input_from_feature_columns()
         self.get_features()
         return self.build_dnn_logits(
                 self.dnn_features,
                 self.dnn_hidden_units,
                 self.logits_dimension,
                 self.dnn_parent_scope)
def _create_partition_checkpoints(sess, checkpoint_dir):
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    checkpoint_state_name = "checkpoint"
    v1 = variable_scope.get_variable(
        name="var1",
        shape=[100, 100],
        initializer=init_ops.truncated_normal_initializer(0.5),
        partitioner=partitioned_variables.min_max_variable_partitioner(
            max_partitions=5, axis=0, min_slice_size=8 << 10))
    sess.run(variables.global_variables_initializer())
    v1_value = sess.run(v1._get_variable_list())
    saver = saver_lib.Saver()
    saver.save(sess,
               checkpoint_prefix,
               global_step=0,
               latest_filename=checkpoint_state_name)
    return v1_value
Example #20
0
def build_dnn_model(mode, inputs, columns, config):
  """Compute logits of the dnn part (output distribution)
  Args:
      inputs: (dict) contains the inputs of the graph (features, labels...)
              this can be `tf.placeholder` or outputs of `tf.data`
      columns: (list) contains dnn feature columns
      config: (configparser) contains hyperparameters for model building
  Returns:
      logits: (tf.Tensor) output of the model
  """
  features = inputs['features']

  num_ps_replicas = config['dnn_model'].get('num_ps_replicas',0)
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
          
          
  # parse configurations
  units = int(config['dnn_model'].get('units', 1))
  dnn_hidden_units = [int(n) for n in config['dnn_model'].get('hiden_units', '512,128,64').split(',')]
  dnn_activation_fn = tf.nn.relu
  if config['dnn_model'].get('activation_fn', None) is not None:
    dnn_activation_fn = eval(config['dnn_model']['activation_fn'])
  dnn_dropout = None
  if config['dnn_model'].get('dropout', None) is not None:
    dnn_dropout = float(config['dnn_model']['dropout'])
  batch_norm = False
  if config['dnn_model'].get('batch_norm', '').lower() == 'true':
    batch_norm = True
  
  # build dnn part
  dnn_logit_fn = dnn._dnn_logit_fn_builder(
      units=units,
      hidden_units=dnn_hidden_units,
      feature_columns=columns,
      activation_fn=dnn_activation_fn,
      dropout=dnn_dropout,
      input_layer_partitioner=input_layer_partitioner
      )

  dnn_logits = dnn_logit_fn(features=features, mode=mode) 

  return dnn_logits
def _create_partition_checkpoints(sess, checkpoint_dir):
  checkpoint_prefix = os.path.join(checkpoint_dir, "model")
  checkpoint_state_name = "checkpoint"
  v1 = variable_scope.get_variable(
      name="var1",
      shape=[100, 100],
      initializer=init_ops.truncated_normal_initializer(0.5),
      partitioner=partitioned_variables.min_max_variable_partitioner(
          max_partitions=5, axis=0, min_slice_size=8 << 10))
  sess.run(variables.global_variables_initializer())
  v1_value = sess.run(v1._get_variable_list())
  saver = saver_lib.Saver()
  saver.save(
      sess,
      checkpoint_prefix,
      global_step=0,
      latest_filename=checkpoint_state_name)
  return v1_value
Example #22
0
 def build_linear_logits(features,
                         linear_feature_columns,
                         num_ps_replicas,
                         logits_dimension,
                         linear_parent_scope):
     linear_partitioner = partitioned_variables.min_max_variable_partitioner(
             max_partitions=num_ps_replicas,
             min_slice_size=64 << 20)
     with variable_scope.variable_scope(
             linear_parent_scope,
             values=tuple(six.itervalues(features)),
             partitioner=linear_partitioner) as scope:
         linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
                 columns_to_tensors=features,
                 feature_columns=linear_feature_columns,
                 num_outputs=logits_dimension,
                 weight_collections=[linear_parent_scope],
                 scope=scope)
     return linear_logits
Example #23
0
    def build_model(self, features, feature_columns, is_training):
        """See base class."""
        features = self._get_feature_dict(features)
        self._feature_columns = feature_columns

        net = layers.input_from_feature_columns(
            features,
            self._get_feature_columns(),
            weight_collections=[self._weight_collection_name])
        for layer_id, num_hidden_units in enumerate(self._hidden_units):
            with variable_scope.variable_op_scope(
                [net],
                    "hiddenlayer_%d" % layer_id,
                    partitioner=partitioned_variables.
                    min_max_variable_partitioner(
                        max_partitions=self._config.num_ps_replicas)) as scope:
                net = layers.fully_connected(
                    net,
                    num_hidden_units,
                    activation_fn=self._activation_fn,
                    variables_collections=[self._weight_collection_name],
                    scope=scope)
                if self._dropout is not None and is_training:
                    net = layers.dropout(net, keep_prob=(1.0 - self._dropout))
            self._add_hidden_layer_summary(net, scope.name)
        with variable_scope.variable_op_scope(
            [net],
                "dnn_logits",
                partitioner=partitioned_variables.min_max_variable_partitioner(
                    max_partitions=self._config.num_ps_replicas)) as scope:
            logits = layers.fully_connected(
                net,
                self._num_label_columns,
                activation_fn=None,
                variables_collections=[self._weight_collection_name],
                scope=scope)
        self._add_hidden_layer_summary(logits, "dnn_logits")
        return logits
  def testPartitionToOne(self):
    # For small variables there is only one partition.
    variable_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=2, min_slice_size=64 << 20)
    strategy = parameter_server_strategy_v2.ParameterServerStrategyV2(
        self.cluster_resolver, variable_partitioner)
    with strategy.scope():
      initializer = init_ops_v2.Constant([0] * 10)
      v1 = variables.Variable(
          initial_value=lambda: initializer(shape=(10,), dtype=dtypes.int64),
          shape=(10,),
          dtype=dtypes.int64)

      v2 = variables.Variable([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

    self.assertIsInstance(v1, variables.Variable)
    self.assertNotIsInstance(v1, sharded_variable.ShardedVariable)
    self.assertRegex(v1.device, "/job:ps/replica:0/task:0")
    self.assertAllEqual(v1.read_value().numpy(), [0] * 10)

    self.assertIsInstance(v2, variables.Variable)
    self.assertNotIsInstance(v2, sharded_variable.ShardedVariable)
    self.assertRegex(v2.device, "/job:ps/replica:0/task:1")
    self.assertAllEqual(v2.read_value().numpy(), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Example #25
0
 def _dnn_logits(self, features, is_training=False):
     net = layers.input_from_feature_columns(
         features,
         self._get_dnn_feature_columns(),
         weight_collections=[self._dnn_weight_collection])
     for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units):
         op_scope = "hiddenlayer_%d" % layer_id
         with variable_scope.variable_op_scope(
             [net],
                 op_scope,
                 partitioner=partitioned_variables.
                 min_max_variable_partitioner(
                     max_partitions=self._config.num_ps_replicas)):
             net = layers.fully_connected(
                 net,
                 num_hidden_units,
                 activation_fn=self._dnn_activation_fn,
                 variables_collections=[self._dnn_weight_collection],
                 scope=op_scope)
             if self._dnn_dropout is not None and is_training:
                 net = layers.dropout(net,
                                      keep_prob=(1.0 - self._dnn_dropout))
         self._add_hidden_layer_summary(net, op_scope)
     with variable_scope.variable_op_scope(
         [net],
             "dnn_logit",
             partitioner=partitioned_variables.min_max_variable_partitioner(
                 max_partitions=self._config.num_ps_replicas)):
         logit = layers.fully_connected(
             net,
             self._num_label_columns(),
             activation_fn=None,
             variables_collections=[self._dnn_weight_collection],
             scope="dnn_logit")
     self._add_hidden_layer_summary(logit, "dnn_logit")
     return logit
Example #26
0
def model_fn(features, labels, mode: tf.estimator.ModeKeys, config: RunConfig,
             params: user_params):
    print("--- model_fn in %s ---" % mode)
    num_ps_replicas = config.num_ps_replicas if config else 0
    if tf.executing_eagerly():
        partitioner = None
    else:
        partitioner = partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas)
    partitioner = None

    def is_training():
        return mode == tf.estimator.ModeKeys.TRAIN

    layers = list()
    with tf.variable_scope("rc",
                           partitioner=partitioner,
                           initializer=xavier_initializer()):
        feature_size = 0
        #params.procedure = ["wordembedding,contextual,bidaf,modeling,output"]
        for name in params.procedure:
            if name == "wordembedding":
                l = WordEmbedLayer(params, is_training())
            elif name == "contextual":
                l = (BiLstmLayer(params, feature_size, is_training()))
            elif name == "bidaf":
                l = (BiDafLayer(params, feature_size, is_training()))
            elif name == "dcn":
                l = (DcnLayer(params, feature_size, is_training()))
            elif name == "modeling":
                l = (ModelingLayer(params,
                                   feature_size,
                                   2,
                                   is_training(),
                                   name="m1"))
            elif name == "output":
                l = (OutputLayer(params, feature_size, is_training()))
            elif name == "output_att":
                l = (OutputLayerAttM2(params, feature_size, is_training()))
            elif name == "QAnetEmbedding":
                l = (QAnetEmbedding(params, feature_size, is_training()))
            elif name == "QaModelBlock":
                l = (QaModelBlock(params, feature_size, is_training()))
            elif name == "QAOutput":
                l = QAOutputLayer(params, feature_size, is_training())
            else:
                raise ValueError(
                    "unknow precedure, valid name is pcnn,mi_att,birnn")
            layers.append(l)
            feature_size = l.get_output_feature_size()

        head = SpanMatchHead(name="span_head")
        logits = features
        for layer in layers:
            logits = layer(logits)

    def train_op_fn(loss):
        global_step = training_util.get_global_step()
        #warmed up learning rate
        lr = tf.minimum(
            params.learning_rate, 0.001 / tf.log(999.) *
            tf.log(tf.cast(global_step, tf.float32) + 1))
        opt = tf.train.AdamOptimizer(learning_rate=lr,
                                     beta1=0.8,
                                     beta2=0.999,
                                     epsilon=1e-7)
        grads = opt.compute_gradients(loss)
        gradients, variables = zip(*grads)
        capped_grads, _ = tf.clip_by_global_norm(gradients, params.clip_norm)
        train_op = opt.apply_gradients(zip(capped_grads, variables),
                                       global_step=global_step)
        return train_op
        # return tf.train.AdamOptimizer(learning_rate=params.learning_rate) \
        #     .minimize(loss, global_step=training_util.get_global_step())

    if params.enable_ema and mode == tf.estimator.ModeKeys.TRAIN:
        ema = tf.train.ExponentialMovingAverage(decay=params.ema_decay)
        trained_var = tf.trainable_variables()
        ema_op = ema.apply(trained_var)
        # for var in tf.get_collection(key = "not_in_ema"):
        #     trained_var.remove(var)
        variables_to_restore = ema.variables_to_restore()
        #print( "in train-------------------------------------")
        #print(variables_to_restore)
        with tf.control_dependencies([ema_op]):
            logits = tf.identity(logits[0]), tf.identity(logits[1])

    #regularization_loss = None
    lamda = 3e-7
    regularization_loss = [
        lamda * x for x in tf.losses.get_regularization_losses()
    ]
    ps = None
    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        ps = [labels[params.p1], labels[params.p2]]

    spec = head.create_estimator_spec(
        None,
        mode,
        logits,
        labels=ps,
        train_op_fn=train_op_fn,
        regularization_losses=regularization_loss,
        params=params)
    if params.enable_ema and mode == tf.estimator.ModeKeys.PREDICT:
        ema = tf.train.ExponentialMovingAverage(decay=params.ema_decay)
        variables_to_restore = ema.variables_to_restore()
        #print( "in prodict-------------------------------------")
        #print(variables_to_restore)
        scaffold = spec.scaffold
        scaffold._saver = tf.train.Saver(variables_to_restore)

    return spec
Example #27
0
def _dnn_classifier_model_fn(features, targets, mode, params):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  optimizer = params["optimizer"]
  activation_fn = params["activation_fn"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  enable_centered_bias = params["enable_centered_bias"]
  num_ps_replicas = params["num_ps_replicas"]

  features = _get_feature_dict(features)
  parent_scope = "dnn"
  num_label_columns = 1 if n_classes == 2 else n_classes
  if n_classes == 2:
    loss_fn = loss_ops.sigmoid_cross_entropy
  else:
    loss_fn = loss_ops.sparse_softmax_cross_entropy

  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      values=features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id,
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(
          net,
          num_hidden_units,
          activation_fn=activation_fn,
          variables_collections=[parent_scope],
          scope=scope)
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(
            net,
            keep_prob=(1.0 - dropout))
    _add_hidden_layer_summary(net, scope.name)

  with variable_scope.variable_scope(
      parent_scope + "/logits",
      values=[net],
      partitioner=hidden_layer_partitioner) as scope:
    logits = layers.fully_connected(
        net,
        num_label_columns,
        activation_fn=None,
        variables_collections=[parent_scope],
        scope=scope)
  _add_hidden_layer_summary(logits, scope.name)

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  if mode == estimator.ModeKeys.TRAIN:
    targets = _reshape_targets(targets)
    weight = _get_weight_tensor(features, weight_column_name)
    training_loss = loss_fn(logits, targets, weight=weight)
    loss = _rescale_eval_loss(training_loss, weight)

    train_ops = [optimizers.optimize_loss(
        loss=training_loss,
        global_step=contrib_variables.get_global_step(),
        learning_rate=_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer),
        clip_gradients=gradient_clip_norm,
        name=parent_scope,
        # Empty summaries to prevent optimizers from logging the training_loss.
        summaries=[])]
    if enable_centered_bias:
      train_ops.append(_centered_bias_step(targets, loss_fn, num_label_columns))

    logging_ops.scalar_summary("loss", loss)

    return None, loss, control_flow_ops.group(*train_ops)

  elif mode == estimator.ModeKeys.EVAL:
    predictions = _predictions(logits=logits, n_classes=n_classes)

    targets = _reshape_targets(targets)
    weight = _get_weight_tensor(features, weight_column_name)
    training_loss = loss_fn(logits, targets, weight=weight)
    loss = _rescale_eval_loss(training_loss, weight)

    return predictions, loss, []

  else:  # mode == estimator.ModeKeys.INFER:
    predictions = _predictions(logits=logits, n_classes=n_classes)

    return predictions, None, []
Example #28
0
def _dnn_model_fn(features, labels, mode, params, config=None):
    """Deep Neural Net model_fn.

    Args:
      features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
      labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
        dtype `int32` or `int64` in the range `[0, n_classes)`.
      mode: Defines whether this is training, evaluation or prediction.
        See `ModeKeys`.
      params: A dict of hyperparameters.
        The following hyperparameters are expected:
        * head: A `_Head` instance.
        * hidden_units: List of hidden units per layer.
        * feature_columns: An iterable containing all the feature columns used by
            the model.
        * optimizer: string, `Optimizer` object, or callable that defines the
            optimizer to use for training. If `None`, will use the Adagrad
            optimizer with a default learning rate of 0.05.
        * activation_fn: Activation function applied to each layer. If `None`,
            will use `tf.nn.relu`.
        * dropout: When not `None`, the probability we will drop out a given
            coordinate.
        * gradient_clip_norm: A float > 0. If provided, gradients are
            clipped to their global norm with this clipping ratio.
        * embedding_lr_multipliers: Optional. A dictionary from
            `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
            multiply with learning rate for the embedding variables.
        * input_layer_min_slice_size: Optional. The min slice size of input layer
            partitions. If not provided, will use the default of 64M.
      config: `RunConfig` object to configure the runtime settings.

    Returns:
      predictions: A dict of `Tensor` objects.
      loss: A scalar containing the loss of the step.
      train_op: The op for training.
    """
    head = params["head"]
    hidden_units = params["hidden_units"]
    feature_columns = params["feature_columns"]
    optimizer = params.get("optimizer") or "Adagrad"
    activation_fn = params.get("activation_fn")
    dropout = params.get("dropout")
    gradient_clip_norm = params.get("gradient_clip_norm")
    input_layer_min_slice_size = (params.get("input_layer_min_slice_size")
                                  or 64 << 20)
    num_ps_replicas = config.num_ps_replicas if config else 0
    embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

    features = _get_feature_dict(features)
    parent_scope = "dnn"

    # Synthetic minority over-sampling technique
    # to overcome the lack of B and S signals in the training data
    if mode == model_fn.ModeKeys.TRAIN:
        sm = SMOTE(ratio=0.1, k=5, kind='regular', random_state=10)
        sess = tf.Session(config=tf.ConfigProto(operation_timeout_in_ms=500))
        with sess:
            features, labels = sm.fit_sample(features, labels.eval().ravel())

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas)
    with variable_scope.variable_scope(parent_scope,
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):
        input_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas,
                min_slice_size=input_layer_min_slice_size))
        with variable_scope.variable_scope(
                "input_from_feature_columns",
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner) as input_layer_scope:
            if all([
                    isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
                    for fc in feature_columns
            ]):
                net = layers.input_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=feature_columns,
                    weight_collections=[parent_scope],
                    scope=input_layer_scope)
            else:
                net = fc_core.input_layer(features=features,
                                          feature_columns=feature_columns,
                                          weight_collections=[parent_scope])

        for layer_id, num_hidden_units in enumerate(hidden_units):
            with variable_scope.variable_scope(
                    "hiddenlayer_%d" % layer_id,
                    values=(net, )) as hidden_layer_scope:
                net = layers.fully_connected(
                    net,
                    num_hidden_units,
                    activation_fn=activation_fn,
                    variables_collections=[parent_scope],
                    scope=hidden_layer_scope)
                if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = layers.dropout(net, keep_prob=(1.0 - dropout))
            _add_hidden_layer_summary(net, hidden_layer_scope.name)

        with variable_scope.variable_scope("logits",
                                           values=(net, )) as logits_scope:
            logits = layers.fully_connected(
                net,
                head.logits_dimension,
                activation_fn=None,
                variables_collections=[parent_scope],
                scope=logits_scope)
        _add_hidden_layer_summary(logits, logits_scope.name)

        def _train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizers.optimize_loss(
                loss=loss,
                global_step=contrib_variables.get_global_step(),
                learning_rate=_LEARNING_RATE,
                optimizer=_get_optimizer(optimizer),
                gradient_multipliers=(
                    dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                        embedding_lr_multipliers, parent_scope,
                        input_layer_scope.name)),
                clip_gradients=gradient_clip_norm,
                name=parent_scope,
                # Empty summaries to prevent optimizers from logging training_loss.
                summaries=[])

        return head.create_model_fn_ops(features=features,
                                        mode=mode,
                                        labels=labels,
                                        train_op_fn=_train_op_fn,
                                        logits=logits)
Example #29
0
def _linear_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns)
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = config.num_ps_replicas if config else 0
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

    def _train_op_fn(loss):
      global_step = contrib_variables.get_global_step()
      my_vars = ops.get_collection(parent_scope)
      grads = gradients.gradients(loss, my_vars)
      if gradient_clip_norm:
        grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
      return (_get_optimizer(optimizer).apply_gradients(
          zip(grads, my_vars), global_step=global_step))

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
def _dnn_tree_combined_model_fn(
        features,
        labels,
        mode,
        head,
        dnn_hidden_units,
        dnn_feature_columns,
        tree_learner_config,
        num_trees,
        tree_examples_per_layer,
        config=None,
        dnn_optimizer="Adagrad",
        dnn_activation_fn=nn.relu,
        dnn_dropout=None,
        dnn_input_layer_partitioner=None,
        dnn_input_layer_to_tree=True,
        dnn_steps_to_train=10000,
        predict_with_tree_only=False,
        tree_feature_columns=None,
        tree_center_bias=False,
        dnn_to_tree_distillation_param=None,
        use_core_versions=False,
        output_type=model.ModelBuilderOutputType.MODEL_FN_OPS,
        override_global_step_value=None):
    """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    predict_with_tree_only: Whether to use only the tree model output as the
      final prediction.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.
    dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the
      float defines the weight of the distillation loss, and the loss_fn, for
      computing distillation loss, takes dnn_logits, tree_logits and weight
      tensor. If the entire tuple is None, no distillation will be applied. If
      only the loss_fn is None, we will take the sigmoid/softmax cross entropy
      loss be default. When distillation is applied, `predict_with_tree_only`
      will be set to True.
    use_core_versions: Whether feature columns and loss are from the core (as
      opposed to contrib) version of tensorflow.
    output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec
      (new interface).
    override_global_step_value: If after the training is done, global step
      value must be reset to this value. This is particularly useful for hyper
      parameter tuning, which can't recognize early stopping due to the number
      of trees. If None, no override of global step will happen.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
    if not isinstance(features, dict):
        raise ValueError("features should be a dictionary of `Tensor`s. "
                         "Given type: {}".format(type(features)))

    if not dnn_feature_columns:
        raise ValueError("dnn_feature_columns must be specified")

    if dnn_to_tree_distillation_param:
        if not predict_with_tree_only:
            logging.warning(
                "update predict_with_tree_only to True since distillation"
                "is specified.")
            predict_with_tree_only = True

    # Build DNN Logits.
    dnn_parent_scope = "dnn"
    dnn_partitioner = dnn_input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))

    if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC
            and not use_core_versions):
        raise ValueError("You must use core versions with Estimator Spec")
    global_step = training_util.get_global_step()

    with variable_scope.variable_scope(dnn_parent_scope,
                                       values=tuple(six.itervalues(features)),
                                       partitioner=dnn_partitioner):

        with variable_scope.variable_scope(
                "input_from_feature_columns",
                values=tuple(six.itervalues(features)),
                partitioner=dnn_partitioner) as input_layer_scope:
            if use_core_versions:
                input_layer = feature_column_lib.input_layer(
                    features=features,
                    feature_columns=dnn_feature_columns,
                    weight_collections=[dnn_parent_scope])
            else:
                input_layer = layers.input_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=dnn_feature_columns,
                    weight_collections=[dnn_parent_scope],
                    scope=input_layer_scope)

        def dnn_logits_fn():
            """Builds the logits from the input layer."""
            previous_layer = input_layer
            for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
                with variable_scope.variable_scope(
                        "hiddenlayer_%d" % layer_id,
                        values=(previous_layer, )) as hidden_layer_scope:
                    net = layers.fully_connected(
                        previous_layer,
                        num_hidden_units,
                        activation_fn=dnn_activation_fn,
                        variables_collections=[dnn_parent_scope],
                        scope=hidden_layer_scope)
                    if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                        net = layers.dropout(net,
                                             keep_prob=(1.0 - dnn_dropout))
                _add_hidden_layer_summary(net, hidden_layer_scope.name)
                previous_layer = net
            with variable_scope.variable_scope(
                    "logits", values=(previous_layer, )) as logits_scope:
                dnn_logits = layers.fully_connected(
                    previous_layer,
                    head.logits_dimension,
                    activation_fn=None,
                    variables_collections=[dnn_parent_scope],
                    scope=logits_scope)
            _add_hidden_layer_summary(dnn_logits, logits_scope.name)
            return dnn_logits

        if predict_with_tree_only and mode == model_fn.ModeKeys.INFER:
            dnn_logits = array_ops.constant(0.0)
            dnn_train_op_fn = control_flow_ops.no_op
        elif predict_with_tree_only and mode == model_fn.ModeKeys.EVAL:
            dnn_logits = control_flow_ops.cond(
                global_step > dnn_steps_to_train,
                lambda: array_ops.constant(0.0), dnn_logits_fn)
            dnn_train_op_fn = control_flow_ops.no_op
        else:
            dnn_logits = dnn_logits_fn()

            def dnn_train_op_fn(loss):
                """Returns the op to optimize the loss."""
                return optimizers.optimize_loss(
                    loss=loss,
                    global_step=training_util.get_global_step(),
                    learning_rate=_DNN_LEARNING_RATE,
                    optimizer=_get_optimizer(dnn_optimizer),
                    name=dnn_parent_scope,
                    variables=ops.get_collection(
                        ops.GraphKeys.TRAINABLE_VARIABLES,
                        scope=dnn_parent_scope),
                    # Empty summaries to prevent optimizers from logging training_loss.
                    summaries=[])

    # Build Tree Logits.
    with ops.device(global_step.device):
        ensemble_handle = model_ops.tree_ensemble_variable(
            stamp_token=0,
            tree_ensemble_config="",  # Initialize an empty ensemble.
            name="ensemble_model")

    tree_features = features.copy()
    if dnn_input_layer_to_tree:
        tree_features["dnn_input_layer"] = input_layer
        tree_feature_columns.append(
            layers.real_valued_column("dnn_input_layer"))
    gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
        is_chief=config.is_chief,
        num_ps_replicas=config.num_ps_replicas,
        ensemble_handle=ensemble_handle,
        center_bias=tree_center_bias,
        examples_per_layer=tree_examples_per_layer,
        learner_config=tree_learner_config,
        feature_columns=tree_feature_columns,
        logits_dimension=head.logits_dimension,
        features=tree_features,
        use_core_columns=use_core_versions)

    with ops.name_scope("gbdt"):
        predictions_dict = gbdt_model.predict(mode)
        tree_logits = predictions_dict["predictions"]

        def _tree_train_op_fn(loss):
            """Returns the op to optimize the loss."""
            if dnn_to_tree_distillation_param:
                loss_weight, loss_fn = dnn_to_tree_distillation_param
                # pylint: disable=protected-access
                if use_core_versions:
                    weight_tensor = head_lib._weight_tensor(
                        features, head._weight_column)
                else:
                    weight_tensor = head_lib._weight_tensor(
                        features, head.weight_column_name)
                # pylint: enable=protected-access
                dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

                if loss_fn is None:
                    # we create the loss_fn similar to the head loss_fn for
                    # multi_class_head used previously as the default one.
                    n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
                    loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(
                        n_classes)

                dnn_to_tree_distillation_loss = loss_weight * loss_fn(
                    dnn_logits_fixed, tree_logits, weight_tensor)
                summary.scalar("dnn_to_tree_distillation_loss",
                               dnn_to_tree_distillation_loss)
                loss += dnn_to_tree_distillation_loss

            update_op = gbdt_model.train(loss, predictions_dict, labels)
            with ops.control_dependencies(
                [update_op]), (ops.colocate_with(global_step)):
                update_op = state_ops.assign_add(global_step, 1).op
                return update_op

    if predict_with_tree_only:
        if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER:
            tree_train_logits = tree_logits
        else:
            tree_train_logits = control_flow_ops.cond(
                global_step > dnn_steps_to_train, lambda: tree_logits,
                lambda: dnn_logits)
    else:
        tree_train_logits = dnn_logits + tree_logits

    def _no_train_op_fn(loss):
        """Returns a no-op."""
        del loss
        return control_flow_ops.no_op()

    if tree_center_bias:
        num_trees += 1
    finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

    if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS:
        model_fn_ops = head.create_model_fn_ops(features=features,
                                                mode=mode,
                                                labels=labels,
                                                train_op_fn=_no_train_op_fn,
                                                logits=tree_train_logits)
        if mode != model_fn.ModeKeys.TRAIN:
            return model_fn_ops
        dnn_train_op = head.create_model_fn_ops(features=features,
                                                mode=mode,
                                                labels=labels,
                                                train_op_fn=dnn_train_op_fn,
                                                logits=dnn_logits).train_op
        tree_train_op = head.create_model_fn_ops(
            features=tree_features,
            mode=mode,
            labels=labels,
            train_op_fn=_tree_train_op_fn,
            logits=tree_train_logits).train_op

        # Add the hooks
        model_fn_ops.training_hooks.extend([
            trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
                                        tree_train_op),
            trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                          finalized_trees,
                                          override_global_step_value)
        ])
        return model_fn_ops

    elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC:
        fusion_spec = head.create_estimator_spec(features=features,
                                                 mode=mode,
                                                 labels=labels,
                                                 train_op_fn=_no_train_op_fn,
                                                 logits=tree_train_logits)
        if mode != model_fn.ModeKeys.TRAIN:
            return fusion_spec
        dnn_spec = head.create_estimator_spec(features=features,
                                              mode=mode,
                                              labels=labels,
                                              train_op_fn=dnn_train_op_fn,
                                              logits=dnn_logits)
        tree_spec = head.create_estimator_spec(features=tree_features,
                                               mode=mode,
                                               labels=labels,
                                               train_op_fn=_tree_train_op_fn,
                                               logits=tree_train_logits)

        training_hooks = [
            trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train,
                                        tree_spec.train_op),
            trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                          finalized_trees,
                                          override_global_step_value)
        ]
        fusion_spec = fusion_spec._replace(training_hooks=training_hooks +
                                           list(fusion_spec.training_hooks))
        return fusion_spec
Example #31
0
def _dnn_model_fn(features, labels, mode, params):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  head = params["head"]
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or "Adagrad"
  activation_fn = params.get("activation_fn")
  dropout = params.get("dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = params.get("num_ps_replicas", 0)

  features = _get_feature_dict(features)
  parent_scope = "dnn"

  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      values=features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id,
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(
          net,
          num_hidden_units,
          activation_fn=activation_fn,
          variables_collections=[parent_scope],
          scope=scope)
      if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
        net = layers.dropout(
            net,
            keep_prob=(1.0 - dropout))
    _add_hidden_layer_summary(net, scope.name)

  with variable_scope.variable_scope(
      parent_scope + "/logits",
      values=[net],
      partitioner=hidden_layer_partitioner) as scope:
    logits = layers.fully_connected(
        net,
        head.logits_dimension,
        activation_fn=None,
        variables_collections=[parent_scope],
        scope=scope)
  _add_hidden_layer_summary(logits, scope.name)

  def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    return optimizers.optimize_loss(
        loss=loss,
        global_step=contrib_variables.get_global_step(),
        learning_rate=_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer),
        clip_gradients=gradient_clip_norm,
        name=parent_scope,
        # Empty summaries to prevent optimizers from logging the training_loss.
        summaries=[])

  return head.head_ops(features, labels, mode, _train_op_fn, logits)
Example #32
0
def _dnn_classifier_model_fn(features, labels, mode, params):
    """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of label classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
    hidden_units = params["hidden_units"]
    feature_columns = params["feature_columns"]
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    optimizer = params["optimizer"]
    activation_fn = params["activation_fn"]
    dropout = params["dropout"]
    gradient_clip_norm = params["gradient_clip_norm"]
    enable_centered_bias = params["enable_centered_bias"]
    num_ps_replicas = params["num_ps_replicas"]

    features = _get_feature_dict(features)
    parent_scope = "dnn"
    num_label_columns = 1 if n_classes == 2 else n_classes
    if n_classes == 2:
        loss_fn = loss_ops.sigmoid_cross_entropy
    else:
        loss_fn = loss_ops.sparse_softmax_cross_entropy

    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))
    with variable_scope.variable_scope(
            parent_scope + "/input_from_feature_columns",
            values=features.values(),
            partitioner=input_layer_partitioner) as scope:
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(hidden_units):
        with variable_scope.variable_scope(
                parent_scope + "/hiddenlayer_%d" % layer_id,
                values=[net],
                partitioner=hidden_layer_partitioner) as scope:
            net = layers.fully_connected(net,
                                         num_hidden_units,
                                         activation_fn=activation_fn,
                                         variables_collections=[parent_scope],
                                         scope=scope)
            if dropout is not None and mode == estimator.ModeKeys.TRAIN:
                net = layers.dropout(net, keep_prob=(1.0 - dropout))
        _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
            parent_scope + "/logits",
            values=[net],
            partitioner=hidden_layer_partitioner) as scope:
        logits = layers.fully_connected(net,
                                        num_label_columns,
                                        activation_fn=None,
                                        variables_collections=[parent_scope],
                                        scope=scope)
    _add_hidden_layer_summary(logits, scope.name)

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    if mode == estimator.ModeKeys.TRAIN:
        labels = _reshape_labels(labels)
        weights = _get_weight_tensor(features, weight_column_name)
        training_loss = loss_fn(logits, labels, weights=weights)
        loss = _rescale_eval_loss(training_loss, weights)

        train_ops = [
            optimizers.optimize_loss(
                loss=training_loss,
                global_step=contrib_variables.get_global_step(),
                learning_rate=_LEARNING_RATE,
                optimizer=_get_optimizer(optimizer),
                clip_gradients=gradient_clip_norm,
                name=parent_scope,
                # Empty summaries to prevent optimizers from logging the training_loss.
                summaries=[])
        ]
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(labels, loss_fn, num_label_columns))

        summary.scalar("loss", loss)

        return None, loss, control_flow_ops.group(*train_ops)

    elif mode == estimator.ModeKeys.EVAL:
        predictions = _predictions(logits=logits, n_classes=n_classes)

        labels = _reshape_labels(labels)
        weights = _get_weight_tensor(features, weight_column_name)
        training_loss = loss_fn(logits, labels, weights=weights)
        loss = _rescale_eval_loss(training_loss, weights)

        return predictions, loss, []

    else:  # mode == estimator.ModeKeys.INFER:
        predictions = _predictions(logits=logits, n_classes=n_classes)

        return predictions, None, []
Example #33
0
def _dnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  hidden_units,
                  feature_columns,
                  optimizer='Adagrad',
                  activation_fn=nn.relu,
                  dropout=None,
                  input_layer_partitioner=None,
                  config=None,
                  use_tpu=False,
                  batch_norm=False):
    """Deep Neural Net model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.
    use_tpu: Whether to make a DNN model able to run on TPU. Will make function
      return a `_TPUEstimatorSpec` instance and disable variable partitioning.
    batch_norm: Whether to use batch normalization after each hidden layer.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: If features has the wrong type.
  """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))

    optimizer = optimizers.get_optimizer_instance(optimizer,
                                                  learning_rate=_LEARNING_RATE)
    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = (None if use_tpu else
                   partitioned_variables.min_max_variable_partitioner(
                       max_partitions=num_ps_replicas))
    with variable_scope.variable_scope('dnn',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):
        input_layer_partitioner = input_layer_partitioner or (
            None
            if use_tpu else partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas, min_slice_size=64 << 20))

        logit_fn = dnn_logit_fn_builder(
            units=head.logits_dimension,
            hidden_units=hidden_units,
            feature_columns=feature_columns,
            activation_fn=activation_fn,
            dropout=dropout,
            input_layer_partitioner=input_layer_partitioner,
            batch_norm=batch_norm)
        logits = logit_fn(features=features, mode=mode)

        if use_tpu:
            return head._create_tpu_estimator_spec(  # pylint: disable=protected-access
                features=features,
                mode=mode,
                labels=labels,
                optimizer=optimizer,
                logits=logits)
        else:
            return head.create_estimator_spec(features=features,
                                              mode=mode,
                                              labels=labels,
                                              optimizer=optimizer,
                                              logits=logits)
def _dnn_tree_combined_model_fn(
    features, labels, mode, head, dnn_hidden_units,
    dnn_feature_columns, tree_learner_config, num_trees,
    tree_examples_per_layer,
    config=None, dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu, dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True, dnn_steps_to_train=10000,
    tree_feature_columns=None,
    tree_center_bias=True):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas,
          min_slice_size=64 << 20))

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      input_layer = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=input_layer_scope)
    previous_layer = input_layer
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(previous_layer,)) as hidden_layer_scope:
        net = layers.fully_connected(
            previous_layer,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=hidden_layer_scope)
        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)
      previous_layer = net
    with variable_scope.variable_scope(
        "logits",
        values=(previous_layer,)) as logits_scope:
      dnn_logits = layers.fully_connected(
          previous_layer,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(dnn_logits, logits_scope.name)

    def _dnn_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_DNN_LEARNING_RATE,
          optimizer=_get_optimizer(dnn_optimizer),
          name=dnn_parent_scope,
          variables=ops.get_collection(
              ops.GraphKeys.TRAINABLE_VARIABLES,
              scope=dnn_parent_scope),
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

  # Build Tree Logits.
  global_step = training_util.get_global_step()
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_no_train_op_fn,
      logits=tree_train_logits)
  dnn_train_op = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_dnn_train_op_fn,
      logits=dnn_logits).train_op
  tree_train_op = head.create_model_fn_ops(
      features=tree_features,
      mode=mode,
      labels=labels,
      train_op_fn=_tree_train_op_fn,
      logits=tree_train_logits).train_op

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  model_fn_ops.training_hooks.extend([
      trainer_hooks.SwitchTrainOp(
          dnn_train_op, dnn_steps_to_train, tree_train_op),
      trainer_hooks.StopAfterNTrees(
          num_trees, attempted_trees, finalized_trees)])

  return model_fn_ops
Example #35
0
 def __call__(self, shape, dtype, axis=0):
     return partitioned_variables.min_max_variable_partitioner(
         max_partitions=self._max_shards,
         axis=axis,
         min_slice_size=self._min_shard_bytes,
         bytes_per_string_element=self._bytes_per_string)(shape, dtype)
Example #36
0
def _dnn_model_fn(
    features, labels, mode, head, hidden_units, feature_columns,
    optimizer='Adagrad', activation_fn=nn.relu, dropout=None,
    input_layer_partitioner=None, config=None):
  """Deep Neural Net model_fn.

  Args:
    features: Dict of `Tensor` (depends on data passed to `train`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  optimizer = optimizers.get_optimizer_instance(
      optimizer, learning_rate=_LEARNING_RATE)
  num_ps_replicas = config.num_ps_replicas if config else 0

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      'dnn',
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        'input_from_feature_columns',
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner):
      net = feature_column_lib.input_layer(
          features=features,
          feature_columns=feature_columns)

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id,
          values=(net,)) as hidden_layer_scope:
        net = core_layers.dense(
            net,
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = core_layers.dropout(net, rate=dropout, training=True)
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope(
        'logits',
        values=(net,)) as logits_scope:
      logits = core_layers.dense(
          net,
          units=head.logits_dimension,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer(),
          name=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizer.minimize(
          loss,
          global_step=training_util.get_global_step())

    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
Example #38
0
def _dnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  hidden_units,
                  feature_columns,
                  optimizer='Adagrad',
                  activation_fn=nn.relu,
                  dropout=None,
                  input_layer_partitioner=None,
                  config=None,
                  tpu_estimator_spec=False):
  """Deep Neural Net model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.
    tpu_estimator_spec: Whether to return a `_TPUEstimatorSpec` or
      or `model_fn.EstimatorSpec` instance.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: If features has the wrong type.
  """
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))

  optimizer = optimizers.get_optimizer_instance(
      optimizer, learning_rate=_LEARNING_RATE)
  num_ps_replicas = config.num_ps_replicas if config else 0

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      'dnn',
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))

    logit_fn = _dnn_logit_fn_builder(
        units=head.logits_dimension,
        hidden_units=hidden_units,
        feature_columns=feature_columns,
        activation_fn=activation_fn,
        dropout=dropout,
        input_layer_partitioner=input_layer_partitioner)
    logits = logit_fn(features=features, mode=mode)

    if tpu_estimator_spec:
      return head._create_tpu_estimator_spec(  # pylint: disable=protected-access
          features=features,
          mode=mode,
          labels=labels,
          optimizer=optimizer,
          logits=logits)
    else:
      return head.create_estimator_spec(
          features=features,
          mode=mode,
          labels=labels,
          optimizer=optimizer,
          logits=logits)
Example #39
0
def _dnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  hidden_units,
                  feature_columns,
                  optimizer='Adagrad',
                  activation_fn=nn.relu,
                  dropout=None,
                  input_layer_partitioner=None,
                  config=None):
    """Deep Neural Net model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If features has the wrong type.
  """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))
    #optimizer = optimizers.get_optimizer_instance(
    #    optimizer, learning_rate=_LEARNING_RATE)
    if isinstance(optimizer, six.string_types):
        optimizer = optimizers.get_optimizer_instance(
            **{
                'optimizer': optimizer,
                'learning_rate': _LEARNING_RATE
            })
    else:
        optimizer = optimizers.get_optimizer_instance(**optimizer)
    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas)
    with variable_scope.variable_scope('dnn',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):
        input_layer_partitioner = input_layer_partitioner or (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas, min_slice_size=64 << 20))

        logit_fn = _dnn_logit_fn_builder(
            units=head.logits_dimension,
            hidden_units=hidden_units,
            feature_columns=feature_columns,
            activation_fn=activation_fn,
            dropout=dropout,
            input_layer_partitioner=input_layer_partitioner)
        logits = logit_fn(features=features, mode=mode)

        def _train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizer.minimize(
                loss, global_step=training_util.get_global_step())

        return head.create_estimator_spec(features=features,
                                          mode=mode,
                                          labels=labels,
                                          train_op_fn=_train_op_fn,
                                          logits=logits)
def _dnn_linear_combined_model_fn(features,
                                  labels,
                                  mode,
                                  head,
                                  linear_feature_columns=None,
                                  linear_optimizer='Ftrl',
                                  dnn_feature_columns=None,
                                  dnn_optimizer='Adagrad',
                                  dnn_hidden_units=None,
                                  dnn_activation_fn=nn.relu,
                                  dnn_dropout=None,
                                  input_layer_partitioner=None,
                                  config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    linear_feature_columns: An iterable containing all the feature columns used
      by the Linear model.
    linear_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the Linear model. Defaults to the Ftrl
      optimizer.
    dnn_feature_columns: An iterable containing all the feature columns used by
      the DNN model.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN model. Defaults to the Adagrad
      optimizer.
    dnn_hidden_units: List of hidden units per DNN layer.
    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
      will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability we will drop out a given DNN
      coordinate.
    input_layer_partitioner: Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing,
      or features has the wrong type.
  """
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))
  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        'Either linear_feature_columns or dnn_feature_columns must be defined.')

  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Build DNN Logits.
  dnn_parent_scope = 'dnn'

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    dnn_optimizer = optimizers.get_optimizer_instance(
        dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
    _check_no_sync_replicas_optimizer(dnn_optimizer)
    if not dnn_hidden_units:
      raise ValueError(
          'dnn_hidden_units must be defined when dnn_feature_columns is '
          'specified.')
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):

      dnn_logit_fn = dnn._dnn_logit_fn_builder(  # pylint: disable=protected-access
          units=head.logits_dimension,
          hidden_units=dnn_hidden_units,
          feature_columns=dnn_feature_columns,
          activation_fn=dnn_activation_fn,
          dropout=dnn_dropout,
          input_layer_partitioner=input_layer_partitioner)
      dnn_logits = dnn_logit_fn(features=features, mode=mode)

  linear_parent_scope = 'linear'

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_optimizer = optimizers.get_optimizer_instance(
        linear_optimizer,
        learning_rate=_linear_learning_rate(len(linear_feature_columns)))
    _check_no_sync_replicas_optimizer(linear_optimizer)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as scope:
      logit_fn = linear._linear_logit_fn_builder(  # pylint: disable=protected-access
          units=head.logits_dimension,
          feature_columns=linear_feature_columns)
      linear_logits = logit_fn(features=features)
      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          dnn_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=dnn_parent_scope)))
    if linear_logits is not None:
      train_ops.append(
          linear_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=linear_parent_scope)))

    train_op = control_flow_ops.group(*train_ops)
    with ops.control_dependencies([train_op]):
      return distribute_lib.increment_var(global_step)

  return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)
Example #41
0
def _dnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  hidden_units,
                  feature_columns,
                  optimizer='Adagrad',
                  activation_fn=nn.relu,
                  dropout=None,
                  input_layer_partitioner=None,
                  config=None):
    """Deep Neural Net model_fn.

  Args:
    features: Dict of `Tensor` (depends on data passed to `train`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
    optimizer = optimizers.get_optimizer_instance(optimizer,
                                                  learning_rate=_LEARNING_RATE)
    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas)
    with variable_scope.variable_scope('dnn',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):
        input_layer_partitioner = input_layer_partitioner or (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas, min_slice_size=64 << 20))
        with variable_scope.variable_scope(
                'input_from_feature_columns',
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner):
            net = feature_column_lib.input_layer(
                features=features, feature_columns=feature_columns)

        for layer_id, num_hidden_units in enumerate(hidden_units):
            with variable_scope.variable_scope(
                    'hiddenlayer_%d' % layer_id,
                    values=(net, )) as hidden_layer_scope:
                net = core_layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=activation_fn,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name=hidden_layer_scope)
                if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = core_layers.dropout(net, rate=dropout, training=True)
            _add_hidden_layer_summary(net, hidden_layer_scope.name)

        with variable_scope.variable_scope('logits',
                                           values=(net, )) as logits_scope:
            logits = core_layers.dense(
                net,
                units=head.logits_dimension,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer(),
                name=logits_scope)
        _add_hidden_layer_summary(logits, logits_scope.name)

        def _train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizer.minimize(
                loss, global_step=training_util.get_global_step())

        return head.create_estimator_spec(features=features,
                                          mode=mode,
                                          labels=labels,
                                          train_op_fn=_train_op_fn,
                                          logits=logits)
Example #42
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  enable_centered_bias = params.get("enable_centered_bias", True)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  num_label_columns = 1 if n_classes == 2 else n_classes
  loss_fn = _softmax_cross_entropy_loss
  if n_classes == 2:
    loss_fn = _log_loss_with_two_classes

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)
  with variable_scope.variable_op_scope(
      features.values(), parent_scope, partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=[parent_scope],
              scope=scope))

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = loss_fn(logits, targets)
    if weight_column_name:
      weight_tensor = array_ops.reshape(
          math_ops.to_float(features[weight_column_name]), shape=(-1,))
      loss = _weighted_loss(loss, weight_tensor)
    else:
      loss = math_ops.reduce_mean(loss, name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_ops = []
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()

    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    train_ops.append(optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))
    if enable_centered_bias:
      train_ops.append(
          _centered_bias_step(targets, loss_fn, num_label_columns))

  predictions = {}
  if n_classes == 2:
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, control_flow_ops.group(*train_ops)
Example #43
0
def _rnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  rnn_cell_fn,
                  sequence_feature_columns,
                  context_feature_columns,
                  optimizer='Adagrad',
                  input_layer_partitioner=None,
                  config=None):
  """Recurrent Neural Net model_fn.

  Args:
    features: dict of `Tensor` and `SparseTensor` objects returned from
      `input_fn`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] with labels.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
      returns an object of type `tf.nn.rnn_cell.RNNCell`.
    sequence_feature_columns: Iterable containing `FeatureColumn`s that
      represent sequential model inputs.
    context_feature_columns: Iterable containing `FeatureColumn`s that
      represent model inputs not associated with a specific timestep.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05 and gradient clip norm of
      5.0.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: If mode or optimizer is invalid, or features has the wrong type.
  """
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))

  # If user does not provide an optimizer instance, use the optimizer specified
  # by the string with default learning rate and gradient clipping.
  if not isinstance(optimizer, optimizer_lib.Optimizer):
    optimizer = optimizers.get_optimizer_instance(
        optimizer, learning_rate=_DEFAULT_LEARNING_RATE)
    optimizer = extenders.clip_gradients_by_norm(optimizer, _DEFAULT_CLIP_NORM)

  num_ps_replicas = config.num_ps_replicas if config else 0
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      'rnn',
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))

    logit_fn = _rnn_logit_fn_builder(
        output_units=head.logits_dimension,
        rnn_cell_fn=rnn_cell_fn,
        sequence_feature_columns=sequence_feature_columns,
        context_feature_columns=context_feature_columns,
        input_layer_partitioner=input_layer_partitioner)
    logits = logit_fn(features=features, mode=mode)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizer.minimize(
          loss,
          global_step=training_util.get_global_step())

    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
Example #44
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  head = params.get("head", None)
  if not head:
    # TODO(zakaria): Remove these params and make head mandatory
    head = head_lib._multi_class_head(  # pylint: disable=protected-access
        params.get("n_classes"),
        weight_column_name=params["weight_column_name"],
        enable_centered_bias=params.get("enable_centered_bias", False))

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_op_scope(
      features.values(), parent_scope, partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

  def _train_op_fn(loss):
    global_step = contrib_variables.get_global_step()
    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    return (optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))

  return head.head_ops(features, targets, mode, _train_op_fn, logits)
Example #45
0
def _dnn_linear_combined_model_fn(
    features, labels, mode, head,
    linear_feature_columns=None, linear_optimizer='Ftrl',
    dnn_feature_columns=None, dnn_optimizer='Adagrad', 
    dnn_hidden_units=None,
    dnn_activation_fn=nn.relu, dnn_dropout=None,
    input_layer_partitioner=None, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    linear_feature_columns: An iterable containing all the feature columns used
      by the Linear model.
    linear_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the Linear model. Defaults to the Ftrl
      optimizer.
    dnn_feature_columns: An iterable containing all the feature columns used by
      the DNN model.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN model. Defaults to the Adagrad
      optimizer.
    dnn_hidden_units: List of hidden units per DNN layer.
    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
      will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability we will drop out a given DNN
      coordinate.
    input_layer_partitioner: Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing,
      or features has the wrong type.
  """
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))
  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        'Either linear_feature_columns or dnn_feature_columns must be defined.')
  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Build DNN Logits.
  dnn_parent_scope = 'dnn'

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if isinstance(dnn_optimizer, six.string_types):
      dnn_optimizer = optimizers.get_optimizer_instance(
        **{'optimizer':'Adagrad', 'learning_rate':_DNN_LEARNING_RATE})
    else:
      dnn_optimizer = optimizers.get_optimizer_instance(**dnn_optimizer)
    _check_no_sync_replicas_optimizer(dnn_optimizer)
    if not dnn_hidden_units:
      raise ValueError(
          'dnn_hidden_units must be defined when dnn_feature_columns is '
          'specified.')
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):

      dnn_logit_fn = dnn._dnn_logit_fn_builder(  # pylint: disable=protected-access
          units=head.logits_dimension,
          hidden_units=dnn_hidden_units,
          feature_columns=dnn_feature_columns,
          activation_fn=activation_fn,
          dropout=dropout,
          input_layer_partitioner=input_layer_partitioner)
      dnn_logits = dnn_logit_fn(features=features, mode=mode)

  linear_parent_scope = 'linear'

  if not linear_feature_columns:
    linear_logits = None
  else:
    if isinstance(linear_optimizer, six.string_types):
      linear_optimizer = optimizers.get_optimizer_instance(
        **{'linear_optimizer':linear_optimizer, 'learning_rate':_linear_learning_rate(len(linear_feature_columns))})
    else:
      linear_optimizer = optimizers.get_optimizer_instance(**linear_optimizer)
    
    _check_no_sync_replicas_optimizer(linear_optimizer)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as scope:
      logit_fn = linear._linear_logit_fn_builder(  # pylint: disable=protected-access
          units=head.logits_dimension,
          feature_columns=linear_feature_columns)
      linear_logits = logit_fn(features=features)
      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          dnn_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=dnn_parent_scope)))
    if linear_logits is not None:
      train_ops.append(
          linear_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=linear_parent_scope)))

    train_op = control_flow_ops.group(*train_ops)
    with ops.control_dependencies([train_op]):
      with ops.colocate_with(global_step):
        return state_ops.assign_add(global_step, 1)

  return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)
Example #46
0
def _dnn_model_fn(features, labels, mode, params):
    """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.
      * embedding_lr_multipliers: Optional. A dictionary from
        `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
        multiply with learning rate for the embedding variables.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
    head = params["head"]
    hidden_units = params["hidden_units"]
    feature_columns = params["feature_columns"]
    optimizer = params.get("optimizer") or "Adagrad"
    activation_fn = params.get("activation_fn")
    dropout = params.get("dropout")
    gradient_clip_norm = params.get("gradient_clip_norm")
    num_ps_replicas = params.get("num_ps_replicas", 0)
    embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

    features = _get_feature_dict(features)
    parent_scope = "dnn"

    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))
    input_layer_scope = parent_scope + "/input_from_feature_columns"
    with variable_scope.variable_scope(
            input_layer_scope,
            values=list(six.itervalues(features)),
            partitioner=input_layer_partitioner) as scope:
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(hidden_units):
        with variable_scope.variable_scope(
                parent_scope + "/hiddenlayer_%d" % layer_id,
                values=[net],
                partitioner=hidden_layer_partitioner) as scope:
            net = layers.fully_connected(net,
                                         num_hidden_units,
                                         activation_fn=activation_fn,
                                         variables_collections=[parent_scope],
                                         scope=scope)
            if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                net = layers.dropout(net, keep_prob=(1.0 - dropout))
        _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
            parent_scope + "/logits",
            values=[net],
            partitioner=hidden_layer_partitioner) as scope:
        logits = layers.fully_connected(net,
                                        head.logits_dimension,
                                        activation_fn=None,
                                        variables_collections=[parent_scope],
                                        scope=scope)
    _add_hidden_layer_summary(logits, scope.name)

    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        return optimizers.optimize_loss(
            loss=loss,
            global_step=contrib_variables.get_global_step(),
            learning_rate=_LEARNING_RATE,
            optimizer=_get_optimizer(optimizer),
            gradient_multipliers=(
                dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                    embedding_lr_multipliers, parent_scope,
                    input_layer_scope)),
            clip_gradients=gradient_clip_norm,
            name=parent_scope,
            # Empty summaries to prevent optimizers from logging the training_loss.
            summaries=[])

    return head.head_ops(features, labels, mode, _train_op_fn, logits)
  def testInitFromPartitionVar(self):
    checkpoint_dir = self.get_temp_dir()
    with self.test_session() as session:
      v1 = _create_partition_checkpoints(session, checkpoint_dir)

    # New graph and session.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as session:
        with variable_scope.variable_scope("some_scope"):
          my1 = variable_scope.get_variable(
              name="my1",
              shape=[100, 100],
              initializer=init_ops.zeros_initializer(),
              partitioner=partitioned_variables.min_max_variable_partitioner(
                  max_partitions=5, axis=0, min_slice_size=8 << 10))
          my1_var_list = my1._get_variable_list()
        # Create another variable with different partitions than the variable in
        # the checkpoint.
        with variable_scope.variable_scope("some_other_scope"):
          my2 = variable_scope.get_variable(
              name="var1",
              shape=[100, 100],
              initializer=init_ops.zeros_initializer(),
              partitioner=partitioned_variables.min_max_variable_partitioner(
                  max_partitions=5, axis=0, min_slice_size=16 << 10))
          my2_var_list = my2._get_variable_list()

        checkpoint_utils.init_from_checkpoint(checkpoint_dir, {
            "scope/var1": "some_scope/my1",
            "scope/": "some_other_scope/"})

        session.run(variables.global_variables_initializer())
        my1_values = session.run(my1_var_list)
        self.assertAllEqual(my1_values, v1)
        my2_values = session.run(my2_var_list)
        # Verify we created different number of partitions.
        self.assertNotEquals(len(my2_values), len(v1))
        # Verify the values were correctly initialized inspite of different
        # partitions.
        full_my2_values = np.concatenate(my2_values, axis=0)
        full_v1_values = np.concatenate(v1, axis=0)
        self.assertAllEqual(full_my2_values, full_v1_values)

    # New graph and session.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as session:
        with variable_scope.variable_scope("some_scope"):
          my1 = variable_scope.get_variable(
              name="my1",
              shape=[100, 100],
              initializer=init_ops.truncated_normal_initializer(0.5),
              partitioner=partitioned_variables.min_max_variable_partitioner(
                  max_partitions=5, axis=0, min_slice_size=8 << 10))
          my1_var_list = my1._get_variable_list()

        checkpoint_utils.init_from_checkpoint(checkpoint_dir,
                                              {"scope/var1": my1_var_list,})

        session.run(variables.global_variables_initializer())
        my1_values = session.run(my1_var_list)
        self.assertAllEqual(my1_values, v1)
Example #48
0
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
    """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
    head = params["head"]
    linear_feature_columns = params.get("linear_feature_columns")
    linear_optimizer = params.get("linear_optimizer") or "Ftrl"
    joint_linear_weights = params.get("joint_linear_weights")
    dnn_feature_columns = params.get("dnn_feature_columns")
    dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
    dnn_hidden_units = params.get("dnn_hidden_units")
    dnn_activation_fn = params.get("dnn_activation_fn")
    dnn_dropout = params.get("dnn_dropout")
    gradient_clip_norm = params.get("gradient_clip_norm")
    input_layer_min_slice_size = (params.get("input_layer_min_slice_size")
                                  or 64 << 20)
    num_ps_replicas = config.num_ps_replicas if config else 0
    embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

    if not linear_feature_columns and not dnn_feature_columns:
        raise ValueError(
            "Either linear_feature_columns or dnn_feature_columns must be defined."
        )

    features = _get_feature_dict(features)

    # Build DNN Logits.
    dnn_parent_scope = "dnn"

    if not dnn_feature_columns:
        dnn_logits = None
    else:
        input_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas,
                min_slice_size=input_layer_min_slice_size))
        input_layer_scope = dnn_parent_scope + "/input_from_feature_columns"
        with variable_scope.variable_scope(
                input_layer_scope,
                values=features.values(),
                partitioner=input_layer_partitioner) as scope:
            net = layers.input_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=dnn_feature_columns,
                weight_collections=[dnn_parent_scope],
                scope=scope)

        hidden_layer_partitioner = (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas))
        for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
            with variable_scope.variable_scope(
                    dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
                    values=[net],
                    partitioner=hidden_layer_partitioner) as scope:
                net = layers.fully_connected(
                    net,
                    num_hidden_units,
                    activation_fn=dnn_activation_fn,
                    variables_collections=[dnn_parent_scope],
                    scope=scope)
                if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
            # TODO(b/31209633): Consider adding summary before dropout.
            _add_hidden_layer_summary(net, scope.name)

        with variable_scope.variable_scope(
                dnn_parent_scope + "/logits",
                values=[net],
                partitioner=hidden_layer_partitioner) as scope:
            dnn_logits = layers.fully_connected(
                net,
                head.logits_dimension,
                activation_fn=None,
                variables_collections=[dnn_parent_scope],
                scope=scope)
        _add_hidden_layer_summary(dnn_logits, scope.name)

    # Build Linear logits.
    linear_parent_scope = "linear"

    if not linear_feature_columns:
        linear_logits = None
    else:
        linear_partitioner = partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20)
        with variable_scope.variable_scope(
                linear_parent_scope,
                values=features.values(),
                partitioner=linear_partitioner) as scope:
            if joint_linear_weights:
                linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=linear_feature_columns,
                    num_outputs=head.logits_dimension,
                    weight_collections=[linear_parent_scope],
                    scope=scope)
            else:
                linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=linear_feature_columns,
                    num_outputs=head.logits_dimension,
                    weight_collections=[linear_parent_scope],
                    scope=scope)

    # Combine logits and build full model.
    if dnn_logits is not None and linear_logits is not None:
        logits = dnn_logits + linear_logits
    elif dnn_logits is not None:
        logits = dnn_logits
    else:
        logits = linear_logits

    def _make_training_op(training_loss):
        """Training op for the DNN linear combined model."""
        train_ops = []
        if dnn_logits is not None:
            train_ops.append(
                optimizers.optimize_loss(
                    loss=training_loss,
                    global_step=contrib_variables.get_global_step(),
                    learning_rate=_DNN_LEARNING_RATE,
                    optimizer=_get_optimizer(dnn_optimizer),
                    gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                        embedding_lr_multipliers, dnn_parent_scope,
                        input_layer_scope),
                    clip_gradients=gradient_clip_norm,
                    variables=ops.get_collection(dnn_parent_scope),
                    name=dnn_parent_scope,
                    # Empty summaries, because head already logs "loss" summary.
                    summaries=[]))
        if linear_logits is not None:
            train_ops.append(
                optimizers.optimize_loss(
                    loss=training_loss,
                    global_step=contrib_variables.get_global_step(),
                    learning_rate=_linear_learning_rate(
                        len(linear_feature_columns)),
                    optimizer=_get_optimizer(linear_optimizer),
                    clip_gradients=gradient_clip_norm,
                    variables=ops.get_collection(linear_parent_scope),
                    name=linear_parent_scope,
                    # Empty summaries, because head already logs "loss" summary.
                    summaries=[]))

        return control_flow_ops.group(*train_ops)

    return head.create_model_fn_ops(features,
                                    labels,
                                    mode,
                                    _make_training_op,
                                    logits=logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_partitioner: Optional. Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer") or "Ftrl"
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = params.get("input_layer_partitioner") or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})
  fix_global_step_increment_bug = params.get(
      "fix_global_step_increment_bug", True)

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  linear_optimizer = _get_optimizer(linear_optimizer)
  _check_no_sync_replicas_optimizer(linear_optimizer)
  dnn_optimizer = _get_optimizer(dnn_optimizer)
  _check_no_sync_replicas_optimizer(dnn_optimizer)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if not dnn_hidden_units:
      raise ValueError(
          "dnn_hidden_units must be defined when dnn_feature_columns is "
          "specified.")
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):
      with variable_scope.variable_scope(
          "input_from_feature_columns",
          values=tuple(six.itervalues(features)),
          partitioner=input_layer_partitioner) as dnn_input_scope:
        if all(
            isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
            for fc in dnn_feature_columns
        ):
          net = layers.input_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope],
              scope=dnn_input_scope)
        else:
          net = fc_core.input_layer(
              features=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope])

      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            "hiddenlayer_%d" % layer_id,
            values=(net,)) as dnn_hidden_layer_scope:
          net = layers.fully_connected(
              net,
              num_hidden_units,
              activation_fn=dnn_activation_fn,
              variables_collections=[dnn_parent_scope],
              scope=dnn_hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = layers.dropout(
                net,
                keep_prob=(1.0 - dnn_dropout))
        # TODO(b/31209633): Consider adding summary before dropout.
        _add_layer_summary(net, dnn_hidden_layer_scope.name)

      with variable_scope.variable_scope(
          "logits",
          values=(net,)) as dnn_logits_scope:
        dnn_logits = layers.fully_connected(
            net,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[dnn_parent_scope],
            scope=dnn_logits_scope)
      _add_layer_summary(dnn_logits, dnn_logits_scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=linear_partitioner) as scope:
      if all(isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
             for fc in linear_feature_columns):
        if joint_linear_weights:
          linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
        else:
          linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
      else:
        linear_logits = fc_core.linear_model(
            features=features,
            feature_columns=linear_feature_columns,
            units=head.logits_dimension,
            weight_collections=[linear_parent_scope])

      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=dnn_optimizer,
              gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, dnn_parent_scope,
                  dnn_input_scope.name),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=linear_optimizer,
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))

    train_op = control_flow_ops.group(*train_ops)
    if fix_global_step_increment_bug:
      with ops.control_dependencies([train_op]):
        with ops.colocate_with(global_step):
          return state_ops.assign_add(global_step, 1).op
    return train_op

  return head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_make_training_op,
      logits=logits)
Example #50
0
def _linear_model_fn(features, labels, mode, params, config=None):
  """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use a FTRL optimizer.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    A `ModelFnOps` instance.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
  head = params["head"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns)
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  num_ps_replicas = config.num_ps_replicas if config else 0
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  parent_scope = "linear"
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)

  with variable_scope.variable_scope(
      parent_scope, values=features.values(), partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[parent_scope],
              scope=scope))

  def _train_op_fn(loss):
    global_step = contrib_variables.get_global_step()
    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    return (_get_optimizer(optimizer).apply_gradients(
        zip(grads, my_vars), global_step=global_step))

  return head.head_ops(features, labels, mode, _train_op_fn, logits)
Example #51
0
def _dnn_tree_combined_model_fn(features,
                                labels,
                                mode,
                                head,
                                dnn_hidden_units,
                                dnn_feature_columns,
                                tree_learner_config,
                                num_trees,
                                tree_examples_per_layer,
                                config=None,
                                dnn_optimizer="Adagrad",
                                dnn_activation_fn=nn.relu,
                                dnn_dropout=None,
                                dnn_input_layer_partitioner=None,
                                dnn_input_layer_to_tree=True,
                                dnn_steps_to_train=10000,
                                tree_feature_columns=None,
                                tree_center_bias=False,
                                use_core_versions=False):
    """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.
    use_core_versions: Whether feature columns and loss are from the core (as
      opposed to contrib) version of tensorflow.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
    if not isinstance(features, dict):
        raise ValueError("features should be a dictionary of `Tensor`s. "
                         "Given type: {}".format(type(features)))

    if not dnn_feature_columns:
        raise ValueError("dnn_feature_columns must be specified")

    # Build DNN Logits.
    dnn_parent_scope = "dnn"
    dnn_partitioner = dnn_input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))

    with variable_scope.variable_scope(dnn_parent_scope,
                                       values=tuple(six.itervalues(features)),
                                       partitioner=dnn_partitioner):

        with variable_scope.variable_scope(
                "input_from_feature_columns",
                values=tuple(six.itervalues(features)),
                partitioner=dnn_partitioner) as input_layer_scope:
            if use_core_versions:
                input_layer = feature_column_lib.input_layer(
                    features=features,
                    feature_columns=dnn_feature_columns,
                    weight_collections=[dnn_parent_scope])
            else:
                input_layer = layers.input_from_feature_columns(
                    columns_to_tensors=features,
                    feature_columns=dnn_feature_columns,
                    weight_collections=[dnn_parent_scope],
                    scope=input_layer_scope)
        previous_layer = input_layer
        for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
            with variable_scope.variable_scope(
                    "hiddenlayer_%d" % layer_id,
                    values=(previous_layer, )) as hidden_layer_scope:
                net = layers.fully_connected(
                    previous_layer,
                    num_hidden_units,
                    activation_fn=dnn_activation_fn,
                    variables_collections=[dnn_parent_scope],
                    scope=hidden_layer_scope)
                if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
            _add_hidden_layer_summary(net, hidden_layer_scope.name)
            previous_layer = net
        with variable_scope.variable_scope(
                "logits", values=(previous_layer, )) as logits_scope:
            dnn_logits = layers.fully_connected(
                previous_layer,
                head.logits_dimension,
                activation_fn=None,
                variables_collections=[dnn_parent_scope],
                scope=logits_scope)
        _add_hidden_layer_summary(dnn_logits, logits_scope.name)

        def _dnn_train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizers.optimize_loss(
                loss=loss,
                global_step=training_util.get_global_step(),
                learning_rate=_DNN_LEARNING_RATE,
                optimizer=_get_optimizer(dnn_optimizer),
                name=dnn_parent_scope,
                variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES,
                                             scope=dnn_parent_scope),
                # Empty summaries to prevent optimizers from logging training_loss.
                summaries=[])

    # Build Tree Logits.
    global_step = training_util.get_global_step()
    with ops.device(global_step.device):
        ensemble_handle = model_ops.tree_ensemble_variable(
            stamp_token=0,
            tree_ensemble_config="",  # Initialize an empty ensemble.
            name="ensemble_model")

    tree_features = features.copy()
    if dnn_input_layer_to_tree:
        tree_features["dnn_input_layer"] = input_layer
        tree_feature_columns.append(
            layers.real_valued_column("dnn_input_layer"))
    gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
        is_chief=config.is_chief,
        num_ps_replicas=config.num_ps_replicas,
        ensemble_handle=ensemble_handle,
        center_bias=tree_center_bias,
        examples_per_layer=tree_examples_per_layer,
        learner_config=tree_learner_config,
        feature_columns=tree_feature_columns,
        logits_dimension=head.logits_dimension,
        features=tree_features)

    with ops.name_scope("gbdt"):
        predictions_dict = gbdt_model.predict(mode)
        tree_logits = predictions_dict["predictions"]

        def _tree_train_op_fn(loss):
            """Returns the op to optimize the loss."""
            update_op = gbdt_model.train(loss, predictions_dict, labels)
            with ops.control_dependencies(
                [update_op]), (ops.colocate_with(global_step)):
                update_op = state_ops.assign_add(global_step, 1).op
                return update_op

    tree_train_logits = dnn_logits + tree_logits

    def _no_train_op_fn(loss):
        """Returns a no-op."""
        del loss
        return control_flow_ops.no_op()

    if use_core_versions:
        model_fn_ops = head.create_estimator_spec(features=features,
                                                  mode=mode,
                                                  labels=labels,
                                                  train_op_fn=_no_train_op_fn,
                                                  logits=tree_train_logits)
        dnn_train_op = head.create_estimator_spec(features=features,
                                                  mode=mode,
                                                  labels=labels,
                                                  train_op_fn=_dnn_train_op_fn,
                                                  logits=dnn_logits)
        dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
            dnn_train_op).train_op

        tree_train_op = head.create_estimator_spec(
            features=tree_features,
            mode=mode,
            labels=labels,
            train_op_fn=_tree_train_op_fn,
            logits=tree_train_logits)
        tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
            tree_train_op).train_op

        model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops(
            model_fn_ops)
    else:
        model_fn_ops = head.create_model_fn_ops(features=features,
                                                mode=mode,
                                                labels=labels,
                                                train_op_fn=_no_train_op_fn,
                                                logits=tree_train_logits)
        dnn_train_op = head.create_model_fn_ops(features=features,
                                                mode=mode,
                                                labels=labels,
                                                train_op_fn=_dnn_train_op_fn,
                                                logits=dnn_logits).train_op
        tree_train_op = head.create_model_fn_ops(
            features=tree_features,
            mode=mode,
            labels=labels,
            train_op_fn=_tree_train_op_fn,
            logits=tree_train_logits).train_op

    if tree_center_bias:
        num_trees += 1
    finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

    model_fn_ops.training_hooks.extend([
        trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
                                    tree_train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees)
    ])

    return model_fn_ops
Example #52
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    head = params.get("head", None)
    if not head:
        # TODO(zakaria): Remove these params and make head mandatory
        head = head_lib._multi_class_head(  # pylint: disable=protected-access
            params.get("n_classes"),
            weight_column_name=params["weight_column_name"],
            enable_centered_bias=params.get("enable_centered_bias", False))

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)

    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=head.logits_dimension,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=head.logits_dimension,
                weight_collections=[parent_scope],
                scope=scope))

    def _train_op_fn(loss):
        global_step = contrib_variables.get_global_step()
        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        return (optimizer.apply_gradients(zip(grads, my_vars),
                                          global_step=global_step))

    return head.head_ops(features, targets, mode, _train_op_fn, logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    `estimator.ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer")
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer")
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn")
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = params["num_ps_replicas"]

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        dnn_parent_scope + "/input_from_feature_columns",
        values=features.values(),
        partitioner=input_layer_partitioner) as scope:
      net = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
          values=[net],
          partitioner=hidden_layer_partitioner) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=scope)
        if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - dnn_dropout))
      # TODO(b/31209633): Consider adding summary before dropout.
      _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
        dnn_parent_scope + "/logits",
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      dnn_logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=scope)
    _add_hidden_layer_summary(dnn_logits, scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=features.values(),
        partitioner=linear_partitioner) as scope:
      if joint_linear_weights:
        linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)
      else:
        linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=_get_optimizer(dnn_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=_get_optimizer(linear_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))

    return control_flow_ops.group(*train_ops)

  return head.head_ops(
      features, labels, mode, _make_training_op, logits=logits)
def _dnn_tree_combined_model_fn(
    features,
    labels,
    mode,
    head,
    dnn_hidden_units,
    dnn_feature_columns,
    tree_learner_config,
    num_trees,
    tree_examples_per_layer,
    config=None,
    dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu,
    dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True,
    dnn_steps_to_train=10000,
    predict_with_tree_only=False,
    tree_feature_columns=None,
    tree_center_bias=False,
    dnn_to_tree_distillation_param=None,
    use_core_versions=False,
    output_type=model.ModelBuilderOutputType.MODEL_FN_OPS):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    predict_with_tree_only: Whether to use only the tree model output as the
      final prediction.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.
    dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the
      float defines the weight of the distillation loss, and the loss_fn, for
      computing distillation loss, takes dnn_logits, tree_logits and weight
      tensor. If the entire tuple is None, no distillation will be applied. If
      only the loss_fn is None, we will take the sigmoid/softmax cross entropy
      loss be default. When distillation is applied, `predict_with_tree_only`
      will be set to True.
    use_core_versions: Whether feature columns and loss are from the core (as
      opposed to contrib) version of tensorflow.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  if dnn_to_tree_distillation_param:
    if not predict_with_tree_only:
      logging.warning("update predict_with_tree_only to True since distillation"
                      "is specified.")
      predict_with_tree_only = True

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))

  if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and
      not use_core_versions):
    raise ValueError("You must use core versions with Estimator Spec")

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      if use_core_versions:
        input_layer = feature_column_lib.input_layer(
            features=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope])
      else:
        input_layer = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope],
            scope=input_layer_scope)
    previous_layer = input_layer
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(previous_layer,)) as hidden_layer_scope:
        net = layers.fully_connected(
            previous_layer,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=hidden_layer_scope)
        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)
      previous_layer = net
    with variable_scope.variable_scope(
        "logits", values=(previous_layer,)) as logits_scope:
      dnn_logits = layers.fully_connected(
          previous_layer,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(dnn_logits, logits_scope.name)

    def _dnn_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_DNN_LEARNING_RATE,
          optimizer=_get_optimizer(dnn_optimizer),
          name=dnn_parent_scope,
          variables=ops.get_collection(
              ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope),
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

  # Build Tree Logits.
  global_step = training_util.get_global_step()
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features,
      use_core_columns=use_core_versions)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      if dnn_to_tree_distillation_param:
        loss_weight, loss_fn = dnn_to_tree_distillation_param
        weight_tensor = head_lib._weight_tensor(  # pylint: disable=protected-access
            features, head.weight_column_name)
        dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

        if loss_fn is None:
          # we create the loss_fn similar to the head loss_fn for
          # multi_class_head used previously as the default one.
          n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
          loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(
              n_classes)

        dnn_to_tree_distillation_loss = loss_weight * loss_fn(
            dnn_logits_fixed, tree_logits, weight_tensor)
        summary.scalar("dnn_to_tree_distillation_loss",
                       dnn_to_tree_distillation_loss)
        loss += dnn_to_tree_distillation_loss

      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  if predict_with_tree_only:
    if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER:
      tree_train_logits = tree_logits
    else:
      tree_train_logits = control_flow_ops.cond(
          global_step > dnn_steps_to_train,
          lambda: tree_logits,
          lambda: dnn_logits)
  else:
    tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS:
    if use_core_versions:
      model_fn_ops = head.create_estimator_spec(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_no_train_op_fn,
          logits=tree_train_logits)
      dnn_train_op = head.create_estimator_spec(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_dnn_train_op_fn,
          logits=dnn_logits)
      dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
          dnn_train_op).train_op

      tree_train_op = head.create_estimator_spec(
          features=tree_features,
          mode=mode,
          labels=labels,
          train_op_fn=_tree_train_op_fn,
          logits=tree_train_logits)
      tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
          tree_train_op).train_op

      model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops(
          model_fn_ops)
    else:
      model_fn_ops = head.create_model_fn_ops(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_no_train_op_fn,
          logits=tree_train_logits)
      dnn_train_op = head.create_model_fn_ops(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_dnn_train_op_fn,
          logits=dnn_logits).train_op
      tree_train_op = head.create_model_fn_ops(
          features=tree_features,
          mode=mode,
          labels=labels,
          train_op_fn=_tree_train_op_fn,
          logits=tree_train_logits).train_op

    # Add the hooks
    model_fn_ops.training_hooks.extend([
        trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
                                    tree_train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees)
    ])
    return model_fn_ops

  elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC:
    fusion_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_no_train_op_fn,
        logits=tree_train_logits)
    dnn_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_dnn_train_op_fn,
        logits=dnn_logits)
    tree_spec = head.create_estimator_spec(
        features=tree_features,
        mode=mode,
        labels=labels,
        train_op_fn=_tree_train_op_fn,
        logits=tree_train_logits)

    training_hooks = [
        trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train,
                                    tree_spec.train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees)
    ]
    fusion_spec = fusion_spec._replace(training_hooks=training_hooks +
                                       list(fusion_spec.training_hooks))
    return fusion_spec
Example #55
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
Example #56
0
def _linear_classifier_model_fn(features, targets, mode, params):
  """Estimator's linear model_fn."""
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  enable_centered_bias = params.get("enable_centered_bias", True)
  num_ps_replicas = params.get("num_ps_replicas", 0)
  joint_weights = params.get("joint_weights", False)

  if not isinstance(features, dict):
    features = {"": features}

  num_label_columns = 1 if n_classes == 2 else n_classes
  loss_fn = _softmax_cross_entropy_loss
  if n_classes == 2:
    loss_fn = _log_loss_with_two_classes

  feat_values = (features.values() if isinstance(features, dict)
                 else [features])
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)
  with variable_scope.variable_op_scope(
      feat_values, "linear", partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=["linear"],
              scope=scope))
    else:
      logits, _, _ = (
          layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
              weight_collections=["linear"],
              scope=scope))

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  loss = None
  if mode != estimator.ModeKeys.INFER:
    loss = loss_fn(logits, targets)
    if weight_column_name:
      weight_tensor = array_ops.reshape(
          math_ops.to_float(features[weight_column_name]), shape=(-1,))
      loss = _weighted_loss(loss, weight_tensor)
    else:
      loss = math_ops.reduce_mean(loss, name="loss")
    logging_ops.scalar_summary("loss", loss)

  train_ops = []
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()

    my_vars = ops.get_collection("linear")
    grads = gradients.gradients(loss, my_vars)
    if gradient_clip_norm:
      grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
    train_ops.append(optimizer.apply_gradients(
        zip(grads, my_vars), global_step=global_step))
    if enable_centered_bias:
      train_ops.append(
          _centered_bias_step(targets, loss_fn, num_label_columns))

  predictions = {}
  if n_classes == 2:
    predictions[_LOGISTIC] = math_ops.sigmoid(logits)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
  predictions[_PROBABILITIES] = nn.softmax(logits)
  predictions[_CLASSES] = math_ops.argmax(logits, 1)

  return predictions, loss, control_flow_ops.group(*train_ops)
Example #57
0
def _dnn_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  head = params["head"]
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or "Adagrad"
  activation_fn = params.get("activation_fn")
  dropout = params.get("dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  input_layer_min_slice_size = (
      params.get("input_layer_min_slice_size") or 64 << 20)
  num_ps_replicas = config.num_ps_replicas if config else 0
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

  features = _get_feature_dict(features)
  parent_scope = "dnn"

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=input_layer_min_slice_size))
    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as input_layer_scope:
      if all([
          isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
          for fc in feature_columns
      ]):
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=input_layer_scope)
      else:
        net = fc_core.input_layer(
            features=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope])

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(net,)) as hidden_layer_scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=activation_fn,
            variables_collections=[parent_scope],
            scope=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope(
        "logits",
        values=(net,)) as logits_scope:
      logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=contrib_variables.get_global_step(),
          learning_rate=_LEARNING_RATE,
          optimizer=_get_optimizer(optimizer),
          gradient_multipliers=(
              dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, parent_scope,
                  input_layer_scope.name)),
          clip_gradients=gradient_clip_norm,
          name=parent_scope,
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
  def __init__(self,  # _joint_linear_weights pylint: disable=invalid-name
               model_dir=None,
               weight_column_name=None,
               linear_feature_columns=None,
               linear_optimizer=None,
               _joint_linear_weights=False,
               dnn_feature_columns=None,
               dnn_optimizer=None,
               dnn_hidden_units=None,
               dnn_activation_fn=nn.relu,
               dnn_dropout=None,
               gradient_clip_norm=None,
               enable_centered_bias=False,
               label_dimension=1,
               config=None,
               feature_engineering_fn=None,
               embedding_lr_multipliers=None,
               input_layer_min_slice_size=None,
               fix_global_step_increment_bug=False):
    """Initializes a DNNLinearCombinedRegressor instance.

    Note: New users must set `fix_global_step_increment_bug=True` when creating
    an estimator.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be
        instances of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the linear part of the model. If `None`, will use a FTRL optimizer.
      _joint_linear_weights: If True a single (possibly partitioned) variable
        will be used to store the linear model weights. It's faster, but
        requires that all columns are sparse and have the 'sum' combiner.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the deep part of the model. If `None`, will use an Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If None,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out
        a given coordinate.
      gradient_clip_norm: A float > 0. If provided, gradients are clipped
        to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      enable_centered_bias: A bool. If True, estimator will learn a centered
        bias variable for each class. Rest of the model structure learns the
        residual after centered bias.
      label_dimension: Number of regression targets per example. This is the
        size of the last dimension of the labels and logits `Tensor` objects
        (typically, these have shape `[batch_size, label_dimension]`).
      config: RunConfig object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
        labels which are the output of `input_fn` and returns features and
        labels which will be fed into the model.
      embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to
        a `float` multiplier. Multiplier will be used to multiply with
        learning rate for the embedding variables.
      input_layer_min_slice_size: Optional. The min slice size of input layer
        partitions. If not provided, will use the default of 64M.
      fix_global_step_increment_bug: If `False`, the estimator needs two fit
        steps to optimize both linear and dnn parts. If `True`, this bug is
        fixed. New users must set this to `True`, but it the default value is
        `False` for backwards compatibility.

    Raises:
      ValueError: If both linear_feature_columns and dnn_features_columns are
        empty at the same time.
    """
    linear_feature_columns = linear_feature_columns or []
    dnn_feature_columns = dnn_feature_columns or []
    self._feature_columns = linear_feature_columns + dnn_feature_columns
    if not self._feature_columns:
      raise ValueError("Either linear_feature_columns or dnn_feature_columns "
                       "must be defined.")

    # TODO(b/35922130): Replace with `input_layer_partitioner` arg.
    input_layer_partitioner = None
    if input_layer_min_slice_size is not None:
      input_layer_partitioner = (
          partitioned_variables.min_max_variable_partitioner(
              max_partitions=config.num_ps_replicas if config else 0,
              min_slice_size=input_layer_min_slice_size))

    head = head_lib.regression_head(
        weight_column_name=weight_column_name,
        label_dimension=label_dimension,
        enable_centered_bias=enable_centered_bias)
    super(DNNLinearCombinedRegressor, self).__init__(
        model_fn=_dnn_linear_combined_model_fn,
        model_dir=model_dir,
        config=config,
        params={
            "head": head,
            "linear_feature_columns": linear_feature_columns,
            "linear_optimizer": linear_optimizer,
            "joint_linear_weights": _joint_linear_weights,
            "dnn_feature_columns": dnn_feature_columns,
            "dnn_optimizer": dnn_optimizer,
            "dnn_hidden_units": dnn_hidden_units,
            "dnn_activation_fn": dnn_activation_fn,
            "dnn_dropout": dnn_dropout,
            "gradient_clip_norm": gradient_clip_norm,
            "embedding_lr_multipliers": embedding_lr_multipliers,
            "input_layer_partitioner": input_layer_partitioner,
            "fix_global_step_increment_bug": fix_global_step_increment_bug,
        },
        feature_engineering_fn=feature_engineering_fn)
Example #59
0
def _linear_model_fn(features,
                     labels,
                     mode,
                     head,
                     feature_columns,
                     optimizer,
                     partitioner,
                     config,
                     sparse_combiner='sum'):
    """A model_fn for linear models that use a gradient-based optimizer.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape `[batch_size, logits_dimension]`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    feature_columns: An iterable containing all the feature columns used by
      the model.
    optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training. If `None`, will use a FTRL optimizer.
    partitioner: Partitioner for variables.
    config: `RunConfig` object to configure the runtime settings.
    sparse_combiner: A string specifying how to reduce if a categorical column
      is multivalent.  One of "mean", "sqrtn", and "sum".

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: mode or params are invalid, or features has the wrong type.
  """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))

    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))

    with variable_scope.variable_scope('linear',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):

        if isinstance(optimizer, LinearSDCA):
            assert sparse_combiner == 'sum'
            return _sdca_model_fn(features, labels, mode, head,
                                  feature_columns, optimizer)

        else:
            logit_fn = linear_logit_fn_builder(
                units=head.logits_dimension,
                feature_columns=feature_columns,
                sparse_combiner=sparse_combiner,
            )
            logits = logit_fn(features=features)

            optimizer = optimizers.get_optimizer_instance(
                optimizer or _get_default_optimizer(feature_columns),
                learning_rate=_LEARNING_RATE)

            return head.create_estimator_spec(features=features,
                                              mode=mode,
                                              labels=labels,
                                              optimizer=optimizer,
                                              logits=logits)