Exemple #1
0
  def testBiasOnly(self):
    """Tests LinearRegressor with LinearSDCA and validates bias weight."""

    def input_fn():
      """Testing the bias weight when it's the only feature present.

      All of the instances in this input only have the bias feature, and a
      1/4 of the labels are positive. This means that the expected weight for
      the bias should be close to the average prediction, i.e 0.25.
      Returns:
        Training data for the test.
      """
      num_examples = 40
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          # place_holder is an empty column which is always 0 (absent), because
          # LinearClassifier requires at least one column.
          'place_holder':
              constant_op.constant([[0.0]] * num_examples),
      }, constant_op.constant(
          [1 if i % 4 is 0 else 0 for i in range(num_examples)])

    place_holder = feature_column_v2.numeric_column_v2('place_holder')
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[place_holder], optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=100)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.25, err=0.1)
Exemple #2
0
  def testUnknownBatchSize(self):
    """Tests LinearRegressor with LinearSDCA and unknown batch size."""

    def input_fn():
      # Similar to testBiasOnly but use placeholder_with_default in order to
      # let the static batch size unspecified.
      return {
          'example_id':
              array_ops.placeholder_with_default(
                  constant_op.constant(['0', '1']),
                  shape=[None]),
          # always_zero is an empty column which is always 0 (absent), because
          # LinearClassifier requires at least one column.
          'always_zero':
              array_ops.placeholder_with_default(
                  constant_op.constant([[0.0]] * 2),
                  shape=[None, 1]),
      }, array_ops.placeholder_with_default(constant_op.constant([0.0, 1.0]),
                                            shape=[None])

    always_zero = feature_column_v2.numeric_column_v2('always_zero')
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[always_zero], optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=100)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.5, err=0.1)
Exemple #3
0
  def testRealValuedLinearFeatures(self):
    """Tests LinearRegressor with LinearSDCA and real valued features."""
    x = [[1.2, 2.0, -1.5], [-2.0, 3.0, -0.5], [1.0, -0.5, 4.0]]
    weights = [[3.0], [-1.2], [0.5]]
    y = np.dot(x, weights)

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'x': constant_op.constant(x),
          'weights': constant_op.constant([[10.0], [10.0], [10.0]])
      }, constant_op.constant(y)

    x_column = feature_column_v2.numeric_column_v2('x', shape=3)
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[x_column],
        weight_column='weights',
        optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=20)
    loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.01)
    self.assertIn('linear/linear_model/x/weights',
                  regressor.get_variable_names())
    regressor_weights = regressor.get_variable_value(
        'linear/linear_model/x/weights')
    self.assertAllClose(
        [w[0] for w in weights], regressor_weights.flatten(), rtol=0.1)
Exemple #4
0
  def testBiasAndOtherColumns(self):
    """Tests LinearRegressor with LinearSDCA and validates bias weight."""

    def input_fn():
      """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.4 of all instances that have feature 'a' are positive, and 0.2 of all
      instances that have feature 'b' are positive. The labels in the dataset
      are ordered to appear shuffled since SDCA expects shuffled data, and
      converges faster with this pseudo-random ordering.
      If the bias was not regularized we would expect the weights to be:
      bias: 0.3
      a: 0.1
      b: -0.1
      Bu with bias regularization the optimal values are:
      bias: 0.2
      a: 0.2
      b: 0.0
      Returns:
        The test dataset.
      """
      num_examples = 200
      half = int(num_examples / 2)
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          'a':
              constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
          'b':
              constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
      }, constant_op.constant(
          [[x]
           for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) +
           [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)])

    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[
            feature_column_v2.numeric_column_v2('a'),
            feature_column_v2.numeric_column_v2('b')
        ],
        optimizer=optimizer)

    regressor.train(input_fn=input_fn, steps=200)

    variable_names = regressor.get_variable_names()
    self.assertIn('linear/linear_model/bias_weights', variable_names)
    self.assertIn('linear/linear_model/a/weights', variable_names)
    self.assertIn('linear/linear_model/b/weights', variable_names)
    # TODO(b/29339026): Change the expected results to expect a centered bias.
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.2, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/a/weights')[0], 0.2, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/b/weights')[0], 0.0, err=0.05)
Exemple #5
0
    def test_key_should_be_in_features(self):
        def input_fn():
            return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]

        estimator = linear.LinearRegressor([fc.numeric_column('x')])
        estimator.train(input_fn=input_fn, steps=1)

        estimator = extenders.forward_features(estimator, 'y')
        with self.assertRaisesRegexp(ValueError,
                                     'keys should be exist in features'):
            next(estimator.predict(input_fn=input_fn))
Exemple #6
0
    def test_forwarded_feature_should_be_a_sparse_tensor(self):
        input_fn = self.make_dummy_input_fn()

        estimator = linear.LinearRegressor([fc.numeric_column('x')])
        estimator.train(input_fn=input_fn, steps=1)

        estimator = extenders.forward_features(estimator,
                                               sparse_default_values={
                                                   'id': 0,
                                                   'sparse_id': 0
                                               })
        with self.assertRaisesRegexp(
                ValueError, 'Feature .* is expected to be a `SparseTensor`.'):
            next(estimator.predict(input_fn=input_fn))
Exemple #7
0
    def test_forward_list(self):
        def input_fn():
            return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]

        estimator = linear.LinearRegressor([fc.numeric_column('x')])
        estimator.train(input_fn=input_fn, steps=1)

        self.assertNotIn('id', next(estimator.predict(input_fn=input_fn)))
        estimator = extenders.forward_features(estimator, ['x', 'id'])
        predictions = next(estimator.predict(input_fn=input_fn))
        self.assertIn('id', predictions)
        self.assertIn('x', predictions)
        self.assertEqual(101, predictions['id'])
        self.assertEqual(3., predictions['x'])
Exemple #8
0
  def testBiasAndOtherColumnsFabricatedCentered(self):
    """Tests LinearRegressor with LinearSDCA and validates bias weight."""

    def input_fn():
      """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.1 of all instances that have feature 'a' have a label of 1, and 0.1 of
      all instances that have feature 'b' have a label of -1.
      We can expect the weights to be:
      bias: 0.0
      a: 0.1
      b: -0.1
      Returns:
        The test dataset.
      """
      num_examples = 200
      half = int(num_examples / 2)
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          'a':
              constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
          'b':
              constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
      }, constant_op.constant([[1 if x % 10 == 0 else 0] for x in range(half)] +
                              [[-1 if x % 10 == 0 else 0] for x in range(half)])

    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[
            feature_column_v2.numeric_column_v2('a'),
            feature_column_v2.numeric_column_v2('b')
        ],
        optimizer=optimizer)

    regressor.train(input_fn=input_fn, steps=100)

    variable_names = regressor.get_variable_names()
    self.assertIn('linear/linear_model/bias_weights', variable_names)
    self.assertIn('linear/linear_model/a/weights', variable_names)
    self.assertIn('linear/linear_model/b/weights', variable_names)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.0, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/a/weights')[0], 0.1, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/b/weights')[0], -0.1, err=0.05)
Exemple #9
0
    def test_forwarded_feature_should_not_be_a_sparse_tensor(self):
        def input_fn():
            return {
                'x': [[3.], [5.]],
                'id':
                sparse_tensor.SparseTensor(values=['1', '2'],
                                           indices=[[0, 0], [1, 0]],
                                           dense_shape=[2, 1])
            }, [[1.], [2.]]

        estimator = linear.LinearRegressor([fc.numeric_column('x')])
        estimator.train(input_fn=input_fn, steps=1)

        estimator = extenders.forward_features(estimator)
        with self.assertRaisesRegexp(ValueError,
                                     'Feature .* should be a Tensor.*'):
            next(estimator.predict(input_fn=input_fn))
Exemple #10
0
    def test_forward_keys(self):

        input_fn = self.make_dummy_input_fn()
        estimator = linear.LinearRegressor([fc.numeric_column('x')])
        estimator.train(input_fn=input_fn, steps=1)

        forwarded_keys = ['id', 'sparse_id']

        for key in forwarded_keys:
            self.assertNotIn(key, next(estimator.predict(input_fn=input_fn)))

        estimator = extenders.forward_features(
            estimator, forwarded_keys, sparse_default_values={'sparse_id': 1})

        expected_results = [101, 2, 102, 5]
        predictions = estimator.predict(input_fn=input_fn)
        for _ in range(2):
            prediction = next(predictions)
            for key in forwarded_keys:
                self.assertIn(key, prediction)
                self.assertEqual(expected_results.pop(0), sum(prediction[key]))
Exemple #11
0
  def testPartitionedVariables(self):
    """Tests LinearRegressor with LinearSDCA with partitioned variables."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([0.6, 0.8, 0.3]),
          'sq_footage':
              constant_op.constant([[900.0], [700.0], [600.0]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [5.0], [7.0]])
      }, constant_op.constant([[1.55], [-1.25], [-3.0]])

    price = feature_column_v2.numeric_column_v2('price')
    sq_footage_bucket = feature_column_v2.bucketized_column_v2(
        feature_column_v2.numeric_column_v2('sq_footage'),
        boundaries=[650.0, 800.0])
    country = feature_column_v2.categorical_column_with_hash_bucket_v2(
        'country', hash_bucket_size=5)
    sq_footage_country = feature_column_v2.crossed_column_v2(
        [sq_footage_bucket, 'country'], hash_bucket_size=10)
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)

    regressor = linear.LinearRegressor(
        feature_columns=[price, sq_footage_bucket, country, sq_footage_country],
        weight_column='weights',
        partitioner=partitioned_variables.fixed_size_partitioner(
            num_shards=2, axis=0),
        optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=20)
    loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.05)
Exemple #12
0
    def test_forward_in_exported(self):
        def serving_input_fn():
            features_ph = {
                'x': array_ops.placeholder(dtypes.float32, [None]),
                'id': array_ops.placeholder(dtypes.int32, [None])
            }
            features = {
                key: array_ops.expand_dims(tensor, -1)
                for key, tensor in features_ph.items()
            }
            return estimator_lib.export.ServingInputReceiver(
                features, features_ph)

        def input_fn():
            return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]

        # create estimator
        feature_columns = [fc.numeric_column('x')]
        estimator = linear.LinearRegressor(feature_columns)
        estimator.train(input_fn=input_fn, steps=1)
        estimator = extenders.forward_features(estimator, 'id')

        # export saved model
        export_dir, tmpdir = self._export_estimator(estimator,
                                                    serving_input_fn)

        # restore model
        predict_fn = from_saved_model(export_dir, signature_def_key='predict')
        predictions = predict_fn({'x': [3], 'id': [101]})

        # verify that 'id' exists in predictions
        self.assertIn('id', predictions)
        self.assertEqual(101, predictions['id'])

        # Clean up.
        gfile.DeleteRecursively(tmpdir)
Exemple #13
0
 def test_key_should_be_list_of_string(self):
     estimator = linear.LinearRegressor([fc.numeric_column('x')])
     with self.assertRaisesRegexp(TypeError, 'should be a string'):
         extenders.forward_features(estimator, ['x', estimator])
def _linear_regressor_fn(*args, **kwargs):
  return linear.LinearRegressor(*args, **kwargs)
Exemple #15
0
  def testSparseFeaturesWithL1Reg(self):
    """Tests LinearRegressor with LinearSDCA and sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.4], [0.6], [0.3]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[10.0], [10.0], [10.0]])
      }, constant_op.constant([[1.4], [-0.8], [2.6]])

    price = feature_column_v2.numeric_column_v2('price')
    country = feature_column_v2.categorical_column_with_hash_bucket_v2(
        'country', hash_bucket_size=5)
    # Regressor with no L1 regularization.
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[price, country],
        weight_column='weights',
        optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=20)
    no_l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
    variable_names = regressor.get_variable_names()
    self.assertIn('linear/linear_model/price/weights', variable_names)
    self.assertIn('linear/linear_model/country/weights', variable_names)
    no_l1_reg_weights = {
        'linear/linear_model/price/weights': regressor.get_variable_value(
            'linear/linear_model/price/weights'),
        'linear/linear_model/country/weights': regressor.get_variable_value(
            'linear/linear_model/country/weights'),
    }

    # Regressor with L1 regularization.
    optimizer = linear.LinearSDCA(
        example_id_column='example_id',
        symmetric_l1_regularization=1.0,
        symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[price, country],
        weight_column='weights',
        optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=20)
    l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
    l1_reg_weights = {
        'linear/linear_model/price/weights': regressor.get_variable_value(
            'linear/linear_model/price/weights'),
        'linear/linear_model/country/weights': regressor.get_variable_value(
            'linear/linear_model/country/weights'),
    }

    # Unregularized loss is lower when there is no L1 regularization.
    self.assertLess(no_l1_reg_loss, l1_reg_loss)
    self.assertLess(no_l1_reg_loss, 0.05)

    # But weights returned by the regressor with L1 regularization have smaller
    # L1 norm.
    l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0
    for var_name in sorted(l1_reg_weights):
      l1_reg_weights_norm += sum(
          np.absolute(l1_reg_weights[var_name].flatten()))
      no_l1_reg_weights_norm += sum(
          np.absolute(no_l1_reg_weights[var_name].flatten()))
      print('Var name: %s, value: %s' %
            (var_name, no_l1_reg_weights[var_name].flatten()))
    self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm)