Esempio n. 1
0
  def testWeightedSparseFeaturesOOVWithNoOOVBuckets(self):
    """LinearClassifier with LinearSDCA with OOV features (-1 IDs)."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              sparse_tensor.SparseTensor(
                  values=[2., 3., 1.],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5]),
          'country':
              sparse_tensor.SparseTensor(
                  # 'GB' is out of the vocabulary.
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5])
      }, constant_op.constant([[1], [0], [1]])

    country = feature_column_v2.categorical_column_with_vocabulary_list_v2(
        'country', vocabulary_list=['US', 'CA', 'MK', 'IT', 'CN'])
    country_weighted_by_price = (feature_column_v2.
                                 weighted_categorical_column_v2(country,
                                                                'price'))
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.01)
    classifier = linear.LinearClassifier(
        feature_columns=[country_weighted_by_price], optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 2
0
  def testCrossedFeatures(self):
    """Tests LinearClassifier with LinearSDCA and crossed features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'language':
              sparse_tensor.SparseTensor(
                  values=['english', 'italian', 'spanish'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 1]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['US', 'IT', 'MX'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 1])
      }, constant_op.constant([[0], [0], [1]])

    country_language = feature_column_v2.crossed_column_v2(
        ['language', 'country'], hash_bucket_size=100)
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.01)
    classifier = linear.LinearClassifier(
        feature_columns=[country_language], optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 3
0
  def testSparseFeatures(self):
    """Tests LinearClassifier with LinearSDCA and sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[1.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    country = feature_column_v2.categorical_column_with_hash_bucket_v2(
        'country', hash_bucket_size=5)
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.01)
    classifier = linear.LinearClassifier(
        feature_columns=[country],
        weight_column='weights',
        optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 4
0
  def testBucketizedFeatures(self):
    """Tests LinearClassifier with LinearSDCA and bucketized features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'price': constant_op.constant([[600.0], [1000.0], [400.0]]),
          'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]),
          'weights': constant_op.constant([[1.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    price_bucket = feature_column_v2.bucketized_column(
        feature_column_v2.numeric_column('price'),
        boundaries=[500.0, 700.0])
    sq_footage_bucket = feature_column_v2.bucketized_column(
        feature_column_v2.numeric_column('sq_footage'), boundaries=[650.0])
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.01)
    classifier = linear.LinearClassifier(
        feature_columns=[price_bucket, sq_footage_bucket],
        weight_column='weights',
        optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 5
0
        def _test_metric_fn(metric_fn):
            input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
            estimator = linear.LinearClassifier([fc.numeric_column('x')])
            estimator = extenders.add_metrics(estimator, metric_fn)

            estimator.train(input_fn=input_fn)
            metrics = estimator.evaluate(input_fn=input_fn)
            self.assertEqual(2., metrics['two'])
Esempio n. 6
0
    def test_should_error_out_for_not_recognized_args(self):
        estimator = linear.LinearClassifier([fc.numeric_column('x')])

        def metric_fn(features, not_recognized):
            _, _ = features, not_recognized
            return {}

        with self.assertRaisesRegexp(ValueError, 'not_recognized'):
            estimator = extenders.add_metrics(estimator, metric_fn)
Esempio n. 7
0
    def test_all_args_are_optional(self):
        input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
        estimator = linear.LinearClassifier([fc.numeric_column('x')])

        def metric_fn():
            return {'two': metrics_lib.mean(constant_op.constant([2.]))}

        estimator = extenders.add_metrics(estimator, metric_fn)

        estimator.train(input_fn=input_fn)
        metrics = estimator.evaluate(input_fn=input_fn)
        self.assertEqual(2., metrics['two'])
Esempio n. 8
0
    def test_overrides_existing_metrics(self):
        input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
        estimator = linear.LinearClassifier([fc.numeric_column('x')])
        estimator.train(input_fn=input_fn)
        metrics = estimator.evaluate(input_fn=input_fn)
        self.assertNotEqual(2., metrics['auc'])

        def metric_fn():
            return {'auc': metrics_lib.mean(constant_op.constant([2.]))}

        estimator = extenders.add_metrics(estimator, metric_fn)
        metrics = estimator.evaluate(input_fn=input_fn)
        self.assertEqual(2., metrics['auc'])
Esempio n. 9
0
    def test_forward_in_exported_sparse(self):
        features_columns = [
            fc.indicator_column(
                fc.categorical_column_with_vocabulary_list('x', range(10)))
        ]

        classifier = linear.LinearClassifier(feature_columns=features_columns)

        def train_input_fn():
            dataset = dataset_ops.Dataset.from_tensors({
                'x':
                sparse_tensor.SparseTensor(values=[1, 2, 3],
                                           indices=[[0, 0], [1, 0], [1, 1]],
                                           dense_shape=[2, 2]),
                'labels': [[0], [1]]
            })

            def _split(x):
                labels = x.pop('labels')
                return x, labels

            dataset = dataset.map(_split)
            return dataset

        classifier.train(train_input_fn, max_steps=1)

        classifier = extenders.forward_features(classifier,
                                                keys=['x'],
                                                sparse_default_values={'x': 0})

        def serving_input_fn():
            features_ph = array_ops.placeholder(dtype=dtypes.int32,
                                                name='x',
                                                shape=[None])
            features = {'x': layers.dense_to_sparse(features_ph)}
            return estimator_lib.export.ServingInputReceiver(
                features, {'x': features_ph})

        export_dir, tmpdir = self._export_estimator(classifier,
                                                    serving_input_fn)
        prediction_fn = from_saved_model(export_dir,
                                         signature_def_key='predict')

        features = (0, 2)
        prediction = prediction_fn({'x': features})

        self.assertIn('x', prediction)
        self.assertEqual(features, tuple(prediction['x']))
        gfile.DeleteRecursively(tmpdir)
Esempio n. 10
0
    def test_all_supported_args_in_different_order(self):
        input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
        estimator = linear.LinearClassifier([fc.numeric_column('x')])

        def metric_fn(labels, config, features, predictions):
            self.assertIn('x', features)
            self.assertIsNotNone(labels)
            self.assertIn('logistic', predictions)
            self.assertTrue(isinstance(config, estimator_lib.RunConfig))
            return {}

        estimator = extenders.add_metrics(estimator, metric_fn)

        estimator.train(input_fn=input_fn)
        estimator.evaluate(input_fn=input_fn)
Esempio n. 11
0
        def _test_metric_fn(metric_fn):
            input_fn = get_input_fn(x=np.arange(4)[:, None, None],
                                    y=np.ones(4)[:, None])
            config = run_config.RunConfig(log_step_count_steps=1)
            estimator = linear.LinearClassifier([fc.numeric_column('x')],
                                                config=config)

            estimator = extenders.add_metrics(estimator, metric_fn)

            estimator.train(input_fn=input_fn)
            metrics = estimator.evaluate(input_fn=input_fn)
            self.assertIn('mean_x', metrics)
            self.assertEqual(1.5, metrics['mean_x'])
            # assert that it keeps original estimators metrics
            self.assertIn('auc', metrics)
Esempio n. 12
0
    def test_should_add_metrics(self):
        input_fn = get_input_fn(x=np.arange(4)[:, None, None],
                                y=np.ones(4)[:, None])
        estimator = linear.LinearClassifier([fc.numeric_column('x')])

        def metric_fn(features):
            return {'mean_x': metrics_lib.mean(features['x'])}

        estimator = extenders.add_metrics(estimator, metric_fn)

        estimator.train(input_fn=input_fn)
        metrics = estimator.evaluate(input_fn=input_fn)
        self.assertIn('mean_x', metrics)
        self.assertEqual(1.5, metrics['mean_x'])
        # assert that it keeps original estimators metrics
        self.assertIn('auc', metrics)
Esempio n. 13
0
  def testPartitionedVariables(self):
    """Tests LinearClassifier with LinearSDCA with partitioned variables."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.6], [0.8], [0.3]]),
          'sq_footage':
              constant_op.constant([[900.0], [700.0], [600.0]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    price = feature_column_v2.numeric_column_v2('price')
    sq_footage_bucket = feature_column_v2.bucketized_column_v2(
        feature_column_v2.numeric_column_v2('sq_footage'),
        boundaries=[650.0, 800.0])
    country = feature_column_v2.categorical_column_with_hash_bucket_v2(
        'country', hash_bucket_size=5)
    sq_footage_country = feature_column_v2.crossed_column_v2(
        [sq_footage_bucket, 'country'], hash_bucket_size=10)

    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.01)

    classifier = linear.LinearClassifier(
        feature_columns=[price, sq_footage_bucket, country, sq_footage_country],
        weight_column='weights',
        partitioner=partitioned_variables.fixed_size_partitioner(
            num_shards=2, axis=0),
        optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 14
0
  def testRealValuedFeatures(self):
    """Tests LinearClassifier with LinearSDCA and real valued features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2']),
          'maintenance_cost': constant_op.constant([[500.0], [200.0]]),
          'sq_footage': constant_op.constant([[800.0], [600.0]]),
          'weights': constant_op.constant([[1.0], [1.0]])
      }, constant_op.constant([[0], [1]])

    maintenance_cost = feature_column_v2.numeric_column_v2('maintenance_cost')
    sq_footage = feature_column_v2.numeric_column_v2('sq_footage')
    optimizer = linear.LinearSDCA(example_id_column='example_id')
    classifier = linear.LinearClassifier(
        feature_columns=[maintenance_cost, sq_footage],
        weight_column='weights',
        optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 15
0
  def testRealValuedFeatureWithHigherDimension(self):
    """Tests LinearSDCA with real valued features of higher dimension."""

    # input_fn is identical to the one in testRealValuedFeatures
    # where 2 1-dimensional dense features have been replaced by 1 2-dimensional
    # feature.
    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2']),
          'dense_feature':
              constant_op.constant([[500.0, 800.0], [200.0, 600.0]])
      }, constant_op.constant([[0], [1]])

    dense_feature = feature_column_v2.numeric_column('dense_feature', shape=2)
    optimizer = linear.LinearSDCA(example_id_column='example_id')
    classifier = linear.LinearClassifier(
        feature_columns=[dense_feature], optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 16
0
def _linear_classifier_fn(*args, **kwargs):
  return linear.LinearClassifier(*args, **kwargs)