Example #1
0
    def testCrossedFeatures(self):
        """Tests LinearClassifier with LinearSDCA and crossed features."""
        def input_fn():
            return {
                'example_id':
                tf.constant(['1', '2', '3']),
                'language':
                tf.sparse.SparseTensor(
                    values=['english', 'italian', 'spanish'],
                    indices=[[0, 0], [1, 0], [2, 0]],
                    dense_shape=[3, 1]),
                'country':
                tf.sparse.SparseTensor(values=['US', 'IT', 'MX'],
                                       indices=[[0, 0], [1, 0], [2, 0]],
                                       dense_shape=[3, 1])
            }, tf.constant([[0], [0], [1]])

        country_language = tf.feature_column.crossed_column(
            ['language', 'country'], hash_bucket_size=100)
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(
            feature_columns=[country_language], optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #2
0
    def testWeightedSparseFeaturesOOVWithNoOOVBuckets(self):
        """LinearClassifier with LinearSDCA with OOV features (-1 IDs)."""
        def input_fn():
            return {
                'example_id':
                tf.constant(['1', '2', '3']),
                'price':
                tf.sparse.SparseTensor(values=[2., 3., 1.],
                                       indices=[[0, 0], [1, 0], [2, 0]],
                                       dense_shape=[3, 5]),
                'country':
                tf.sparse.SparseTensor(
                    # 'GB' is out of the vocabulary.
                    values=['IT', 'US', 'GB'],
                    indices=[[0, 0], [1, 0], [2, 0]],
                    dense_shape=[3, 5])
            }, tf.constant([[1], [0], [1]])

        country = tf.feature_column.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'CA', 'MK', 'IT', 'CN'])
        country_weighted_by_price = (
            tf.feature_column.weighted_categorical_column(country, 'price'))
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(
            feature_columns=[country_weighted_by_price], optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #3
0
    def testBucketizedFeatures(self):
        """Tests LinearClassifier with LinearSDCA and bucketized features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'price': constant_op.constant([[600.0], [1000.0], [400.0]]),
                'sq_footage': constant_op.constant([[1000.0], [600.0],
                                                    [700.0]]),
                'weights': constant_op.constant([[1.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('price'),
            boundaries=[500.0, 700.0])
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('sq_footage'),
            boundaries=[650.0])
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(
            feature_columns=[price_bucket, sq_footage_bucket],
            weight_column='weights',
            optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #4
0
    def testWeightedSparseFeatures(self):
        """LinearClassifier with LinearSDCA and weighted sparse features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                sparse_tensor.SparseTensor(values=[2., 3., 1.],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 5]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 5])
            }, constant_op.constant([[1], [0], [1]])

        country = feature_column_lib.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        country_weighted_by_price = (
            feature_column_lib.weighted_categorical_column(country, 'price'))
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(
            feature_columns=[country_weighted_by_price], optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #5
0
        def _test_metric_fn(metric_fn):
            input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
            estimator = linear.LinearClassifierV2([fc.numeric_column('x')])
            estimator = extenders.add_metrics(estimator, metric_fn)

            estimator.train(input_fn=input_fn)
            metrics = estimator.evaluate(input_fn=input_fn)
            self.assertEqual(2., metrics['two'])
Example #6
0
    def test_should_error_out_for_not_recognized_args(self):
        estimator = linear.LinearClassifierV2([fc.numeric_column('x')])

        def metric_fn(features, not_recognized):
            _, _ = features, not_recognized
            return {}

        with self.assertRaisesRegexp(ValueError, 'not_recognized'):
            estimator = extenders.add_metrics(estimator, metric_fn)
Example #7
0
    def test_all_supported_args(self):
        input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
        estimator = linear.LinearClassifierV2([fc.numeric_column('x')])

        def metric_fn(features, predictions, labels, config):
            self.assertIn('x', features)
            self.assertIsNotNone(labels)
            self.assertIn('logistic', predictions)
            self.assertTrue(isinstance(config, run_config.RunConfig))
            return {}

        estimator = extenders.add_metrics(estimator, metric_fn)

        estimator.train(input_fn=input_fn)
        estimator.evaluate(input_fn=input_fn)
Example #8
0
        def _test_metric_fn(metric_fn):
            input_fn = get_input_fn(x=np.arange(4)[:, None, None],
                                    y=np.ones(4)[:, None])
            config = run_config.RunConfig(log_step_count_steps=1)
            estimator = linear.LinearClassifierV2([fc.numeric_column('x')],
                                                  config=config)

            estimator = extenders.add_metrics(estimator, metric_fn)

            estimator.train(input_fn=input_fn)
            metrics = estimator.evaluate(input_fn=input_fn)
            self.assertIn('mean_x', metrics)
            self.assertEqual(1.5, metrics['mean_x'])
            # assert that it keeps original estimators metrics
            self.assertIn('auc', metrics)
Example #9
0
    def testRealValuedFeatureWithHigherDimension(self):
        """Tests LinearSDCA with real valued features of higher dimension."""

        # input_fn is identical to the one in testRealValuedFeatures
        # where 2 1-dimensional dense features have been replaced by 1 2-dimensional
        # feature.
        def input_fn():
            return {
                'example_id': tf.constant(['1', '2']),
                'dense_feature': tf.constant([[500.0, 800.0], [200.0, 600.0]])
            }, tf.constant([[0], [1]])

        dense_feature = tf.feature_column.numeric_column('dense_feature',
                                                         shape=2)
        optimizer = linear.LinearSDCA(example_id_column='example_id')
        classifier = linear.LinearClassifierV2(feature_columns=[dense_feature],
                                               optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #10
0
    def testRealValuedFeatures(self):
        """Tests LinearClassifier with LinearSDCA and real valued features."""
        def input_fn():
            return {
                'example_id': tf.constant(['1', '2']),
                'maintenance_cost': tf.constant([[500.0], [200.0]]),
                'sq_footage': tf.constant([[800.0], [600.0]]),
                'weights': tf.constant([[1.0], [1.0]])
            }, tf.constant([[0], [1]])

        maintenance_cost = tf.feature_column.numeric_column('maintenance_cost')
        sq_footage = tf.feature_column.numeric_column('sq_footage')
        optimizer = linear.LinearSDCA(example_id_column='example_id')
        classifier = linear.LinearClassifierV2(
            feature_columns=[maintenance_cost, sq_footage],
            weight_column='weights',
            optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #11
0
    def testPartitionedVariables(self):
        """Tests LinearClassifier with LinearSDCA with partitioned variables."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column_lib.numeric_column('price')
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column_lib.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column_lib.crossed_column(
            [sq_footage_bucket, 'country'], hash_bucket_size=10)

        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)

        classifier = linear.LinearClassifierV2(
            feature_columns=[
                price, sq_footage_bucket, country, sq_footage_country
            ],
            weight_column='weights',
            partitioner=partitioned_variables.fixed_size_partitioner(
                num_shards=2, axis=0),
            optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #12
0
    def testSparseFeatures(self):
        """Tests LinearClassifier with LinearSDCA and sparse features."""
        def input_fn():
            return {
                'example_id':
                tf.constant(['1', '2', '3']),
                'country':
                tf.sparse.SparseTensor(values=['IT', 'US', 'GB'],
                                       indices=[[0, 0], [1, 0], [2, 0]],
                                       dense_shape=[3, 5]),
                'weights':
                tf.constant([[1.0], [1.0], [1.0]])
            }, tf.constant([[1], [0], [1]])

        country = tf.feature_column.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(feature_columns=[country],
                                               weight_column='weights',
                                               optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
Example #13
0
def _linear_classifier_fn(*args, **kwargs):
    return linear.LinearClassifierV2(*args, **kwargs)