コード例 #1
0
    def testBucketizedFeatures(self):
        """Tests SDCALogisticClassifier with bucketized features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'price': constant_op.constant([600.0, 1000.0, 400.0]),
                'sq_footage': constant_op.constant([[1000.0], [600.0],
                                                    [700.0]]),
                'weights': constant_op.constant([[1.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        with self._single_threaded_test_session():
            price_bucket = feature_column_lib.bucketized_column(
                feature_column_lib.real_valued_column('price'),
                boundaries=[500.0, 700.0])
            sq_footage_bucket = feature_column_lib.bucketized_column(
                feature_column_lib.real_valued_column('sq_footage'),
                boundaries=[650.0])
            classifier = sdca_estimator.SDCALogisticClassifier(
                example_id_column='example_id',
                feature_columns=[price_bucket, sq_footage_bucket],
                weight_column_name='weights',
                l2_regularization=1.0)
            classifier.fit(input_fn=input_fn, steps=50)
            metrics = classifier.evaluate(input_fn=input_fn, steps=1)
            self.assertGreater(metrics['accuracy'], 0.9)
コード例 #2
0
    def testSparseFeaturesWithDuplicates(self):
        """Tests SDCALogisticClassifier with duplicated sparse features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2']),
                'age':
                sparse_tensor.SparseTensor(values=['20-29'] * 5 +
                                           ['31-40'] * 5,
                                           indices=[[0, 0], [0, 0], [0, 0],
                                                    [0, 0], [0, 0], [1, 0],
                                                    [1, 0], [1, 0], [1, 0],
                                                    [1, 0]],
                                           dense_shape=[2, 1]),
                'gender':
                sparse_tensor.SparseTensor(values=['m'] * 5 + ['f'] * 5,
                                           indices=[[0, 0], [0, 0], [0, 0],
                                                    [0, 0], [0, 0], [1, 0],
                                                    [1, 0], [1, 0], [1, 0],
                                                    [1, 0]],
                                           dense_shape=[2, 1]),
            }, constant_op.constant([[1], [0]])

        with self._single_threaded_test_session():
            age = feature_column_lib.sparse_column_with_hash_bucket(
                'age', hash_bucket_size=10)
            gender = feature_column_lib.sparse_column_with_hash_bucket(
                'gender', hash_bucket_size=10)
            classifier = sdca_estimator.SDCALogisticClassifier(
                example_id_column='example_id', feature_columns=[age, gender])
            classifier.fit(input_fn=input_fn, steps=50)
            metrics = classifier.evaluate(input_fn=input_fn, steps=1)
            self.assertLess(metrics['loss'], 0.060)
コード例 #3
0
    def testCrossedFeatures(self):
        """Tests SDCALogisticClassifier with crossed features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'language':
                sparse_tensor.SparseTensor(
                    values=['english', 'italian', 'spanish'],
                    indices=[[0, 0], [1, 0], [2, 0]],
                    dense_shape=[3, 1]),
                'country':
                sparse_tensor.SparseTensor(values=['US', 'IT', 'MX'],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 1])
            }, constant_op.constant([[0], [0], [1]])

        with self._single_threaded_test_session():
            language = feature_column_lib.sparse_column_with_hash_bucket(
                'language', hash_bucket_size=5)
            country = feature_column_lib.sparse_column_with_hash_bucket(
                'country', hash_bucket_size=5)
            country_language = feature_column_lib.crossed_column(
                [language, country], hash_bucket_size=10)
            classifier = sdca_estimator.SDCALogisticClassifier(
                example_id_column='example_id',
                feature_columns=[country_language])
            classifier.fit(input_fn=input_fn, steps=10)
            metrics = classifier.evaluate(input_fn=input_fn, steps=1)
            self.assertGreater(metrics['accuracy'], 0.9)
コード例 #4
0
    def testWeightedSparseFeatures(self):
        """Tests SDCALogisticClassifier with weighted sparse features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                sparse_tensor.SparseTensor(values=[2., 3., 1.],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 5]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 5])
            }, constant_op.constant([[1], [0], [1]])

        with self._single_threaded_test_session():
            country = feature_column_lib.sparse_column_with_hash_bucket(
                'country', hash_bucket_size=5)
            country_weighted_by_price = feature_column_lib.weighted_sparse_column(
                country, 'price')
            classifier = sdca_estimator.SDCALogisticClassifier(
                example_id_column='example_id',
                feature_columns=[country_weighted_by_price])
            classifier.fit(input_fn=input_fn, steps=50)
            metrics = classifier.evaluate(input_fn=input_fn, steps=1)
            self.assertGreater(metrics['accuracy'], 0.9)
コード例 #5
0
    def testSparseFeatures(self):
        """Tests SDCALogisticClassifier with sparse features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.4], [0.6], [0.3]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[1.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column_lib.real_valued_column('price')
        country = feature_column_lib.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        classifier = sdca_estimator.SDCALogisticClassifier(
            example_id_column='example_id',
            feature_columns=[price, country],
            weight_column_name='weights')
        classifier.fit(input_fn=input_fn, steps=50)
        metrics = classifier.evaluate(input_fn=input_fn, steps=1)
        self.assertGreater(metrics['accuracy'], 0.9)
コード例 #6
0
    def testMixedFeatures(self):
        """Tests SDCALogisticClassifier with a mix of features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([900.0, 700.0, 600.0]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column_lib.real_valued_column('price')
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.real_valued_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column_lib.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column_lib.crossed_column(
            [sq_footage_bucket, country], hash_bucket_size=10)
        classifier = sdca_estimator.SDCALogisticClassifier(
            example_id_column='example_id',
            feature_columns=[
                price, sq_footage_bucket, country, sq_footage_country
            ],
            weight_column_name='weights')
        classifier.fit(input_fn=input_fn, steps=50)
        metrics = classifier.evaluate(input_fn=input_fn, steps=1)
        self.assertGreater(metrics['accuracy'], 0.9)
コード例 #7
0
    def testRealValuedFeatureWithHigherDimension(self):
        """Tests SDCALogisticClassifier with high-dimension real valued features."""

        # input_fn is identical to the one in testRealValuedFeatures where 2
        # 1-dimensional dense features are replaced by a 2-dimensional feature.
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2']),
                'dense_feature':
                constant_op.constant([[500.0, 800.0], [200.0, 600.0]])
            }, constant_op.constant([[0], [1]])

        dense_feature = feature_column_lib.real_valued_column('dense_feature',
                                                              dimension=2)
        classifier = sdca_estimator.SDCALogisticClassifier(
            example_id_column='example_id', feature_columns=[dense_feature])
        classifier.fit(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.05)
コード例 #8
0
    def testRealValuedFeatures(self):
        """Tests SDCALogisticClassifier works with real valued features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2']),
                'maintenance_cost': constant_op.constant([500.0, 200.0]),
                'sq_footage': constant_op.constant([[800.0], [600.0]]),
                'weights': constant_op.constant([[1.0], [1.0]])
            }, constant_op.constant([[0], [1]])

        maintenance_cost = feature_column_lib.real_valued_column(
            'maintenance_cost')
        sq_footage = feature_column_lib.real_valued_column('sq_footage')
        classifier = sdca_estimator.SDCALogisticClassifier(
            example_id_column='example_id',
            feature_columns=[maintenance_cost, sq_footage],
            weight_column_name='weights')
        classifier.fit(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.05)