def testBucketizedFeatures(self): """Tests SDCALogisticClassifier with bucketized features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([600.0, 1000.0, 400.0]), 'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]), 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) with self._single_threaded_test_session(): price_bucket = feature_column_lib.bucketized_column( feature_column_lib.real_valued_column('price'), boundaries=[500.0, 700.0]) sq_footage_bucket = feature_column_lib.bucketized_column( feature_column_lib.real_valued_column('sq_footage'), boundaries=[650.0]) classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[price_bucket, sq_footage_bucket], weight_column_name='weights', l2_regularization=1.0) classifier.fit(input_fn=input_fn, steps=50) metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(metrics['accuracy'], 0.9)
def testSparseFeaturesWithDuplicates(self): """Tests SDCALogisticClassifier with duplicated sparse features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2']), 'age': sparse_tensor.SparseTensor(values=['20-29'] * 5 + ['31-40'] * 5, indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0]], dense_shape=[2, 1]), 'gender': sparse_tensor.SparseTensor(values=['m'] * 5 + ['f'] * 5, indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0]], dense_shape=[2, 1]), }, constant_op.constant([[1], [0]]) with self._single_threaded_test_session(): age = feature_column_lib.sparse_column_with_hash_bucket( 'age', hash_bucket_size=10) gender = feature_column_lib.sparse_column_with_hash_bucket( 'gender', hash_bucket_size=10) classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[age, gender]) classifier.fit(input_fn=input_fn, steps=50) metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertLess(metrics['loss'], 0.060)
def testCrossedFeatures(self): """Tests SDCALogisticClassifier with crossed features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'language': sparse_tensor.SparseTensor( values=['english', 'italian', 'spanish'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 1]), 'country': sparse_tensor.SparseTensor(values=['US', 'IT', 'MX'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 1]) }, constant_op.constant([[0], [0], [1]]) with self._single_threaded_test_session(): language = feature_column_lib.sparse_column_with_hash_bucket( 'language', hash_bucket_size=5) country = feature_column_lib.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) country_language = feature_column_lib.crossed_column( [language, country], hash_bucket_size=10) classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[country_language]) classifier.fit(input_fn=input_fn, steps=10) metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(metrics['accuracy'], 0.9)
def testWeightedSparseFeatures(self): """Tests SDCALogisticClassifier with weighted sparse features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': sparse_tensor.SparseTensor(values=[2., 3., 1.], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]) }, constant_op.constant([[1], [0], [1]]) with self._single_threaded_test_session(): country = feature_column_lib.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) country_weighted_by_price = feature_column_lib.weighted_sparse_column( country, 'price') classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[country_weighted_by_price]) classifier.fit(input_fn=input_fn, steps=50) metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(metrics['accuracy'], 0.9)
def testSparseFeatures(self): """Tests SDCALogisticClassifier with sparse features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[0.4], [0.6], [0.3]]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': constant_op.constant([[1.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price = feature_column_lib.real_valued_column('price') country = feature_column_lib.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[price, country], weight_column_name='weights') classifier.fit(input_fn=input_fn, steps=50) metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(metrics['accuracy'], 0.9)
def testMixedFeatures(self): """Tests SDCALogisticClassifier with a mix of features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[0.6], [0.8], [0.3]]), 'sq_footage': constant_op.constant([900.0, 700.0, 600.0]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': constant_op.constant([[3.0], [1.0], [1.0]]) }, constant_op.constant([[1], [0], [1]]) price = feature_column_lib.real_valued_column('price') sq_footage_bucket = feature_column_lib.bucketized_column( feature_column_lib.real_valued_column('sq_footage'), boundaries=[650.0, 800.0]) country = feature_column_lib.sparse_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = feature_column_lib.crossed_column( [sq_footage_bucket, country], hash_bucket_size=10) classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[ price, sq_footage_bucket, country, sq_footage_country ], weight_column_name='weights') classifier.fit(input_fn=input_fn, steps=50) metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(metrics['accuracy'], 0.9)
def testRealValuedFeatureWithHigherDimension(self): """Tests SDCALogisticClassifier with high-dimension real valued features.""" # input_fn is identical to the one in testRealValuedFeatures where 2 # 1-dimensional dense features are replaced by a 2-dimensional feature. def input_fn(): return { 'example_id': constant_op.constant(['1', '2']), 'dense_feature': constant_op.constant([[500.0, 800.0], [200.0, 600.0]]) }, constant_op.constant([[0], [1]]) dense_feature = feature_column_lib.real_valued_column('dense_feature', dimension=2) classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[dense_feature]) classifier.fit(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.05)
def testRealValuedFeatures(self): """Tests SDCALogisticClassifier works with real valued features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2']), 'maintenance_cost': constant_op.constant([500.0, 200.0]), 'sq_footage': constant_op.constant([[800.0], [600.0]]), 'weights': constant_op.constant([[1.0], [1.0]]) }, constant_op.constant([[0], [1]]) maintenance_cost = feature_column_lib.real_valued_column( 'maintenance_cost') sq_footage = feature_column_lib.real_valued_column('sq_footage') classifier = sdca_estimator.SDCALogisticClassifier( example_id_column='example_id', feature_columns=[maintenance_cost, sq_footage], weight_column_name='weights') classifier.fit(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.05)