Esempio n. 1
0
  def testCrossedFeatures(self):
    """Tests SDCALogisticClassifier with crossed features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'language':
              sparse_tensor.SparseTensor(
                  values=['english', 'italian', 'spanish'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 1]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['US', 'IT', 'MX'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 1])
      }, constant_op.constant([[0], [0], [1]])

    with self._single_threaded_test_session():
      language = feature_column_lib.sparse_column_with_hash_bucket(
          'language', hash_bucket_size=5)
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      country_language = feature_column_lib.crossed_column(
          [language, country], hash_bucket_size=10)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id', feature_columns=[country_language])
      classifier.fit(input_fn=input_fn, steps=10)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Esempio n. 2
0
  def testBucketizedFeatures(self):
    """Tests SDCALogisticClassifier with bucketized features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'price': constant_op.constant([600.0, 1000.0, 400.0]),
          'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]),
          'weights': constant_op.constant([[1.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    with self._single_threaded_test_session():
      price_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('price'),
          boundaries=[500.0, 700.0])
      sq_footage_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('sq_footage'),
          boundaries=[650.0])
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[price_bucket, sq_footage_bucket],
          weight_column_name='weights',
          l2_regularization=1.0)
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Esempio n. 3
0
  def testSparseFeaturesWithDuplicates(self):
    """Tests SDCALogisticClassifier with duplicated sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2']),
          'age':
              sparse_tensor.SparseTensor(
                  values=['20-29'] * 5 + ['31-40'] * 5,
                  indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0],
                           [1, 0], [1, 0], [1, 0], [1, 0]],
                  dense_shape=[2, 1]),
          'gender':
              sparse_tensor.SparseTensor(
                  values=['m'] * 5 + ['f'] * 5,
                  indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0],
                           [1, 0], [1, 0], [1, 0], [1, 0]],
                  dense_shape=[2, 1]),
      }, constant_op.constant([[1], [0]])

    with self._single_threaded_test_session():
      age = feature_column_lib.sparse_column_with_hash_bucket(
          'age', hash_bucket_size=10)
      gender = feature_column_lib.sparse_column_with_hash_bucket(
          'gender', hash_bucket_size=10)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id', feature_columns=[age, gender])
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertLess(metrics['loss'], 0.060)
Esempio n. 4
0
  def testWeightedSparseFeatures(self):
    """Tests SDCALogisticClassifier with weighted sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              sparse_tensor.SparseTensor(
                  values=[2., 3., 1.],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5])
      }, constant_op.constant([[1], [0], [1]])

    with self._single_threaded_test_session():
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      country_weighted_by_price = feature_column_lib.weighted_sparse_column(
          country, 'price')
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[country_weighted_by_price])
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Esempio n. 5
0
  def testRealValuedFeatureWithHigherDimension(self):
    """Tests SDCALogisticClassifier with high-dimension real valued features."""

    # input_fn is identical to the one in testRealValuedFeatures where 2
    # 1-dimensional dense features are replaced by a 2-dimensional feature.
    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2']),
          'dense_feature':
              constant_op.constant([[500.0, 800.0], [200.0, 600.0]])
      }, constant_op.constant([[0], [1]])

    with self._single_threaded_test_session():
      dense_feature = feature_column_lib.real_valued_column(
          'dense_feature', dimension=2)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id', feature_columns=[dense_feature])
      classifier.fit(input_fn=input_fn, steps=100)
      loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
      self.assertLess(loss, 0.05)
Esempio n. 6
0
  def testPartitionedMixedFeatures(self):
    """Tests SDCALogisticClassifier with a mix of features (partitioned)."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.6], [0.8], [0.3]]),
          'sq_footage':
              constant_op.constant([900.0, 700.0, 600.0]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    with self._single_threaded_test_session():
      price = feature_column_lib.real_valued_column('price')
      sq_footage_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('sq_footage'),
          boundaries=[650.0, 800.0])
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      sq_footage_country = feature_column_lib.crossed_column(
          [sq_footage_bucket, country], hash_bucket_size=10)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[
              price, sq_footage_bucket, country, sq_footage_country
          ],
          weight_column_name='weights',
          partitioner=partitioned_variables.fixed_size_partitioner(
              num_shards=2, axis=0))
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Esempio n. 7
0
  def testRealValuedFeatures(self):
    """Tests SDCALogisticClassifier works with real valued features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2']),
          'maintenance_cost': constant_op.constant([500.0, 200.0]),
          'sq_footage': constant_op.constant([[800.0], [600.0]]),
          'weights': constant_op.constant([[1.0], [1.0]])
      }, constant_op.constant([[0], [1]])

    with self._single_threaded_test_session():
      maintenance_cost = feature_column_lib.real_valued_column(
          'maintenance_cost')
      sq_footage = feature_column_lib.real_valued_column('sq_footage')
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[maintenance_cost, sq_footage],
          weight_column_name='weights')
      classifier.fit(input_fn=input_fn, steps=100)
      loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
      self.assertLess(loss, 0.05)