Example #1
0
    def testJointLinearModel(self):
        """Tests that loss goes down with training."""
        def input_fn():
            return {
                'age':
                sparse_tensor.SparseTensor(values=['1'],
                                           indices=[[0, 0]],
                                           dense_shape=[1, 1]),
                'language':
                sparse_tensor.SparseTensor(values=['english'],
                                           indices=[[0, 0]],
                                           dense_shape=[1, 1])
            }, constant_op.constant([[1]])

        language = feature_column.sparse_column_with_hash_bucket(
            'language', 100)
        age = feature_column.sparse_column_with_hash_bucket('age', 2)

        head = head_lib.multi_class_head(n_classes=2)
        classifier = _joint_linear_estimator(head,
                                             feature_columns=[age, language])

        classifier.fit(input_fn=input_fn, steps=1000)
        loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        classifier.fit(input_fn=input_fn, steps=2000)
        loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss2, loss1)
        self.assertLess(loss2, 0.01)
Example #2
0
  def testCrossedFeatures(self):
    """Tests SDCALogisticClassifier with crossed features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'language':
              sparse_tensor.SparseTensor(
                  values=['english', 'italian', 'spanish'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 1]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['US', 'IT', 'MX'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 1])
      }, constant_op.constant([[0], [0], [1]])

    with self._single_threaded_test_session():
      language = feature_column_lib.sparse_column_with_hash_bucket(
          'language', hash_bucket_size=5)
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      country_language = feature_column_lib.crossed_column(
          [language, country], hash_bucket_size=10)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id', feature_columns=[country_language])
      classifier.fit(input_fn=input_fn, steps=10)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Example #3
0
  def testSparseFeaturesWithDuplicates(self):
    """Tests SDCALogisticClassifier with duplicated sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2']),
          'age':
              sparse_tensor.SparseTensor(
                  values=['20-29'] * 5 + ['31-40'] * 5,
                  indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0],
                           [1, 0], [1, 0], [1, 0], [1, 0]],
                  dense_shape=[2, 1]),
          'gender':
              sparse_tensor.SparseTensor(
                  values=['m'] * 5 + ['f'] * 5,
                  indices=[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 0],
                           [1, 0], [1, 0], [1, 0], [1, 0]],
                  dense_shape=[2, 1]),
      }, constant_op.constant([[1], [0]])

    with self._single_threaded_test_session():
      age = feature_column_lib.sparse_column_with_hash_bucket(
          'age', hash_bucket_size=10)
      gender = feature_column_lib.sparse_column_with_hash_bucket(
          'gender', hash_bucket_size=10)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id', feature_columns=[age, gender])
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertLess(metrics['loss'], 0.060)
Example #4
0
    def testSparseFeatures(self):
        """Tests SVM classifier with (hashed) sparse features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.8], [0.6], [0.3]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 1]),
            }, constant_op.constant([[0], [1], [1]])

        price = feature_column.real_valued_column('price')
        country = feature_column.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        svm_classifier = svm.SVM(feature_columns=[price, country],
                                 example_id_column='example_id',
                                 l1_regularization=0.0,
                                 l2_regularization=1.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        accuracy = svm_classifier.evaluate(input_fn=input_fn,
                                           steps=1)['accuracy']
        self.assertAlmostEqual(accuracy, 1.0, places=3)
Example #5
0
  def testWeightedSparseFeatures(self):
    """Tests SDCALogisticClassifier with weighted sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              sparse_tensor.SparseTensor(
                  values=[2., 3., 1.],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5])
      }, constant_op.constant([[1], [0], [1]])

    with self._single_threaded_test_session():
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      country_weighted_by_price = feature_column_lib.weighted_sparse_column(
          country, 'price')
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[country_weighted_by_price])
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Example #6
0
    def testExport(self):
        """Tests export model for servo."""
        def input_fn():
            return {
                'age':
                constant_op.constant([1]),
                'language':
                sparse_tensor.SparseTensor(values=['english'],
                                           indices=[[0, 0]],
                                           dense_shape=[1, 1])
            }, constant_op.constant([[1]])

        language = feature_column.sparse_column_with_hash_bucket(
            'language', 100)
        feature_columns = [
            feature_column.real_valued_column('age'),
            feature_column.embedding_column(language, dimension=1)
        ]

        classifier = debug.DebugClassifier(config=run_config.RunConfig(
            tf_random_seed=1))
        classifier.fit(input_fn=input_fn, steps=5)

        def default_input_fn(unused_estimator, examples):
            return feature_column_ops.parse_feature_columns_from_examples(
                examples, feature_columns)

        export_dir = tempfile.mkdtemp()
        classifier.export(export_dir, input_fn=default_input_fn)
Example #7
0
  def testMixedFeaturesArbitraryWeightsPartitioned(self):
    """Tests SDCALinearRegressor works with a mix of features (partitioned)."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.6], [0.8], [0.3]]),
          'sq_footage':
              constant_op.constant([[900.0], [700.0], [600.0]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [5.0], [7.0]])
      }, constant_op.constant([[1.55], [-1.25], [-3.0]])

    with self._single_threaded_test_session():
      price = feature_column_lib.real_valued_column('price')
      sq_footage_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('sq_footage'),
          boundaries=[650.0, 800.0])
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      sq_footage_country = feature_column_lib.crossed_column(
          [sq_footage_bucket, country], hash_bucket_size=10)
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[
              price, sq_footage_bucket, country, sq_footage_country
          ],
          l2_regularization=1.0,
          weight_column_name='weights',
          partitioner=partitioned_variables.fixed_size_partitioner(
              num_shards=2, axis=0))
      regressor.fit(input_fn=input_fn, steps=20)
      loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
      self.assertLess(loss, 0.05)
Example #8
0
  def testPartitionedMixedFeatures(self):
    """Tests SDCALogisticClassifier with a mix of features (partitioned)."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.6], [0.8], [0.3]]),
          'sq_footage':
              constant_op.constant([900.0, 700.0, 600.0]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    with self._single_threaded_test_session():
      price = feature_column_lib.real_valued_column('price')
      sq_footage_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('sq_footage'),
          boundaries=[650.0, 800.0])
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      sq_footage_country = feature_column_lib.crossed_column(
          [sq_footage_bucket, country], hash_bucket_size=10)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[
              price, sq_footage_bucket, country, sq_footage_country
          ],
          weight_column_name='weights',
          partitioner=partitioned_variables.fixed_size_partitioner(
              num_shards=2, axis=0))
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Example #9
0
    def testMixedFeatures(self):
        """Tests SVM classifier with a mix of features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([0.6, 0.8, 0.3]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column.real_valued_column('price')
        sq_footage_bucket = feature_column.bucketized_column(
            feature_column.real_valued_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column.crossed_column(
            [sq_footage_bucket, country], hash_bucket_size=10)
        svm_classifier = svm.SVM(feature_columns=[
            price, sq_footage_bucket, country, sq_footage_country
        ],
                                 example_id_column='example_id',
                                 weight_column_name='weights',
                                 l1_regularization=0.1,
                                 l2_regularization=1.0)

        svm_classifier.fit(input_fn=input_fn, steps=30)
        accuracy = svm_classifier.evaluate(input_fn=input_fn,
                                           steps=1)['accuracy']
        self.assertAlmostEqual(accuracy, 1.0, places=3)
Example #10
0
  def testSdcaOptimizerSparseFeaturesWithL1Reg(self):
    """SDCALinearRegressor works with sparse features and L1 regularization."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([0.4, 0.6, 0.3]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[10.0], [10.0], [10.0]])
      }, constant_op.constant([[1.4], [-0.8], [2.6]])

    with self._single_threaded_test_session():
      price = feature_column_lib.real_valued_column('price')
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      # Regressor with no L1 regularization.
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[price, country],
          weight_column_name='weights')
      regressor.fit(input_fn=input_fn, steps=20)
      no_l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
      variable_names = regressor.get_variable_names()
      self.assertIn('linear/price/weight', variable_names)
      self.assertIn('linear/country/weights', variable_names)
      no_l1_reg_weights = {
          'linear/price/weight':
              regressor.get_variable_value('linear/price/weight'),
          'linear/country/weights':
              regressor.get_variable_value('linear/country/weights'),
      }

      # Regressor with L1 regularization.
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[price, country],
          l1_regularization=1.0,
          weight_column_name='weights')
      regressor.fit(input_fn=input_fn, steps=20)
      l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
      l1_reg_weights = {
          'linear/price/weight':
              regressor.get_variable_value('linear/price/weight'),
          'linear/country/weights':
              regressor.get_variable_value('linear/country/weights'),
      }

      # Unregularized loss is lower when there is no L1 regularization.
      self.assertLess(no_l1_reg_loss, l1_reg_loss)
      self.assertLess(no_l1_reg_loss, 0.05)

      # But weights returned by the regressor with L1 regularization have
      # smaller L1 norm.
      l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0
      for var_name in sorted(l1_reg_weights):
        l1_reg_weights_norm += sum(
            np.absolute(l1_reg_weights[var_name].flatten()))
        no_l1_reg_weights_norm += sum(
            np.absolute(no_l1_reg_weights[var_name].flatten()))
        print('Var name: %s, value: %s' %
              (var_name, no_l1_reg_weights[var_name].flatten()))
      self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm)