def testBiasAndOtherColumns(self):
        """SDCALinearRegressor has valid bias weight with other columns present."""
        def input_fn():
            """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.4 of all instances that have feature 'a' are positive, and 0.2 of all
      instances that have feature 'b' are positive. The labels in the dataset
      are ordered to appear shuffled since SDCA expects shuffled data, and
      converges faster with this pseudo-random ordering.
      If the bias was centered we would expect the weights to be:
      bias: 0.3
      a: 0.1
      b: -0.1
      Until b/29339026 is resolved, the bias gets regularized with the same
      global value for the other columns, and so the expected weights get
      shifted and are:
      bias: 0.2
      a: 0.2
      b: 0.0
      Returns:
        The test dataset.
      """
            num_examples = 200
            half = int(num_examples / 2)
            return {
                'example_id':
                constant_op.constant([str(x + 1)
                                      for x in range(num_examples)]),
                'a':
                constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
                'b':
                constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
            }, constant_op.constant(
                [[x] for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) +
                 [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)])

        with self._single_threaded_test_session():
            regressor = sdca_estimator.SDCALinearRegressor(
                example_id_column='example_id',
                feature_columns=[
                    feature_column_lib.real_valued_column('a'),
                    feature_column_lib.real_valued_column('b')
                ])

            regressor.fit(input_fn=input_fn, steps=200)

            variable_names = regressor.get_variable_names()
            self.assertIn('linear/bias_weight', variable_names)
            self.assertIn('linear/a/weight', variable_names)
            self.assertIn('linear/b/weight', variable_names)
            # TODO(b/29339026): Change the expected results to expect a centered bias.
            self.assertNear(
                regressor.get_variable_value('linear/bias_weight')[0],
                0.2,
                err=0.05)
            self.assertNear(regressor.get_variable_value('linear/a/weight')[0],
                            0.2,
                            err=0.05)
            self.assertNear(regressor.get_variable_value('linear/b/weight')[0],
                            0.0,
                            err=0.05)
    def testSdcaOptimizerSparseFeaturesWithL1Reg(self):
        """SDCALinearRegressor works with sparse features and L1 regularization."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([0.4, 0.6, 0.3]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[10.0], [10.0], [10.0]])
            }, constant_op.constant([[1.4], [-0.8], [2.6]])

        with self._single_threaded_test_session():
            price = feature_column_lib.real_valued_column('price')
            country = feature_column_lib.sparse_column_with_hash_bucket(
                'country', hash_bucket_size=5)
            # Regressor with no L1 regularization.
            regressor = sdca_estimator.SDCALinearRegressor(
                example_id_column='example_id',
                feature_columns=[price, country],
                weight_column_name='weights')
            regressor.fit(input_fn=input_fn, steps=20)
            no_l1_reg_loss = regressor.evaluate(input_fn=input_fn,
                                                steps=1)['loss']
            variable_names = regressor.get_variable_names()
            self.assertIn('linear/price/weight', variable_names)
            self.assertIn('linear/country/weights', variable_names)
            no_l1_reg_weights = {
                'linear/price/weight':
                regressor.get_variable_value('linear/price/weight'),
                'linear/country/weights':
                regressor.get_variable_value('linear/country/weights'),
            }

            # Regressor with L1 regularization.
            regressor = sdca_estimator.SDCALinearRegressor(
                example_id_column='example_id',
                feature_columns=[price, country],
                l1_regularization=1.0,
                weight_column_name='weights')
            regressor.fit(input_fn=input_fn, steps=20)
            l1_reg_loss = regressor.evaluate(input_fn=input_fn,
                                             steps=1)['loss']
            l1_reg_weights = {
                'linear/price/weight':
                regressor.get_variable_value('linear/price/weight'),
                'linear/country/weights':
                regressor.get_variable_value('linear/country/weights'),
            }

            # Unregularized loss is lower when there is no L1 regularization.
            self.assertLess(no_l1_reg_loss, l1_reg_loss)
            self.assertLess(no_l1_reg_loss, 0.05)

            # But weights returned by the regressor with L1 regularization have
            # smaller L1 norm.
            l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0
            for var_name in sorted(l1_reg_weights):
                l1_reg_weights_norm += sum(
                    np.absolute(l1_reg_weights[var_name].flatten()))
                no_l1_reg_weights_norm += sum(
                    np.absolute(no_l1_reg_weights[var_name].flatten()))
                print('Var name: %s, value: %s' %
                      (var_name, no_l1_reg_weights[var_name].flatten()))
            self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm)