Esempio n. 1
0
  def testOutOfRangeSparseFeatures(self):
    # Setup test data
    example_protos = [
        make_example_proto({
            'age': [0],
            'gender': [0]
        }, 0),
        make_example_proto({
            'age': [1],
            'gender': [1]
        }, 1),
    ]
    example_weights = [1.0, 1.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(0, 0)
      options = dict(
          symmetric_l2_regularization=1,
          symmetric_l1_regularization=0,
          loss_type='logistic_loss')

      lr = SdcaModel(examples, variables, options)
      variables_lib.global_variables_initializer().run()
      train_op = lr.minimize()
      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                   'indices.*'):
        train_op.run()
Esempio n. 2
0
    def testL2Regularization(self):
        # Setup test data
        example_protos = [
            # 2 identical examples
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            # 2 more identical examples
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
        ]
        example_weights = [1.0, 1.0, 1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=16, symmetric_l1_regularization=0, loss_type="squared_loss")

            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be 1/5 of label due to minimizing regularized loss:
            #   (label - 2 * weight)^2 + L2 * 16 * weight^2
            optimal1 = -10.0 / 5.0
            optimal2 = 14.0 / 5.0
            self.assertAllClose([optimal1, optimal1, optimal2, optimal2], predictions.eval(), rtol=0.01)
Esempio n. 3
0
    def testSimple(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="squared_loss")

            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)
            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be 2/3 of label due to minimizing regularized loss:
            #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2
            self.assertAllClose([-20.0 / 3.0, 28.0 / 3.0], predictions.eval(), rtol=0.005)
            # Approximate gap should be very close to 0.0. (In fact, because the gap
            # is only approximate, it is likely that upon convergence the duality gap
            # can have a tiny negative value).
            self.assertAllClose(0.0, lr.approximate_duality_gap().eval(), atol=1e-2)
Esempio n. 4
0
    def testL1Regularization(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=4.0, loss_type="squared_loss")
            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            prediction = lr.predictions(examples)
            loss = lr.regularized_loss(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be -4.0, 48/5 due to minimizing regularized loss:
            #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight
            self.assertAllClose([-4.0, 20.0 / 3.0], prediction.eval(), rtol=0.08)

            # Loss should be the sum of the regularized loss value from above per
            # example after plugging in the optimal weights.
            self.assertAllClose(308.0 / 6.0, loss.eval(), atol=0.01)
Esempio n. 5
0
  def testLinearFeatureValues(self):
    # Setup test data
    example_protos = [
        make_example_proto(
            {'age': [0],
             'gender': [0]}, -10.0, -2.0),
        make_example_proto(
            {'age': [1],
             'gender': [1]}, 14.0, 2.0),
    ]
    example_weights = [1.0, 1.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)

      variables = make_variable_dict(1, 1)
      options = dict(symmetric_l2_regularization=0.5,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss',
                     prior=0.0)
      tf.initialize_all_variables().run()
      lr = SdcaModel(CONTAINER, examples, variables, options)
      prediction = lr.predictions(examples)

      lr.minimize().run()

      # Predictions should be 8/9 of label due to minimizing regularized loss:
      #   (label - 2 * 2 * weight)^2 / 2 + L2 * 2 * weight^2
      self.assertAllClose([-10.0 * 8 / 9, 14.0 * 8 / 9],
                          prediction.eval(),
                          rtol=0.07)
Esempio n. 6
0
 def testDuplicateExampleIds(self):
   # Setup test data with 1 positive, and 1 negative example.
   example_protos = [
       make_example_proto(
           {'age': [0],
            'gender': [0]}, 0),
       make_example_proto(
           {'age': [1],
            'gender': [1]}, 1),
   ]
   example_weights = [1.0, 1.0]
   with self._single_threaded_test_session():
     examples = make_example_dict(example_protos, example_weights)
     examples['example_ids'] = ['duplicate_id'
                                for x in examples['example_ids']]
     variables = make_variable_dict(1, 1)
     options = dict(symmetric_l2_regularization=0.5,
                    symmetric_l1_regularization=0,
                    loss_type='logistic_loss')
     tf.initialize_all_variables().run()
     lr = SdcaModel(CONTAINER, examples, variables, options)
     self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
     with self.assertRaisesOpError('Detected 1 duplicates in example_ids'):
       lr.minimize().run()
     self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
Esempio n. 7
0
    def testDenseFeatures(self):
        with self._single_threaded_test_session():
            examples = make_dense_examples_dict(
                dense_feature_values=[[-2.0, 0.0], [0.0, 2.0]],
                weights=[1.0, 1.0],
                labels=[-10.0, 14.0])
            variables = make_dense_variable_dict(2, 2)
            options = dict(symmetric_l2_regularization=1,
                           symmetric_l1_regularization=0,
                           loss_type='squared_loss')
            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in xrange(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be 4/5 of label due to minimizing regularized loss:
            #   (label - 2 * weight)^2 / 2 + L2 * weight^2
            self.assertAllClose([-10.0 * 4 / 5, 14.0 * 4 / 5],
                                predictions.eval(),
                                rtol=0.01)

            loss = lr.regularized_loss(examples)
            self.assertAllClose(148.0 / 10.0, loss.eval(), atol=0.01)
Esempio n. 8
0
  def testOutOfRangeSparseFeatures(self):
    # Setup test data
    example_protos = [
        make_example_proto({
            'age': [0],
            'gender': [0]
        }, 0),
        make_example_proto({
            'age': [1],
            'gender': [1]
        }, 1),
    ]
    example_weights = [1.0, 1.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(0, 0)
      options = dict(
          symmetric_l2_regularization=1,
          symmetric_l1_regularization=0,
          loss_type='logistic_loss')

      lr = SdcaModel(examples, variables, options)
      variables_lib.global_variables_initializer().run()
      train_op = lr.minimize()
      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                   'indices.*'):
        train_op.run()
Esempio n. 9
0
  def testDenseFeaturesWithArbitraryWeights(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]],
          weights=[20.0, 10.0],
          labels=[10.0, -5.0])
      options = dict(symmetric_l2_regularization=5.0,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')
      lr = SdcaModel(examples, variables, options)
      tf.initialize_all_variables().run()
      predictions = lr.predictions(examples)

      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()

      # The loss function for these particular features is given by:
      # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
      # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It
      # turns out that the optimal (variable) weights are given by:
      # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and
      # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3.
      # In this case the (unnormalized regularized) loss will be:
      # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The
      # actual loss should be further normalized by the sum of example weights.
      self.assertAllClose([8.0, -10.0/3],
                          predictions.eval(),
                          rtol=0.01)
      loss = lr.regularized_loss(examples)
      self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
Esempio n. 10
0
    def testFeatureValues(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, -10.0, -2.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0, 2.0),
        ]
        example_weights = [5.0, 3.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)

            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="squared_loss")

            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for
            # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1,
            # y_2 be the labels for examples 1 and 2 respectively and s_1, s_2 the
            # corresponding *example* weights. With the given feature values, the loss
            # function is given by:
            # s_1/2(y_1 + 2w_1 + 2w_3)^2 + s_2/2(y_2 - 2w_2 - 2w_4)^2
            # + \lambda/2 (w_1^2 + w_2^2 + w_3^2 + w_4^2). Solving for the optimal, it
            # can be verified that:
            # w_1* = w_3* = -2.0 s_1 y_1/(\lambda + 8 s_1) and
            # w_2* = w_4* = 2 \cdot s_2 y_2/(\lambda + 8 s_2). Equivalently, due to
            # regularization and example weights, the predictions are within:
            # 8 \cdot s_i /(\lambda + 8 \cdot s_i) of the labels.
            self.assertAllClose([-10 * 40.0 / 41.0, 14.0 * 24 / 25.0], predictions.eval(), atol=0.01)
Esempio n. 11
0
  def testL1Regularization(self):
    # Setup test data
    example_protos = [
        make_example_proto(
            {'age': [0],
             'gender': [0]}, -10.0),
        make_example_proto(
            {'age': [1],
             'gender': [1]}, 14.0),
    ]
    example_weights = [1.0, 1.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(1, 1)
      options = dict(symmetric_l2_regularization=1.0,
                     symmetric_l1_regularization=4.0,
                     loss_type='squared_loss')
      lr = SdcaModel(examples, variables, options)
      tf.initialize_all_variables().run()
      prediction = lr.predictions(examples)
      loss = lr.regularized_loss(examples)

      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()

      # Predictions should be -4.0, 48/5 due to minimizing regularized loss:
      #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight
      self.assertAllClose([-4.0, 20.0 / 3.0], prediction.eval(), rtol=0.08)

      # Loss should be the sum of the regularized loss value from above per
      # example after plugging in the optimal weights.
      self.assertAllClose(308.0 / 6.0, loss.eval(), atol=0.01)
Esempio n. 12
0
  def testDenseFeaturesWithDefaultWeights(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0], [0.0]], [0.0, 1.0]],
          weights=[1.0, 1.0],
          labels=[10.0, -5.0])
      options = dict(symmetric_l2_regularization=1.0,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')
      lr = SdcaModel(examples, variables, options)
      tf.initialize_all_variables().run()
      predictions = lr.predictions(examples)

      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()

      # The loss function for these particular features is given by:
      # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So,
      # differentiating wrt to w_1, w_2 yields the following optimal values:
      # w_1* = label_1/(\lambda + 1)= 10/2, w_2* =label_2/(\lambda + 1)= -5/2.
      # In this case the (unnormalized regularized) loss will be:
      # 1/2(10-5)^2 + 1/2(5-5/2)^2 + 1/2(5^2 + (5/2)^2) = 125.0/4. The actual
      # loss should be further normalized by the sum of example weights.
      self.assertAllClose([5.0, -2.5],
                          predictions.eval(),
                          rtol=0.01)
      loss = lr.regularized_loss(examples)
      self.assertAllClose(125.0 / 8.0, loss.eval(), atol=0.01)
Esempio n. 13
0
  def testSimple(self):
    # Setup test data
    example_protos = [
        make_example_proto(
            {'age': [0],
             'gender': [0]}, -10.0),
        make_example_proto(
            {'age': [1],
             'gender': [1]}, 14.0),
    ]
    example_weights = [1.0, 1.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(1, 1)
      options = dict(symmetric_l2_regularization=1,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')

      lr = SdcaModel(CONTAINER, examples, variables, options)
      tf.initialize_all_variables().run()
      predictions = lr.predictions(examples)

      for _ in xrange(20):
        lr.minimize().run()

      # Predictions should be 2/3 of label due to minimizing regularized loss:
      #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2
      self.assertAllClose([-20.0 / 3.0, 28.0 / 3.0],
                          predictions.eval(),
                          rtol=0.005)
      self.assertAllClose(0.01,
                          lr.approximate_duality_gap().eval(),
                          rtol=1e-2,
                          atol=1e-2)
Esempio n. 14
0
  def testDenseFeatures(self):
    with self._single_threaded_test_session():
      examples = make_dense_examples_dict(
          dense_feature_values=[[-2.0, 0.0], [0.0, 2.0]],
          weights=[1.0, 1.0],
          labels=[-10.0, 14.0])
      variables = make_dense_variable_dict(2, 2)
      options = dict(symmetric_l2_regularization=1,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')
      lr = SdcaModel(CONTAINER, examples, variables, options)
      tf.initialize_all_variables().run()
      predictions = lr.predictions(examples)

      for _ in xrange(20):
        lr.minimize().run()

      # Predictions should be 4/5 of label due to minimizing regularized loss:
      #   (label - 2 * weight)^2 / 2 + L2 * weight^2
      self.assertAllClose([-10.0 * 4 / 5, 14.0 * 4 / 5],
                          predictions.eval(),
                          rtol=0.01)

      loss = lr.regularized_loss(examples)
      self.assertAllClose(148.0 / 10.0, loss.eval(), atol=0.01)
Esempio n. 15
0
    def testDenseFeaturesSeparableWithinMargins(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]], weights=[1.0, 1.0], labels=[1.0, 0.0]
            )
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss")
            model = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = model.predictions(examples)
            binary_predictions = get_binary_predictions_for_hinge(predictions)

            train_op = model.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
            # are within the margins so there is unregularized loss (1/2 per example).
            # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which
            # gives an L2 loss of ~0.25.
            self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05)
            self.assertAllEqual([1, 0], binary_predictions.eval())
            unregularized_loss = model.unregularized_loss(examples)
            regularized_loss = model.regularized_loss(examples)
            self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02)
            self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
Esempio n. 16
0
    def testDenseFeaturesWeightedExamples(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]], weights=[3.0, 1.0], labels=[1.0, 0.0]
            )
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss")
            model = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = model.predictions(examples)
            binary_predictions = get_binary_predictions_for_hinge(predictions)
            train_op = model.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will
            # try to increase the margin from (1.0, 0.5). Due to regularization,
            # (1.0, -0.5) will be within the margin. For these points and example
            # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2
            # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be
            # correct, but the boundary will be much closer to the 2nd point than the
            # first one.
            self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05)
            self.assertAllEqual([1, 0], binary_predictions.eval())
            unregularized_loss = model.unregularized_loss(examples)
            regularized_loss = model.regularized_loss(examples)
            self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02)
            self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02)
Esempio n. 17
0
    def testDenseFeaturesWithArbitraryWeights(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[10.0, -5.0]
            )
            options = dict(symmetric_l2_regularization=5.0, symmetric_l1_regularization=0, loss_type="squared_loss")
            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # The loss function for these particular features is given by:
            # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
            # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It
            # turns out that the optimal (variable) weights are given by:
            # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and
            # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3.
            # In this case the (unnormalized regularized) loss will be:
            # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The
            # actual loss should be further normalized by the sum of example weights.
            self.assertAllClose([8.0, -10.0 / 3], predictions.eval(), rtol=0.01)
            loss = lr.regularized_loss(examples)
            self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
Esempio n. 18
0
 def testNoWeightedExamples(self):
   # Setup test data with 1 positive, and 1 negative example.
   example_protos = [
       make_example_proto(
           {'age': [0],
            'gender': [0]}, 0),
       make_example_proto(
           {'age': [1],
            'gender': [1]}, 1),
   ]
   # Zeroed out example weights.
   example_weights = [0.0, 0.0]
   with self._single_threaded_test_session():
     examples = make_example_dict(example_protos, example_weights)
     variables = make_variable_dict(1, 1)
     options = dict(symmetric_l2_regularization=0.5,
                    symmetric_l1_regularization=0,
                    loss_type='logistic_loss')
     tf.initialize_all_variables().run()
     lr = SdcaModel(CONTAINER, examples, variables, options)
     self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
     with self.assertRaisesOpError(
         'No weighted examples in 2 training examples'):
       lr.minimize().run()
     self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
Esempio n. 19
0
    def testDenseFeaturesPerfectlySeparable(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[1.0, 1.0], [1.0, -1.0]], weights=[1.0, 1.0], labels=[1.0, 0.0]
            )
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss")
            model = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = model.predictions(examples)
            binary_predictions = get_binary_predictions_for_hinge(predictions)

            train_op = model.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05)
            self.assertAllEqual([1, 0], binary_predictions.eval())

            # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is,
            # the SVM's functional margin >=1), so the unregularized loss is ~0.0.
            # There is only loss due to l2-regularization. For these datapoints, it
            # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25.
            unregularized_loss = model.unregularized_loss(examples)
            regularized_loss = model.regularized_loss(examples)
            self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02)
            self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02)
Esempio n. 20
0
  def testFractionalExampleLabel(self):
    # Setup test data with 1 positive, and 1 mostly-negative example.
    example_protos = [
        make_example_proto({
            'age': [0],
            'gender': [0]
        }, 0.1),
        make_example_proto({
            'age': [1],
            'gender': [1]
        }, 0.9),
    ]
    example_weights = [1.0, 1.0]
    for num_shards in _SHARD_NUMBERS:
      with self._single_threaded_test_session():
        examples = make_example_dict(example_protos, example_weights)
        variables = make_variable_dict(1, 1)
        options = dict(
            symmetric_l2_regularization=1,
            symmetric_l1_regularization=0,
            num_table_shards=num_shards,
            loss_type='logistic_loss')

        lr = SdcaModel(examples, variables, options)
        variables_lib.global_variables_initializer().run()
        with self.assertRaisesOpError(
            'Only labels of 0.0 or 1.0 are supported right now.'):
          lr.minimize().run()
Esempio n. 21
0
    def testFractionalLogisticExample(self):
        # Setup test data with 1 positive, and 1 mostly-negative example.
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0.1),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 1),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1,
                           symmetric_l1_regularization=0,
                           loss_type='logistic_loss')

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            with self.assertRaisesOpError(
                    'Only labels of 0.0 or 1.0 are supported right now.'):
                lr.minimize().run()
Esempio n. 22
0
  def testFractionalExampleLabel(self):
    # Setup test data with 1 positive, and 1 mostly-negative example.
    example_protos = [
        make_example_proto({
            'age': [0],
            'gender': [0]
        }, 0.1),
        make_example_proto({
            'age': [1],
            'gender': [1]
        }, 1),
    ]
    example_weights = [1.0, 1.0]
    for num_shards in _SHARD_NUMBERS:
      with self._single_threaded_test_session():
        examples = make_example_dict(example_protos, example_weights)
        variables = make_variable_dict(1, 1)
        options = dict(
            symmetric_l2_regularization=1,
            symmetric_l1_regularization=0,
            num_table_shards=num_shards,
            loss_type='logistic_loss')

        lr = SdcaModel(examples, variables, options)
        variables_lib.global_variables_initializer().run()
        with self.assertRaisesOpError(
            'Only labels of 0.0 or 1.0 are supported right now.'):
          lr.minimize().run()
Esempio n. 23
0
  def testDistributedSimple(self):
    # Setup test data
    example_protos = [
        make_example_proto({'age': [0],
                            'gender': [0]}, 0),
        make_example_proto({'age': [1],
                            'gender': [1]}, 1),
    ]
    example_weights = [1.0, 1.0]
    for num_shards in _SHARD_NUMBERS:
      for num_loss_partitions in _NUM_LOSS_PARTITIONS:
        with self._single_threaded_test_session():
          examples = make_example_dict(example_protos, example_weights)
          variables = make_variable_dict(1, 1)
          options = dict(
              symmetric_l2_regularization=1,
              symmetric_l1_regularization=0,
              loss_type='logistic_loss',
              num_table_shards=num_shards,
              num_loss_partitions=num_loss_partitions)

          lr = SdcaModel(examples, variables, options)
          tf.initialize_all_variables().run()
          unregularized_loss = lr.unregularized_loss(examples)
          loss = lr.regularized_loss(examples)
          predictions = lr.predictions(examples)
          self.assertAllClose(0.693147, unregularized_loss.eval())
          self.assertAllClose(0.693147, loss.eval())

          train_op = lr.minimize()

          def Minimize():
            with self._single_threaded_test_session():
              for _ in range(_MAX_ITERATIONS):
                train_op.run()

          threads = []
          for _ in range(num_loss_partitions):
            threads.append(Thread(target=Minimize))
            threads[-1].start()

          for t in threads:
            t.join()
          lr.update_weights(train_op).run()

          # The high tolerance in unregularized_loss comparisons is due to the
          # fact that it's possible to trade off unregularized_loss vs.
          # regularization and still have a sum that is quite close to the
          # optimal regularized_loss value.  SDCA's duality gap only ensures
          # that the regularized_loss is within 0.01 of optimal.
          # 0.525457 is the optimal regularized_loss.
          # 0.411608 is the unregularized_loss at that optimum.
          self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
          self.assertAllClose(0.525457, loss.eval(), atol=0.01)
          predicted_labels = get_binary_predictions_for_logistic(predictions)
          self.assertAllEqual([0, 1], predicted_labels.eval())
          self.assertTrue(lr.approximate_duality_gap().eval() < 0.02)
Esempio n. 24
0
    def testSomeUnweightedExamples(self):
        # Setup test data with 4 examples, but should produce the same
        # results as testSimple.
        example_protos = [
            # Will be used.
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0),
            # Will be ignored.
            make_example_proto({
                'age': [1],
                'gender': [0]
            }, 0),
            # Will be used.
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 1),
            # Will be ignored.
            make_example_proto({
                'age': [1],
                'gender': [0]
            }, 1),
        ]
        example_weights = [1.0, 0.0, 1.0, 0.0]
        for num_shards in _SHARD_NUMBERS:
            with self._single_threaded_test_session():
                # Only use examples 0 and 2
                examples = make_example_dict(example_protos, example_weights)
                variables = make_variable_dict(1, 1)
                options = dict(symmetric_l2_regularization=1,
                               symmetric_l1_regularization=0,
                               num_table_shards=num_shards,
                               loss_type='logistic_loss')

                lr = SdcaModel(examples, variables, options)
                variables_lib.global_variables_initializer().run()
                unregularized_loss = lr.unregularized_loss(examples)
                loss = lr.regularized_loss(examples)
                predictions = lr.predictions(examples)
                train_op = lr.minimize()
                for _ in range(_MAX_ITERATIONS):
                    train_op.run()
                lr.update_weights(train_op).run()

                self.assertAllClose(0.411608,
                                    unregularized_loss.eval(),
                                    atol=0.05)
                self.assertAllClose(0.525457, loss.eval(), atol=0.01)
                predicted_labels = get_binary_predictions_for_logistic(
                    predictions)
                self.assertAllClose([0, 1, 1, 1], predicted_labels.eval())
                self.assertAllClose(0.01,
                                    lr.approximate_duality_gap().eval(),
                                    rtol=1e-2,
                                    atol=1e-2)
Esempio n. 25
0
  def testDenseFeaturesSeparableWithinMargins(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]],
          weights=[1.0, 1.0],
          labels=[1.0, 0.0])
      options = dict(
          symmetric_l2_regularization=1.0,
          symmetric_l1_regularization=0,
          loss_type='hinge_loss')
      model = SdcaModel(examples, variables, options)
      variables_lib.global_variables_initializer().run()
      predictions = model.predictions(examples)
      binary_predictions = get_binary_predictions_for_hinge(predictions)

      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      model.update_weights(train_op).run()

      # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
      # are within the margins so there is unregularized loss (1/2 per example).
      # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which
      # gives an L2 loss of ~0.25.
      self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05)
      self.assertAllEqual([1, 0], binary_predictions.eval())
      unregularized_loss = model.unregularized_loss(examples)
      regularized_loss = model.regularized_loss(examples)
      self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02)
      self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
Esempio n. 26
0
    def testImbalancedWithExampleWeights(self):
        # Setup test data with 1 positive, and 1 negative example.
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [1]}, 1),
        ]
        example_weights = [3.0, 1.0]
        for num_shards in _SHARD_NUMBERS:
            with self._single_threaded_test_session():
                examples = make_example_dict(example_protos, example_weights)
                variables = make_variable_dict(1, 1)
                options = dict(
                    symmetric_l2_regularization=1,
                    symmetric_l1_regularization=0,
                    num_table_shards=num_shards,
                    loss_type="logistic_loss",
                )

                lr = SdcaModel(examples, variables, options)
                tf.global_variables_initializer().run()
                unregularized_loss = lr.unregularized_loss(examples)
                loss = lr.regularized_loss(examples)
                predictions = lr.predictions(examples)
                train_op = lr.minimize()
                for _ in range(_MAX_ITERATIONS):
                    train_op.run()
                lr.update_weights(train_op).run()

                self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08)
                self.assertAllClose(0.408044, loss.eval(), atol=0.012)
                predicted_labels = get_binary_predictions_for_logistic(predictions)
                self.assertAllEqual([0, 1], predicted_labels.eval())
                self.assertAllClose(0.0, lr.approximate_duality_gap().eval(), rtol=2e-2, atol=1e-2)
Esempio n. 27
0
    def testInstancesOfOneClassOnly(self):
        # Setup test data with 1 positive (ignored), and 1 negative example.
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [0]}, 1),  # Shares gender with the instance above.
        ]
        example_weights = [1.0, 0.0]  # Second example "omitted" from training.
        for num_shards in _SHARD_NUMBERS:
            with self._single_threaded_test_session():
                examples = make_example_dict(example_protos, example_weights)
                variables = make_variable_dict(1, 1)
                options = dict(
                    symmetric_l2_regularization=1,
                    symmetric_l1_regularization=0,
                    num_table_shards=num_shards,
                    loss_type="logistic_loss",
                )

                lr = SdcaModel(examples, variables, options)
                tf.global_variables_initializer().run()
                unregularized_loss = lr.unregularized_loss(examples)
                loss = lr.regularized_loss(examples)
                predictions = lr.predictions(examples)
                train_op = lr.minimize()
                for _ in range(_MAX_ITERATIONS):
                    train_op.run()
                lr.update_weights(train_op).run()
                self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
                self.assertAllClose(0.525457, loss.eval(), atol=0.01)
                predicted_labels = get_binary_predictions_for_logistic(predictions)
                self.assertAllEqual([0, 0], predicted_labels.eval())
                self.assertAllClose(0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
Esempio n. 28
0
    def testInstancesOfOneClassOnly(self):
        # Setup test data with 1 positive (ignored), and 1 negative example.
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [1],
                'gender': [0]
            }, 1),  # Shares gender with the instance above.
        ]
        example_weights = [1.0, 0.0]  # Second example "omitted" from training.
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1,
                           symmetric_l1_regularization=0,
                           loss_type='logistic_loss')

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            unregularized_loss = lr.unregularized_loss(examples)
            loss = lr.regularized_loss(examples)
            predictions = lr.predictions(examples)
            for _ in xrange(5):
                lr.minimize().run()
            self.assertAllClose(0.411608, unregularized_loss.eval(), rtol=0.12)
            self.assertAllClose(0.525457, loss.eval(), atol=0.01)
            predicted_labels = get_binary_predictions_for_logistic(predictions)
            self.assertAllEqual([0, 0], predicted_labels.eval())
            self.assertAllClose(0.01,
                                lr.approximate_duality_gap().eval(),
                                rtol=1e-2,
                                atol=1e-2)
Esempio n. 29
0
    def testNoWeightedExamples(self):
        # Setup test data with 1 positive, and 1 negative example.
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 1),
        ]
        # Zeroed out example weights.
        example_weights = [0.0, 0.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1,
                           symmetric_l1_regularization=0,
                           loss_type='logistic_loss')

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
            lr.minimize().run()
            self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
            with self.assertRaisesOpError(
                    'No examples found or all examples have zero weight.'):
                lr.approximate_duality_gap().eval()
Esempio n. 30
0
  def testDenseFeaturesPerfectlySeparable(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[1.0, 1.0], [1.0, -1.0]],
          weights=[1.0, 1.0],
          labels=[1.0, 0.0])
      options = dict(
          symmetric_l2_regularization=1.0,
          symmetric_l1_regularization=0,
          loss_type='hinge_loss')
      model = SdcaModel(examples, variables, options)
      variables_lib.global_variables_initializer().run()
      predictions = model.predictions(examples)
      binary_predictions = get_binary_predictions_for_hinge(predictions)

      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      model.update_weights(train_op).run()

      self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05)
      self.assertAllEqual([1, 0], binary_predictions.eval())

      # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is,
      # the SVM's functional margin >=1), so the unregularized loss is ~0.0.
      # There is only loss due to l2-regularization. For these datapoints, it
      # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25.
      unregularized_loss = model.unregularized_loss(examples)
      regularized_loss = model.regularized_loss(examples)
      self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02)
      self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02)
Esempio n. 31
0
    def testImbalancedWithExampleWeights(self):
        # Setup test data with 1 positive, and 1 negative example.
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 1),
        ]
        example_weights = [3.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1,
                           symmetric_l1_regularization=0,
                           loss_type='logistic_loss')

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            unregularized_loss = lr.unregularized_loss(examples)
            loss = lr.regularized_loss(examples)
            predictions = lr.predictions(examples)
            for _ in xrange(5):
                lr.minimize().run()
            self.assertAllClose(0.284860, unregularized_loss.eval(), rtol=0.08)
            self.assertAllClose(0.408044, loss.eval(), atol=0.012)
            predicted_labels = get_binary_predictions_for_logistic(predictions)
            self.assertAllEqual([0, 1], predicted_labels.eval())
            self.assertAllClose(0.01,
                                lr.approximate_duality_gap().eval(),
                                rtol=1e-2,
                                atol=1e-2)
Esempio n. 32
0
  def testDenseFeaturesWeightedExamples(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]],
          weights=[3.0, 1.0],
          labels=[1.0, 0.0])
      options = dict(
          symmetric_l2_regularization=1.0,
          symmetric_l1_regularization=0,
          loss_type='hinge_loss')
      model = SdcaModel(examples, variables, options)
      variables_lib.global_variables_initializer().run()
      predictions = model.predictions(examples)
      binary_predictions = get_binary_predictions_for_hinge(predictions)
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      model.update_weights(train_op).run()

      # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will
      # try to increase the margin from (1.0, 0.5). Due to regularization,
      # (1.0, -0.5) will be within the margin. For these points and example
      # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2
      # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be
      # correct, but the boundary will be much closer to the 2nd point than the
      # first one.
      self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05)
      self.assertAllEqual([1, 0], binary_predictions.eval())
      unregularized_loss = model.unregularized_loss(examples)
      regularized_loss = model.regularized_loss(examples)
      self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02)
      self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02)
Esempio n. 33
0
 def testOutOfRangeDenseFeatures(self):
     with self._single_threaded_test_session():
         examples, variables = make_dense_examples_and_variables_dicts(
             dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[1.0, 0.0]
         )
         # Replace with a variable of size 1 instead of 2.
         variables["dense_features_weights"] = [tf.Variable(tf.zeros([1], dtype=tf.float32))]
         options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="logistic_loss")
         lr = SdcaModel(examples, variables, options)
         tf.initialize_all_variables().run()
         train_op = lr.minimize()
         with self.assertRaisesRegexp(
             tf.errors.InvalidArgumentError, "More dense features than we have parameters for.*"
         ):
             train_op.run()
Esempio n. 34
0
    def testImbalanced(self):
        # Setup test data with 1 positive, and 3 negative examples.
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [2],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [3],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 1),
        ]
        example_weights = [1.0, 1.0, 1.0, 1.0]
        for num_shards in _SHARD_NUMBERS:
            with self._single_threaded_test_session():
                examples = make_example_dict(example_protos, example_weights)
                variables = make_variable_dict(3, 1)
                options = dict(symmetric_l2_regularization=1,
                               symmetric_l1_regularization=0,
                               num_table_shards=num_shards,
                               loss_type='logistic_loss')

                lr = SdcaModel(examples, variables, options)
                variables_lib.global_variables_initializer().run()
                unregularized_loss = lr.unregularized_loss(examples)
                loss = lr.regularized_loss(examples)
                predictions = lr.predictions(examples)
                train_op = lr.minimize()
                for _ in range(_MAX_ITERATIONS):
                    train_op.run()
                lr.update_weights(train_op).run()

                self.assertAllClose(0.226487 + 0.102902,
                                    unregularized_loss.eval(),
                                    atol=0.08)
                self.assertAllClose(0.328394 + 0.131364,
                                    loss.eval(),
                                    atol=0.01)
                predicted_labels = get_binary_predictions_for_logistic(
                    predictions)
                self.assertAllEqual([0, 0, 0, 1], predicted_labels.eval())
                self.assertAllClose(0.0,
                                    lr.approximate_duality_gap().eval(),
                                    rtol=2e-2,
                                    atol=1e-2)
Esempio n. 35
0
    def testFractionalExampleLabel(self):
        # Setup test data with 1 positive, and 1 mostly-negative example.
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0.1),
            make_example_proto({"age": [1], "gender": [1]}, 1),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss")

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            with self.assertRaisesOpError("Only labels of 0.0 or 1.0 are supported right now."):
                lr.minimize().run()
Esempio n. 36
0
  def testDistributedSimple(self):
    # Setup test data
    example_protos = [
        make_example_proto({'age': [0],
                            'gender': [0]}, 0),
        make_example_proto({'age': [1],
                            'gender': [1]}, 1),
    ]
    example_weights = [1.0, 1.0]
    for num_shards in _SHARD_NUMBERS:
      for num_loss_partitions in _NUM_LOSS_PARTITIONS:
        with self._single_threaded_test_session():
          examples = make_example_dict(example_protos, example_weights)
          variables = make_variable_dict(1, 1)
          options = dict(
              symmetric_l2_regularization=1,
              symmetric_l1_regularization=0,
              loss_type='logistic_loss',
              num_table_shards=num_shards,
              num_loss_partitions=num_loss_partitions)

          lr = SdcaModel(examples, variables, options)
          tf.global_variables_initializer().run()
          unregularized_loss = lr.unregularized_loss(examples)
          loss = lr.regularized_loss(examples)
          predictions = lr.predictions(examples)
          self.assertAllClose(0.693147, unregularized_loss.eval())
          self.assertAllClose(0.693147, loss.eval())

          train_op = lr.minimize()

          def Minimize():
            with self._single_threaded_test_session():
              for _ in range(_MAX_ITERATIONS):
                train_op.run()

          threads = []
          for _ in range(num_loss_partitions):
            threads.append(Thread(target=Minimize))
            threads[-1].start()

          for t in threads:
            t.join()
          lr.update_weights(train_op).run()

          # The high tolerance in unregularized_loss comparisons is due to the
          # fact that it's possible to trade off unregularized_loss vs.
          # regularization and still have a sum that is quite close to the
          # optimal regularized_loss value.  SDCA's duality gap only ensures
          # that the regularized_loss is within 0.01 of optimal.
          # 0.525457 is the optimal regularized_loss.
          # 0.411608 is the unregularized_loss at that optimum.
          self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
          self.assertAllClose(0.525457, loss.eval(), atol=0.01)
          predicted_labels = get_binary_predictions_for_logistic(predictions)
          self.assertAllEqual([0, 1], predicted_labels.eval())
          self.assertTrue(lr.approximate_duality_gap().eval() < 0.02)
Esempio n. 37
0
    def testOutOfRangeSparseFeatures(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [1]}, 1),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(0, 0)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss")

            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            train_op = lr.minimize()
            with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, "Found sparse feature indices out.*"):
                train_op.run()
Esempio n. 38
0
    def testL2Regularization(self):
        # Setup test data
        example_protos = [
            # 2 identical examples
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, -10.0),
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, -10.0),
            # 2 more identical examples
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 14.0),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 14.0),
        ]
        example_weights = [1.0, 1.0, 1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=16,
                           symmetric_l1_regularization=0,
                           loss_type='squared_loss')

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in xrange(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be 1/5 of label due to minimizing regularized loss:
            #   (label - 2 * weight)^2 + L2 * 16 * weight^2
            optimal1 = -10.0 / 5.0
            optimal2 = 14.0 / 5.0
            self.assertAllClose([optimal1, optimal1, optimal2, optimal2],
                                predictions.eval(),
                                rtol=0.01)
Esempio n. 39
0
    def testSimple(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [1]}, 1),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss")

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            unregularized_loss = lr.unregularized_loss(examples)
            loss = lr.regularized_loss(examples)
            predictions = lr.predictions(examples)
            self.assertAllClose(0.693147, unregularized_loss.eval())
            self.assertAllClose(0.693147, loss.eval())
            for _ in xrange(5):
                lr.minimize().run()
            # The high tolerance in unregularized_loss comparisons is due to the
            # fact that it's possible to trade off unregularized_loss vs.
            # regularization and still have a sum that is quite close to the
            # optimal regularized_loss value.  SDCA's duality gap only ensures that
            # the regularized_loss is within 0.01 of optimal.
            # 0.525457 is the optimal regularized_loss.
            # 0.411608 is the unregularized_loss at that optimum.
            self.assertAllClose(0.411608, unregularized_loss.eval(), rtol=0.11)
            self.assertAllClose(0.525457, loss.eval(), atol=0.01)
            predicted_labels = get_binary_predictions_for_logistic(predictions)
            self.assertAllEqual([0, 1], predicted_labels.eval())
            self.assertAllClose(0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
Esempio n. 40
0
    def testSimpleNoL2(self):
        # Same as test above (so comments from above apply) but without an L2.
        # The algorithm should behave as if we have an L2 of 1 in optimization but
        # 0 in regularized_loss.
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [1]}, 1),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=0, symmetric_l1_regularization=0, loss_type="logistic_loss")

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            unregularized_loss = lr.unregularized_loss(examples)
            loss = lr.regularized_loss(examples)
            predictions = lr.predictions(examples)
            self.assertAllClose(0.693147, unregularized_loss.eval())
            self.assertAllClose(0.693147, loss.eval())
            for _ in xrange(5):
                lr.minimize().run()
            self.assertAllClose(0.411608, unregularized_loss.eval(), rtol=0.11)
            self.assertAllClose(0.371705, loss.eval(), atol=0.01)
            predicted_labels = get_binary_predictions_for_logistic(predictions)
            self.assertAllEqual([0, 1], predicted_labels.eval())
            self.assertAllClose(0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
Esempio n. 41
0
    def testSomeUnweightedExamples(self):
        # Setup test data with 4 examples, but should produce the same
        # results as testSimple.
        example_protos = [
            # Will be used.
            make_example_proto({"age": [0], "gender": [0]}, 0),
            # Will be ignored.
            make_example_proto({"age": [1], "gender": [0]}, 0),
            # Will be used.
            make_example_proto({"age": [1], "gender": [1]}, 1),
            # Will be ignored.
            make_example_proto({"age": [1], "gender": [0]}, 1),
        ]
        example_weights = [1.0, 0.0, 1.0, 0.0]
        with self._single_threaded_test_session():
            # Only use examples 0 and 2
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss")

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            unregularized_loss = lr.unregularized_loss(examples)
            loss = lr.regularized_loss(examples)
            predictions = lr.predictions(examples)
            for _ in xrange(5):
                lr.minimize().run()
            self.assertAllClose(0.411608, unregularized_loss.eval(), rtol=0.12)
            self.assertAllClose(0.525457, loss.eval(), atol=0.01)
            predicted_labels = get_binary_predictions_for_logistic(predictions)
            self.assertAllClose([0, 1, 1, 1], predicted_labels.eval())
            self.assertAllClose(0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
Esempio n. 42
0
 def testImbalancedWithExampleWeights(self):
   # Setup test data with 1 positive, and 3 negative examples.
   example_protos = [
       make_example_proto(
           {'age': [0],
            'gender': [0]}, 0),
       make_example_proto(
           {'age': [1],
            'gender': [1]}, 1),
   ]
   example_weights = [3.0, 1.0]
   with self._single_threaded_test_session():
     examples = make_example_dict(example_protos, example_weights)
     variables = make_variable_dict(1, 1)
     options = dict(symmetric_l2_regularization=0.25,
                    symmetric_l1_regularization=0,
                    loss_type='logistic_loss')
     tf.initialize_all_variables().run()
     lr = SdcaModel(CONTAINER, examples, variables, options)
     unregularized_loss = lr.unregularized_loss(examples)
     loss = lr.regularized_loss(examples)
     prediction = lr.predictions(examples)
     lr.minimize().run()
     self.assertAllClose(0.266189, unregularized_loss.eval(),
                         rtol=3e-2, atol=3e-2)
     self.assertAllClose(0.571912, loss.eval(), rtol=3e-2, atol=3e-2)
     predicted_labels = tf.cast(
         tf.greater_equal(prediction,
                          tf.ones_like(prediction) * 0.5), tf.float32)
     self.assertAllEqual([0, 1], predicted_labels.eval())
Esempio n. 43
0
    def testImbalanced(self):
        # Setup test data with 1 positive, and 3 negative examples.
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [2], "gender": [0]}, 0),
            make_example_proto({"age": [3], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [1]}, 1),
        ]
        example_weights = [1.0, 1.0, 1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(3, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="logistic_loss")

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            unregularized_loss = lr.unregularized_loss(examples)
            loss = lr.regularized_loss(examples)
            predictions = lr.predictions(examples)
            for _ in xrange(5):
                lr.minimize().run()
            self.assertAllClose(0.226487 + 0.102902, unregularized_loss.eval(), rtol=0.08)
            self.assertAllClose(0.328394 + 0.131364, loss.eval(), atol=0.01)
            predicted_labels = get_binary_predictions_for_logistic(predictions)
            self.assertAllEqual([0, 0, 0, 1], predicted_labels.eval())
            self.assertAllClose(0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
Esempio n. 44
0
 def testInstancesOfOneClassOnly(self):
   # Setup test data with 1 positive (ignored), and 1 negative example.
   example_protos = [
       make_example_proto(
           {'age': [0],
            'gender': [0]}, 0),
       make_example_proto(
           {'age': [1],
            'gender': [0]}, 1),  # Shares gender with the instance above.
   ]
   example_weights = [1.0, 0.0]  # Second example "omitted" from training.
   with self._single_threaded_test_session():
     examples = make_example_dict(example_protos, example_weights)
     variables = make_variable_dict(1, 1)
     options = dict(symmetric_l2_regularization=0.25,
                    symmetric_l1_regularization=0,
                    loss_type='logistic_loss')
     tf.initialize_all_variables().run()
     lr = SdcaModel(CONTAINER, examples, variables, options)
     unregularized_loss = lr.unregularized_loss(examples)
     loss = lr.regularized_loss(examples)
     prediction = lr.predictions(examples)
     lr.minimize().run()
     self.assertAllClose(0.395226,
                         unregularized_loss.eval(),
                         rtol=3e-2,
                         atol=3e-2)
     self.assertAllClose(0.460781, loss.eval(), rtol=3e-2, atol=3e-2)
     predicted_labels = tf.cast(
         tf.greater_equal(prediction,
                          tf.ones_like(prediction) * 0.5), tf.float32)
     self.assertAllEqual([0, 0], predicted_labels.eval())
Esempio n. 45
0
    def testHingeDenseFeaturesSeparableWithinMargins(self):
        with self._single_threaded_test_session():
            examples = make_dense_examples_dict(
                dense_feature_values=[[1.0, 1.0], [0.5, -0.5]],
                weights=[1.0, 1.0],
                labels=[1.0, 0.0])
            variables = make_dense_variable_dict(2, 2)
            options = dict(symmetric_l2_regularization=1.0,
                           symmetric_l1_regularization=0,
                           loss_type='hinge_loss')
            model = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = model.predictions(examples)
            binary_predictions = get_binary_predictions_for_hinge(predictions)

            for _ in xrange(5):
                model.minimize().run()

            # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
            # are within the margins so there is unregularized loss (1/2 per example).
            # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which
            # gives an L2 loss of ~0.25.
            self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05)
            self.assertAllClose([1.0, 0.0], binary_predictions.eval())
            unregularized_loss = model.unregularized_loss(examples)
            regularized_loss = model.regularized_loss(examples)
            self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02)
            self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
Esempio n. 46
0
 def testSimpleLogistic(self):
   # Setup test data
   example_protos = [
       make_example_proto(
           {'age': [0],
            'gender': [0]}, 0),
       make_example_proto(
           {'age': [1],
            'gender': [1]}, 1),
   ]
   example_weights = [1.0, 1.0]
   with self._single_threaded_test_session():
     examples = make_example_dict(example_protos, example_weights)
     variables = make_variable_dict(1, 1)
     options = dict(symmetric_l2_regularization=0.5,
                    symmetric_l1_regularization=0,
                    loss_type='logistic_loss',
                    prior=0.0)
     tf.initialize_all_variables().run()
     lr = SdcaModel(CONTAINER, examples, variables, options)
     unregularized_loss = lr.unregularized_loss(examples)
     loss = lr.regularized_loss(examples)
     prediction = lr.predictions(examples)
     self.assertAllClose(0.693147, unregularized_loss.eval())
     self.assertAllClose(0.693147, loss.eval())
     lr.minimize().run()
     self.assertAllClose(0.395226, unregularized_loss.eval(),
                         rtol=3e-2, atol=3e-2)
     self.assertAllClose(0.657446, loss.eval(),
                         rtol=3e-2, atol=3e-2)
     predicted_labels = tf.cast(
         tf.greater_equal(prediction,
                          tf.ones_like(prediction) * 0.5), tf.float32)
     self.assertAllEqual([0, 1], predicted_labels.eval())
Esempio n. 47
0
  def testImbalancedWithExampleWeights(self):
    # Setup test data with 1 positive, and 1 negative example.
    example_protos = [
        make_example_proto(
            {'age': [0],
             'gender': [0]}, 0),
        make_example_proto(
            {'age': [1],
             'gender': [1]}, 1),
    ]
    example_weights = [3.0, 1.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(1, 1)
      options = dict(symmetric_l2_regularization=1,
                     symmetric_l1_regularization=0,
                     loss_type='logistic_loss')

      lr = SdcaModel(CONTAINER, examples, variables, options)
      tf.initialize_all_variables().run()
      unregularized_loss = lr.unregularized_loss(examples)
      loss = lr.regularized_loss(examples)
      predictions = lr.predictions(examples)
      for _ in xrange(5):
        lr.minimize().run()
      self.assertAllClose(0.284860, unregularized_loss.eval(), rtol=0.08)
      self.assertAllClose(0.408044, loss.eval(), atol=0.012)
      predicted_labels = get_binary_predictions_for_logistic(predictions)
      self.assertAllEqual([0, 1], predicted_labels.eval())
      self.assertAllClose(0.01,
                          lr.approximate_duality_gap().eval(),
                          rtol=1e-2,
                          atol=1e-2)
Esempio n. 48
0
  def testInstancesOfOneClassOnly(self):
    # Setup test data with 1 positive (ignored), and 1 negative example.
    example_protos = [
        make_example_proto(
            {'age': [0],
             'gender': [0]}, 0),
        make_example_proto(
            {'age': [1],
             'gender': [0]}, 1),  # Shares gender with the instance above.
    ]
    example_weights = [1.0, 0.0]  # Second example "omitted" from training.
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(1, 1)
      options = dict(symmetric_l2_regularization=1,
                     symmetric_l1_regularization=0,
                     loss_type='logistic_loss')

      lr = SdcaModel(CONTAINER, examples, variables, options)
      tf.initialize_all_variables().run()
      unregularized_loss = lr.unregularized_loss(examples)
      loss = lr.regularized_loss(examples)
      predictions = lr.predictions(examples)
      for _ in xrange(5):
        lr.minimize().run()
      self.assertAllClose(0.411608, unregularized_loss.eval(), rtol=0.12)
      self.assertAllClose(0.525457, loss.eval(), atol=0.01)
      predicted_labels = get_binary_predictions_for_logistic(predictions)
      self.assertAllEqual([0, 0], predicted_labels.eval())
      self.assertAllClose(0.01,
                          lr.approximate_duality_gap().eval(),
                          rtol=1e-2,
                          atol=1e-2)
Esempio n. 49
0
    def testFeatureValues(self):
        # Setup test data
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, -10.0, -2.0),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 14.0, 2.0),
        ]
        example_weights = [5.0, 3.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)

            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1,
                           symmetric_l1_regularization=0,
                           loss_type='squared_loss')

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in xrange(_MAX_ITERATIONS):
                train_op.run()

            # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for
            # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1,
            # y_2 be the labels for examples 1 and 2 respectively and s_1, s_2 the
            # corresponding *example* weights. With the given feature values, the loss
            # function is given by:
            # s_1/2(y_1 + 2w_1 + 2w_3)^2 + s_2/2(y_2 - 2w_2 - 2w_4)^2
            # + \lambda/2 (w_1^2 + w_2^2 + w_3^2 + w_4^2). Solving for the optimal, it
            # can be verified that:
            # w_1* = w_3* = -2.0 s_1 y_1/(\lambda + 8 s_1) and
            # w_2* = w_4* = 2 \cdot s_2 y_2/(\lambda + 8 s_2). Equivalently, due to
            # regularization and example weights, the predictions are within:
            # 8 \cdot s_i /(\lambda + 8 \cdot s_i) of the labels.
            self.assertAllClose([-10 * 40.0 / 41.0, 14.0 * 24 / 25.0],
                                predictions.eval(),
                                atol=0.01)
Esempio n. 50
0
  def testLinearDenseFeatures(self):
    with self._single_threaded_test_session():
      examples = dict(sparse_features=[],
                      dense_features=[tf.convert_to_tensor(
                          [-2.0, 0.0],
                          dtype=tf.float32), tf.convert_to_tensor(
                              [0.0, 2.0],
                              dtype=tf.float32)],
                      example_weights=[1.0, 1.0],
                      example_labels=[-10.0, 14.0],
                      example_ids=['%d' % i for i in xrange(0, 2)])
      variables = dict(sparse_features_weights=[],
                       dense_features_weights=[tf.Variable(tf.zeros(
                           [1],
                           dtype=tf.float32)), tf.Variable(tf.zeros(
                               [1],
                               dtype=tf.float32))],
                       dual=tf.Variable(tf.zeros(
                           [2],
                           dtype=tf.float32)),
                       primal_loss=tf.Variable(tf.zeros(
                           [],
                           dtype=tf.float64)))
      options = dict(symmetric_l2_regularization=0.5,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss',
                     prior=0.0)
      tf.initialize_all_variables().run()
      lr = SdcaModel(CONTAINER, examples, variables, options)
      prediction = lr.predictions(examples)

      lr.minimize().run()

      # Predictions should be 4/5 of label due to minimizing regularized loss:
      #   (label - 2 * weight)^2 / 2 + L2 * weight^2
      self.assertAllClose([-10.0 * 4 / 5, 14.0 * 4 / 5],
                          prediction.eval(),
                          rtol=0.01)

      loss = lr.regularized_loss(examples)
      self.assertAllClose(
          (4.0 + 7.84 + 16.0 + 31.36) / 2,
          loss.eval(),
          rtol=0.01)
Esempio n. 51
0
    def testSimpleNoL2(self):
        # Same as test above (so comments from above apply) but without an L2.
        # The algorithm should behave as if we have an L2 of 1 in optimization but
        # 0 in regularized_loss.
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 1),
        ]
        example_weights = [1.0, 1.0]
        for num_shards in _SHARD_NUMBERS:
            with self._single_threaded_test_session():
                examples = make_example_dict(example_protos, example_weights)
                variables = make_variable_dict(1, 1)
                options = dict(symmetric_l2_regularization=0,
                               symmetric_l1_regularization=0,
                               num_table_shards=num_shards,
                               loss_type='logistic_loss')

                lr = SdcaModel(examples, variables, options)
                variables_lib.global_variables_initializer().run()
                unregularized_loss = lr.unregularized_loss(examples)
                loss = lr.regularized_loss(examples)
                predictions = lr.predictions(examples)
                self.assertAllClose(0.693147, unregularized_loss.eval())
                self.assertAllClose(0.693147, loss.eval())
                train_op = lr.minimize()
                for _ in range(_MAX_ITERATIONS):
                    train_op.run()
                lr.update_weights(train_op).run()

                # There is neither L1 nor L2 loss, so regularized and unregularized
                # losses should be exactly the same.
                self.assertAllClose(0.40244,
                                    unregularized_loss.eval(),
                                    atol=0.01)
                self.assertAllClose(0.40244, loss.eval(), atol=0.01)
                predicted_labels = get_binary_predictions_for_logistic(
                    predictions)
                self.assertAllEqual([0, 1], predicted_labels.eval())
                self.assertAllClose(0.01,
                                    lr.approximate_duality_gap().eval(),
                                    rtol=1e-2,
                                    atol=1e-2)
Esempio n. 52
0
  def testSimple(self):
    # Setup test data
    example_protos = [
        make_example_proto({
            'age': [0],
            'gender': [0]
        }, 0),
        make_example_proto({
            'age': [1],
            'gender': [1]
        }, 2),
    ]
    example_weights = [100.0, 100.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(1, 1)
      options = dict(
          symmetric_l2_regularization=1.0,
          symmetric_l1_regularization=0,
          loss_type='poisson_loss')
      model = SdcaModel(examples, variables, options)
      variables_lib.global_variables_initializer().run()

      # Before minimization, the weights default to zero. There is no loss due
      # to regularization, only unregularized loss which is 1 for each example.
      predictions = model.predictions(examples)
      self.assertAllClose([1.0, 1.0], predictions.eval())
      unregularized_loss = model.unregularized_loss(examples)
      regularized_loss = model.regularized_loss(examples)
      approximate_duality_gap = model.approximate_duality_gap()
      self.assertAllClose(1.0, unregularized_loss.eval())
      self.assertAllClose(1.0, regularized_loss.eval())

      # There are 4 sparse weights: 2 for age (say w1, w2) and 2 for gender
      # (say w3 and w4). The minimization leads to:
      # w1=w3=-1.96487, argmin of 100*(exp(2*w)-2*w*0)+w**2.
      # w2=w4=0.345708, argmin of 100*(exp(2*w)-2*w*2)+w**2.
      # This gives an unregularized loss of .3167 and .3366 with regularization.
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      model.update_weights(train_op).run()

      self.assertAllClose([0.0196, 1.9965], predictions.eval(), atol=1e-4)
      self.assertAllClose(0.3167, unregularized_loss.eval(), atol=1e-4)
      self.assertAllClose(0.3366, regularized_loss.eval(), atol=1e-4)
      self.assertAllClose(0., approximate_duality_gap.eval(), atol=1e-6)
Esempio n. 53
0
  def testPartitionedPrimals(self):
    # Setup test data
    example_protos = [
        make_example_proto({
            'age': [0],
            'gender': [0]
        }, 0),
        make_example_proto({
            'age': [1],
            'gender': [1]
        }, 1),
    ]
    example_weights = [1.0, 1.0]
    for num_shards in _SHARD_NUMBERS:
      with self._single_threaded_test_session():
        examples = make_example_dict(example_protos, example_weights)
        variables = make_variable_dict(1, 1, partitioned=True)
        options = dict(
            symmetric_l2_regularization=1,
            symmetric_l1_regularization=0,
            num_table_shards=num_shards,
            loss_type='logistic_loss')

        lr = SdcaModel(examples, variables, options)
        variables_lib.global_variables_initializer().run()
        unregularized_loss = lr.unregularized_loss(examples)
        loss = lr.regularized_loss(examples)
        predictions = lr.predictions(examples)
        self.assertAllClose(0.693147, unregularized_loss.eval())
        self.assertAllClose(0.693147, loss.eval())
        train_op = lr.minimize()
        for _ in range(_MAX_ITERATIONS):
          train_op.run()
        lr.update_weights(train_op).run()
        # The high tolerance in unregularized_loss comparisons is due to the
        # fact that it's possible to trade off unregularized_loss vs.
        # regularization and still have a sum that is quite close to the
        # optimal regularized_loss value.  SDCA's duality gap only ensures that
        # the regularized_loss is within 0.01 of optimal.
        # 0.525457 is the optimal regularized_loss.
        # 0.411608 is the unregularized_loss at that optimum.
        self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
        self.assertAllClose(0.525457, loss.eval(), atol=0.01)
        predicted_labels = get_binary_predictions_for_logistic(predictions)
        self.assertAllEqual([0, 1], predicted_labels.eval())
        self.assertAllClose(
            0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
Esempio n. 54
0
 def testOutOfRangeDenseFeatures(self):
     with self._single_threaded_test_session():
         examples, variables = make_dense_examples_and_variables_dicts(
             dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]],
             weights=[20.0, 10.0],
             labels=[1.0, 0.0])
         # Replace with a variable of size 1 instead of 2.
         variables['dense_features_weights'] = [
             tf.Variable(tf.zeros([1], dtype=tf.float32))
         ]
         options = dict(symmetric_l2_regularization=1.0,
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
         lr = SdcaModel(examples, variables, options)
         tf.global_variables_initializer().run()
         train_op = lr.minimize()
         with self.assertRaisesRegexp(
                 tf.errors.InvalidArgumentError,
                 'More dense features than we have parameters for.*'):
             train_op.run()
Esempio n. 55
0
 def testNoWeightedExamples(self):
     # Setup test data with 1 positive, and 1 negative example.
     example_protos = [
         make_example_proto({
             'age': [0],
             'gender': [0]
         }, 0),
         make_example_proto({
             'age': [1],
             'gender': [1]
         }, 1),
     ]
     # Zeroed out example weights.
     example_weights = [0.0, 0.0]
     with self.test_session(use_gpu=False):
         examples = make_example_dict(example_protos, example_weights)
         variables = make_variable_dict(examples, 1, 1)
         options = dict(symmetric_l2_regularization=0.5,
                        symmetric_l1_regularization=0,
                        loss_type='logistic_loss')
         tf.initialize_all_variables().run()
         lr = SdcaModel(examples, variables, options)
         self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
         with self.assertRaisesOpError(
                 'No weighted examples in 2 training examples'):
             lr.minimize().run()
         self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
Esempio n. 56
0
    def testDuplicateExampleIds(self):
        # Setup test data with 1 positive, and 1 negative example.
        example_protos = [
            make_example_proto({
                'age': [0],
                'gender': [0]
            }, 0),
            make_example_proto({
                'age': [1],
                'gender': [1]
            }, 1),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            examples['example_ids'] = [
                'duplicate_id' for x in examples['example_ids']
            ]
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=0.5,
                           symmetric_l1_regularization=0,
                           loss_type='logistic_loss')

            lr = SdcaModel(CONTAINER, examples, variables, options)
            tf.initialize_all_variables().run()
            self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
            with self.assertRaisesOpError(
                    'Detected 1 duplicates in example_ids'):
                lr.minimize().run()
            self.assertAllClose([0.5, 0.5], lr.predictions(examples).eval())
Esempio n. 57
0
  def testSparseRandom(self):
    dim = 20
    num_examples = 1000
    # Number of non-zero features per example.
    non_zeros = 10
    # Setup test data.
    with self._single_threaded_test_session():
      examples, variables = make_random_examples_and_variables_dicts(
          num_examples, dim, non_zeros)
      options = dict(
          symmetric_l2_regularization=.1,
          symmetric_l1_regularization=0,
          num_table_shards=1,
          adaptive=False,
          loss_type='logistic_loss')

      lr = SdcaModel(examples, variables, options)
      variables_lib.global_variables_initializer().run()
      train_op = lr.minimize()
      for _ in range(4):
        train_op.run()
      lr.update_weights(train_op).run()
      # Duality gap is 1.4e-5.
      # It would be 0.01 without shuffling and 0.02 with adaptive sampling.
      self.assertNear(0.0, lr.approximate_duality_gap().eval(), err=1e-3)