def testGoodKernelApproximationAmortized(self):
    # Parameters.
    num_points = 20
    input_dim = 5
    mapped_dim = 5000
    stddev = 5.0

    points_shape = [1, input_dim]
    points = [
        random_ops.random_uniform(shape=points_shape, maxval=1.0)
        for _ in xrange(num_points)
    ]

    normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
    total_absolute_error = 0.0
    with self.cached_session():
      rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0)
      # Cache mappings so that they are not computed multiple times.
      cached_mappings = dict((point, rffm.map(point))
                             for point in normalized_points)
      for x in normalized_points:
        mapped_x = cached_mappings[x]
        for y in normalized_points:
          mapped_y = cached_mappings[y]
          exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev)
          approx_kernel_value = _inner_product(mapped_x, mapped_y)
          abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value)
          total_absolute_error += abs_error
      self.assertAllClose(
          [[0.0]],
          total_absolute_error.eval() / (num_points * num_points),
          atol=0.02)
  def testInvalidInputShape(self):
    x = constant_op.constant([[2.0, 1.0]])

    with self.test_session():
      rffm = RandomFourierFeatureMapper(3, 10)
      with self.assertRaisesWithPredicateMatch(
          dense_kernel_mapper.InvalidShapeError,
          r'Invalid dimension: expected 3 input features, got 2 instead.'):
        rffm.map(x)
Beispiel #3
0
  def testInvalidInputShape(self):
    x = constant_op.constant([[2.0, 1.0]])

    with self.test_session():
      rffm = RandomFourierFeatureMapper(3, 10)
      with self.assertRaisesWithPredicateMatch(
          dense_kernel_mapper.InvalidShapeError,
          r'Invalid dimension: expected 3 input features, got 2 instead.'):
        rffm.map(x)
  def testSameOmegaReused(self):
    x = constant_op.constant([[2.0, 1.0, 0.0]])

    with self.test_session():
      rffm = RandomFourierFeatureMapper(3, 100)
      mapped_x = rffm.map(x)
      mapped_x_copy = rffm.map(x)
      # Two different evaluations of tensors output by map on the same input
      # are identical because the same parameters are used for the mappings.
      self.assertAllClose(mapped_x.eval(), mapped_x_copy.eval(), atol=0.001)
Beispiel #5
0
  def testSameOmegaReused(self):
    x = constant_op.constant([[2.0, 1.0, 0.0]])

    with self.test_session():
      rffm = RandomFourierFeatureMapper(3, 100)
      mapped_x = rffm.map(x)
      mapped_x_copy = rffm.map(x)
      # Two different evaluations of tensors output by map on the same input
      # are identical because the same parameters are used for the mappings.
      self.assertAllClose(mapped_x.eval(), mapped_x_copy.eval(), atol=0.001)
    def testMappedShape(self):
        x1 = constant_op.constant([[2.0, 1.0, 0.0]])
        x2 = constant_op.constant([[1.0, -1.0, 2.0], [-1.0, 10.0, 1.0],
                                   [4.0, -2.0, -1.0]])

        with self.test_session():
            rffm = RandomFourierFeatureMapper(3, 10, 1.0)
            mapped_x1 = rffm.map(x1)
            mapped_x2 = rffm.map(x2)
            self.assertEqual([1, 10], mapped_x1.get_shape())
            self.assertEqual([3, 10], mapped_x2.get_shape())
  def testMappedShape(self):
    x1 = constant_op.constant([[2.0, 1.0, 0.0]])
    x2 = constant_op.constant([[1.0, -1.0, 2.0], [-1.0, 10.0, 1.0],
                               [4.0, -2.0, -1.0]])

    with self.test_session():
      rffm = RandomFourierFeatureMapper(3, 10, 1.0)
      mapped_x1 = rffm.map(x1)
      mapped_x2 = rffm.map(x2)
      self.assertEqual([1, 10], mapped_x1.get_shape())
      self.assertEqual([3, 10], mapped_x2.get_shape())
Beispiel #8
0
  def testBadKernelApproximation(self):
    x = constant_op.constant([[2.0, 1.0, 0.0]])
    y = constant_op.constant([[1.0, -1.0, 2.0]])
    stddev = 3.0

    with self.test_session():
      # The mapped dimension is fairly small, so the kernel approximation is
      # very rough.
      rffm = RandomFourierFeatureMapper(3, 100, stddev, seed=0)
      mapped_x = rffm.map(x)
      mapped_y = rffm.map(y)
      exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev)
      approx_kernel_value = _inner_product(mapped_x, mapped_y)
      self.assertAllClose(
          exact_kernel_value.eval(), approx_kernel_value.eval(), atol=0.2)
  def testBadKernelApproximation(self):
    x = constant_op.constant([[2.0, 1.0, 0.0]])
    y = constant_op.constant([[1.0, -1.0, 2.0]])
    stddev = 3.0

    with self.test_session():
      # The mapped dimension is fairly small, so the kernel approximation is
      # very rough.
      rffm = RandomFourierFeatureMapper(3, 100, stddev, seed=0)
      mapped_x = rffm.map(x)
      mapped_y = rffm.map(y)
      exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev)
      approx_kernel_value = _inner_product(mapped_x, mapped_y)
      self.assertAllClose(
          exact_kernel_value.eval(), approx_kernel_value.eval(), atol=0.2)
Beispiel #10
0
    def testMulticlassDataWithAndWithoutKernels(self):
        """Tests classifier w/ and w/o kernels on multiclass data."""
        feature_column = layers.real_valued_column('feature', dimension=4)

        # Metrics for linear classifier (no kernels).
        linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[feature_column], n_classes=3)
        linear_classifier.fit(input_fn=test_data.iris_input_multiclass_fn,
                              steps=50)
        linear_metrics = linear_classifier.evaluate(
            input_fn=test_data.iris_input_multiclass_fn, steps=1)
        linear_loss = linear_metrics['loss']
        linear_accuracy = linear_metrics['accuracy']

        # Using kernel mappers allows to discover non-linearities in data (via RBF
        # kernel approximation), reduces loss and increases accuracy.
        kernel_mappers = {
            feature_column: [
                RandomFourierFeatureMapper(input_dim=4,
                                           output_dim=50,
                                           stddev=1.0,
                                           name='rffm')
            ]
        }
        kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[], n_classes=3, kernel_mappers=kernel_mappers)
        kernel_linear_classifier.fit(
            input_fn=test_data.iris_input_multiclass_fn, steps=50)
        kernel_linear_metrics = kernel_linear_classifier.evaluate(
            input_fn=test_data.iris_input_multiclass_fn, steps=1)
        kernel_linear_loss = kernel_linear_metrics['loss']
        kernel_linear_accuracy = kernel_linear_metrics['accuracy']
        self.assertLess(kernel_linear_loss, linear_loss)
        self.assertGreater(kernel_linear_accuracy, linear_accuracy)
  def testTwoMapperObjects(self):
    x = constant_op.constant([[2.0, 1.0, 0.0]])
    y = constant_op.constant([[1.0, -1.0, 2.0]])
    stddev = 3.0

    with self.test_session():
      # The mapped dimension is fairly small, so the kernel approximation is
      # very rough.
      rffm1 = RandomFourierFeatureMapper(3, 100, stddev)
      rffm2 = RandomFourierFeatureMapper(3, 100, stddev)
      mapped_x1 = rffm1.map(x)
      mapped_y1 = rffm1.map(y)
      mapped_x2 = rffm2.map(x)
      mapped_y2 = rffm2.map(y)

      approx_kernel_value1 = _inner_product(mapped_x1, mapped_y1)
      approx_kernel_value2 = _inner_product(mapped_x2, mapped_y2)
      self.assertAllClose(
          approx_kernel_value1.eval(), approx_kernel_value2.eval(), atol=0.01)
    def testGoodKernelApproximationAmortized(self):
        # Parameters.
        num_points = 20
        input_dim = 5
        mapped_dim = 5000
        stddev = 5.0

        # TODO(sibyl-vie3Poto): Reduce test's running time before moving to third_party. One
        # possible way to speed the test up is to compute both the approximate and
        # the exact kernel matrix directly using matrix operations instead of
        # computing the values for each pair of points separately.
        points_shape = [1, input_dim]
        points = [
            random_ops.random_uniform(shape=points_shape, maxval=1.0)
            for _ in xrange(num_points)
        ]

        normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
        total_absolute_error = 0.0
        with self.test_session():
            rffm = RandomFourierFeatureMapper(input_dim,
                                              mapped_dim,
                                              stddev,
                                              seed=0)
            # Cache mappings so that they are not computed multiple times.
            cached_mappings = dict(
                (point, rffm.map(point)) for point in normalized_points)
            for x in normalized_points:
                mapped_x = cached_mappings[x]
                for y in normalized_points:
                    mapped_y = cached_mappings[y]
                    exact_kernel_value = _compute_exact_rbf_kernel(
                        x, y, stddev)
                    approx_kernel_value = _inner_product(mapped_x, mapped_y)
                    abs_error = math_ops.abs(exact_kernel_value -
                                             approx_kernel_value)
                    total_absolute_error += abs_error
            self.assertAllClose([[0.0]],
                                total_absolute_error.eval() /
                                (num_points * num_points),
                                atol=0.02)
    def testGoodKernelApproximationAmortized(self):
        # Parameters.
        num_points = 20
        input_dim = 5
        mapped_dim = 5000
        stddev = 5.0

        points_shape = [1, input_dim]
        points = [
            random_ops.random_uniform(shape=points_shape, maxval=1.0)
            for _ in xrange(num_points)
        ]

        normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
        total_absolute_error = 0.0
        with self.cached_session():
            rffm = RandomFourierFeatureMapper(input_dim,
                                              mapped_dim,
                                              stddev,
                                              seed=0)
            # Cache mappings so that they are not computed multiple times.
            cached_mappings = dict(
                (point, rffm.map(point)) for point in normalized_points)
            for x in normalized_points:
                mapped_x = cached_mappings[x]
                for y in normalized_points:
                    mapped_y = cached_mappings[y]
                    exact_kernel_value = _compute_exact_rbf_kernel(
                        x, y, stddev)
                    approx_kernel_value = _inner_product(mapped_x, mapped_y)
                    abs_error = math_ops.abs(exact_kernel_value -
                                             approx_kernel_value)
                    total_absolute_error += abs_error
            self.assertAllClose([[0.0]],
                                total_absolute_error.eval() /
                                (num_points * num_points),
                                atol=0.02)
  def testGoodKernelApproximationAmortized(self):
    # Parameters.
    num_points = 20
    input_dim = 5
    mapped_dim = 5000
    stddev = 5.0

    # TODO(sibyl-vie3Poto): Reduce test's running time before moving to third_party. One
    # possible way to speed the test up is to compute both the approximate and
    # the exact kernel matrix directly using matrix operations instead of
    # computing the values for each pair of points separately.
    points_shape = [1, input_dim]
    points = [
        random_ops.random_uniform(shape=points_shape, maxval=1.0)
        for _ in xrange(num_points)
    ]

    normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
    total_absolute_error = 0.0
    with self.test_session():
      rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0)
      # Cache mappings so that they are not computed multiple times.
      cached_mappings = dict((point, rffm.map(point))
                             for point in normalized_points)
      for x in normalized_points:
        mapped_x = cached_mappings[x]
        for y in normalized_points:
          mapped_y = cached_mappings[y]
          exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev)
          approx_kernel_value = _inner_product(mapped_x, mapped_y)
          abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value)
          total_absolute_error += abs_error
      self.assertAllClose(
          [[0.0]],
          total_absolute_error.eval() / (num_points * num_points),
          atol=0.02)
Beispiel #15
0
    def testLinearlyInseparableBinaryDataWithAndWithoutKernels(self):
        """Tests classifier w/ and w/o kernels on non-linearly-separable data."""
        multi_dim_feature = layers.real_valued_column('multi_dim_feature',
                                                      dimension=2)

        # Data points are non-linearly separable so there will be at least one
        # mis-classified sample (accuracy < 0.8). In fact, the loss is minimized for
        # w1=w2=0.0, in which case each example incurs a loss of ln(2). The overall
        # (average) loss should then be ln(2) and the logits should be approximately
        # 0.0 for each sample.
        logreg_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[multi_dim_feature])
        logreg_classifier.fit(input_fn=_linearly_inseparable_binary_input_fn,
                              steps=50)
        logreg_metrics = logreg_classifier.evaluate(
            input_fn=_linearly_inseparable_binary_input_fn, steps=1)
        logreg_loss = logreg_metrics['loss']
        logreg_accuracy = logreg_metrics['accuracy']
        logreg_predictions = logreg_classifier.predict(
            input_fn=_linearly_inseparable_binary_input_fn, as_iterable=False)
        self.assertAlmostEqual(logreg_loss, np.log(2), places=3)
        self.assertLess(logreg_accuracy, 0.8)
        self.assertAllClose(logreg_predictions['logits'],
                            [[0.0], [0.0], [0.0], [0.0]])

        # Using kernel mappers allows to discover non-linearities in data. Mapping
        # the data to a higher dimensional feature space using approx RBF kernels,
        # substantially reduces the loss and leads to perfect classification
        # accuracy.
        kernel_mappers = {
            multi_dim_feature:
            [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
        }
        kernelized_logreg_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[], kernel_mappers=kernel_mappers)
        kernelized_logreg_classifier.fit(
            input_fn=_linearly_inseparable_binary_input_fn, steps=50)
        kernelized_logreg_metrics = kernelized_logreg_classifier.evaluate(
            input_fn=_linearly_inseparable_binary_input_fn, steps=1)
        kernelized_logreg_loss = kernelized_logreg_metrics['loss']
        kernelized_logreg_accuracy = kernelized_logreg_metrics['accuracy']
        self.assertLess(kernelized_logreg_loss, 0.2)
        self.assertEqual(kernelized_logreg_accuracy, 1.0)
Beispiel #16
0
    def testVariablesWithAndWithoutKernels(self):
        """Tests variables w/ and w/o kernel."""
        multi_dim_feature = layers.real_valued_column('multi_dim_feature',
                                                      dimension=2)

        linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[multi_dim_feature])
        linear_classifier.fit(input_fn=_linearly_inseparable_binary_input_fn,
                              steps=50)
        linear_variables = linear_classifier.get_variable_names()
        self.assertIn('linear/multi_dim_feature/weight', linear_variables)
        self.assertIn('linear/bias_weight', linear_variables)
        linear_weights = linear_classifier.get_variable_value(
            'linear/multi_dim_feature/weight')
        linear_bias = linear_classifier.get_variable_value(
            'linear/bias_weight')

        kernel_mappers = {
            multi_dim_feature:
            [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
        }
        kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[], kernel_mappers=kernel_mappers)
        kernel_linear_classifier.fit(
            input_fn=_linearly_inseparable_binary_input_fn, steps=50)
        kernel_linear_variables = kernel_linear_classifier.get_variable_names()
        self.assertIn('linear/multi_dim_feature_MAPPED/weight',
                      kernel_linear_variables)
        self.assertIn('linear/bias_weight', kernel_linear_variables)
        kernel_linear_weights = kernel_linear_classifier.get_variable_value(
            'linear/multi_dim_feature_MAPPED/weight')
        kernel_linear_bias = kernel_linear_classifier.get_variable_value(
            'linear/bias_weight')

        # The feature column used for linear classification (no kernels) has
        # dimension 2 so the model will learn a 2-dimension weights vector (and a
        # scalar for the bias). In the kernelized model, the features are mapped to
        # a 30-dimensional feature space and so the weights variable will also have
        # dimension 30.
        self.assertEqual(2, len(linear_weights))
        self.assertEqual(1, len(linear_bias))
        self.assertEqual(30, len(kernel_linear_weights))
        self.assertEqual(1, len(kernel_linear_bias))
Beispiel #17
0
    def testClassifierWithAndWithoutKernelsNoRealValuedColumns(self):
        """Tests kernels have no effect for non-real valued columns ."""
        def input_fn():
            return {
                'price':
                constant_op.constant([[0.4], [0.6], [0.3]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
            }, constant_op.constant([[1], [0], [1]])

        price = layers.real_valued_column('price')
        country = layers.sparse_column_with_hash_bucket('country',
                                                        hash_bucket_size=5)

        linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[price, country])
        linear_classifier.fit(input_fn=input_fn, steps=100)
        linear_metrics = linear_classifier.evaluate(input_fn=input_fn, steps=1)
        linear_loss = linear_metrics['loss']
        linear_accuracy = linear_metrics['accuracy']

        kernel_mappers = {
            country: [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
        }

        kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[price, country], kernel_mappers=kernel_mappers)
        kernel_linear_classifier.fit(input_fn=input_fn, steps=100)
        kernel_linear_metrics = kernel_linear_classifier.evaluate(
            input_fn=input_fn, steps=1)
        kernel_linear_loss = kernel_linear_metrics['loss']
        kernel_linear_accuracy = kernel_linear_metrics['accuracy']

        # The kernel mapping is applied to a non-real-valued feature column and so
        # it should have no effect on the model. The loss and accuracy of the
        # "kernelized" model should match the loss and accuracy of the initial model
        # (without kernels).
        self.assertAlmostEqual(linear_loss, kernel_linear_loss, delta=0.01)
        self.assertAlmostEqual(linear_accuracy,
                               kernel_linear_accuracy,
                               delta=0.01)
Beispiel #18
0
  def testTwoMapperObjects(self):
    x = constant_op.constant([[2.0, 1.0, 0.0]])
    y = constant_op.constant([[1.0, -1.0, 2.0]])
    stddev = 3.0

    with self.test_session():
      # The mapped dimension is fairly small, so the kernel approximation is
      # very rough.
      rffm1 = RandomFourierFeatureMapper(3, 100, stddev)
      rffm2 = RandomFourierFeatureMapper(3, 100, stddev)
      mapped_x1 = rffm1.map(x)
      mapped_y1 = rffm1.map(y)
      mapped_x2 = rffm2.map(x)
      mapped_y2 = rffm2.map(y)

      approx_kernel_value1 = _inner_product(mapped_x1, mapped_y1)
      approx_kernel_value2 = _inner_product(mapped_x2, mapped_y2)
      self.assertAllClose(
          approx_kernel_value1.eval(), approx_kernel_value2.eval(), atol=0.01)