예제 #1
0
    def testMultiDimensionalRealValuedFeaturesWithL2Regularization(self):
        """Tests SVM with multi-dimensional real features and L2 regularization."""

        # This is identical to the one in testRealValuedFeaturesWithL2Regularization
        # where 2 tensors (dense features) of shape [3, 1] have been replaced by a
        # single tensor (dense feature) of shape [3, 2].
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'multi_dim_feature':
                constant_op.constant([[0.5, 1.0], [1.0, -1.0], [1.0, 0.5]]),
            }, constant_op.constant([[1], [0], [1]])

        multi_dim_feature = feature_column.real_valued_column(
            'multi_dim_feature', dimension=2)
        svm_classifier = svm.SVM(feature_columns=[multi_dim_feature],
                                 example_id_column='example_id',
                                 l1_regularization=0.0,
                                 l2_regularization=1.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
        loss = metrics['loss']
        accuracy = metrics['accuracy']
        self.assertLess(loss, 0.1)
        self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #2
0
    def testBucketizedFeatures(self):
        """Tests SVM classifier with bucketized features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'price': constant_op.constant([[600.0], [800.0], [400.0]]),
                'sq_footage': constant_op.constant([[1000.0], [800.0],
                                                    [500.0]]),
                'weights': constant_op.constant([[1.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price_bucket = feature_column.bucketized_column(
            feature_column.real_valued_column('price'),
            boundaries=[500.0, 700.0])
        sq_footage_bucket = feature_column.bucketized_column(
            feature_column.real_valued_column('sq_footage'),
            boundaries=[650.0])

        svm_classifier = svm.SVM(
            feature_columns=[price_bucket, sq_footage_bucket],
            example_id_column='example_id',
            l1_regularization=0.1,
            l2_regularization=1.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        accuracy = svm_classifier.evaluate(input_fn=input_fn,
                                           steps=1)['accuracy']
        self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #3
0
    def testRealValuedFeaturesWithL2Regularization(self):
        """Tests SVM classifier with real valued features and L2 regularization."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'feature1': constant_op.constant([[0.5], [1.0], [1.0]]),
                'feature2': constant_op.constant([[1.0], [-1.0], [0.5]]),
            }, constant_op.constant([[1], [0], [1]])

        feature1 = feature_column.real_valued_column('feature1')
        feature2 = feature_column.real_valued_column('feature2')
        svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                                 example_id_column='example_id',
                                 l1_regularization=0.0,
                                 l2_regularization=1.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
        loss = metrics['loss']
        accuracy = metrics['accuracy']
        # The points are in general separable. Also, if there was no regularization,
        # the margin inequalities would be satisfied too (for instance by w1=1.0,
        # w2=5.0). Due to regularization, smaller weights are chosen. This results
        # to a small but non-zero uneregularized loss. Still, all the predictions
        # will be correct resulting to perfect accuracy.
        self.assertLess(loss, 0.1)
        self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #4
0
    def testSparseFeatures(self):
        """Tests SVM classifier with (hashed) sparse features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.8], [0.6], [0.3]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 1]),
            }, constant_op.constant([[0], [1], [1]])

        price = feature_column.real_valued_column('price')
        country = feature_column.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        svm_classifier = svm.SVM(feature_columns=[price, country],
                                 example_id_column='example_id',
                                 l1_regularization=0.0,
                                 l2_regularization=1.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        accuracy = svm_classifier.evaluate(input_fn=input_fn,
                                           steps=1)['accuracy']
        self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #5
0
    def testRealValuedFeaturesWithBigL1Regularization(self):
        """Tests SVM classifier with real valued features and L2 regularization."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'feature1': constant_op.constant([[0.5], [1.0], [1.0]]),
                'feature2': constant_op.constant([[1.0], [-1.0], [0.5]]),
            }, constant_op.constant([[1], [0], [1]])

        feature1 = feature_column.real_valued_column('feature1')
        feature2 = feature_column.real_valued_column('feature2')
        svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                                 example_id_column='example_id',
                                 l1_regularization=3.0,
                                 l2_regularization=1.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
        loss = metrics['loss']
        accuracy = metrics['accuracy']

        # When L1 regularization parameter is large, the loss due to regularization
        # outweights the unregularized loss. In this case, the classifier will favor
        # very small weights (in current case 0) resulting both big unregularized
        # loss and bad accuracy.
        self.assertAlmostEqual(loss, 1.0, places=3)
        self.assertAlmostEqual(accuracy, 1 / 3, places=3)
예제 #6
0
    def testRealValuedFeaturesWithMildL1Regularization(self):
        """Tests SVM classifier with real valued features and L2 regularization."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'feature1': constant_op.constant([[0.5], [1.0], [1.0]]),
                'feature2': constant_op.constant([[1.0], [-1.0], [0.5]]),
            }, constant_op.constant([[1], [0], [1]])

        feature1 = feature_column.real_valued_column('feature1')
        feature2 = feature_column.real_valued_column('feature2')
        svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                                 example_id_column='example_id',
                                 l1_regularization=0.5,
                                 l2_regularization=1.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
        loss = metrics['loss']
        accuracy = metrics['accuracy']

        # Adding small L1 regularization favors even smaller weights. This results
        # to somewhat moderate unregularized loss (bigger than the one when there is
        # no L1 regularization. Still, since L1 is small, all the predictions will
        # be correct resulting to perfect accuracy.
        self.assertGreater(loss, 0.1)
        self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #7
0
    def testMixedFeatures(self):
        """Tests SVM classifier with a mix of features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column.real_valued_column('price')
        sq_footage_bucket = feature_column.bucketized_column(
            feature_column.real_valued_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column.crossed_column(
            [sq_footage_bucket, country], hash_bucket_size=10)
        svm_classifier = svm.SVM(feature_columns=[
            price, sq_footage_bucket, country, sq_footage_country
        ],
                                 example_id_column='example_id',
                                 weight_column_name='weights',
                                 l1_regularization=0.1,
                                 l2_regularization=1.0)

        svm_classifier.fit(input_fn=input_fn, steps=30)
        accuracy = svm_classifier.evaluate(input_fn=input_fn,
                                           steps=1)['accuracy']
        self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #8
0
    def testRealValuedFeaturesPerfectlySeparable(self):
        """Tests SVM classifier with real valued features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'feature1': constant_op.constant([[0.0], [1.0], [3.0]]),
                'feature2': constant_op.constant([[1.0], [-1.2], [1.0]]),
            }, constant_op.constant([[1], [0], [1]])

        feature1 = feature_column.real_valued_column('feature1')
        feature2 = feature_column.real_valued_column('feature2')
        svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                                 example_id_column='example_id',
                                 l1_regularization=0.0,
                                 l2_regularization=0.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
        loss = metrics['loss']
        accuracy = metrics['accuracy']
        # The points are not only separable but there exist weights (for instance
        # w1=0.0, w2=1.0) that satisfy the margin inequalities (y_i* w^T*x_i >=1).
        # The unregularized loss should therefore be 0.0.
        self.assertAlmostEqual(loss, 0.0, places=3)
        self.assertAlmostEqual(accuracy, 1.0, places=3)
    feature_cols = continuous_cols.copy()
    feature_cols.update(id_col)
    # Converts the label column into a constant Tensor.
    # Returns the feature columns and the label.
    return feature_cols


try:
    shutil.rmtree('svm/')
except OSError:
    pass

svm_model = svm.SVM(example_id_column="PassengerId",
                    feature_columns=[
                        tf.contrib.layers.real_valued_column(k)
                        for k in list(train)
                        if k not in ['Survived', 'PassengerId']
                    ],
                    model_dir="svm/")
svm_model.fit(input_fn=train_input_fn, steps=100)
svm_pred = list(svm_model.predict_classes(input_fn=predict_input_fn))

if TEST:
    target_names = ['Not Survived', 'Survived']
    print("SVM Report")
    print(
        classification_report(test['Survived'],
                              svm_pred,
                              target_names=target_names))

    print("SVM Confusion Matrix")
예제 #10
0
        for k in CATEGORICAL_COLUMNS
    }
    # Merges the two dictionaries into one.
    feature_cols = dict(continuous_cols.items() + categorical_cols.items())
    # Add example id list
    feature_cols['example_id'] = tf.constant(
        [str(i + 1) for i in range(df['age'].size)])
    # Converts the label column into a constant Tensor.
    label = tf.constant(df[LABEL_COLUMN].values)
    # Returns the feature columns and the label.
    return feature_cols, label


def train_input_fn():
    return input_fn(df_train)


def eval_input_fn():
    return input_fn(df_test)


model_dir = '../svm_model_dir'

model = svm.SVM(example_id_column='example_id',
                feature_columns=FEATURE_COLUMNS,
                model_dir=model_dir)
model.fit(input_fn=train_input_fn, steps=100)
results = model.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
    print("%s: %s" % (key, results[key]))