예제 #1
0
    def testCrossedFeatures(self):
        """Tests LinearClassifier with LinearSDCA and crossed features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'language':
                sparse_tensor.SparseTensor(
                    values=['english', 'italian', 'spanish'],
                    indices=[[0, 0], [1, 0], [2, 0]],
                    dense_shape=[3, 1]),
                'country':
                sparse_tensor.SparseTensor(values=['US', 'IT', 'MX'],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 1])
            }, constant_op.constant([[0], [0], [1]])

        country_language = feature_column_lib.crossed_column(
            ['language', 'country'], hash_bucket_size=100)
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(
            feature_columns=[country_language], optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
예제 #2
0
    def testPartitionedVariables(self):
        """Tests LinearClassifier with LinearSDCA with partitioned variables."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column_lib.numeric_column('price')
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column_lib.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column_lib.crossed_column(
            [sq_footage_bucket, 'country'], hash_bucket_size=10)

        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)

        classifier = linear.LinearClassifierV2(
            feature_columns=[
                price, sq_footage_bucket, country, sq_footage_country
            ],
            weight_column='weights',
            partitioner=partitioned_variables.fixed_size_partitioner(
                num_shards=2, axis=0),
            optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
    def test_sequential_model_with_crossed_column(self):
        feature_columns = []
        age_buckets = fc.bucketized_column(
            fc.numeric_column('age'),
            boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
        feature_columns.append(age_buckets)

        # indicator cols
        thal = fc.categorical_column_with_vocabulary_list(
            'thal', ['fixed', 'normal', 'reversible'])

        crossed_feature = fc.crossed_column([age_buckets, thal],
                                            hash_bucket_size=1000)
        crossed_feature = fc.indicator_column(crossed_feature)
        feature_columns.append(crossed_feature)

        feature_layer = df.DenseFeatures(feature_columns)

        model = keras.models.Sequential([
            feature_layer,
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])

        age_data = np.random.randint(10, 100, size=100)
        thal_data = np.random.choice(['fixed', 'normal', 'reversible'],
                                     size=100)
        inp_x = {'age': age_data, 'thal': thal_data}
        inp_y = np.random.randint(0, 1, size=100)
        ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5)
        model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy'],
        )
        model.fit(ds, epochs=1)
        model.fit(ds, epochs=1)
        model.evaluate(ds)
        model.predict(ds)
예제 #4
0
    def testMixedFeaturesArbitraryWeights(self):
        """Tests LinearRegressor with LinearSDCA and a mix of features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([0.6, 0.8, 0.3]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [5.0], [7.0]])
            }, constant_op.constant([[1.55], [-1.25], [-3.0]])

        price = feature_column_lib.numeric_column('price')
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column_lib.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column_lib.crossed_column(
            [sq_footage_bucket, 'country'], hash_bucket_size=10)
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.1)
        regressor = linear.LinearRegressorV2(feature_columns=[
            price, sq_footage_bucket, country, sq_footage_country
        ],
                                             weight_column='weights',
                                             optimizer=optimizer)
        regressor.train(input_fn=input_fn, steps=20)
        loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.05)