Beispiel #1
0
    def testBucketizedFeatures(self):
        """Tests LinearClassifier with LinearSDCA and bucketized features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'price': constant_op.constant([[600.0], [1000.0], [400.0]]),
                'sq_footage': constant_op.constant([[1000.0], [600.0],
                                                    [700.0]]),
                'weights': constant_op.constant([[1.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('price'),
            boundaries=[500.0, 700.0])
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('sq_footage'),
            boundaries=[650.0])
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(
            feature_columns=[price_bucket, sq_footage_bucket],
            weight_column='weights',
            optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
  def test_linear_model_numpy_input_fn(self):
    price = fc.numeric_column('price')
    price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,])
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([-1., 2., 13., 104.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.linear_model(features, [price_buckets, body_style])
    # self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      bias = self._get_linear_model_bias()
      price_buckets_var = self._get_linear_model_column_var(price_buckets)
      body_style_var = self._get_linear_model_column_var(body_style)

      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
      sess.run(bias.assign([5.]))

      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))

      coord.request_stop()
      coord.join(threads)
Beispiel #3
0
 def setUp(self):
   self._head = canned_boosted_trees._create_regression_head(label_dimension=1)
   self._feature_columns = {
       feature_column.bucketized_column(
           feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
           BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
   }
Beispiel #4
0
  def testBinaryClassifierTrainInMemoryWithMixedColumns(self):
    categorical = feature_column.categorical_column_with_vocabulary_list(
        key='f_0', vocabulary_list=('bad', 'good', 'ok'))
    indicator_col = feature_column.indicator_column(categorical)
    bucketized_col = feature_column.bucketized_column(
        feature_column.numeric_column('f_1', dtype=dtypes.float32),
        BUCKET_BOUNDARIES)
    numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32)

    labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32)
    input_fn = numpy_io.numpy_input_fn(
        x={
            'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']),
            'f_1': np.array([1, 1, 1, 1, 1]),
            'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]),
        },
        y=labels,
        num_epochs=None,
        batch_size=5,
        shuffle=False)
    feature_columns = [numeric_col, bucketized_col, indicator_col]

    est = boosted_trees.boosted_trees_classifier_train_in_memory(
        train_input_fn=input_fn,
        feature_columns=feature_columns,
        n_trees=1,
        max_depth=5,
        quantile_sketch_epsilon=0.33)

    self._assert_checkpoint(
        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)

    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['accuracy'], 1.0)
Beispiel #5
0
    def test_linear_model_impl_numpy_input_fn(self):
        price = fc.numeric_column('price')
        price_buckets = fc.bucketized_column(price,
                                             boundaries=[
                                                 0.,
                                                 10.,
                                                 100.,
                                             ])
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])

        input_fn = numpy_io.numpy_input_fn(x={
            'price':
            np.array([-1., 2., 13., 104.]),
            'body-style':
            np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
                                           batch_size=2,
                                           shuffle=False)
        features = input_fn()
        net = self._get_keras_linear_model_predictions(
            features, [price_buckets, body_style])
        # self.assertEqual(1 + 3 + 5, net.shape[1])
        with self._initialized_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

            bias = self._get_linear_model_bias()
            price_buckets_var = self._get_linear_model_column_var(
                price_buckets)
            body_style_var = self._get_linear_model_column_var(body_style)

            sess.run(
                price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
            sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
            sess.run(bias.assign([5.]))

            self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]],
                                sess.run(net))

            coord.request_stop()
            coord.join(threads)
Beispiel #6
0
    def testPartitionedVariables(self):
        """Tests LinearClassifier with LinearSDCA with partitioned variables."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column_lib.numeric_column('price')
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column_lib.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column_lib.crossed_column(
            [sq_footage_bucket, 'country'], hash_bucket_size=10)

        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)

        classifier = linear.LinearClassifierV2(
            feature_columns=[
                price, sq_footage_bucket, country, sq_footage_country
            ],
            weight_column='weights',
            partitioner=partitioned_variables.fixed_size_partitioner(
                num_shards=2, axis=0),
            optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)
    def test_sequential_model_with_crossed_column(self):
        feature_columns = []
        age_buckets = fc.bucketized_column(
            fc.numeric_column('age'),
            boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
        feature_columns.append(age_buckets)

        # indicator cols
        thal = fc.categorical_column_with_vocabulary_list(
            'thal', ['fixed', 'normal', 'reversible'])

        crossed_feature = fc.crossed_column([age_buckets, thal],
                                            hash_bucket_size=1000)
        crossed_feature = fc.indicator_column(crossed_feature)
        feature_columns.append(crossed_feature)

        feature_layer = df.DenseFeatures(feature_columns)

        model = keras.models.Sequential([
            feature_layer,
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])

        age_data = np.random.randint(10, 100, size=100)
        thal_data = np.random.choice(['fixed', 'normal', 'reversible'],
                                     size=100)
        inp_x = {'age': age_data, 'thal': thal_data}
        inp_y = np.random.randint(0, 1, size=100)
        ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5)
        model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy'],
        )
        model.fit(ds, epochs=1)
        model.fit(ds, epochs=1)
        model.evaluate(ds)
        model.predict(ds)
Beispiel #8
0
    def testMixedFeaturesArbitraryWeights(self):
        """Tests LinearRegressor with LinearSDCA and a mix of features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([0.6, 0.8, 0.3]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [5.0], [7.0]])
            }, constant_op.constant([[1.55], [-1.25], [-3.0]])

        price = feature_column_lib.numeric_column('price')
        sq_footage_bucket = feature_column_lib.bucketized_column(
            feature_column_lib.numeric_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column_lib.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column_lib.crossed_column(
            [sq_footage_bucket, 'country'], hash_bucket_size=10)
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.1)
        regressor = linear.LinearRegressorV2(feature_columns=[
            price, sq_footage_bucket, country, sq_footage_country
        ],
                                             weight_column='weights',
                                             optimizer=optimizer)
        regressor.train(input_fn=input_fn, steps=20)
        loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.05)