Esempio n. 1
0
  def testBucketizedFeatures(self):
    """Tests LinearClassifier with LinearSDCA and bucketized features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'price': constant_op.constant([[600.0], [1000.0], [400.0]]),
          'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]),
          'weights': constant_op.constant([[1.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    price_bucket = feature_column_v2.bucketized_column(
        feature_column_v2.numeric_column('price'),
        boundaries=[500.0, 700.0])
    sq_footage_bucket = feature_column_v2.bucketized_column(
        feature_column_v2.numeric_column('sq_footage'), boundaries=[650.0])
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.01)
    classifier = linear.LinearClassifier(
        feature_columns=[price_bucket, sq_footage_bucket],
        weight_column='weights',
        optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)
Esempio n. 2
0
    def test_trace_features_layer(self):
        columns = [feature_column_v2.numeric_column('x')]
        model = sequential.Sequential(
            [feature_column_v2.DenseFeatures(columns)])
        model_input = {'x': constant_op.constant([[1.]])}
        model.predict(model_input, steps=1)
        fn = saving_utils.trace_model_call(model)
        self.assertAllClose({'output_1': [[1.]]}, fn({'x': [[1.]]}))

        columns = [
            feature_column_v2.numeric_column('x'),
            feature_column_v2.numeric_column('y')
        ]
        model = sequential.Sequential(
            [feature_column_v2.DenseFeatures(columns)])
        model_input = {
            'x': constant_op.constant([[1.]]),
            'y': constant_op.constant([[2.]])
        }
        model.predict(model_input, steps=1)
        fn = saving_utils.trace_model_call(model)
        self.assertAllClose({'output_1': [[1., 2.]]},
                            fn({
                                'x': [[1.]],
                                'y': [[2.]]
                            }))
Esempio n. 3
0
  def testBiasAndOtherColumns(self):
    """Tests LinearRegressor with LinearSDCA and validates bias weight."""

    def input_fn():
      """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.4 of all instances that have feature 'a' are positive, and 0.2 of all
      instances that have feature 'b' are positive. The labels in the dataset
      are ordered to appear shuffled since SDCA expects shuffled data, and
      converges faster with this pseudo-random ordering.
      If the bias was not regularized we would expect the weights to be:
      bias: 0.3
      a: 0.1
      b: -0.1
      Bu with bias regularization the optimal values are:
      bias: 0.2
      a: 0.2
      b: 0.0
      Returns:
        The test dataset.
      """
      num_examples = 200
      half = int(num_examples / 2)
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          'a':
              constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
          'b':
              constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
      }, constant_op.constant(
          [[x]
           for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) +
           [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)])

    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[
            feature_column_v2.numeric_column('a'),
            feature_column_v2.numeric_column('b')
        ],
        optimizer=optimizer)

    regressor.train(input_fn=input_fn, steps=200)

    variable_names = regressor.get_variable_names()
    self.assertIn('linear/linear_model/bias_weights', variable_names)
    self.assertIn('linear/linear_model/a/weights', variable_names)
    self.assertIn('linear/linear_model/b/weights', variable_names)
    # TODO(b/29339026): Change the expected results to expect a centered bias.
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.2, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/a/weights')[0], 0.2, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/b/weights')[0], 0.0, err=0.05)
Esempio n. 4
0
 def test_raises_if_duplicate_name(self):
     with self.assertRaisesRegexp(
             ValueError, 'Duplicate feature column name found for columns'):
         df.DenseFeatures(feature_columns=[
             fc.numeric_column('a'),
             fc.numeric_column('a')
         ])(features={
             'a': [[0]]
         })
  def test_multi_column(self):
    price1 = fc.numeric_column('price1', shape=2)
    price2 = fc.numeric_column('price2')
    with ops.Graph().as_default():
      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
      net = df.DenseFeatures([price1, price2])(features)

      self.evaluate(variables_lib.global_variables_initializer())
      self.evaluate(lookup_ops.tables_initializer())

      self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))
 def test_static_batch_size_mismatch(self):
   price1 = fc.numeric_column('price1')
   price2 = fc.numeric_column('price2')
   with ops.Graph().as_default():
     features = {
         'price1': [[1.], [5.], [7.]],  # batchsize = 3
         'price2': [[3.], [4.]]  # batchsize = 2
     }
     with self.assertRaisesRegex(
         ValueError,
         r'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
       df.DenseFeatures([price1, price2])(features)
 def test_runtime_batch_size_mismatch(self):
   price1 = fc.numeric_column('price1')
   price2 = fc.numeric_column('price2')
   with ops.Graph().as_default():
     features = {
         'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
         'price2': [[3.], [4.]]  # batchsize = 2
     }
     net = df.DenseFeatures([price1, price2])(features)
     with _initialized_session() as sess:
       with self.assertRaisesRegex(errors.OpError,
                                   'Dimensions of inputs should match'):
         sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]})
 def test_subset_of_static_batch_size_mismatch(self):
   price1 = fc.numeric_column('price1')
   price2 = fc.numeric_column('price2')
   price3 = fc.numeric_column('price3')
   with ops.Graph().as_default():
     features = {
         'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
         'price2': [[3.], [4.]],  # batchsize = 2
         'price3': [[3.], [4.], [5.]]  # batchsize = 3
     }
     with self.assertRaisesRegex(
         ValueError,
         r'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
       df.DenseFeatures([price1, price2, price3])(features)
    def _test_complete_flow(self,
                            train_input_fn,
                            eval_input_fn,
                            predict_input_fn,
                            input_dimension,
                            label_dimension,
                            batch_size,
                            dnn_optimizer='Adagrad',
                            linear_optimizer='Ftrl'):
        linear_feature_columns = [
            feature_column_v2.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column_v2.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        est = dnn_linear_combined.DNNLinearCombinedEstimatorV2(
            head=regression_head.RegressionHead(
                label_dimension=label_dimension),
            linear_feature_columns=linear_feature_columns,
            dnn_feature_columns=dnn_feature_columns,
            dnn_hidden_units=(2, 2),
            model_dir=self._model_dir,
            dnn_optimizer=dnn_optimizer,
            linear_optimizer=linear_optimizer)

        # Train
        num_steps = 10
        est.train(train_input_fn, steps=num_steps)

        # Evaluate
        scores = est.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        # Predict
        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in est.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        # Export
        feature_spec = feature_column_v2.make_parse_example_spec_v2(
            feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = est.export_saved_model(tempfile.mkdtemp(),
                                            serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Esempio n. 10
0
  def testBiasAndOtherColumnsFabricatedCentered(self):
    """Tests LinearRegressor with LinearSDCA and validates bias weight."""

    def input_fn():
      """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.1 of all instances that have feature 'a' have a label of 1, and 0.1 of
      all instances that have feature 'b' have a label of -1.
      We can expect the weights to be:
      bias: 0.0
      a: 0.1
      b: -0.1
      Returns:
        The test dataset.
      """
      num_examples = 200
      half = int(num_examples / 2)
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          'a':
              constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
          'b':
              constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
      }, constant_op.constant([[1 if x % 10 == 0 else 0] for x in range(half)] +
                              [[-1 if x % 10 == 0 else 0] for x in range(half)])

    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[
            feature_column_v2.numeric_column('a'),
            feature_column_v2.numeric_column('b')
        ],
        optimizer=optimizer)

    regressor.train(input_fn=input_fn, steps=100)

    variable_names = regressor.get_variable_names()
    self.assertIn('linear/linear_model/bias_weights', variable_names)
    self.assertIn('linear/linear_model/a/weights', variable_names)
    self.assertIn('linear/linear_model/b/weights', variable_names)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.0, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/a/weights')[0], 0.1, err=0.05)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/b/weights')[0], -0.1, err=0.05)
Esempio n. 11
0
 def test_dense_features_layer(self, cycles):
   columns = [feature_column_v2.numeric_column("x"),
              feature_column_v2.numeric_column("y")]
   layer = feature_column_v2.DenseFeatures(columns)
   model = sequential.Sequential([layer])
   model_input = {"x": constant_op.constant([[1.]]),
                  "y": constant_op.constant([[2.]])}
   self.assertAllClose([[1., 2.]], model.predict(model_input))
   loaded = self.cycle(model, cycles)
   output, = loaded._default_save_signature(model_input).values()
   self.assertAllClose([[1., 2.]], output)
   signature_output, = loaded.signatures["serving_default"](
       **model_input).values()
   self.assertAllClose([[1., 2.]], signature_output)
Esempio n. 12
0
  def test_cols_to_output_tensors(self):
    price1 = fc.numeric_column('price1', shape=2)
    price2 = fc.numeric_column('price2')
    with ops.Graph().as_default():
      cols_dict = {}
      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
      dense_features = df.DenseFeatures([price1, price2])
      net = dense_features(features, cols_dict)

      self.evaluate(variables_lib.global_variables_initializer())
      self.evaluate(lookup_ops.tables_initializer())

      self.assertAllClose([[1., 2.], [5., 6.]],
                          self.evaluate(cols_dict[price1]))
      self.assertAllClose([[3.], [4.]], self.evaluate(cols_dict[price2]))
      self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))
Esempio n. 13
0
 def test_does_not_support_dict_columns(self):
   with self.assertRaisesRegex(
       ValueError, 'Expected feature_columns to be iterable, found dict.'):
     df.DenseFeatures(feature_columns={'a': fc.numeric_column('a')})(
         features={
             'a': [[0]]
         })
Esempio n. 14
0
  def testUnknownBatchSize(self):
    """Tests LinearRegressor with LinearSDCA and unknown batch size."""

    def input_fn():
      # Similar to testBiasOnly but use placeholder_with_default in order to
      # let the static batch size unspecified.
      return {
          'example_id':
              array_ops.placeholder_with_default(
                  constant_op.constant(['0', '1']),
                  shape=[None]),
          # always_zero is an empty column which is always 0 (absent), because
          # LinearClassifier requires at least one column.
          'always_zero':
              array_ops.placeholder_with_default(
                  constant_op.constant([[0.0]] * 2),
                  shape=[None, 1]),
      }, array_ops.placeholder_with_default(
              constant_op.constant([0.0, 1.0]),
              shape=[None])

    always_zero = feature_column_v2.numeric_column('always_zero')
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[always_zero], optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=100)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.5, err=0.1)
Esempio n. 15
0
  def testBiasOnly(self):
    """Tests LinearRegressor with LinearSDCA and validates bias weight."""

    def input_fn():
      """Testing the bias weight when it's the only feature present.

      All of the instances in this input only have the bias feature, and a
      1/4 of the labels are positive. This means that the expected weight for
      the bias should be close to the average prediction, i.e 0.25.
      Returns:
        Training data for the test.
      """
      num_examples = 40
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          # place_holder is an empty column which is always 0 (absent), because
          # LinearClassifier requires at least one column.
          'place_holder':
              constant_op.constant([[0.0]] * num_examples),
      }, constant_op.constant(
          [1 if i % 4 is 0 else 0 for i in range(num_examples)])

    place_holder = feature_column_v2.numeric_column('place_holder')
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[place_holder], optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=100)
    self.assertNear(regressor.get_variable_value(
        'linear/linear_model/bias_weights')[0], 0.25, err=0.1)
Esempio n. 16
0
  def testRealValuedLinearFeatures(self):
    """Tests LinearRegressor with LinearSDCA and real valued features."""
    x = [[1.2, 2.0, -1.5], [-2.0, 3.0, -0.5], [1.0, -0.5, 4.0]]
    weights = [[3.0], [-1.2], [0.5]]
    y = np.dot(x, weights)

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'x': constant_op.constant(x),
          'weights': constant_op.constant([[10.0], [10.0], [10.0]])
      }, constant_op.constant(y)

    x_column = feature_column_v2.numeric_column('x', shape=3)
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.1)
    regressor = linear.LinearRegressor(
        feature_columns=[x_column],
        weight_column='weights',
        optimizer=optimizer)
    regressor.train(input_fn=input_fn, steps=20)
    loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.01)
    self.assertIn('linear/linear_model/x/weights',
                  regressor.get_variable_names())
    regressor_weights = regressor.get_variable_value(
        'linear/linear_model/x/weights')
    self.assertAllClose(
        [w[0] for w in weights], regressor_weights.flatten(), rtol=0.1)
Esempio n. 17
0
 def test_runtime_batch_size_matches(self):
   price1 = fc.numeric_column('price1')
   price2 = fc.numeric_column('price2')
   with ops.Graph().as_default():
     features = {
         'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
         'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
     }
     net = df.DenseFeatures([price1, price2])(features)
     with _initialized_session() as sess:
       sess.run(
           net,
           feed_dict={
               features['price1']: [[1.], [5.]],
               features['price2']: [[1.], [5.]],
           })
Esempio n. 18
0
    def _test_complete_flow(self, train_input_fn, eval_input_fn,
                            predict_input_fn, input_dimension, label_dimension,
                            prediction_length):
        feature_columns = [
            feature_column_lib.numeric_column('x', shape=(input_dimension, ))
        ]
        est = _baseline_estimator_fn(label_dimension=label_dimension,
                                     model_dir=self._model_dir)

        # TRAIN
        # learn y = x
        est.train(train_input_fn, steps=200)

        # EVALUTE
        scores = est.evaluate(eval_input_fn)
        self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))

        # PREDICT
        predictions = np.array(
            [x['predictions'] for x in est.predict(predict_input_fn)])
        self.assertAllEqual((prediction_length, label_dimension),
                            predictions.shape)

        # EXPORT
        feature_spec = feature_column_lib.make_parse_example_spec_v2(
            feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = est.export_saved_model(tempfile.mkdtemp(),
                                            serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Esempio n. 19
0
    def test_saving_with_dense_features(self):
        cols = [
            feature_column_v2.numeric_column('a'),
            feature_column_v2.indicator_column(
                feature_column_v2.categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a': keras.layers.Input(shape=(1, ), name='a'),
            'b': keras.layers.Input(shape=(1, ), name='b', dtype='string')
        }

        fc_layer = feature_column_v2.DenseFeatures(cols)(input_layers)
        output = keras.layers.Dense(10)(fc_layer)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer=keras.optimizers.RMSprop(lr=0.0001),
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        inputs_a = np.arange(10).reshape(10, 1)
        inputs_b = np.arange(10).reshape(10, 1).astype('str')

        # Initialize tables for V1 lookup.
        if not context.executing_eagerly():
            self.evaluate(lookup_ops.tables_initializer())

        self.assertLen(loaded_model.predict({
            'a': inputs_a,
            'b': inputs_b
        }), 10)
Esempio n. 20
0
    def test_with_1d_unknown_shape_sparse_tensor(self):
        embedding_values = (
            (1., 2.),  # id 0
            (6., 7.),  # id 1
            (11., 12.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = fc.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = fc.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = fc.embedding_column(country,
                                               dimension=2,
                                               initializer=_initializer)

        # Provides 1-dim tensor and dense tensor.
        features = {
            'price': array_ops.placeholder(dtypes.float32),
            'body-style': array_ops.sparse_placeholder(dtypes.string),
            # This is dense tensor for the categorical_column.
            'country': array_ops.placeholder(dtypes.string),
        }
        self.assertIsNone(features['price'].shape.ndims)
        self.assertIsNone(features['body-style'].get_shape().ndims)
        self.assertIsNone(features['country'].shape.ndims)

        price_data = np.array([11., 12.])
        body_style_data = sparse_tensor.SparseTensorValue(indices=((0, ),
                                                                   (1, )),
                                                          values=('sedan',
                                                                  'hardtop'),
                                                          dense_shape=(2, ))
        country_data = np.array([['US'], ['CA']])

        net = df.DenseFeatures([price, one_hot_body_style,
                                embedded_country])(features)
        self.assertEqual(1 + 3 + 2, net.shape[1])
        with _initialized_session() as sess:

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual(
                [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
                sess.run(net,
                         feed_dict={
                             features['price']: price_data,
                             features['body-style']: body_style_data,
                             features['country']: country_data
                         }))
Esempio n. 21
0
  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
                          input_dimension, n_classes, batch_size):
    feature_columns = [feature_column_v2.numeric_column('x', shape=(input_dimension,))]

    est = dnn.DNNClassifier(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        n_classes=n_classes,
        model_dir=self._model_dir)

    # TRAIN
    num_steps = 10
    est.train(train_input_fn, steps=num_steps)

    # EVALUATE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predicted_proba = np.array([
        x[prediction_keys.PredictionKeys.PROBABILITIES]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

    # EXPORT
    feature_spec = feature_column_v2.make_parse_example_spec_v2(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Esempio n. 22
0
    def test_column_order(self):
        price_a = fc.numeric_column('price_a')
        price_b = fc.numeric_column('price_b')
        with ops.Graph().as_default():
            features = {
                'price_a': [[1.]],
                'price_b': [[3.]],
            }
            net1 = df.DenseFeatures([price_a, price_b])(features)
            net2 = df.DenseFeatures([price_b, price_a])(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[1., 3.]], self.evaluate(net1))
            self.assertAllClose([[1., 3.]], self.evaluate(net2))
Esempio n. 23
0
    def test_from_config(self, units, sparse_combiner, trainable, name):
        cols = [
            fc.numeric_column('a'),
            fc.categorical_column_with_vocabulary_list('b',
                                                       vocabulary_list=('1',
                                                                        '2',
                                                                        '3')),
            fc.categorical_column_with_hash_bucket(key='c', hash_bucket_size=3)
        ]
        orig_layer = fc._LinearModelLayer(cols,
                                          units=units,
                                          sparse_combiner=sparse_combiner,
                                          trainable=trainable,
                                          name=name)
        config = orig_layer.get_config()

        new_layer = fc._LinearModelLayer.from_config(config)

        self.assertEqual(new_layer.name, orig_layer.name)
        self.assertEqual(new_layer._units, units)
        self.assertEqual(new_layer._sparse_combiner, sparse_combiner)
        self.assertEqual(new_layer.trainable, trainable)
        self.assertLen(new_layer._feature_columns, 3)
        self.assertEqual(new_layer._feature_columns[0].name, 'a')
        self.assertEqual(new_layer._feature_columns[1].vocabulary_list,
                         ('1', '2', '3'))
        self.assertEqual(new_layer._feature_columns[2].num_buckets, 3)
Esempio n. 24
0
    def test_from_config(self, trainable, name):
        cols = [
            fc.numeric_column('a'),
            fc.embedding_column(fc.categorical_column_with_vocabulary_list(
                'b', vocabulary_list=['1', '2', '3']),
                                dimension=2),
            fc.indicator_column(
                fc.categorical_column_with_hash_bucket(key='c',
                                                       hash_bucket_size=3))
        ]
        orig_layer = df.DenseFeatures(cols, trainable=trainable, name=name)
        config = orig_layer.get_config()

        new_layer = df.DenseFeatures.from_config(config)

        self.assertEqual(new_layer.name, orig_layer.name)
        self.assertEqual(new_layer.trainable, trainable)
        self.assertLen(new_layer._feature_columns, 3)
        self.assertEqual(new_layer._feature_columns[0].name, 'a')
        self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0)
        self.assertEqual(new_layer._feature_columns[1].categorical_column.name,
                         'b')
        self.assertIsInstance(new_layer._feature_columns[0], cols[0].__class__)
        self.assertIsInstance(new_layer._feature_columns[1], cols[1].__class__)
        self.assertIsInstance(new_layer._feature_columns[2], cols[2].__class__)
Esempio n. 25
0
def embedding_varlen(batch_size, max_length):
  """Benchmark a variable-length embedding."""
  # Data and constants.
  max_value = 25.0
  bins = np.arange(1.0, max_value)
  data = fc_bm.create_data(
      max_length, batch_size * NUM_REPEATS, 100000, dtype=float)

  # Keras implementation
  model = keras.Sequential()
  model.add(keras.Input(shape=(max_length,), name="data", dtype=dt.float32))
  model.add(discretization.Discretization(bins))

  # FC implementation
  fc = fcv2.bucketized_column(
      fcv2.numeric_column("data"), boundaries=list(bins))

  # Wrap the FC implementation in a tf.function for a fair comparison
  @tf_function()
  def fc_fn(tensors):
    fc.transform_feature(fcv2.FeatureTransformationCache(tensors), None)

  # Benchmark runs
  keras_data = {"data": data.to_tensor(default_value=0.0)}
  k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)

  fc_data = {"data": data.to_tensor(default_value=0.0)}
  fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

  return k_avg_time, fc_avg_time
  def _test_parsed_sequence_example(
      self, col_name, col_fn, col_arg, shape, values):
    """Helper function to check that each FeatureColumn parses correctly.

    Args:
      col_name: string, name to give to the feature column. Should match
        the name that the column will parse out of the features dict.
      col_fn: function used to create the feature column. For example,
        sequence_numeric_column.
      col_arg: second arg that the target feature column is expecting.
      shape: the expected dense_shape of the feature after parsing into
        a SparseTensor.
      values: the expected values at index [0, 2, 6] of the feature
        after parsing into a SparseTensor.
    """
    example = _make_sequence_example()
    columns = [
        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
        fc.numeric_column('float_ctx'),
        col_fn(col_name, col_arg)
    ]
    context, seq_features = parsing_ops.parse_single_sequence_example(
        example.SerializeToString(),
        context_features=fc.make_parse_example_spec_v2(columns[:2]),
        sequence_features=fc.make_parse_example_spec_v2(columns[2:]))

    with self.cached_session() as sess:
      ctx_result, seq_result = sess.run([context, seq_features])
      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
      self.assertEqual(
          list(seq_result[col_name].values[[0, 2, 6]]), values)
      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
    def _test_parsed_sequence_example(self, col_name, col_fn, col_arg, shape,
                                      values):
        """Helper function to check that each FeatureColumn parses correctly.

    Args:
      col_name: string, name to give to the feature column. Should match
        the name that the column will parse out of the features dict.
      col_fn: function used to create the feature column. For example,
        sequence_numeric_column.
      col_arg: second arg that the target feature column is expecting.
      shape: the expected dense_shape of the feature after parsing into
        a SparseTensor.
      values: the expected values at index [0, 2, 6] of the feature
        after parsing into a SparseTensor.
    """
        example = _make_sequence_example()
        columns = [
            fc.categorical_column_with_identity('int_ctx', num_buckets=100),
            fc.numeric_column('float_ctx'),
            col_fn(col_name, col_arg)
        ]
        context, seq_features = parsing_ops.parse_single_sequence_example(
            example.SerializeToString(),
            context_features=fc.make_parse_example_spec_v2(columns[:2]),
            sequence_features=fc.make_parse_example_spec_v2(columns[2:]))

        with self.cached_session() as sess:
            ctx_result, seq_result = sess.run([context, seq_features])
            self.assertEqual(list(seq_result[col_name].dense_shape), shape)
            self.assertEqual(list(seq_result[col_name].values[[0, 2, 6]]),
                             values)
            self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
            self.assertEqual(ctx_result['int_ctx'].values[0], 5)
            self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
            self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
Esempio n. 28
0
  def test_compute_output_shape(self):
    price1 = fc.numeric_column('price1', shape=2)
    price2 = fc.numeric_column('price2', shape=4)
    with ops.Graph().as_default():
      features = {
          'price1': [[1., 2.], [5., 6.]],
          'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
      }
      dense_features = df.DenseFeatures([price1, price2])
      self.assertEqual((None, 6), dense_features.compute_output_shape((None,)))
      net = dense_features(features)

      self.evaluate(variables_lib.global_variables_initializer())
      self.evaluate(lookup_ops.tables_initializer())

      self.assertAllClose([[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]],
                          self.evaluate(net))
Esempio n. 29
0
 def test_raises_if_shape_mismatch(self):
   price = fc.numeric_column('price', shape=2)
   with ops.Graph().as_default():
     features = {'price': [[1.], [5.]]}
     with self.assertRaisesRegex(
         Exception,
         r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
       df.DenseFeatures([price])(features)
Esempio n. 30
0
    def test_bare_column(self):
        with ops.Graph().as_default():
            features = features = {'a': [0.]}
            net = df.DenseFeatures(fc.numeric_column('a'))(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[0.]], self.evaluate(net))
Esempio n. 31
0
    def test_with_1d_sparse_tensor(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = fc.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = fc.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = fc.embedding_column(country,
                                               dimension=5,
                                               initializer=_initializer)

        with ops.Graph().as_default():
            # Provides 1-dim tensor and dense tensor.
            features = {
                'price':
                constant_op.constant([
                    11.,
                    12.,
                ]),
                'body-style':
                sparse_tensor.SparseTensor(indices=((0, ), (1, )),
                                           values=('sedan', 'hardtop'),
                                           dense_shape=(2, )),
                # This is dense tensor for the categorical_column.
                'country':
                constant_op.constant(['CA', 'US']),
            }
            self.assertEqual(1, features['price'].shape.ndims)
            self.assertEqual(1,
                             features['body-style'].dense_shape.get_shape()[0])
            self.assertEqual(1, features['country'].shape.ndims)

            net = df.DenseFeatures(
                [price, one_hot_body_style, embedded_country])(features)
            self.assertEqual(1 + 3 + 5, net.shape[1])
            with _initialized_session() as sess:

                # Each row is formed by concatenating `embedded_body_style`,
                # `one_hot_body_style`, and `price` in order.
                self.assertAllEqual(
                    [[0., 0., 1., 11., 12., 13., 14., 15., 11.],
                     [1., 0., 0., 1., 2., 3., 4., 5., 12.]], sess.run(net))
Esempio n. 32
0
  def test_trace_features_layer(self):
    columns = [feature_column_v2.numeric_column('x')]
    model = sequential.Sequential(
        [feature_column_v2.DenseFeatures(columns)])
    model_input = {'x': constant_op.constant([[1.]])}
    model.predict(model_input, steps=1)
    fn = saving_utils.trace_model_call(model)
    self.assertAllClose({'output_1': [[1.]]}, fn({'x': [[1.]]}))

    columns = [feature_column_v2.numeric_column('x'),
               feature_column_v2.numeric_column('y')]
    model = sequential.Sequential(
        [feature_column_v2.DenseFeatures(columns)])
    model_input = {'x': constant_op.constant([[1.]]),
                   'y': constant_op.constant([[2.]])}
    model.predict(model_input, steps=1)
    fn = saving_utils.trace_model_call(model)
    self.assertAllClose({'output_1': [[1., 2.]]},
                        fn({'x': [[1.]], 'y': [[2.]]}))
 def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn,
                              predict_input_fn, input_dimension,
                              label_dimension, batch_size, fc_impl):
   del fc_impl
   linear_feature_columns = [
       feature_column_v2.numeric_column('x', shape=(input_dimension,))
   ]
   dnn_feature_columns = [
       feature_column.numeric_column('x', shape=(input_dimension,))
   ]
   feature_columns = linear_feature_columns + dnn_feature_columns
   feature_spec = feature_column.make_parse_example_spec(feature_columns)
   self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
                                   feature_spec, train_input_fn, eval_input_fn,
                                   predict_input_fn, input_dimension,
                                   label_dimension, batch_size)
  def _build_feature_columns(self):
    col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
    ctx_cols = [
        fc.embedding_column(col, dimension=10),
        fc.numeric_column('float_ctx')
    ]

    identity_col = sfc.sequence_categorical_column_with_identity(
        'int_list', num_buckets=10)
    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
        'bytes_list', hash_bucket_size=100)
    seq_cols = [
        fc.embedding_column(identity_col, dimension=10),
        fc.embedding_column(bucket_col, dimension=20)
    ]

    return ctx_cols, seq_cols