コード例 #1
0
 def test_does_not_support_dict_columns(self):
     with self.assertRaisesRegexp(
             ValueError,
             'Expected feature_columns to be iterable, found dict.'):
         df.DenseFeatures(feature_columns={'a': fc.numeric_column('a')})(
             features={
                 'a': [[0]]
             })
コード例 #2
0
 def test_raises_if_duplicate_name(self):
     with self.assertRaisesRegexp(
             ValueError, 'Duplicate feature column name found for columns'):
         df.DenseFeatures(feature_columns=[
             fc.numeric_column('a'),
             fc.numeric_column('a')
         ])(features={
             'a': [[0]]
         })
コード例 #3
0
    def test_bare_column(self):
        with ops.Graph().as_default():
            features = features = {'a': [0.]}
            net = df.DenseFeatures(fc.numeric_column('a'))(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[0.]], self.evaluate(net))
コード例 #4
0
 def test_raises_if_shape_mismatch(self):
     price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
         features = {'price': [[1.], [5.]]}
         with self.assertRaisesRegexp(
                 Exception,
                 r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'
         ):
             df.DenseFeatures([price])(features)
コード例 #5
0
    def test_column_generator(self):
        with ops.Graph().as_default():
            features = features = {'a': [0.], 'b': [1.]}
            columns = (fc.numeric_column(key) for key in features)
            net = df.DenseFeatures(columns)(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[0., 1.]], self.evaluate(net))
コード例 #6
0
    def test_reshaping(self):
        price = fc.numeric_column('price', shape=[1, 2])
        with ops.Graph().as_default():
            features = {'price': [[[1., 2.]], [[5., 6.]]]}
            net = df.DenseFeatures([price])(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net))
コード例 #7
0
    def test_with_1d_sparse_tensor(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = fc.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = fc.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = fc.embedding_column(country,
                                               dimension=5,
                                               initializer=_initializer)

        # Provides 1-dim tensor and dense tensor.
        features = {
            'price':
            constant_op.constant([
                11.,
                12.,
            ]),
            'body-style':
            sparse_tensor.SparseTensor(indices=((0, ), (1, )),
                                       values=('sedan', 'hardtop'),
                                       dense_shape=(2, )),
            # This is dense tensor for the categorical_column.
            'country':
            constant_op.constant(['CA', 'US']),
        }
        self.assertEqual(1, features['price'].shape.ndims)
        self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
        self.assertEqual(1, features['country'].shape.ndims)

        net = df.DenseFeatures([price, one_hot_body_style,
                                embedded_country])(features)
        self.assertEqual(1 + 3 + 5, net.shape[1])
        with _initialized_session() as sess:

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.],
                                 [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
                                sess.run(net))
コード例 #8
0
 def test_fails_for_categorical_column(self):
     animal = fc.categorical_column_with_identity('animal', num_buckets=4)
     with ops.Graph().as_default():
         features = {
             'animal':
             sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1]],
                                        values=[1, 2],
                                        dense_shape=[1, 2])
         }
         with self.assertRaisesRegexp(Exception, 'must be a .*DenseColumn'):
             df.DenseFeatures([animal])(features)
コード例 #9
0
    def test_multi_column(self):
        price1 = fc.numeric_column('price1', shape=2)
        price2 = fc.numeric_column('price2')
        with ops.Graph().as_default():
            features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
            net = df.DenseFeatures([price1, price2])(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[1., 2., 3.], [5., 6., 4.]],
                                self.evaluate(net))
コード例 #10
0
    def test_dense_feature_with_training_arg(self):
        price1 = fc.numeric_column('price1', shape=2)
        price2 = fc.numeric_column('price2')

        # Monkey patch the second numeric column to simulate a column that has
        # different behavior by mode.
        def training_aware_get_dense_tensor(transformation_cache,
                                            state_manager,
                                            training=None):
            return transformation_cache.get(price2,
                                            state_manager,
                                            training=training)

        def training_aware_transform_feature(transformation_cache,
                                             state_manager,
                                             training=None):
            input_tensor = transformation_cache.get(price2.key,
                                                    state_manager,
                                                    training=training)
            if training:
                return input_tensor * 10.0
            else:
                return input_tensor * 20.0

        price2.get_dense_tensor = training_aware_get_dense_tensor
        price2.transform_feature = training_aware_transform_feature
        with ops.Graph().as_default():
            features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
            train_mode = df.DenseFeatures([price1, price2])(features,
                                                            training=True)
            predict_mode = df.DenseFeatures([price1, price2])(features,
                                                              training=False)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[1., 2., 30.], [5., 6., 40.]],
                                self.evaluate(train_mode))
            self.assertAllClose([[1., 2., 60.], [5., 6., 80.]],
                                self.evaluate(predict_mode))
コード例 #11
0
 def test_static_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
         features = {
             'price1': [[1.], [5.], [7.]],  # batchsize = 3
             'price2': [[3.], [4.]]  # batchsize = 2
         }
         with self.assertRaisesRegexp(
                 ValueError,
                 r'Batch size \(first dimension\) of each feature must be same.'
         ):  # pylint: disable=anomalous-backslash-in-string
             df.DenseFeatures([price1, price2])(features)
コード例 #12
0
    def test_with_rank_0_feature(self):
        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')
        features = {
            'price': constant_op.constant(0),
        }
        self.assertEqual(0, features['price'].shape.ndims)

        # Static rank 0 should fail
        with self.assertRaisesRegexp(ValueError,
                                     'Feature .* cannot have rank 0'):
            df.DenseFeatures([price])(features)

        # Dynamic rank 0 should fail
        features = {
            'price': array_ops.placeholder(dtypes.float32),
        }
        net = df.DenseFeatures([price])(features)
        self.assertEqual(1, net.shape[1])
        with _initialized_session() as sess:
            with self.assertRaisesOpError('Feature .* cannot have rank 0'):
                sess.run(net, feed_dict={features['price']: np.array(1)})
コード例 #13
0
    def test_multiple_layers_with_same_embedding_column(self):
        some_sparse_column = fc.categorical_column_with_hash_bucket(
            'sparse_feature', hash_bucket_size=5)
        some_embedding_column = fc.embedding_column(some_sparse_column,
                                                    dimension=10)

        with ops.Graph().as_default():
            features = {
                'sparse_feature': [['a'], ['x']],
            }
            all_cols = [some_embedding_column]
            df.DenseFeatures(all_cols)(features)
            df.DenseFeatures(all_cols)(features)
            # Make sure that 2 variables get created in this case.
            self.assertEqual(
                2, len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
            expected_var_names = [
                'dense_features/sparse_feature_embedding/embedding_weights:0',
                'dense_features_1/sparse_feature_embedding/embedding_weights:0'
            ]
            self.assertItemsEqual(expected_var_names, [
                v.name
                for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
            ])
コード例 #14
0
 def test_runtime_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
         features = {
             'price1':
             array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
             'price2': [[3.], [4.]]  # batchsize = 2
         }
         net = df.DenseFeatures([price1, price2])(features)
         with _initialized_session() as sess:
             with self.assertRaisesRegexp(
                     errors.OpError, 'Dimensions of inputs should match'):
                 sess.run(
                     net,
                     feed_dict={features['price1']: [[1.], [5.], [7.]]})
コード例 #15
0
    def test_feature_column_dense_features_gradient(self):
        with context.eager_mode():
            sparse_input = sparse_tensor.SparseTensor(indices=((0, 0), (1, 0),
                                                               (2, 0)),
                                                      values=(0, 1, 2),
                                                      dense_shape=(3, 3))

            # Create feature columns (categorical and embedding).
            categorical_column = fc.categorical_column_with_identity(
                key='a', num_buckets=3)
            embedding_dimension = 2

            def _embedding_column_initializer(shape,
                                              dtype,
                                              partition_info=None):
                del shape  # unused
                del dtype  # unused
                del partition_info  # unused
                embedding_values = (
                    (1, 0),  # id 0
                    (0, 1),  # id 1
                    (1, 1))  # id 2
                return embedding_values

            embedding_column = fc.embedding_column(
                categorical_column,
                dimension=embedding_dimension,
                initializer=_embedding_column_initializer)

            dense_features = df.DenseFeatures([embedding_column])
            features = {'a': sparse_input}

            def scale_matrix():
                matrix = dense_features(features)
                return 2 * matrix

            # Sanity check: Verify that scale_matrix returns the correct output.
            self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())

            # Check that the returned gradient is correct.
            grad_function = backprop.implicit_grad(scale_matrix)
            grads_and_vars = grad_function()
            indexed_slice = grads_and_vars[0][0]
            gradient = grads_and_vars[0][0].values

            self.assertAllEqual([0, 1, 2], indexed_slice.indices)
            self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
コード例 #16
0
    def test_reuses_variables(self):
        with context.eager_mode():
            sparse_input = sparse_tensor.SparseTensor(indices=((0, 0), (1, 0),
                                                               (2, 0)),
                                                      values=(0, 1, 2),
                                                      dense_shape=(3, 3))

            # Create feature columns (categorical and embedding).
            categorical_column = fc.categorical_column_with_identity(
                key='a', num_buckets=3)
            embedding_dimension = 2

            def _embedding_column_initializer(shape,
                                              dtype,
                                              partition_info=None):
                del shape  # unused
                del dtype  # unused
                del partition_info  # unused
                embedding_values = (
                    (1, 0),  # id 0
                    (0, 1),  # id 1
                    (1, 1))  # id 2
                return embedding_values

            embedding_column = fc.embedding_column(
                categorical_column,
                dimension=embedding_dimension,
                initializer=_embedding_column_initializer)

            dense_features = df.DenseFeatures([embedding_column])
            features = {'a': sparse_input}

            inputs = dense_features(features)
            variables = dense_features.variables

            # Sanity check: test that the inputs are correct.
            self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)

            # Check that only one variable was created.
            self.assertEqual(1, len(variables))

            # Check that invoking dense_features on the same features does not create
            # additional variables
            _ = dense_features(features)
            self.assertEqual(1, len(variables))
            self.assertEqual(variables[0], dense_features.variables[0])
コード例 #17
0
 def test_runtime_batch_size_matches(self):
     price1 = fc.numeric_column('price1')
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
         features = {
             'price1':
             array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
             'price2':
             array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
         }
         net = df.DenseFeatures([price1, price2])(features)
         with _initialized_session() as sess:
             sess.run(net,
                      feed_dict={
                          features['price1']: [[1.], [5.]],
                          features['price2']: [[1.], [5.]],
                      })
コード例 #18
0
    def test_cols_to_output_tensors(self):
        price1 = fc.numeric_column('price1', shape=2)
        price2 = fc.numeric_column('price2')
        with ops.Graph().as_default():
            cols_dict = {}
            features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
            dense_features = df.DenseFeatures([price1, price2])
            net = dense_features(features, cols_dict)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[1., 2.], [5., 6.]],
                                self.evaluate(cols_dict[price1]))
            self.assertAllClose([[3.], [4.]], self.evaluate(cols_dict[price2]))
            self.assertAllClose([[1., 2., 3.], [5., 6., 4.]],
                                self.evaluate(net))
コード例 #19
0
    def DISABLED_test_train_with_dense_features_v2(self):
        feature_dict = {
            'sex': np.int64([1, 1, 1, 1, 0]),
            'cp': np.int64([0, 3, 3, 2, 1]),
            'slope': np.int64([3, 2, 0, 3, 1]),
        }
        label = np.int64([0, 1, 0, 0, 0])
        train_input_fn = numpy_io.numpy_input_fn(x=feature_dict,
                                                 y=label,
                                                 num_epochs=1,
                                                 shuffle=False)
        feature_columns = list()
        input_features = dict()
        for feature_name, data_array in feature_dict.items():
            feature_columns.append(
                tf.feature_column.embedding_column(
                    tf.feature_column.categorical_column_with_identity(
                        key=feature_name,
                        num_buckets=np.size(np.unique(data_array))),
                    dimension=3))
            input_features[feature_name] = keras.layers.Input(
                name=feature_name,
                shape=(np.size(np.unique(data_array)), ),
                dtype=tf.dtypes.int64)

        df = dense_features_v2.DenseFeatures(feature_columns)
        x = df(input_features)
        x = keras.layers.Dense(16, activation='relu')(x)
        logits = keras.layers.Dense(1, activation='linear')(x)
        model = keras.Model(inputs=input_features, outputs=logits)

        model.compile(optimizer='rmsprop',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        estimator_model = keras_lib.model_to_estimator(keras_model=model)
        estimator_model.train(input_fn=train_input_fn, steps=5)
        # We assert that we find the embedding_weights variables in the dependencies
        # for the DenseFeatures layer.
        dependency_names = [x.name for x in df._checkpoint_dependencies]
        self.assertNotIn('embedding_weights', dependency_names)
        self.assertIn('cp_embedding/embedding_weights', dependency_names)
        self.assertIn('sex_embedding/embedding_weights', dependency_names)
        self.assertIn('slope_embedding/embedding_weights', dependency_names)
コード例 #20
0
    def test_compute_output_shape(self):
        price1 = fc.numeric_column('price1', shape=2)
        price2 = fc.numeric_column('price2', shape=4)
        with ops.Graph().as_default():
            features = {
                'price1': [[1., 2.], [5., 6.]],
                'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
            }
            dense_features = df.DenseFeatures([price1, price2])
            self.assertEqual((None, 6),
                             dense_features.compute_output_shape((None, )))
            net = dense_features(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose(
                [[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]],
                self.evaluate(net))
コード例 #21
0
 def test_linear_model_with_feature_column(self):
   vocab_list = ['alpha', 'beta', 'gamma']
   vocab_val = [0.4, 0.6, 0.9]
   data = np.random.choice(vocab_list, size=256)
   y = np.zeros_like(data, dtype=np.float32)
   for vocab, val in zip(vocab_list, vocab_val):
     indices = np.where(data == vocab)
     y[indices] = val + np.random.uniform(
         low=-0.01, high=0.01, size=indices[0].shape)
   cat_column = feature_column_v2.categorical_column_with_vocabulary_list(
       key='symbol', vocabulary_list=vocab_list)
   ind_column = feature_column_v2.indicator_column(cat_column)
   dense_feature_layer = dense_features_v2.DenseFeatures([ind_column])
   linear_model = linear.LinearModel(
       use_bias=False, kernel_initializer='zeros')
   combined = keras.Sequential([dense_feature_layer, linear_model])
   opt = gradient_descent.SGD(learning_rate=0.1)
   combined.compile(opt, 'mse', [])
   combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10)
   self.assertAllClose([[0.4], [0.6], [0.9]],
                       combined.layers[1].dense_layers[0].kernel.numpy(),
                       atol=0.01)
コード例 #22
0
 def test_wide_deep_model_with_single_feature_column(self):
     vocab_list = ['alpha', 'beta', 'gamma']
     vocab_val = [0.4, 0.6, 0.9]
     data = np.random.choice(vocab_list, size=256)
     y = np.zeros_like(data, dtype=np.float32)
     for vocab, val in zip(vocab_list, vocab_val):
         indices = np.where(data == vocab)
         y[indices] = val + np.random.uniform(
             low=-0.01, high=0.01, size=indices[0].shape)
     cat_column = fc.categorical_column_with_vocabulary_list(
         key='symbol', vocabulary_list=vocab_list)
     ind_column = fc.indicator_column(cat_column)
     dense_feature_layer = dense_features_v2.DenseFeatures([ind_column])
     linear_model = linear.LinearModel(use_bias=False,
                                       kernel_initializer='zeros')
     dnn_model = sequential.Sequential([core.Dense(units=1)])
     wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
     combined = sequential.Sequential(
         [dense_feature_layer, wide_deep_model])
     opt = gradient_descent.SGD(learning_rate=0.1)
     combined.compile(opt,
                      'mse', [],
                      run_eagerly=testing_utils.should_run_eagerly())
     combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10)
コード例 #23
0
ファイル: dnn.py プロジェクト: shaosimon/estimator
  def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModelV2, self).__init__(name=name, **kwargs)
    with ops.name_scope(
        'input_from_feature_columns') as input_feature_column_scope:
      layer_name = input_feature_column_scope + 'input_layer'
      if feature_column_lib.is_feature_column_v2(feature_columns):
        self._input_layer = dense_features_v2.DenseFeatures(
            feature_columns=feature_columns, name=layer_name)
      else:
        raise ValueError(
            'Received a feature column from TensorFlow v1, but this is a '
            'TensorFlow v2 Estimator. Please either use v2 feature columns '
            '(accessible via tf.feature_column.* in TF 2.x) with this '
            'Estimator, or switch to a v1 Estimator for use with v1 feature '
            'columns (accessible via tf.compat.v1.estimator.* and '
            'tf.compat.v1.feature_column.*, respectively.')

    self._dropout = dropout
    self._batch_norm = batch_norm

    self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with ops.name_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        # Get scope name without the trailing slash.
        hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
        hidden_layer = keras_core.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
            name=hidden_shared_name)
        self._hidden_layer_scope_names.append(hidden_shared_name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = keras_core.Dropout(rate=self._dropout)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
          # TODO(scottzhu): Change back to use BatchNormalization when the
          # cleanup is done.
          batch_norm_layer = keras_norm.BatchNormalizationBase(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name=batch_norm_name)
          self._batch_norm_layers.append(batch_norm_layer)

    with ops.name_scope('logits') as logits_scope:
      logits_shared_name = _name_from_scope_name(logits_scope)
      self._logits_layer = keras_core.Dense(
          units=units,
          activation=None,
          kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
          name=logits_shared_name)
      self._logits_scope_name = logits_shared_name
コード例 #24
0
 def test_retrieving_input(self):
     features = {'a': [0.]}
     dense_features = df.DenseFeatures(fc.numeric_column('a'))
     inputs = self.evaluate(dense_features(features))
     self.assertAllClose([[0.]], inputs)
コード例 #25
0
 def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegexp(ValueError,
                                  'feature_columns must not be empty'):
         df.DenseFeatures(feature_columns=[])(features={})