def test_train_premade_widedeep_model_with_feature_layers(self):
        vocab_list = ['alpha', 'beta', 'gamma']
        vocab_val = [0.4, 0.6, 0.9]
        data = np.random.choice(vocab_list, size=256)
        y = np.zeros_like(data, dtype=np.float32)
        for vocab, val in zip(vocab_list, vocab_val):
            indices = np.where(data == vocab)
            y[indices] = val + np.random.uniform(
                low=-0.01, high=0.01, size=indices[0].shape)
        cat_column = tf.feature_column.categorical_column_with_vocabulary_list(
            key='symbol', vocabulary_list=vocab_list)
        ind_column = tf.feature_column.indicator_column(cat_column)
        # TODO(tanzheny): use emb column for dense part once b/139667019 is fixed.
        # emb_column = feature_column.embedding_column(cat_column, dimension=5)
        keras_input = keras.layers.Input(name='symbol',
                                         shape=3,
                                         dtype=tf.dtypes.string)

        # build linear part with feature layer.
        linear_feature_layer = dense_features.DenseFeatures([ind_column])
        linear_model = linear.LinearModel(units=1,
                                          name='Linear',
                                          kernel_initializer='zeros')
        combined_linear = keras.Sequential(
            [linear_feature_layer, linear_model])

        # build dnn part with feature layer.
        dnn_feature_layer = dense_features.DenseFeatures([ind_column])
        dense_layer = keras.layers.Dense(units=1,
                                         name='DNNDense',
                                         kernel_initializer='zeros')
        combined_dnn = keras.Sequential([dnn_feature_layer, dense_layer])

        # build and compile wide deep.
        wide_deep_model = wide_deep.WideDeepModel(combined_linear,
                                                  combined_dnn)
        wide_deep_model._set_inputs({'symbol': keras_input})
        sgd_opt = gradient_descent.SGD(0.1)
        adam_opt = adam.Adam(0.1)
        wide_deep_model.compile([sgd_opt, adam_opt], 'mse', ['mse'])

        # build estimator.
        train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                 y=y,
                                                 num_epochs=20,
                                                 shuffle=False)
        eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                y=y,
                                                num_epochs=20,
                                                shuffle=False)
        est = keras_lib.model_to_estimator(keras_model=wide_deep_model,
                                           config=self._config,
                                           checkpoint_format='saver')

        before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        est.train(input_fn=train_input_fn, steps=20)
        after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        self.assertLess(after_eval_results['loss'],
                        before_eval_results['loss'])
        self.assertLess(after_eval_results['loss'], 0.1)
Exemple #2
0
    def test_multiple_layers_with_same_shared_embedding_column(self):
        categorical_column_a = fc.categorical_column_with_identity(
            key='aaa', num_buckets=3)
        categorical_column_b = fc.categorical_column_with_identity(
            key='bbb', num_buckets=3)
        embedding_dimension = 2
        embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
            [categorical_column_b, categorical_column_a],
            dimension=embedding_dimension)

        with ops.Graph().as_default():
            features = {
                'aaa':
                sparse_tensor.SparseTensor(indices=((0, 0), (1, 0), (1, 1)),
                                           values=(0, 1, 0),
                                           dense_shape=(2, 2)),
                'bbb':
                sparse_tensor.SparseTensor(indices=((0, 0), (1, 0), (1, 1)),
                                           values=(1, 2, 1),
                                           dense_shape=(2, 2)),
            }
            all_cols = [embedding_column_a, embedding_column_b]
            df.DenseFeatures(all_cols)(features)
            df.DenseFeatures(all_cols)(features)
            # Make sure that only 1 variable gets created in this case.
            self.assertEqual(
                1, len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
            self.assertItemsEqual(['aaa_bbb_shared_embedding:0'], [
                v.name
                for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
            ])
Exemple #3
0
    def test_with_1d_unknown_shape_sparse_tensor(self):
        embedding_values = (
            (1., 2.),  # id 0
            (6., 7.),  # id 1
            (11., 12.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = fc.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = fc.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = fc.embedding_column(country,
                                               dimension=2,
                                               initializer=_initializer)

        # Provides 1-dim tensor and dense tensor.
        features = {
            'price': array_ops.placeholder(dtypes.float32),
            'body-style': array_ops.sparse_placeholder(dtypes.string),
            # This is dense tensor for the categorical_column.
            'country': array_ops.placeholder(dtypes.string),
        }
        self.assertIsNone(features['price'].shape.ndims)
        self.assertIsNone(features['body-style'].get_shape().ndims)
        self.assertIsNone(features['country'].shape.ndims)

        price_data = np.array([11., 12.])
        body_style_data = sparse_tensor.SparseTensorValue(indices=((0, ),
                                                                   (1, )),
                                                          values=('sedan',
                                                                  'hardtop'),
                                                          dense_shape=(2, ))
        country_data = np.array([['US'], ['CA']])

        net = df.DenseFeatures([price, one_hot_body_style,
                                embedded_country])(features)
        self.assertEqual(1 + 3 + 2, net.shape[1])
        with _initialized_session() as sess:

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual(
                [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
                sess.run(net,
                         feed_dict={
                             features['price']: price_data,
                             features['body-style']: body_style_data,
                             features['country']: country_data
                         }))
Exemple #4
0
    def test_shared_sequence_non_sequence_into_input_layer(self):
        non_seq = fc.categorical_column_with_identity('non_seq',
                                                      num_buckets=10)
        seq = sfc.sequence_categorical_column_with_identity('seq',
                                                            num_buckets=10)
        shared_non_seq, shared_seq = fc.shared_embedding_columns_v2(
            [non_seq, seq],
            dimension=4,
            combiner='sum',
            initializer=init_ops_v2.Ones(),
            shared_embedding_collection_name='shared')

        seq = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                                         values=[0, 1, 2],
                                         dense_shape=[2, 2])
        non_seq = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                                             values=[0, 1, 2],
                                             dense_shape=[2, 2])
        features = {'seq': seq, 'non_seq': non_seq}

        # Tile the context features across the sequence features
        seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features)
        non_seq_input = dense_features.DenseFeatures([shared_non_seq
                                                      ])(features)

        with self.cached_session() as sess:
            sess.run(variables.global_variables_initializer())
            output_seq, output_seq_length, output_non_seq = sess.run(
                [seq_input, seq_length, non_seq_input])
            self.assertAllEqual(
                output_seq,
                [[[1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0]]])
            self.assertAllEqual(output_seq_length, [2, 1])
            self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
Exemple #5
0
    def test_column_order(self):
        price_a = fc.numeric_column('price_a')
        price_b = fc.numeric_column('price_b')
        with ops.Graph().as_default():
            features = {
                'price_a': [[1.]],
                'price_b': [[3.]],
            }
            net1 = df.DenseFeatures([price_a, price_b])(features)
            net2 = df.DenseFeatures([price_b, price_a])(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[1., 3.]], self.evaluate(net1))
            self.assertAllClose([[1., 3.]], self.evaluate(net2))
Exemple #6
0
    def test_train_with_dense_features(self):
        feature_dict = {
            'sex': np.int64([1, 1, 1, 1, 0]),
            'cp': np.int64([0, 3, 3, 2, 1]),
            'slope': np.int64([3, 2, 0, 3, 1]),
        }
        label = np.int64([0, 1, 0, 0, 0])
        train_input_fn = numpy_io.numpy_input_fn(x=feature_dict,
                                                 y=label,
                                                 num_epochs=1,
                                                 shuffle=False)
        feature_columns = list()
        input_features = dict()
        for feature_name, data_array in feature_dict.items():
            feature_columns.append(
                tf.feature_column.indicator_column(
                    tf.feature_column.categorical_column_with_identity(
                        key=feature_name,
                        num_buckets=np.size(np.unique(data_array)))))
            input_features[feature_name] = keras.layers.Input(
                name=feature_name,
                shape=(np.size(np.unique(data_array)), ),
                dtype=tf.dtypes.int64)

        x = dense_features.DenseFeatures(feature_columns)(input_features)
        x = keras.layers.Dense(16, activation='relu')(x)
        logits = keras.layers.Dense(1, activation='linear')(x)
        model = keras.Model(inputs=input_features, outputs=logits)

        model.compile(optimizer='rmsprop',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        estimator_model = keras_lib.model_to_estimator(keras_model=model)
        estimator_model.train(input_fn=train_input_fn, steps=5)
 def test_does_not_support_dict_columns(self):
   with self.assertRaisesRegexp(
       ValueError, 'Expected feature_columns to be iterable, found dict.'):
     df.DenseFeatures(feature_columns={'a': fc.numeric_column('a')})(
         features={
             'a': [[0]]
         })
Exemple #8
0
 def test_should_be_dense_column(self):
     with self.assertRaisesRegexp(ValueError, 'must be a .*DenseColumn'):
         df.DenseFeatures(feature_columns=[
             fc.categorical_column_with_hash_bucket('wire_cast', 4)
         ])(features={
             'a': [[0]]
         })
Exemple #9
0
    def test_from_config(self, trainable, name):
        cols = [
            fc.numeric_column('a'),
            fc.embedding_column(fc.categorical_column_with_vocabulary_list(
                'b', vocabulary_list=['1', '2', '3']),
                                dimension=2),
            fc.indicator_column(
                fc.categorical_column_with_hash_bucket(key='c',
                                                       hash_bucket_size=3))
        ]
        orig_layer = dense_features.DenseFeatures(cols,
                                                  trainable=trainable,
                                                  name=name)
        config = orig_layer.get_config()

        new_layer = dense_features.DenseFeatures.from_config(config)

        self.assertEqual(new_layer.name, orig_layer.name)
        self.assertEqual(new_layer.trainable, trainable)
        self.assertLen(new_layer._feature_columns, 3)
        self.assertEqual(new_layer._feature_columns[0].name, 'a')
        self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0)
        self.assertEqual(new_layer._feature_columns[1].categorical_column.name,
                         'b')
        self.assertIsInstance(new_layer._feature_columns[2],
                              fc.IndicatorColumn)
 def test_raises_if_shape_mismatch(self):
   price = fc.numeric_column('price', shape=2)
   with ops.Graph().as_default():
     features = {'price': [[1.], [5.]]}
     with self.assertRaisesRegexp(
         Exception,
         r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
       df.DenseFeatures([price])(features)
Exemple #11
0
    def test_bare_column(self):
        with ops.Graph().as_default():
            features = features = {'a': [0.]}
            net = df.DenseFeatures(fc.numeric_column('a'))(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[0.]], self.evaluate(net))
Exemple #12
0
 def test_raises_if_duplicate_name(self):
     with self.assertRaisesRegexp(
             ValueError, 'Duplicate feature column name found for columns'):
         df.DenseFeatures(feature_columns=[
             fc.numeric_column('a'),
             fc.numeric_column('a')
         ])(features={
             'a': [[0]]
         })
 def test_fails_for_categorical_column(self):
   animal = fc.categorical_column_with_identity('animal', num_buckets=4)
   with ops.Graph().as_default():
     features = {
         'animal':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
     }
     with self.assertRaisesRegexp(Exception, 'must be a .*DenseColumn'):
       df.DenseFeatures([animal])(features)
Exemple #14
0
    def test_column_generator(self):
        with ops.Graph().as_default():
            features = features = {'a': [0.], 'b': [1.]}
            columns = (fc.numeric_column(key) for key in features)
            net = df.DenseFeatures(columns)(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[0., 1.]], self.evaluate(net))
Exemple #15
0
    def test_reshaping(self):
        price = fc.numeric_column('price', shape=[1, 2])
        with ops.Graph().as_default():
            features = {'price': [[[1., 2.]], [[5., 6.]]]}
            net = df.DenseFeatures([price])(features)

            self.evaluate(variables_lib.global_variables_initializer())
            self.evaluate(lookup_ops.tables_initializer())

            self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net))
Exemple #16
0
    def test_with_1d_sparse_tensor(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = fc.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = fc.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = fc.embedding_column(country,
                                               dimension=5,
                                               initializer=_initializer)

        # Provides 1-dim tensor and dense tensor.
        features = {
            'price':
            constant_op.constant([
                11.,
                12.,
            ]),
            'body-style':
            sparse_tensor.SparseTensor(indices=((0, ), (1, )),
                                       values=('sedan', 'hardtop'),
                                       dense_shape=(2, )),
            # This is dense tensor for the categorical_column.
            'country':
            constant_op.constant(['CA', 'US']),
        }
        self.assertEqual(1, features['price'].shape.ndims)
        self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
        self.assertEqual(1, features['country'].shape.ndims)

        net = df.DenseFeatures([price, one_hot_body_style,
                                embedded_country])(features)
        self.assertEqual(1 + 3 + 5, net.shape[1])
        with _initialized_session() as sess:

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.],
                                 [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
                                sess.run(net))
  def test_multi_column(self):
    price1 = fc.numeric_column('price1', shape=2)
    price2 = fc.numeric_column('price2')
    with ops.Graph().as_default():
      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
      net = df.DenseFeatures([price1, price2])(features)

      self.evaluate(variables_lib.global_variables_initializer())
      self.evaluate(lookup_ops.tables_initializer())

      self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))
Exemple #18
0
    def test_dense_feature_with_partitioner(self):
        with context.eager_mode():
            sparse_input = sparse_tensor.SparseTensor(indices=((0, 0), (1, 0),
                                                               (2, 0), (3, 0)),
                                                      values=(0, 1, 3, 2),
                                                      dense_shape=(4, 4))

            # Create feature columns (categorical and embedding).
            categorical_column = fc.categorical_column_with_identity(
                key='a', num_buckets=4)
            embedding_dimension = 2

            def _embedding_column_initializer(shape,
                                              dtype,
                                              partition_info=None):
                offset = partition_info._var_offset[0]
                del shape  # unused
                del dtype  # unused
                if offset == 0:
                    embedding_values = (
                        (1, 0),  # id 0
                        (0, 1))  # id 1
                else:
                    embedding_values = (
                        (1, 1),  # id 2
                        (2, 2))  # id 3
                return embedding_values

            embedding_column = fc.embedding_column(
                categorical_column,
                dimension=embedding_dimension,
                initializer=_embedding_column_initializer)

            dense_features = df.DenseFeatures(
                [embedding_column],
                partitioner=partitioned_variables.fixed_size_partitioner(2))
            features = {'a': sparse_input}

            inputs = dense_features(features)
            variables = dense_features.variables

            # Sanity check: test that the inputs are correct.
            self.assertAllEqual([[1, 0], [0, 1], [2, 2], [1, 1]], inputs)

            # Check that only one variable was created.
            self.assertEqual(2, len(variables))

            # Check that invoking dense_features on the same features does not create
            # additional variables
            _ = dense_features(features)
            self.assertEqual(2, len(variables))
            self.assertIs(variables[0], dense_features.variables[0])
            self.assertIs(variables[1], dense_features.variables[1])
 def test_static_batch_size_mismatch(self):
   price1 = fc.numeric_column('price1')
   price2 = fc.numeric_column('price2')
   with ops.Graph().as_default():
     features = {
         'price1': [[1.], [5.], [7.]],  # batchsize = 3
         'price2': [[3.], [4.]]  # batchsize = 2
     }
     with self.assertRaisesRegexp(
         ValueError,
         r'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
       df.DenseFeatures([price1, price2])(features)
  def test_with_rank_0_feature(self):
    # price has 1 dimension in dense_features
    price = fc.numeric_column('price')
    features = {
        'price': constant_op.constant(0),
    }
    self.assertEqual(0, features['price'].shape.ndims)

    # Static rank 0 should fail
    with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
      df.DenseFeatures([price])(features)

    # Dynamic rank 0 should fail
    features = {
        'price': array_ops.placeholder(dtypes.float32),
    }
    net = df.DenseFeatures([price])(features)
    self.assertEqual(1, net.shape[1])
    with _initialized_session() as sess:
      with self.assertRaisesOpError('Feature .* cannot have rank 0'):
        sess.run(net, feed_dict={features['price']: np.array(1)})
 def test_runtime_batch_size_mismatch(self):
   price1 = fc.numeric_column('price1')
   price2 = fc.numeric_column('price2')
   with ops.Graph().as_default():
     features = {
         'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
         'price2': [[3.], [4.]]  # batchsize = 2
     }
     net = df.DenseFeatures([price1, price2])(features)
     with _initialized_session() as sess:
       with self.assertRaisesRegexp(errors.OpError,
                                    'Dimensions of inputs should match'):
         sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]})
Exemple #22
0
  def __init__(
      self,
      rnn_layer,
      units,
      sequence_feature_columns,
      context_feature_columns=None,
      activation=None,
      return_sequences=False,
      **kwargs):
    """Initializes a RNNModel instance.

    Args:
      rnn_layer: A Keras RNN layer.
      units: An int indicating the dimension of the logit layer, and of the
        model output.
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `DenseColumn` such as
        `numeric_column`, not the sequential variants.
      activation: Activation function to apply to the logit layer (for instance
        `tf.keras.activations.sigmoid`). If you don't specify anything, no
        activation is applied.
      return_sequences: A boolean indicating whether to return the last output
        in the output sequence, or the full sequence.
      **kwargs: Additional arguments.

    Raises:
      ValueError: If `units` is not an int.
    """
    super(RNNModel, self).__init__(**kwargs)
    if not isinstance(units, int):
      raise ValueError('units must be an int.  Given type: {}'.format(
          type(units)))
    self._return_sequences = return_sequences
    self._sequence_feature_columns = sequence_feature_columns
    self._context_feature_columns = context_feature_columns
    self._sequence_features_layer = fc.SequenceFeatures(
        sequence_feature_columns)
    self._dense_features_layer = None
    if context_feature_columns:
      self._dense_features_layer = dense_features.DenseFeatures(
          context_feature_columns)
    self._rnn_layer = rnn_layer
    self._logits_layer = keras_layers.Dense(
        units=units, activation=activation, name='logits')
  def test_multiple_layers_with_same_embedding_column(self):
    some_sparse_column = fc.categorical_column_with_hash_bucket(
        'sparse_feature', hash_bucket_size=5)
    some_embedding_column = fc.embedding_column(
        some_sparse_column, dimension=10)

    with ops.Graph().as_default():
      features = {
          'sparse_feature': [['a'], ['x']],
      }
      all_cols = [some_embedding_column]
      df.DenseFeatures(all_cols)(features)
      df.DenseFeatures(all_cols)(features)
      # Make sure that 2 variables get created in this case.
      self.assertEqual(2,
                       len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
      expected_var_names = [
          'dense_features/sparse_feature_embedding/embedding_weights:0',
          'dense_features_1/sparse_feature_embedding/embedding_weights:0'
      ]
      self.assertItemsEqual(
          expected_var_names,
          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
Exemple #24
0
    def test_crossed_column(self):
        a = fc.categorical_column_with_vocabulary_list(
            'a', vocabulary_list=['1', '2', '3'])
        b = fc.categorical_column_with_vocabulary_list(
            'b', vocabulary_list=['1', '2', '3'])
        ab = fc.crossed_column([a, b], hash_bucket_size=2)
        cols = [fc.indicator_column(ab)]

        orig_layer = dense_features.DenseFeatures(cols)
        config = orig_layer.get_config()

        new_layer = dense_features.DenseFeatures.from_config(config)

        self.assertLen(new_layer._feature_columns, 1)
        self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')
 def test_runtime_batch_size_matches(self):
   price1 = fc.numeric_column('price1')
   price2 = fc.numeric_column('price2')
   with ops.Graph().as_default():
     features = {
         'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
         'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
     }
     net = df.DenseFeatures([price1, price2])(features)
     with _initialized_session() as sess:
       sess.run(
           net,
           feed_dict={
               features['price1']: [[1.], [5.]],
               features['price2']: [[1.], [5.]],
           })
  def test_cols_to_output_tensors(self):
    price1 = fc.numeric_column('price1', shape=2)
    price2 = fc.numeric_column('price2')
    with ops.Graph().as_default():
      cols_dict = {}
      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
      dense_features = df.DenseFeatures([price1, price2])
      net = dense_features(features, cols_dict)

      self.evaluate(variables_lib.global_variables_initializer())
      self.evaluate(lookup_ops.tables_initializer())

      self.assertAllClose([[1., 2.], [5., 6.]],
                          self.evaluate(cols_dict[price1]))
      self.assertAllClose([[3.], [4.]], self.evaluate(cols_dict[price2]))
      self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))
Exemple #27
0
    def test_feature_column_dense_features_gradient(self):
        with context.eager_mode():
            sparse_input = sparse_tensor.SparseTensor(indices=((0, 0), (1, 0),
                                                               (2, 0)),
                                                      values=(0, 1, 2),
                                                      dense_shape=(3, 3))

            # Create feature columns (categorical and embedding).
            categorical_column = fc.categorical_column_with_identity(
                key='a', num_buckets=3)
            embedding_dimension = 2

            def _embedding_column_initializer(shape,
                                              dtype,
                                              partition_info=None):
                del shape  # unused
                del dtype  # unused
                del partition_info  # unused
                embedding_values = (
                    (1, 0),  # id 0
                    (0, 1),  # id 1
                    (1, 1))  # id 2
                return embedding_values

            embedding_column = fc.embedding_column(
                categorical_column,
                dimension=embedding_dimension,
                initializer=_embedding_column_initializer)

            dense_features = df.DenseFeatures([embedding_column])
            features = {'a': sparse_input}

            def scale_matrix():
                matrix = dense_features(features)
                return 2 * matrix

            # Sanity check: Verify that scale_matrix returns the correct output.
            self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())

            # Check that the returned gradient is correct.
            grad_function = backprop.implicit_grad(scale_matrix)
            grads_and_vars = grad_function()
            indexed_slice = grads_and_vars[0][0]
            gradient = grads_and_vars[0][0].values

            self.assertAllEqual([0, 1, 2], indexed_slice.indices)
            self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
  def test_compute_output_shape(self):
    price1 = fc.numeric_column('price1', shape=2)
    price2 = fc.numeric_column('price2', shape=4)
    with ops.Graph().as_default():
      features = {
          'price1': [[1., 2.], [5., 6.]],
          'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
      }
      dense_features = df.DenseFeatures([price1, price2])
      self.assertEqual((None, 6), dense_features.compute_output_shape((None,)))
      net = dense_features(features)

      self.evaluate(variables_lib.global_variables_initializer())
      self.evaluate(lookup_ops.tables_initializer())

      self.assertAllClose([[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]],
                          self.evaluate(net))
    def test_reuses_variables(self):
        with context.eager_mode():
            sparse_input = sparse_tensor.SparseTensor(indices=((0, 0), (1, 0),
                                                               (2, 0)),
                                                      values=(0, 1, 2),
                                                      dense_shape=(3, 3))

            # Create feature columns (categorical and embedding).
            categorical_column = fc.categorical_column_with_identity(
                key='a', num_buckets=3)
            embedding_dimension = 2

            def _embedding_column_initializer(shape,
                                              dtype,
                                              partition_info=None):
                del shape  # unused
                del dtype  # unused
                del partition_info  # unused
                embedding_values = (
                    (1, 0),  # id 0
                    (0, 1),  # id 1
                    (1, 1))  # id 2
                return embedding_values

            embedding_column = fc.embedding_column(
                categorical_column,
                dimension=embedding_dimension,
                initializer=_embedding_column_initializer)

            dense_features = df.DenseFeatures([embedding_column])
            features = {'a': sparse_input}

            inputs = dense_features(features)
            variables = dense_features.variables

            # Sanity check: test that the inputs are correct.
            self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)

            # Check that only one variable was created.
            self.assertEqual(1, len(variables))

            # Check that invoking dense_features on the same features does not create
            # additional variables
            _ = dense_features(features)
            self.assertEqual(1, len(variables))
            self.assertEqual(variables[0], dense_features.variables[0])
Exemple #30
0
    def DISABLED_test_train_with_dense_features_embedding(self):
        feature_dict = {
            'sex': np.int64([1, 1, 1, 1, 0]),
            'cp': np.int64([0, 3, 3, 2, 1]),
            'slope': np.int64([3, 2, 0, 3, 1]),
        }
        label = np.int64([0, 1, 0, 0, 0])
        train_input_fn = numpy_io.numpy_input_fn(x=feature_dict,
                                                 y=label,
                                                 num_epochs=1,
                                                 shuffle=False)
        feature_columns = list()
        input_features = dict()
        for feature_name, data_array in feature_dict.items():
            feature_columns.append(
                tf.feature_column.embedding_column(
                    tf.feature_column.categorical_column_with_identity(
                        key=feature_name,
                        num_buckets=np.size(np.unique(data_array))),
                    dimension=3))
            input_features[feature_name] = keras.layers.Input(
                name=feature_name,
                shape=(np.size(np.unique(data_array)), ),
                dtype=tf.dtypes.int64)

        df = dense_features.DenseFeatures(feature_columns)
        x = df(input_features)
        x = keras.layers.Dense(16, activation='relu')(x)
        logits = keras.layers.Dense(1, activation='linear')(x)
        model = keras.Model(inputs=input_features, outputs=logits)

        model.compile(optimizer='rmsprop',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        estimator_model = keras_lib.model_to_estimator(keras_model=model)
        estimator_model.train(input_fn=train_input_fn, steps=5)
        # We assert that we find the embedding_weights variables in the dependencies
        # for the DenseFeatures layer.
        dependency_names = [x.name for x in df._checkpoint_dependencies]
        self.assertNotIn('embedding_weights', dependency_names)
        self.assertIn('cp_embedding/embedding_weights', dependency_names)
        self.assertIn('sex_embedding/embedding_weights', dependency_names)
        self.assertIn('slope_embedding/embedding_weights', dependency_names)