Python categorical_column_with_vocabulary_list Examples, tensorflow.python.feature_column.feature_column_v2.categorical_column_with_vocabulary_list Python Examples

Example #1

0

Show file

    def test_with_1d_unknown_shape_sparse_tensor(self):
        embedding_values = (
            (1., 2.),  # id 0
            (6., 7.),  # id 1
            (11., 12.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = fc.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = fc.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = fc.embedding_column(country,
                                               dimension=2,
                                               initializer=_initializer)

        # Provides 1-dim tensor and dense tensor.
        features = {
            'price': array_ops.placeholder(dtypes.float32),
            'body-style': array_ops.sparse_placeholder(dtypes.string),
            # This is dense tensor for the categorical_column.
            'country': array_ops.placeholder(dtypes.string),
        }
        self.assertIsNone(features['price'].shape.ndims)
        self.assertIsNone(features['body-style'].get_shape().ndims)
        self.assertIsNone(features['country'].shape.ndims)

        price_data = np.array([11., 12.])
        body_style_data = sparse_tensor.SparseTensorValue(indices=((0, ),
                                                                   (1, )),
                                                          values=('sedan',
                                                                  'hardtop'),
                                                          dense_shape=(2, ))
        country_data = np.array([['US'], ['CA']])

        net = df.DenseFeatures([price, one_hot_body_style,
                                embedded_country])(features)
        self.assertEqual(1 + 3 + 2, net.shape[1])
        with _initialized_session() as sess:

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual(
                [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
                sess.run(net,
                         feed_dict={
                             features['price']: price_data,
                             features['body-style']: body_style_data,
                             features['country']: country_data
                         }))

Example #2

0

Show file

    def test_with_1d_sparse_tensor(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = fc.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = fc.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = fc.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = fc.embedding_column(country,
                                               dimension=5,
                                               initializer=_initializer)

        with ops.Graph().as_default():
            # Provides 1-dim tensor and dense tensor.
            features = {
                'price':
                constant_op.constant([
                    11.,
                    12.,
                ]),
                'body-style':
                sparse_tensor.SparseTensor(indices=((0, ), (1, )),
                                           values=('sedan', 'hardtop'),
                                           dense_shape=(2, )),
                # This is dense tensor for the categorical_column.
                'country':
                constant_op.constant(['CA', 'US']),
            }
            self.assertEqual(1, features['price'].shape.ndims)
            self.assertEqual(1,
                             features['body-style'].dense_shape.get_shape()[0])
            self.assertEqual(1, features['country'].shape.ndims)

            net = df.DenseFeatures(
                [price, one_hot_body_style, embedded_country])(features)
            self.assertEqual(1 + 3 + 5, net.shape[1])
            with _initialized_session() as sess:

                # Each row is formed by concatenating `embedded_body_style`,
                # `one_hot_body_style`, and `price` in order.
                self.assertAllEqual(
                    [[0., 0., 1., 11., 12., 13., 14., 15., 11.],
                     [1., 0., 0., 1., 2., 3., 4., 5., 12.]], sess.run(net))

Example #3

0

Show file

    def test_crossed_column(self):
        a = fc.categorical_column_with_vocabulary_list(
            'a', vocabulary_list=['1', '2', '3'])
        b = fc.categorical_column_with_vocabulary_list(
            'b', vocabulary_list=['1', '2', '3'])
        ab = fc.crossed_column([a, b], hash_bucket_size=2)
        cols = [fc.indicator_column(ab)]

        orig_layer = df.DenseFeatures(cols)
        config = orig_layer.get_config()

        new_layer = df.DenseFeatures.from_config(config)

        self.assertLen(new_layer._feature_columns, 1)
        self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')

Example #4

0

Show file

    def test_from_config(self, trainable, name):
        cols = [
            fc.numeric_column('a'),
            fc.embedding_column(fc.categorical_column_with_vocabulary_list(
                'b', vocabulary_list=['1', '2', '3']),
                                dimension=2),
            fc.indicator_column(
                fc.categorical_column_with_hash_bucket(key='c',
                                                       hash_bucket_size=3))
        ]
        orig_layer = df.DenseFeatures(cols, trainable=trainable, name=name)
        config = orig_layer.get_config()

        new_layer = df.DenseFeatures.from_config(config)

        self.assertEqual(new_layer.name, orig_layer.name)
        self.assertEqual(new_layer.trainable, trainable)
        self.assertLen(new_layer._feature_columns, 3)
        self.assertEqual(new_layer._feature_columns[0].name, 'a')
        self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0)
        self.assertEqual(new_layer._feature_columns[1].categorical_column.name,
                         'b')
        self.assertIsInstance(new_layer._feature_columns[0], cols[0].__class__)
        self.assertIsInstance(new_layer._feature_columns[1], cols[1].__class__)
        self.assertIsInstance(new_layer._feature_columns[2], cols[2].__class__)

Example #5

0

Show file

File: serialization_test.py Project: zonghaofan/tensorflow

    def test_from_config(self, units, sparse_combiner, trainable, name):
        cols = [
            fc.numeric_column('a'),
            fc.categorical_column_with_vocabulary_list('b',
                                                       vocabulary_list=('1',
                                                                        '2',
                                                                        '3')),
            fc.categorical_column_with_hash_bucket(key='c', hash_bucket_size=3)
        ]
        orig_layer = fc._LinearModelLayer(cols,
                                          units=units,
                                          sparse_combiner=sparse_combiner,
                                          trainable=trainable,
                                          name=name)
        config = orig_layer.get_config()

        new_layer = fc._LinearModelLayer.from_config(config)

        self.assertEqual(new_layer.name, orig_layer.name)
        self.assertEqual(new_layer._units, units)
        self.assertEqual(new_layer._sparse_combiner, sparse_combiner)
        self.assertEqual(new_layer.trainable, trainable)
        self.assertLen(new_layer._feature_columns, 3)
        self.assertEqual(new_layer._feature_columns[0].name, 'a')
        self.assertEqual(new_layer._feature_columns[1].vocabulary_list,
                         ('1', '2', '3'))
        self.assertEqual(new_layer._feature_columns[2].num_buckets, 3)

Example #6

0

Show file

  def testWeightedSparseFeaturesOOVWithNoOOVBuckets(self):
    """LinearClassifier with LinearSDCA with OOV features (-1 IDs)."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              sparse_tensor.SparseTensor(
                  values=[2., 3., 1.],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5]),
          'country':
              sparse_tensor.SparseTensor(
                  # 'GB' is out of the vocabulary.
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 5])
      }, constant_op.constant([[1], [0], [1]])

    country = feature_column_v2.categorical_column_with_vocabulary_list(
        'country', vocabulary_list=['US', 'CA', 'MK', 'IT', 'CN'])
    country_weighted_by_price = feature_column_v2.weighted_categorical_column(
        country, 'price')
    optimizer = linear.LinearSDCA(
        example_id_column='example_id', symmetric_l2_regularization=0.01)
    classifier = linear.LinearClassifier(
        feature_columns=[country_weighted_by_price], optimizer=optimizer)
    classifier.train(input_fn=input_fn, steps=100)
    loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.2)

Example #7

0

Show file

File: linear_test.py Project: zzsnow/tensorflow

 def test_linear_model_with_feature_column(self):
     with context.eager_mode():
         vocab_list = ['alpha', 'beta', 'gamma']
         vocab_val = [0.4, 0.6, 0.9]
         data = np.random.choice(vocab_list, size=256)
         y = np.zeros_like(data, dtype=np.float32)
         for vocab, val in zip(vocab_list, vocab_val):
             indices = np.where(data == vocab)
             y[indices] = val + np.random.uniform(
                 low=-0.01, high=0.01, size=indices[0].shape)
         cat_column = fc.categorical_column_with_vocabulary_list(
             key='symbol', vocabulary_list=vocab_list)
         ind_column = fc.indicator_column(cat_column)
         dense_feature_layer = dense_features_v2.DenseFeatures([ind_column])
         linear_model = linear.LinearModel(use_bias=False,
                                           kernel_initializer='zeros')
         combined = sequential.Sequential(
             [dense_feature_layer, linear_model])
         opt = gradient_descent.SGD(learning_rate=0.1)
         combined.compile(opt, 'mse', [])
         combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10)
         self.assertAllClose(
             [[0.4], [0.6], [0.9]],
             combined.layers[1].dense_layers[0].kernel.numpy(),
             atol=0.01)

Example #8

0

Show file

File: keras_premade_model_test.py Project: Elizaaaaa/tensorflow-estimator

    def test_train_premade_widedeep_model_with_feature_layers(self):
        vocab_list = ['alpha', 'beta', 'gamma']
        vocab_val = [0.4, 0.6, 0.9]
        data = np.random.choice(vocab_list, size=256)
        y = np.zeros_like(data, dtype=np.float32)
        for vocab, val in zip(vocab_list, vocab_val):
            indices = np.where(data == vocab)
            y[indices] = val + np.random.uniform(
                low=-0.01, high=0.01, size=indices[0].shape)
        cat_column = feature_column.categorical_column_with_vocabulary_list(
            key='symbol', vocabulary_list=vocab_list)
        ind_column = feature_column.indicator_column(cat_column)
        # TODO(tanzheny): use emb column for dense part once b/139667019 is fixed.
        # emb_column = feature_column.embedding_column(cat_column, dimension=5)
        keras_input = keras.layers.Input(name='symbol',
                                         shape=3,
                                         dtype=dtypes.string)

        # build linear part with feature layer.
        linear_feature_layer = dense_features.DenseFeatures([ind_column])
        linear_model = linear.LinearModel(units=1,
                                          name='Linear',
                                          kernel_initializer='zeros')
        combined_linear = keras.Sequential(
            [linear_feature_layer, linear_model])

        # build dnn part with feature layer.
        dnn_feature_layer = dense_features.DenseFeatures([ind_column])
        dense_layer = keras.layers.Dense(units=1,
                                         name='DNNDense',
                                         kernel_initializer='zeros')
        combined_dnn = keras.Sequential([dnn_feature_layer, dense_layer])

        # build and compile wide deep.
        wide_deep_model = wide_deep.WideDeepModel(combined_linear,
                                                  combined_dnn)
        wide_deep_model._set_inputs({'symbol': keras_input})
        sgd_opt = gradient_descent.SGD(0.1)
        adam_opt = adam.Adam(0.1)
        wide_deep_model.compile([sgd_opt, adam_opt], 'mse', ['mse'])

        # build estimator.
        train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                 y=y,
                                                 num_epochs=20,
                                                 shuffle=False)
        eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                y=y,
                                                num_epochs=20,
                                                shuffle=False)
        est = keras_lib.model_to_estimator(keras_model=wide_deep_model,
                                           config=self._config,
                                           checkpoint_format='saver')

        before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        est.train(input_fn=train_input_fn, steps=20)
        after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        self.assertLess(after_eval_results['loss'],
                        before_eval_results['loss'])
        self.assertLess(after_eval_results['loss'], 0.1)

Example #9

0

Show file

 def test_wide_deep_model_with_two_feature_columns(self):
     vocab_list = ['alpha', 'beta', 'gamma']
     vocab_val = [0.4, 0.6, 0.9]
     data = np.random.choice(vocab_list, size=256)
     y = np.zeros_like(data, dtype=np.float32)
     for vocab, val in zip(vocab_list, vocab_val):
         indices = np.where(data == vocab)
         y[indices] = val + np.random.uniform(
             low=-0.01, high=0.01, size=indices[0].shape)
     cat_column = fc.categorical_column_with_vocabulary_list(
         key='symbol', vocabulary_list=vocab_list)
     ind_column = fc.indicator_column(cat_column)
     emb_column = fc.embedding_column(cat_column, dimension=5)
     linear_feature_layer = dense_features_v2.DenseFeatures([ind_column])
     linear_model = linear.LinearModel(use_bias=False,
                                       kernel_initializer='zeros')
     combined_linear = sequential.Sequential(
         [linear_feature_layer, linear_model])
     dnn_model = sequential.Sequential([core.Dense(units=1)])
     dnn_feature_layer = dense_features_v2.DenseFeatures([emb_column])
     combined_dnn = sequential.Sequential([dnn_feature_layer, dnn_model])
     wide_deep_model = wide_deep.WideDeepModel(combined_linear,
                                               combined_dnn)
     opt = gradient_descent.SGD(learning_rate=0.1)
     wide_deep_model.compile(opt,
                             'mse', [],
                             run_eagerly=testing_utils.should_run_eagerly(),
                             experimental_run_tf_function=testing_utils.
                             should_run_tf_function())
     wide_deep_model.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10)
     self.assertEqual(3, linear_model.inputs[0].shape[1])
     self.assertEqual(5, dnn_model.inputs[0].shape[1])

Example #10

0

Show file

    def test_saving_with_dense_features(self):
        cols = [
            feature_column_v2.numeric_column('a'),
            feature_column_v2.indicator_column(
                feature_column_v2.categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a': keras.layers.Input(shape=(1, ), name='a'),
            'b': keras.layers.Input(shape=(1, ), name='b', dtype='string')
        }

        fc_layer = feature_column_v2.DenseFeatures(cols)(input_layers)
        output = keras.layers.Dense(10)(fc_layer)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer=keras.optimizers.RMSprop(lr=0.0001),
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        inputs_a = np.arange(10).reshape(10, 1)
        inputs_b = np.arange(10).reshape(10, 1).astype('str')

        # Initialize tables for V1 lookup.
        if not context.executing_eagerly():
            self.evaluate(lookup_ops.tables_initializer())

        self.assertLen(loaded_model.predict({
            'a': inputs_a,
            'b': inputs_b
        }), 10)

Example #11

0

Show file

File: sequence_feature_column.py Project: qingfeng8260/tensorflow_installpack

def sequence_categorical_column_with_vocabulary_list(
    key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
  """A sequence of categorical terms where ids use an in-memory list.

  Pass this to `embedding_column` or `indicator_column` to convert sequence
  categorical data into dense representation for input to sequence NN, such as
  RNN.

  Example:

  ```python
  colors = sequence_categorical_column_with_vocabulary_list(
      key='colors', vocabulary_list=('R', 'G', 'B', 'Y'),
      num_oov_buckets=2)
  colors_embedding = embedding_column(colors, dimension=3)
  columns = [colors_embedding]

  features = tf.io.parse_example(..., features=make_parse_example_spec(columns))
  sequence_feature_layer = SequenceFeatures(columns)
  sequence_input, sequence_length = sequence_feature_layer(features)
  sequence_length_mask = tf.sequence_mask(sequence_length)

  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
  rnn_layer = tf.keras.layers.RNN(rnn_cell)
  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
  ```

  Args:
    key: A unique string identifying the input feature.
    vocabulary_list: An ordered iterable defining the vocabulary. Each feature
      is mapped to the index of its value (if present) in `vocabulary_list`.
      Must be castable to `dtype`.
    dtype: The type of features. Only string and integer types are supported.
      If `None`, it will be inferred from `vocabulary_list`.
    default_value: The integer ID value to return for out-of-vocabulary feature
      values, defaults to `-1`. This can not be specified with a positive
      `num_oov_buckets`.
    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
      `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a
      hash of the input value. A positive `num_oov_buckets` can not be specified
      with `default_value`.

  Returns:
    A `SequenceCategoricalColumn`.

  Raises:
    ValueError: if `vocabulary_list` is empty, or contains duplicate keys.
    ValueError: `num_oov_buckets` is a negative integer.
    ValueError: `num_oov_buckets` and `default_value` are both specified.
    ValueError: if `dtype` is not integer or string.
  """
  return fc.SequenceCategoricalColumn(
      fc.categorical_column_with_vocabulary_list(
          key=key,
          vocabulary_list=vocabulary_list,
          dtype=dtype,
          default_value=default_value,
          num_oov_buckets=num_oov_buckets))

Example #12

0

Show file

File: sequence_feature_column.py Project: kylin9872/tensorflow

def sequence_categorical_column_with_vocabulary_list(
    key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
  """A sequence of categorical terms where ids use an in-memory list.

  Pass this to `embedding_column` or `indicator_column` to convert sequence
  categorical data into dense representation for input to sequence NN, such as
  RNN.

  Example:

  ```python
  colors = sequence_categorical_column_with_vocabulary_list(
      key='colors', vocabulary_list=('R', 'G', 'B', 'Y'),
      num_oov_buckets=2)
  colors_embedding = embedding_column(colors, dimension=3)
  columns = [colors_embedding]

  features = tf.parse_example(..., features=make_parse_example_spec(columns))
  sequence_feature_layer = SequenceFeatures(columns)
  sequence_input, sequence_length = sequence_feature_layer(features)
  sequence_length_mask = tf.sequence_mask(sequence_length)

  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
  rnn_layer = tf.keras.layers.RNN(rnn_cell)
  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
  ```

  Args:
    key: A unique string identifying the input feature.
    vocabulary_list: An ordered iterable defining the vocabulary. Each feature
      is mapped to the index of its value (if present) in `vocabulary_list`.
      Must be castable to `dtype`.
    dtype: The type of features. Only string and integer types are supported.
      If `None`, it will be inferred from `vocabulary_list`.
    default_value: The integer ID value to return for out-of-vocabulary feature
      values, defaults to `-1`. This can not be specified with a positive
      `num_oov_buckets`.
    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
      `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a
      hash of the input value. A positive `num_oov_buckets` can not be specified
      with `default_value`.

  Returns:
    A `SequenceCategoricalColumn`.

  Raises:
    ValueError: if `vocabulary_list` is empty, or contains duplicate keys.
    ValueError: `num_oov_buckets` is a negative integer.
    ValueError: `num_oov_buckets` and `default_value` are both specified.
    ValueError: if `dtype` is not integer or string.
  """
  return fc.SequenceCategoricalColumn(
      fc.categorical_column_with_vocabulary_list(
          key=key,
          vocabulary_list=vocabulary_list,
          dtype=dtype,
          default_value=default_value,
          num_oov_buckets=num_oov_buckets))

Example #13

0

Show file

File: category_vocab_file_dense_benchmark.py Project: xulin2005/tensorflow-1

    def embedding_varlen(self, batch_size, max_length):
        """Benchmark a variable-length embedding."""
        # Data and constants.
        vocab = fc_bm.create_vocabulary(32768)

        path = self._write_to_temp_file("tmp", vocab)

        data = fc_bm.create_string_data(max_length,
                                        batch_size * NUM_REPEATS,
                                        vocab,
                                        pct_oov=0.15)

        # Keras implementation
        model = keras.Sequential()
        model.add(
            keras.Input(shape=(max_length, ), name="data", dtype=dt.string))
        model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None))

        # FC implementation
        fc = fcv2.categorical_column_with_vocabulary_list(
            key="data", vocabulary_list=vocab, num_oov_buckets=1)

        # Wrap the FC implementation in a tf.function for a fair comparison
        @tf_function()
        def fc_fn(tensors):
            fc.transform_feature(fcv2.FeatureTransformationCache(tensors),
                                 None)

        # Benchmark runs
        keras_data = {
            "data":
            data.to_tensor(default_value="", shape=(batch_size, max_length))
        }
        k_avg_time = fc_bm.run_keras(keras_data, model, batch_size,
                                     NUM_REPEATS)

        fc_data = {
            "data":
            data.to_tensor(default_value="", shape=(batch_size, max_length))
        }
        fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

        return k_avg_time, fc_avg_time

Example #14

0

Show file

File: saved_model_test.py Project: yzhuo1992/tensorflow

    def testFeatureColumns(self):
        # TODO(b/120099662): Error with table initialization with Keras models in
        # graph mode.
        if context.executing_eagerly():
            numeric = fc.numeric_column('a')
            bucketized = fc.bucketized_column(numeric, boundaries=[5, 10, 15])
            cat_vocab = fc.categorical_column_with_vocabulary_list(
                'b', ['1', '2', '3'])
            one_hot = fc.indicator_column(cat_vocab)
            embedding = fc.embedding_column(cat_vocab, dimension=8)
            feature_layer = DenseFeatures([bucketized, one_hot, embedding])
            model = keras.models.Sequential(feature_layer)

            features = {'a': np.array([13, 15]), 'b': np.array(['1', '2'])}
            predictions = model.predict(features)

            saved_model_dir = self._save_model_dir()
            model.save(saved_model_dir, save_format='tf')
            loaded = keras_load.load(saved_model_dir)
            loaded_predictions = loaded.predict(features)
            self.assertAllClose(predictions, loaded_predictions)

Example #15

0

Show file

File: keras_premade_model_test.py Project: Elizaaaaa/tensorflow-estimator

    def test_train_premade_linear_model_with_dense_features(self):
        vocab_list = ['alpha', 'beta', 'gamma']
        vocab_val = [0.4, 0.6, 0.9]
        data = np.random.choice(vocab_list, size=256)
        y = np.zeros_like(data, dtype=np.float32)
        for vocab, val in zip(vocab_list, vocab_val):
            indices = np.where(data == vocab)
            y[indices] = val + np.random.uniform(
                low=-0.01, high=0.01, size=indices[0].shape)
        cat_column = feature_column.categorical_column_with_vocabulary_list(
            key='symbol', vocabulary_list=vocab_list)
        ind_column = feature_column.indicator_column(cat_column)
        keras_input = keras.layers.Input(name='symbol',
                                         shape=3,
                                         dtype=dtypes.string)
        feature_layer = dense_features.DenseFeatures([ind_column])
        h = feature_layer({'symbol': keras_input})
        linear_model = linear.LinearModel(units=1)
        h = linear_model(h)

        model = keras.Model(inputs=keras_input, outputs=h)
        opt = gradient_descent.SGD(0.1)
        model.compile(opt, 'mse', ['mse'])
        train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                 y=y,
                                                 num_epochs=20,
                                                 shuffle=False)
        eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                y=y,
                                                num_epochs=20,
                                                shuffle=False)
        est = keras_lib.model_to_estimator(keras_model=model,
                                           config=self._config,
                                           checkpoint_format='saver')
        before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        est.train(input_fn=train_input_fn, steps=30)
        after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        self.assertLess(after_eval_results['loss'],
                        before_eval_results['loss'])
        self.assertLess(after_eval_results['loss'], 0.05)

Example #16

0

Show file

 def test_wide_deep_model_with_single_feature_column(self):
     vocab_list = ['alpha', 'beta', 'gamma']
     vocab_val = [0.4, 0.6, 0.9]
     data = np.random.choice(vocab_list, size=256)
     y = np.zeros_like(data, dtype=np.float32)
     for vocab, val in zip(vocab_list, vocab_val):
         indices = np.where(data == vocab)
         y[indices] = val + np.random.uniform(
             low=-0.01, high=0.01, size=indices[0].shape)
     cat_column = feature_column_v2.categorical_column_with_vocabulary_list(
         key='symbol', vocabulary_list=vocab_list)
     ind_column = feature_column_v2.indicator_column(cat_column)
     dense_feature_layer = dense_features_v2.DenseFeatures([ind_column])
     linear_model = linear.LinearModel(use_bias=False,
                                       kernel_initializer='zeros')
     dnn_model = keras.Sequential([keras.layers.Dense(units=1)])
     wide_deep_model = wide_deep.WideDeepModel(linear_model, dnn_model)
     combined = keras.Sequential([dense_feature_layer, wide_deep_model])
     opt = gradient_descent.SGD(learning_rate=0.1)
     combined.compile(opt,
                      'mse', [],
                      run_eagerly=testing_utils.should_run_eagerly())
     combined.fit(x={'symbol': data}, y=y, batch_size=32, epochs=10)

Example #17

0

Show file

File: feature_parse.py Project: xiaoduli/notekeras

def _get_categorical_column(params: dict) -> fc.CategoricalColumn:
    if 'vocabulary' in params.keys():
        feature = fc.categorical_column_with_vocabulary_list(params['key'],
                                                             vocabulary_list=_parse_vocabulary(
                                                                 params['vocabulary']),
                                                             default_value=0)
    elif 'bucket_size' in params.keys():
        feature = fc.categorical_column_with_hash_bucket(params['key'],
                                                         hash_bucket_size=params['bucket_size'])
    elif 'file' in params.keys():
        feature = fc.categorical_column_with_vocabulary_file(params['key'],
                                                             vocabulary_file=params['file'],
                                                             default_value=0)
    elif 'num_buckets' in params.keys():
        feature = fc.categorical_column_with_identity(params['key'],
                                                      num_buckets=params['num_buckets'])
    elif 'boundaries' in params.keys():
        feature = fc.bucketized_column(fc.numeric_column(
            params['key']), boundaries=params['boundaries'])
    else:
        raise Exception("params error")

    return feature