Python categorical_column_with_vocabulary_list 예제들, tensorflow.python.feature_column.feature_column_lib.categorical_column_with_vocabulary_list Python 예제들

예제 #1

0

파일 보기

  def testInputFromFeatureColumn(self):
    # Tests 1-dimension real valued feature.
    x = np.random.uniform(-1.0, 1.0, size=[self._test_data.num_examples])
    feature_column = feature_column_lib.numeric_column('x')
    # Notice that 1-dimension features [batch_size] are packaged into a 2-dim
    # tensor: [batch_size, 1]
    materialized = self._materialize_feature_column(feature_column, x)
    self.assertEqual(materialized.shape, (self._test_data.num_examples, 1))
    materialized = materialized[:, 0]
    self.assertTrue(
        self._np_array_close(x, materialized), 'expected:{} != got:{}'.format(
            x, materialized))

    # Tests that 2-dimensional real valued feature.
    x = np.random.uniform(-1.0, 1.0, size=[self._test_data.num_examples, 2])
    feature_column = feature_column_lib.numeric_column('x', shape=(2,))
    materialized = self._materialize_feature_column(feature_column, x)
    self.assertTrue(
        self._np_array_close(x, materialized), 'expected:{} != got:{}'.format(
            x, materialized))

    # Tests that categorical feature is correctly converted.
    x = np.array(['Y', 'N', '?', 'Y', 'Y', 'N'])
    expect = np.array([0., 1., -1., 0., 0., 1.])
    feature_column = feature_column_lib.categorical_column_with_vocabulary_list(
        'x', ['Y', 'N'])
    materialized = self._materialize_feature_column(feature_column, x)[:, 0]
    self.assertTrue(
        self._np_array_close(expect, materialized),
        'expect:{} != got:{}'.format(expect, materialized))

예제 #2

0

파일 보기

  def test_saving_with_dense_features(self):
    cols = [
        feature_column_lib.numeric_column('a'),
        feature_column_lib.indicator_column(
            feature_column_lib.categorical_column_with_vocabulary_list(
                'b', ['one', 'two']))
    ]
    input_layers = {
        'a': keras.layers.Input(shape=(1,), name='a'),
        'b': keras.layers.Input(shape=(1,), name='b', dtype='string')
    }

    fc_layer = dense_features.DenseFeatures(cols)(input_layers)
    output = keras.layers.Dense(10)(fc_layer)

    model = keras.models.Model(input_layers, output)

    model.compile(
        loss=keras.losses.MSE,
        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
    loaded_model = model_config.model_from_json(config)

    inputs_a = np.arange(10).reshape(10, 1)
    inputs_b = np.arange(10).reshape(10, 1).astype('str')

    with self.cached_session():
      # Initialize tables for V1 lookup.
      if not context.executing_eagerly():
        self.evaluate(lookup_ops.tables_initializer())

      self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10)

예제 #3

0

파일 보기

    def test_dnn_classifier(self):
        embedding = feature_column_lib.embedding_column(
            feature_column_lib.categorical_column_with_vocabulary_list(
                'wire_cast', ['kima', 'omar', 'stringer']), 8)
        dnn = estimator_lib.DNNClassifier(feature_columns=[embedding],
                                          hidden_units=[3, 1])

        def train_input_fn():
            return dataset_ops.Dataset.from_tensors(({
                'wire_cast': [['omar'], ['kima']]
            }, [[0], [1]])).repeat(3)

        def eval_input_fn():
            return dataset_ops.Dataset.from_tensors(({
                'wire_cast': [['stringer'], ['kima']]
            }, [[0], [1]])).repeat(2)

        evaluator = hooks_lib.InMemoryEvaluatorHook(dnn,
                                                    eval_input_fn,
                                                    name='in-memory')
        dnn.train(train_input_fn, hooks=[evaluator])
        self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory')))
        step_keyword_to_value = summary_step_keyword_to_value_mapping(
            dnn.eval_dir('in-memory'))

        final_metrics = dnn.evaluate(eval_input_fn)
        step = final_metrics[ops.GraphKeys.GLOBAL_STEP]
        for summary_tag in final_metrics:
            if summary_tag == ops.GraphKeys.GLOBAL_STEP:
                continue
            self.assertEqual(final_metrics[summary_tag],
                             step_keyword_to_value[step][summary_tag])

예제 #4

0

파일 보기

파일: numpy_io_test.py 프로젝트: ZhangXinNan/tensorflow

  def test_linear_model_numpy_input_fn(self):
    price = fc.numeric_column('price')
    price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,])
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([-1., 2., 13., 104.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.linear_model(features, [price_buckets, body_style])
    # self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      bias = self._get_linear_model_bias()
      price_buckets_var = self._get_linear_model_column_var(price_buckets)
      body_style_var = self._get_linear_model_column_var(body_style)

      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
      sess.run(bias.assign([5.]))

      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))

      coord.request_stop()
      coord.join(threads)

예제 #5

0

파일 보기

파일: sdca_test.py 프로젝트: yuhonghong66/estimator

    def testWeightedSparseFeaturesOOVWithNoOOVBuckets(self):
        """LinearClassifier with LinearSDCA with OOV features (-1 IDs)."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                sparse_tensor.SparseTensor(values=[2., 3., 1.],
                                           indices=[[0, 0], [1, 0], [2, 0]],
                                           dense_shape=[3, 5]),
                'country':
                sparse_tensor.SparseTensor(
                    # 'GB' is out of the vocabulary.
                    values=['IT', 'US', 'GB'],
                    indices=[[0, 0], [1, 0], [2, 0]],
                    dense_shape=[3, 5])
            }, constant_op.constant([[1], [0], [1]])

        country = feature_column_lib.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'CA', 'MK', 'IT', 'CN'])
        country_weighted_by_price = (
            feature_column_lib.weighted_categorical_column(country, 'price'))
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.01)
        classifier = linear.LinearClassifierV2(
            feature_columns=[country_weighted_by_price], optimizer=optimizer)
        classifier.train(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.2)

예제 #6

0

파일 보기

  def test_encode_listwise_features(self):
    with tf.Graph().as_default():
      # Batch size = 2, list_size = 2.
      features = {
          "query_length":
              tf.convert_to_tensor(value=[[1], [2]]),
          "utility":
              tf.convert_to_tensor(value=[[[1.0], [0.0]], [[0.0], [1.0]]]),
          "unigrams":
              tf.SparseTensor(
                  indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]],
                  values=["ranking", "regression", "classification", "ordinal"],
                  dense_shape=[2, 2, 1])
      }
      context_feature_columns = {
          "query_length":
              feature_column.numeric_column(
                  "query_length", shape=(1,), default_value=0, dtype=tf.int64)
      }
      example_feature_columns = {
          "utility":
              feature_column.numeric_column(
                  "utility", shape=(1,), default_value=0.0, dtype=tf.float32),
          "unigrams":
              feature_column.embedding_column(
                  feature_column.categorical_column_with_vocabulary_list(
                      "unigrams",
                      vocabulary_list=[
                          "ranking", "regression", "classification", "ordinal"
                      ]),
                  dimension=10)
      }

      with self.assertRaisesRegexp(
          ValueError,
          r"2nd dimension of tensor must be equal to input size: 3, but found .*"
      ):
        feature_lib.encode_listwise_features(
            features,
            input_size=3,
            context_feature_columns=context_feature_columns,
            example_feature_columns=example_feature_columns)

      context_features, example_features = feature_lib.encode_listwise_features(
          features,
          input_size=2,
          context_feature_columns=context_feature_columns,
          example_feature_columns=example_feature_columns)
      self.assertAllEqual(["query_length"], sorted(context_features))
      self.assertAllEqual(["unigrams", "utility"], sorted(example_features))
      self.assertAllEqual([2, 2, 10],
                          example_features["unigrams"].get_shape().as_list())
      with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())
        sess.run(tf.compat.v1.tables_initializer())
        context_features, example_features = sess.run(
            [context_features, example_features])
        self.assertAllEqual([[1], [2]], context_features["query_length"])
        self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]],
                            example_features["utility"])

예제 #7

0

파일 보기

파일: boosted_trees_test.py 프로젝트: xzzjx/estimator

  def testBinaryClassifierTrainInMemoryWithMixedColumns(self):
    categorical = feature_column.categorical_column_with_vocabulary_list(
        key='f_0', vocabulary_list=('bad', 'good', 'ok'))
    indicator_col = feature_column.indicator_column(categorical)
    bucketized_col = feature_column.bucketized_column(
        feature_column.numeric_column('f_1', dtype=dtypes.float32),
        BUCKET_BOUNDARIES)
    numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32)

    labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32)
    input_fn = numpy_io.numpy_input_fn(
        x={
            'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']),
            'f_1': np.array([1, 1, 1, 1, 1]),
            'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]),
        },
        y=labels,
        num_epochs=None,
        batch_size=5,
        shuffle=False)
    feature_columns = [numeric_col, bucketized_col, indicator_col]

    est = boosted_trees.boosted_trees_classifier_train_in_memory(
        train_input_fn=input_fn,
        feature_columns=feature_columns,
        n_trees=1,
        max_depth=5,
        quantile_sketch_epsilon=0.33)

    self._assert_checkpoint(
        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)

    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['accuracy'], 1.0)

예제 #8

0

파일 보기

    def test_encode_pointwise_features(self):
        with tf.Graph().as_default():
            # Batch size = 2, tf.Example input format.
            features = {
                "query_length":
                tf.convert_to_tensor(
                    value=[[1], [1]]),  # Repeated context feature.
                "utility":
                tf.convert_to_tensor(value=[[1.0], [0.0]]),
                "unigrams":
                tf.SparseTensor(indices=[[0, 0], [1, 0]],
                                values=["ranking", "regression"],
                                dense_shape=[2, 1])
            }
            context_feature_columns = {
                "query_length":
                tf.feature_column.numeric_column("query_length",
                                                 shape=(1, ),
                                                 default_value=0,
                                                 dtype=tf.int64)
            }
            example_feature_columns = {
                "utility":
                tf.feature_column.numeric_column("utility",
                                                 shape=(1, ),
                                                 default_value=0.0,
                                                 dtype=tf.float32),
                "unigrams":
                tf.feature_column.embedding_column(
                    feature_column.categorical_column_with_vocabulary_list(
                        "unigrams",
                        vocabulary_list=[
                            "ranking", "regression", "classification",
                            "ordinal"
                        ]),
                    dimension=10)
            }

            (context_features,
             example_features) = feature_lib.encode_pointwise_features(
                 features,
                 context_feature_columns=context_feature_columns,
                 example_feature_columns=example_feature_columns)
            self.assertAllEqual(["query_length"], sorted(context_features))
            self.assertAllEqual(["unigrams", "utility"],
                                sorted(example_features))
            # Unigrams dense tensor has shape: [batch_size=2, list_size=1, dim=10].
            self.assertAllEqual(
                [2, 1, 10], example_features["unigrams"].get_shape().as_list())
            with tf.compat.v1.Session() as sess:
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.tables_initializer())
                context_features, example_features = sess.run(
                    [context_features, example_features])
                self.assertAllEqual([[1], [1]],
                                    context_features["query_length"])
                # Utility tensor has shape: [batch_size=2, list_size=1, 1].
                self.assertAllEqual([[[1.0]], [[0.0]]],
                                    example_features["utility"])

예제 #9

0

파일 보기

파일: sequence_feature_column_v2.py 프로젝트: AndreasGocht/tensorflow

def sequence_categorical_column_with_vocabulary_list(
    key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
  """A sequence of categorical terms where ids use an in-memory list.

  Pass this to `embedding_column` or `indicator_column` to convert sequence
  categorical data into dense representation for input to sequence NN, such as
  RNN.

  Example:

  ```python
  colors = sequence_categorical_column_with_vocabulary_list(
      key='colors', vocabulary_list=('R', 'G', 'B', 'Y'),
      num_oov_buckets=2)
  colors_embedding = embedding_column(colors, dimension=3)
  columns = [colors_embedding]

  features = tf.parse_example(..., features=make_parse_example_spec(columns))
  sequence_feature_layer = SequenceFeatures(columns)
  sequence_input, sequence_length = sequence_feature_layer(features)
  sequence_length_mask = tf.sequence_mask(sequence_length)

  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
  rnn_layer = tf.keras.layers.RNN(rnn_cell)
  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
  ```

  Args:
    key: A unique string identifying the input feature.
    vocabulary_list: An ordered iterable defining the vocabulary. Each feature
      is mapped to the index of its value (if present) in `vocabulary_list`.
      Must be castable to `dtype`.
    dtype: The type of features. Only string and integer types are supported.
      If `None`, it will be inferred from `vocabulary_list`.
    default_value: The integer ID value to return for out-of-vocabulary feature
      values, defaults to `-1`. This can not be specified with a positive
      `num_oov_buckets`.
    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
      `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a
      hash of the input value. A positive `num_oov_buckets` can not be specified
      with `default_value`.

  Returns:
    A `SequenceCategoricalColumn`.

  Raises:
    ValueError: if `vocabulary_list` is empty, or contains duplicate keys.
    ValueError: `num_oov_buckets` is a negative integer.
    ValueError: `num_oov_buckets` and `default_value` are both specified.
    ValueError: if `dtype` is not integer or string.
  """
  return fc.SequenceCategoricalColumn(
      fc.categorical_column_with_vocabulary_list(
          key=key,
          vocabulary_list=vocabulary_list,
          dtype=dtype,
          default_value=default_value,
          num_oov_buckets=num_oov_buckets))

예제 #10

0

파일 보기

    def test_save_load_with_dense_features(self, tmpdir, api, loss, optimizer,
                                           metrics):
        if optimizer is None:
            pytest.skip()
        cols = [
            feature_column_lib.numeric_column("a"),
            feature_column_lib.indicator_column(
                feature_column_lib.categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a": keras.layers.Input(shape=(1, ), name="a"),
            "b": keras.layers.Input(shape=(1, ), name="b", dtype="string"),
        }

        fc_layer = dense_features.DenseFeatures(cols)(input_layers)
        output = keras.layers.Dense(10)(fc_layer)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=loss,
            optimizer=optimizer,
            metrics=[metrics],
        )

        tiledb_uri = os.path.join(tmpdir, "model_array")
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        model_opt_weights = batch_get_value(getattr(model.optimizer,
                                                    "weights"))
        loaded_opt_weights = batch_get_value(
            getattr(loaded_model.optimizer, "weights"))

        # Assert optimizer weights are equal
        for weight_model, weight_loaded_model in zip(model_opt_weights,
                                                     loaded_opt_weights):
            np.testing.assert_array_equal(weight_model, weight_loaded_model)

        inputs_a = np.arange(10).reshape(10, 1)
        inputs_b = np.arange(10).reshape(10, 1).astype("str")

        # Assert model predictions are equal
        np.testing.assert_array_equal(
            loaded_model.predict({
                "a": inputs_a,
                "b": inputs_b
            }),
            model.predict({
                "a": inputs_a,
                "b": inputs_b
            }),
        )

예제 #11

0

파일 보기

    def test_forward_in_exported_sparse(self):
        features_columns = [
            fc.indicator_column(
                fc.categorical_column_with_vocabulary_list('x', range(10)))
        ]

        classifier = linear.LinearClassifier(feature_columns=features_columns)

        def train_input_fn():
            dataset = dataset_ops.Dataset.from_tensors({
                'x':
                sparse_tensor.SparseTensor(values=[1, 2, 3],
                                           indices=[[0, 0], [1, 0], [1, 1]],
                                           dense_shape=[2, 2]),
                'labels': [[0], [1]]
            })

            def _split(x):
                labels = x.pop('labels')
                return x, labels

            dataset = dataset.map(_split)
            return dataset

        classifier.train(train_input_fn, max_steps=1)

        classifier = extenders.forward_features(classifier,
                                                keys=['x'],
                                                sparse_default_values={'x': 0})

        def serving_input_fn():
            features_ph = array_ops.placeholder(dtype=dtypes.int32,
                                                name='x',
                                                shape=[None])
            features = {'x': layers.dense_to_sparse(features_ph)}
            return estimator_lib.export.ServingInputReceiver(
                features, {'x': features_ph})

        export_dir, tmpdir = self._export_estimator(classifier,
                                                    serving_input_fn)
        prediction_fn = from_saved_model(export_dir,
                                         signature_def_key='predict')

        features = (0, 2)
        prediction = prediction_fn({'x': features})

        self.assertIn('x', prediction)
        self.assertEqual(features, tuple(prediction['x']))
        gfile.DeleteRecursively(tmpdir)

예제 #12

0

파일 보기

    def test_functional_input_layer_with_numpy_input_fn(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in input_layer
        price = fc.numeric_column('price')
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        # one_hot_body_style has 3 dims in input_layer.
        one_hot_body_style = fc.indicator_column(body_style)
        # embedded_body_style has 5 dims in input_layer.
        embedded_body_style = fc.embedding_column(body_style,
                                                  dimension=5,
                                                  initializer=_initializer)

        input_fn = numpy_io.numpy_input_fn(x={
            'price':
            np.array([11., 12., 13., 14.]),
            'body-style':
            np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
                                           batch_size=2,
                                           shuffle=False)
        features = input_fn()
        net = fc.input_layer(features,
                             [price, one_hot_body_style, embedded_body_style])
        self.assertEqual(1 + 3 + 5, net.shape[1])
        with self._initialized_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.],
                                 [1., 2., 3., 4., 5., 1., 0., 0., 12]],
                                sess.run(net))

            coord.request_stop()
            coord.join(threads)

예제 #13

0

파일 보기

파일: numpy_io_test.py 프로젝트: ZhangXinNan/tensorflow

  def test_functional_input_layer_with_numpy_input_fn(self):
    embedding_values = (
        (1., 2., 3., 4., 5.),  # id 0
        (6., 7., 8., 9., 10.),  # id 1
        (11., 12., 13., 14., 15.)  # id 2
    )
    def _initializer(shape, dtype, partition_info):
      del shape, dtype, partition_info
      return embedding_values

    # price has 1 dimension in input_layer
    price = fc.numeric_column('price')
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
    # one_hot_body_style has 3 dims in input_layer.
    one_hot_body_style = fc.indicator_column(body_style)
    # embedded_body_style has 5 dims in input_layer.
    embedded_body_style = fc.embedding_column(body_style, dimension=5,
                                              initializer=_initializer)

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([11., 12., 13., 14.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.input_layer(features,
                         [price, one_hot_body_style, embedded_body_style])
    self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      # Each row is formed by concatenating `embedded_body_style`,
      # `one_hot_body_style`, and `price` in order.
      self.assertAllEqual(
          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
          sess.run(net))

      coord.request_stop()
      coord.join(threads)

예제 #14

0

파일 보기

    def test_linear_model_impl_numpy_input_fn(self):
        price = fc.numeric_column('price')
        price_buckets = fc.bucketized_column(price,
                                             boundaries=[
                                                 0.,
                                                 10.,
                                                 100.,
                                             ])
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])

        input_fn = numpy_io.numpy_input_fn(x={
            'price':
            np.array([-1., 2., 13., 104.]),
            'body-style':
            np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
                                           batch_size=2,
                                           shuffle=False)
        features = input_fn()
        net = self._get_keras_linear_model_predictions(
            features, [price_buckets, body_style])
        # self.assertEqual(1 + 3 + 5, net.shape[1])
        with self._initialized_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

            bias = self._get_linear_model_bias()
            price_buckets_var = self._get_linear_model_column_var(
                price_buckets)
            body_style_var = self._get_linear_model_column_var(body_style)

            sess.run(
                price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
            sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
            sess.run(bias.assign([5.]))

            self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]],
                                sess.run(net))

            coord.request_stop()
            coord.join(threads)

예제 #15

0

파일 보기

파일: feature_columns_integration_test.py 프로젝트: eth-easl/ml-input-data-service

    def test_sequential_model_with_crossed_column(self):
        feature_columns = []
        age_buckets = fc.bucketized_column(
            fc.numeric_column('age'),
            boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
        feature_columns.append(age_buckets)

        # indicator cols
        thal = fc.categorical_column_with_vocabulary_list(
            'thal', ['fixed', 'normal', 'reversible'])

        crossed_feature = fc.crossed_column([age_buckets, thal],
                                            hash_bucket_size=1000)
        crossed_feature = fc.indicator_column(crossed_feature)
        feature_columns.append(crossed_feature)

        feature_layer = df.DenseFeatures(feature_columns)

        model = keras.models.Sequential([
            feature_layer,
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])

        age_data = np.random.randint(10, 100, size=100)
        thal_data = np.random.choice(['fixed', 'normal', 'reversible'],
                                     size=100)
        inp_x = {'age': age_data, 'thal': thal_data}
        inp_y = np.random.randint(0, 1, size=100)
        ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5)
        model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy'],
        )
        model.fit(ds, epochs=1)
        model.fit(ds, epochs=1)
        model.evaluate(ds)
        model.predict(ds)

예제 #16

0

파일 보기

파일: sequence_feature_column_v2.py 프로젝트: meharm1990/tensorflowsample

def sequence_categorical_column_with_vocabulary_list(key,
                                                     vocabulary_list,
                                                     dtype=None,
                                                     default_value=-1,
                                                     num_oov_buckets=0):
    """A sequence of categorical terms where ids use an in-memory list.

  Pass this to `embedding_column` or `indicator_column` to convert sequence
  categorical data into dense representation for input to sequence NN, such as
  RNN.

  Example:

  ```python
  colors = sequence_categorical_column_with_vocabulary_list(
      key='colors', vocabulary_list=('R', 'G', 'B', 'Y'),
      num_oov_buckets=2)
  colors_embedding = embedding_column(colors, dimension=3)
  columns = [colors_embedding]

  features = tf.parse_example(..., features=make_parse_example_spec(columns))
  sequence_feature_layer = SequenceFeatures(columns)
  sequence_input, sequence_length = sequence_feature_layer(features)
  sequence_length_mask = tf.sequence_mask(sequence_length)

  rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
  rnn_layer = tf.keras.layers.RNN(rnn_cell)
  outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
  ```

  Args:
    key: A unique string identifying the input feature.
    vocabulary_list: An ordered iterable defining the vocabulary. Each feature
      is mapped to the index of its value (if present) in `vocabulary_list`.
      Must be castable to `dtype`.
    dtype: The type of features. Only string and integer types are supported.
      If `None`, it will be inferred from `vocabulary_list`.
    default_value: The integer ID value to return for out-of-vocabulary feature
      values, defaults to `-1`. This can not be specified with a positive
      `num_oov_buckets`.
    num_oov_buckets: Non-negative integer, the number of out-of-vocabulary
      buckets. All out-of-vocabulary inputs will be assigned IDs in the range
      `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a
      hash of the input value. A positive `num_oov_buckets` can not be specified
      with `default_value`.

  Returns:
    A `SequenceCategoricalColumn`.

  Raises:
    ValueError: if `vocabulary_list` is empty, or contains duplicate keys.
    ValueError: `num_oov_buckets` is a negative integer.
    ValueError: `num_oov_buckets` and `default_value` are both specified.
    ValueError: if `dtype` is not integer or string.
  """
    return fc.SequenceCategoricalColumn(
        fc.categorical_column_with_vocabulary_list(
            key=key,
            vocabulary_list=vocabulary_list,
            dtype=dtype,
            default_value=default_value,
            num_oov_buckets=num_oov_buckets))