Example #1
0
  def test_encode_listwise_features(self):
    with tf.Graph().as_default():
      # Batch size = 2, list_size = 2.
      features = {
          "query_length":
              tf.convert_to_tensor(value=[[1], [2]]),
          "utility":
              tf.convert_to_tensor(value=[[[1.0], [0.0]], [[0.0], [1.0]]]),
          "unigrams":
              tf.SparseTensor(
                  indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]],
                  values=["ranking", "regression", "classification", "ordinal"],
                  dense_shape=[2, 2, 1])
      }
      context_feature_columns = {
          "query_length":
              feature_column.numeric_column(
                  "query_length", shape=(1,), default_value=0, dtype=tf.int64)
      }
      example_feature_columns = {
          "utility":
              feature_column.numeric_column(
                  "utility", shape=(1,), default_value=0.0, dtype=tf.float32),
          "unigrams":
              feature_column.embedding_column(
                  feature_column.categorical_column_with_vocabulary_list(
                      "unigrams",
                      vocabulary_list=[
                          "ranking", "regression", "classification", "ordinal"
                      ]),
                  dimension=10)
      }

      with self.assertRaisesRegexp(
          ValueError,
          r"2nd dimension of tensor must be equal to input size: 3, but found .*"
      ):
        feature_lib.encode_listwise_features(
            features,
            input_size=3,
            context_feature_columns=context_feature_columns,
            example_feature_columns=example_feature_columns)

      context_features, example_features = feature_lib.encode_listwise_features(
          features,
          input_size=2,
          context_feature_columns=context_feature_columns,
          example_feature_columns=example_feature_columns)
      self.assertAllEqual(["query_length"], sorted(context_features))
      self.assertAllEqual(["unigrams", "utility"], sorted(example_features))
      self.assertAllEqual([2, 2, 10],
                          example_features["unigrams"].get_shape().as_list())
      with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())
        sess.run(tf.compat.v1.tables_initializer())
        context_features, example_features = sess.run(
            [context_features, example_features])
        self.assertAllEqual([[1], [2]], context_features["query_length"])
        self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]],
                            example_features["utility"])
Example #2
0
    def test_dnn_classifier(self):
        embedding = feature_column_lib.embedding_column(
            feature_column_lib.categorical_column_with_vocabulary_list(
                'wire_cast', ['kima', 'omar', 'stringer']), 8)
        dnn = estimator_lib.DNNClassifier(feature_columns=[embedding],
                                          hidden_units=[3, 1])

        def train_input_fn():
            return dataset_ops.Dataset.from_tensors(({
                'wire_cast': [['omar'], ['kima']]
            }, [[0], [1]])).repeat(3)

        def eval_input_fn():
            return dataset_ops.Dataset.from_tensors(({
                'wire_cast': [['stringer'], ['kima']]
            }, [[0], [1]])).repeat(2)

        evaluator = hooks_lib.InMemoryEvaluatorHook(dnn,
                                                    eval_input_fn,
                                                    name='in-memory')
        dnn.train(train_input_fn, hooks=[evaluator])
        self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory')))
        step_keyword_to_value = summary_step_keyword_to_value_mapping(
            dnn.eval_dir('in-memory'))

        final_metrics = dnn.evaluate(eval_input_fn)
        step = final_metrics[ops.GraphKeys.GLOBAL_STEP]
        for summary_tag in final_metrics:
            if summary_tag == ops.GraphKeys.GLOBAL_STEP:
                continue
            self.assertEqual(final_metrics[summary_tag],
                             step_keyword_to_value[step][summary_tag])
Example #3
0
    def test_encode_pointwise_features(self):
        # Batch size = 2, tf.Example input format.
        features = {
            "query_length":
            ops.convert_to_tensor([[1], [1]]),  # Repeated context feature.
            "utility":
            ops.convert_to_tensor([[1.0], [0.0]]),
            "unigrams":
            sparse_tensor_lib.SparseTensor(indices=[[0, 0], [1, 0]],
                                           values=["ranking", "regression"],
                                           dense_shape=[2, 1])
        }
        context_feature_columns = {
            "query_length":
            feature_column.numeric_column("query_length",
                                          shape=(1, ),
                                          default_value=0,
                                          dtype=dtypes.int64)
        }
        example_feature_columns = {
            "utility":
            feature_column.numeric_column("utility",
                                          shape=(1, ),
                                          default_value=0.0,
                                          dtype=dtypes.float32),
            "unigrams":
            feature_column.embedding_column(
                feature_column.categorical_column_with_vocabulary_list(
                    "unigrams",
                    vocabulary_list=[
                        "ranking", "regression", "classification", "ordinal"
                    ]),
                dimension=10)
        }

        (context_features,
         example_features) = feature_lib.encode_pointwise_features(
             features,
             context_feature_columns=context_feature_columns,
             example_feature_columns=example_feature_columns)
        self.assertAllEqual(["query_length"], sorted(context_features))
        self.assertAllEqual(["unigrams", "utility"], sorted(example_features))
        # Unigrams dense tensor has shape: [batch_size=2, list_size=1, dim=10].
        self.assertAllEqual([2, 1, 10],
                            example_features["unigrams"].get_shape().as_list())
        with session.Session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(lookup_ops.tables_initializer())
            context_features, example_features = sess.run(
                [context_features, example_features])
            self.assertAllEqual([[1], [1]], context_features["query_length"])
            # Utility tensor has shape: [batch_size=2, list_size=1, 1].
            self.assertAllEqual([[[1.0]], [[0.0]]],
                                example_features["utility"])
Example #4
0
    def test_functional_input_layer_with_numpy_input_fn(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in input_layer
        price = fc.numeric_column('price')
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        # one_hot_body_style has 3 dims in input_layer.
        one_hot_body_style = fc.indicator_column(body_style)
        # embedded_body_style has 5 dims in input_layer.
        embedded_body_style = fc.embedding_column(body_style,
                                                  dimension=5,
                                                  initializer=_initializer)

        input_fn = numpy_io.numpy_input_fn(x={
            'price':
            np.array([11., 12., 13., 14.]),
            'body-style':
            np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
                                           batch_size=2,
                                           shuffle=False)
        features = input_fn()
        net = fc.input_layer(features,
                             [price, one_hot_body_style, embedded_body_style])
        self.assertEqual(1 + 3 + 5, net.shape[1])
        with self._initialized_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.],
                                 [1., 2., 3., 4., 5., 1., 0., 0., 12]],
                                sess.run(net))

            coord.request_stop()
            coord.join(threads)
Example #5
0
  def test_functional_input_layer_with_numpy_input_fn(self):
    embedding_values = (
        (1., 2., 3., 4., 5.),  # id 0
        (6., 7., 8., 9., 10.),  # id 1
        (11., 12., 13., 14., 15.)  # id 2
    )
    def _initializer(shape, dtype, partition_info):
      del shape, dtype, partition_info
      return embedding_values

    # price has 1 dimension in input_layer
    price = fc.numeric_column('price')
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
    # one_hot_body_style has 3 dims in input_layer.
    one_hot_body_style = fc.indicator_column(body_style)
    # embedded_body_style has 5 dims in input_layer.
    embedded_body_style = fc.embedding_column(body_style, dimension=5,
                                              initializer=_initializer)

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([11., 12., 13., 14.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.input_layer(features,
                         [price, one_hot_body_style, embedded_body_style])
    self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      # Each row is formed by concatenating `embedded_body_style`,
      # `one_hot_body_style`, and `price` in order.
      self.assertAllEqual(
          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
          sess.run(net))

      coord.request_stop()
      coord.join(threads)
Example #6
0
 def test_one_shot_prediction_head_export(self, estimator_factory):
   def _new_temp_dir():
     return os.path.join(test.get_temp_dir(), str(ops.uid()))
   model_dir = _new_temp_dir()
   categorical_column = feature_column.categorical_column_with_hash_bucket(
       key="categorical_exogenous_feature", hash_bucket_size=16)
   exogenous_feature_columns = [
       feature_column.numeric_column(
           "2d_exogenous_feature", shape=(2,)),
       feature_column.embedding_column(
           categorical_column=categorical_column, dimension=10)]
   estimator = estimator_factory(
       model_dir=model_dir,
       exogenous_feature_columns=exogenous_feature_columns,
       head_type=ts_head_lib.OneShotPredictionHead)
   train_features = {
       feature_keys.TrainEvalFeatures.TIMES: numpy.arange(
           20, dtype=numpy.int64),
       feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(
           20, dtype=numpy.float32)[:, None], [1, 5]),
       "2d_exogenous_feature": numpy.ones([20, 2]),
       "categorical_exogenous_feature": numpy.array(
           ["strkey"] * 20)[:, None]
   }
   train_input_fn = input_pipeline.RandomWindowInputFn(
       input_pipeline.NumpyReader(train_features), shuffle_seed=2,
       num_threads=1, batch_size=16, window_size=16)
   estimator.train(input_fn=train_input_fn, steps=5)
   result = estimator.evaluate(input_fn=train_input_fn, steps=1)
   self.assertIn("average_loss", result)
   self.assertNotIn(feature_keys.State.STATE_TUPLE, result)
   input_receiver_fn = estimator.build_raw_serving_input_receiver_fn()
   export_location = estimator.export_saved_model(_new_temp_dir(),
                                                  input_receiver_fn)
   graph = ops.Graph()
   with graph.as_default():
     with session_lib.Session() as session:
       signatures = loader.load(
           session, [tag_constants.SERVING], export_location)
       self.assertEqual([feature_keys.SavedModelLabels.PREDICT],
                        list(signatures.signature_def.keys()))
       predict_signature = signatures.signature_def[
           feature_keys.SavedModelLabels.PREDICT]
       six.assertCountEqual(
           self,
           [feature_keys.FilteringFeatures.TIMES,
            feature_keys.FilteringFeatures.VALUES,
            "2d_exogenous_feature",
            "categorical_exogenous_feature"],
           predict_signature.inputs.keys())
       features = {
           feature_keys.TrainEvalFeatures.TIMES: numpy.tile(
               numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]),
           feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(
               20, dtype=numpy.float32)[None, :, None], [2, 1, 5]),
           "2d_exogenous_feature": numpy.ones([2, 35, 2]),
           "categorical_exogenous_feature": numpy.tile(numpy.array(
               ["strkey"] * 35)[None, :, None], [2, 1, 1])
       }
       feeds = {
           graph.as_graph_element(input_value.name): features[input_key]
           for input_key, input_value in predict_signature.inputs.items()}
       fetches = {output_key: graph.as_graph_element(output_value.name)
                  for output_key, output_value
                  in predict_signature.outputs.items()}
       output = session.run(fetches, feed_dict=feeds)
       self.assertEqual((2, 15, 5), output["mean"].shape)
   # Build a parsing input function, then make a tf.Example for it to parse.
   export_location = estimator.export_saved_model(
       _new_temp_dir(),
       estimator.build_one_shot_parsing_serving_input_receiver_fn(
           filtering_length=20, prediction_length=15))
   graph = ops.Graph()
   with graph.as_default():
     with session_lib.Session() as session:
       example = example_pb2.Example()
       times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES]
       values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES]
       times.int64_list.value.extend(range(35))
       for i in range(20):
         values.float_list.value.extend(
             [float(i) * 2. + feature_number
              for feature_number in range(5)])
       real_feature = example.features.feature["2d_exogenous_feature"]
       categortical_feature = example.features.feature[
           "categorical_exogenous_feature"]
       for i in range(35):
         real_feature.float_list.value.extend([1, 1])
         categortical_feature.bytes_list.value.append(b"strkey")
       # Serialize the tf.Example for feeding to the Session
       examples = [example.SerializeToString()] * 2
       signatures = loader.load(
           session, [tag_constants.SERVING], export_location)
       predict_signature = signatures.signature_def[
           feature_keys.SavedModelLabels.PREDICT]
       ((_, input_value),) = predict_signature.inputs.items()
       feeds = {graph.as_graph_element(input_value.name): examples}
       fetches = {output_key: graph.as_graph_element(output_value.name)
                  for output_key, output_value
                  in predict_signature.outputs.items()}
       output = session.run(fetches, feed_dict=feeds)
       self.assertEqual((2, 15, 5), output["mean"].shape)
Example #7
0
    def test_encode_features_sequence_column(self):
        with tf.Graph().as_default():
            # Inputs.
            vocabulary_size = 4
            # Sequence of ids. -1 values are ignored.
            input_seq_ids = np.array([
                [3, -1, -1],  # example 0
                [0, 1, -1],  # example 1
            ])
            # Sequence of numeric values.
            # input_seq_nums = [
            #  [1.],  # example 0.
            #  [2., 3.],  # example 1
            # ]
            input_seq_nums = tf.sparse.SparseTensor(indices=[[0, 0], [1, 0],
                                                             [1, 1]],
                                                    values=[1., 2., 3.],
                                                    dense_shape=(2, 3))

            input_features = {
                "seq_ids": input_seq_ids,
                "seq_nums": input_seq_nums
            }

            # Embedding variable.
            embedding_dimension = 2
            embedding_values = (
                (1., 2.),  # id 0
                (3., 5.),  # id 1
                (7., 11.),  # id 2
                (9., 13.)  # id 3
            )

            # Expected sequence embeddings for input_seq_ids.
            expected_seq_embed = [
                # example 0:
                [[9., 13.], [0., 0.], [0., 0.]],
                # example 1:
                [[1., 2.], [3., 5.], [0., 0.]],
            ]
            expected_seq_nums = [
                # example 0:
                [[1.], [0.], [0.]],
                # example 1:
                [[2.], [3.], [0.]],
            ]

            # Build columns.
            seq_categorical_column = (
                feature_column.sequence_categorical_column_with_identity(
                    key="seq_ids", num_buckets=vocabulary_size))
            seq_embed_column = feature_column.embedding_column(
                seq_categorical_column,
                dimension=embedding_dimension,
                initializer=lambda shape, dtype, partition_info:
                embedding_values)
            seq_numeric_column = feature_column.sequence_numeric_column(
                "seq_nums")

            cols_to_tensors = feature_lib.encode_features(
                input_features, [seq_embed_column, seq_numeric_column],
                mode=tf.estimator.ModeKeys.EVAL)
            actual_seq_embed = cols_to_tensors[seq_embed_column]
            actual_seq_nums = cols_to_tensors[seq_numeric_column]

            # Assert embedding variable and encoded sequence features.
            global_vars = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
            embedding_var = global_vars[0]
            with tf.compat.v1.Session() as sess:
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.tables_initializer())
                self.assertAllEqual(embedding_values, embedding_var.eval())
                self.assertAllEqual(expected_seq_embed, actual_seq_embed)
                self.assertAllEqual(expected_seq_nums, actual_seq_nums)