def _test_parsed_sequence_example(
      self, col_name, col_fn, col_arg, shape, values):
    """Helper function to check that each FeatureColumn parses correctly.

    Args:
      col_name: string, name to give to the feature column. Should match
        the name that the column will parse out of the features dict.
      col_fn: function used to create the feature column. For example,
        sequence_numeric_column.
      col_arg: second arg that the target feature column is expecting.
      shape: the expected dense_shape of the feature after parsing into
        a SparseTensor.
      values: the expected values at index [0, 2, 6] of the feature
        after parsing into a SparseTensor.
    """
    example = _make_sequence_example()
    columns = [
        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
        fc.numeric_column('float_ctx'),
        col_fn(col_name, col_arg)
    ]
    context, seq_features = parsing_ops.parse_single_sequence_example(
        example.SerializeToString(),
        context_features=fc.make_parse_example_spec(columns[:2]),
        sequence_features=fc.make_parse_example_spec(columns[2:]))

    with self.cached_session() as sess:
      ctx_result, seq_result = sess.run([context, seq_features])
      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
      self.assertEqual(
          list(seq_result[col_name].values[[0, 2, 6]]), values)
      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
 def _parse_example(example):
   ctx, seq = parsing_ops.parse_single_sequence_example(
       example,
       context_features=fc.make_parse_example_spec(ctx_cols),
       sequence_features=fc.make_parse_example_spec(seq_cols))
   ctx.update(seq)
   return ctx
Ejemplo n.º 3
0
    def _test_complete_flow(self, train_input_fn, eval_input_fn,
                            predict_input_fn, input_dimension, label_dimension,
                            prediction_length):
        feature_columns = [
            feature_column_lib.numeric_column('x', shape=(input_dimension, ))
        ]
        est = self._linear_regressor_fn(feature_columns=feature_columns,
                                        label_dimension=label_dimension,
                                        model_dir=self._model_dir)

        # TRAIN
        # learn y = x
        est.train(train_input_fn, steps=200)

        # EVALUTE
        scores = est.evaluate(eval_input_fn)
        self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))

        # PREDICT
        predictions = np.array(
            [x['predictions'] for x in est.predict(predict_input_fn)])
        self.assertAllEqual((prediction_length, label_dimension),
                            predictions.shape)

        # EXPORT
        feature_spec = feature_column_lib.make_parse_example_spec(
            feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                           serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 4
0
    def _test_complete_flow(self, train_input_fn, eval_input_fn,
                            predict_input_fn, input_dimension, label_dimension,
                            batch_size):
        feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        est = linear.LinearEstimator(
            head=head_lib._regression_head(label_dimension=label_dimension),
            feature_columns=feature_columns,
            model_dir=self._model_dir)

        # Train
        num_steps = 10
        est.train(train_input_fn, steps=num_steps)

        # Evaluate
        scores = est.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        # Predict
        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in est.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        # Export
        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                           serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 5
0
  def _test_complete_flow(
      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
      label_dimension, batch_size):
    feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))]
    est = linear.LinearEstimator(
        head=head_lib.regression_head(label_dimension=label_dimension),
        feature_columns=feature_columns,
        model_dir=self._model_dir)

    # TRAIN
    num_steps = 10
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 6
0
  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
                          input_dimension, label_dimension, batch_size):
    feature_columns = [feature_column.numeric_column('x', shape=(input_dimension,))]

    est = dnn.DNNRegressorV2(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir)

    # TRAIN
    num_steps = 10
    est.train(train_input_fn, steps=num_steps)

    # EVALUATE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_saved_model(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 7
0
  def _test_complete_flow(
      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
      n_classes, batch_size):
    feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))]
    est = dnn.DNNClassifier(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        n_classes=n_classes,
        model_dir=self._model_dir)

    # TRAIN
    num_steps = 10
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predicted_proba = np.array([
        x[prediction_keys.PredictionKeys.PROBABILITIES]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 8
0
  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
                          input_dimension, n_classes, batch_size):
    feature_columns = [
        feature_column._numeric_column('x', shape=(input_dimension,))
    ]
    est = dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        n_classes=n_classes,
        model_dir=self._model_dir)

    # TRAIN
    num_steps = 10
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predicted_proba = np.array([
        x[prediction_keys.PredictionKeys.PROBABILITIES]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 9
0
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = (
       array_ops.placeholder(
           name=feature_keys.TrainEvalFeatures.TIMES,
           dtype=dtypes.int64,
           shape=[default_batch_size, default_series_length]))
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size
                   if default_batch_size else 0, default_series_length
                   if default_series_length else 0, self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape, value_dtype) in (
         exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length]
                        + batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   # Models may not know the shape of their state without creating some
   # variables/ops. Avoid polluting the default graph by making a new one. We
   # use only static metadata from the returned Tensors.
   with ops.Graph().as_default():
     self._model.initialize_graph()
     model_start_state = self._model.get_start_state()
   for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary(
       model_start_state).items():
     state_shape_with_batch = tensor_shape.TensorShape(
         (default_batch_size,)).concatenate(state_tensor.get_shape())
     placeholders[prefixed_state_name] = array_ops.placeholder(
         name=prefixed_state_name,
         shape=state_shape_with_batch,
         dtype=state_tensor.dtype)
   return export_lib.ServingInputReceiver(placeholders, placeholders)
Ejemplo n.º 10
0
  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
                          input_dimension, label_dimension, prediction_length):
    feature_columns = [
        feature_column_lib.numeric_column('x', shape=(input_dimension,))
    ]
    est = _baseline_estimator_fn(
        label_dimension=label_dimension,
        model_dir=self._model_dir)

    # TRAIN
    # learn y = x
    est.train(train_input_fn, steps=200)

    # EVALUTE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))

    # PREDICT
    predictions = np.array(
        [x['predictions'] for x in est.predict(predict_input_fn)])
    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)

    # EXPORT
    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 11
0
    def test_complete_flow_with_mode(self, distribution):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // len(distribution.worker_devices),
            shuffle=True)
        eval_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                y=data,
                                                batch_size=batch_size,
                                                shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            # TODO(isaprykin): Work around the colocate_with error.
            dnn_optimizer=adagrad.AdagradOptimizer(0.001),
            linear_optimizer=adagrad.AdagradOptimizer(0.001),
            config=run_config.RunConfig(train_distribute=distribution))

        num_steps = 10
        estimator.train(train_input_fn, steps=num_steps)

        scores = estimator.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
  def test_complete_flow_with_mode(self, distribution):
    label_dimension = 2
    input_dimension = label_dimension
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    train_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices),
        shuffle=True)
    eval_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices),
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns
    estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir,
        # TODO(isaprykin): Work around the colocate_with error.
        dnn_optimizer=adagrad.AdagradOptimizer(0.001),
        linear_optimizer=adagrad.AdagradOptimizer(0.001),
        config=run_config.RunConfig(
            train_distribute=distribution, eval_distribute=distribution))

    num_steps = 10
    estimator.train(train_input_fn, steps=num_steps)

    scores = estimator.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                             serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 13
0
 def _serving_input_receiver_fn():
     """A receiver function to be passed to export_savedmodel."""
     placeholders = {}
     time_placeholder = array_ops.placeholder(
         name=feature_keys.TrainEvalFeatures.TIMES,
         dtype=dtypes.int64,
         shape=[default_batch_size, default_series_length])
     placeholders[
         feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
     # Values are only necessary when filtering. For prediction the default
     # value will be ignored.
     placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
         array_ops.placeholder_with_default(
             name=feature_keys.TrainEvalFeatures.VALUES,
             input=array_ops.zeros(shape=[
                 default_batch_size if default_batch_size else 0,
                 default_series_length if default_series_length else 0,
                 self._model.num_features
             ],
                                   dtype=self._model.dtype),
             shape=(default_batch_size, default_series_length,
                    self._model.num_features)))
     if self._model.exogenous_feature_columns:
         with ops.Graph().as_default():
             # Default placeholders have only an unknown batch dimension. Make them
             # in a separate graph, then splice in the series length to the shapes
             # and re-create them in the outer graph.
             parsed_features = (feature_column.make_parse_example_spec(
                 self._model.exogenous_feature_columns))
             placeholder_features = parsing_ops.parse_example(
                 serialized=array_ops.placeholder(shape=[None],
                                                  dtype=dtypes.string),
                 features=parsed_features)
             exogenous_feature_shapes = {
                 key: (value.get_shape(), value.dtype)
                 for key, value in placeholder_features.items()
             }
         for feature_key, (batch_only_feature_shape, value_dtype) in (
                 exogenous_feature_shapes.items()):
             batch_only_feature_shape = (
                 batch_only_feature_shape.with_rank_at_least(
                     1).as_list())
             feature_shape = (
                 [default_batch_size, default_series_length] +
                 batch_only_feature_shape[1:])
             placeholders[feature_key] = array_ops.placeholder(
                 dtype=value_dtype,
                 name=feature_key,
                 shape=feature_shape)
     batch_size_tensor = array_ops.shape(time_placeholder)[0]
     placeholders.update(
         self._model_start_state_placeholders(
             batch_size_tensor, static_batch_size=default_batch_size))
     return export_lib.ServingInputReceiver(placeholders, placeholders)
Ejemplo n.º 14
0
  def test_complete_flow(self):
    label_dimension = 2
    batch_size = 10
    feature_columns = [feature_column.numeric_column('x', shape=(2,))]
    est = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir)
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)

    # TRAIN
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    num_steps = 200
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        shuffle=False)
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        batch_size=batch_size,
        shuffle=False)
    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
    # TODO(ptucker): Deterministic test for predicted values?

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 15
0
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   time_placeholder = array_ops.placeholder(
       name=feature_keys.TrainEvalFeatures.TIMES,
       dtype=dtypes.int64,
       shape=[default_batch_size, default_series_length])
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size
                   if default_batch_size else 0, default_series_length
                   if default_series_length else 0, self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape, value_dtype) in (
         exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length]
                        + batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   batch_size_tensor = array_ops.shape(time_placeholder)[0]
   placeholders.update(
       self._model_start_state_placeholders(
           batch_size_tensor, static_batch_size=default_batch_size))
   return export_lib.ServingInputReceiver(placeholders, placeholders)
    def test_complete_flow(self):
        label_dimension = 2
        batch_size = 10
        feature_columns = [feature_column_lib.numeric_column('x', shape=(2, ))]
        est = linear.LinearRegressor(feature_columns=feature_columns,
                                     label_dimension=label_dimension,
                                     model_dir=self._model_dir)
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)

        # TRAIN
        # learn y = x
        train_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                 y=data,
                                                 batch_size=batch_size,
                                                 num_epochs=None,
                                                 shuffle=True)
        est.train(train_input_fn, steps=200)

        # EVALUTE
        eval_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                y=data,
                                                batch_size=batch_size,
                                                num_epochs=1,
                                                shuffle=False)
        scores = est.evaluate(eval_input_fn)
        self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))

        # PREDICT
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   y=None,
                                                   batch_size=batch_size,
                                                   num_epochs=1,
                                                   shuffle=False)
        predictions = list(
            [x['predictions'] for x in est.predict(predict_input_fn)])
        self.assertAllClose(data, predictions, atol=0.01)

        # EXPORT
        feature_spec = feature_column_lib.make_parse_example_spec(
            feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                           serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 17
0
 def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
                         input_dimension, label_dimension, batch_size,
                         fc_impl):
   linear_feature_columns = [
       fc_impl.numeric_column('x', shape=(input_dimension,))
   ]
   dnn_feature_columns = [
       fc_impl.numeric_column('x', shape=(input_dimension,))
   ]
   feature_columns = linear_feature_columns + dnn_feature_columns
   feature_spec = feature_column.make_parse_example_spec(feature_columns)
   self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
                                   feature_spec, train_input_fn, eval_input_fn,
                                   predict_input_fn, input_dimension,
                                   label_dimension, batch_size)
Ejemplo n.º 18
0
 def _serving_input_receiver_fn():
     """A receiver function to be passed to export_savedmodel."""
     times_column = feature_column.numeric_column(
         key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64)
     values_column = feature_column.numeric_column(
         key=feature_keys.TrainEvalFeatures.VALUES,
         dtype=values_input_dtype,
         shape=(self._model.num_features, ))
     parsed_features_no_sequence = (
         feature_column.make_parse_example_spec(
             list(self._model.exogenous_feature_columns) +
             [times_column, values_column]))
     parsed_features = {}
     for key, feature_spec in parsed_features_no_sequence.items():
         if isinstance(feature_spec, parsing_ops.FixedLenFeature):
             if key == feature_keys.TrainEvalFeatures.VALUES:
                 parsed_features[key] = feature_spec._replace(
                     shape=((values_proto_length, ) +
                            feature_spec.shape))
             else:
                 parsed_features[key] = feature_spec._replace(
                     shape=((filtering_length + prediction_length, ) +
                            feature_spec.shape))
         elif feature_spec.dtype == dtypes.string:
             parsed_features[key] = parsing_ops.FixedLenFeature(
                 shape=(filtering_length + prediction_length, ),
                 dtype=dtypes.string)
         else:  # VarLenFeature
             raise ValueError(
                 "VarLenFeatures not supported, got %s for key %s" %
                 (feature_spec, key))
     tfexamples = array_ops.placeholder(shape=[default_batch_size],
                                        dtype=dtypes.string,
                                        name="input")
     features = parsing_ops.parse_example(serialized=tfexamples,
                                          features=parsed_features)
     features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze(
         features[feature_keys.TrainEvalFeatures.TIMES], axis=-1)
     features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast(
         features[feature_keys.TrainEvalFeatures.VALUES],
         dtype=self._model.dtype)[:, :filtering_length]
     features.update(
         self._model_start_state_placeholders(
             batch_size_tensor=array_ops.shape(
                 features[feature_keys.TrainEvalFeatures.TIMES])[0],
             static_batch_size=default_batch_size))
     return export_lib.ServingInputReceiver(features,
                                            {"examples": tfexamples})
 def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn,
                              predict_input_fn, input_dimension,
                              label_dimension, batch_size, fc_impl):
   del fc_impl
   linear_feature_columns = [
       feature_column_v2.numeric_column('x', shape=(input_dimension,))
   ]
   dnn_feature_columns = [
       feature_column.numeric_column('x', shape=(input_dimension,))
   ]
   feature_columns = linear_feature_columns + dnn_feature_columns
   feature_spec = feature_column.make_parse_example_spec(feature_columns)
   self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
                                   feature_spec, train_input_fn, eval_input_fn,
                                   predict_input_fn, input_dimension,
                                   label_dimension, batch_size)
Ejemplo n.º 20
0
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   times_column = feature_column.numeric_column(
       key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64)
   values_column = feature_column.numeric_column(
       key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype,
       shape=(self._model.num_features,))
   parsed_features_no_sequence = (
       feature_column.make_parse_example_spec(
           list(self._model.exogenous_feature_columns)
           + [times_column, values_column]))
   parsed_features = {}
   for key, feature_spec in parsed_features_no_sequence.items():
     if isinstance(feature_spec, parsing_ops.FixedLenFeature):
       if key == feature_keys.TrainEvalFeatures.VALUES:
         parsed_features[key] = feature_spec._replace(
             shape=((values_proto_length,)
                    + feature_spec.shape))
       else:
         parsed_features[key] = feature_spec._replace(
             shape=((filtering_length + prediction_length,)
                    + feature_spec.shape))
     elif feature_spec.dtype == dtypes.string:
       parsed_features[key] = parsing_ops.FixedLenFeature(
           shape=(filtering_length + prediction_length,),
           dtype=dtypes.string)
     else:  # VarLenFeature
       raise ValueError("VarLenFeatures not supported, got %s for key %s"
                        % (feature_spec, key))
   tfexamples = array_ops.placeholder(
       shape=[default_batch_size], dtype=dtypes.string, name="input")
   features = parsing_ops.parse_example(
       serialized=tfexamples,
       features=parsed_features)
   features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze(
       features[feature_keys.TrainEvalFeatures.TIMES], axis=-1)
   features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast(
       features[feature_keys.TrainEvalFeatures.VALUES],
       dtype=self._model.dtype)[:, :filtering_length]
   features.update(
       self._model_start_state_placeholders(
           batch_size_tensor=array_ops.shape(
               features[feature_keys.TrainEvalFeatures.TIMES])[0],
           static_batch_size=default_batch_size))
   return export_lib.ServingInputReceiver(
       features, {"examples": tfexamples})
Ejemplo n.º 21
0
  def _get_exogenous_embedding_shape(self):
    """Computes the shape of the vector returned by _process_exogenous_features.

    Returns:
      The shape as a list. Does not include a batch dimension.
    """
    if not self._exogenous_feature_columns:
      return (0,)
    with ops.Graph().as_default():
      parsed_features = (
          feature_column.make_parse_example_spec(
              self._exogenous_feature_columns))
      placeholder_features = parsing_ops.parse_example(
          serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string),
          features=parsed_features)
      embedded = feature_column.input_layer(
          features=placeholder_features,
          feature_columns=self._exogenous_feature_columns)
      return embedded.get_shape().as_list()[1:]
Ejemplo n.º 22
0
  def test_complete_flow(self):
    label_dimension = 2
    batch_size = 10
    feature_columns = [
        feature_column_lib.numeric_column('x', shape=(2,))
    ]
    est = linear.LinearRegressor(
        feature_columns=feature_columns, label_dimension=label_dimension,
        model_dir=self._model_dir)
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)

    # TRAIN
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=data, batch_size=batch_size, num_epochs=None,
        shuffle=True)
    est.train(train_input_fn, steps=200)

    # EVALUTE
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=data, batch_size=batch_size, num_epochs=1,
        shuffle=False)
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))

    # PREDICT
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=None, batch_size=batch_size, num_epochs=1,
        shuffle=False)
    predictions = list(
        [x['predictions'] for x in est.predict(predict_input_fn)])
    self.assertAllClose(data, predictions, atol=0.01)

    # EXPORT
    feature_spec = feature_column_lib.make_parse_example_spec(
        feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
    def test_complete_flow(self):
        n_classes = 3
        input_dimension = 2
        batch_size = 12

        data = np.linspace(0.,
                           n_classes - 1.,
                           batch_size * input_dimension,
                           dtype=np.float32)
        x_data = data.reshape(batch_size, input_dimension)
        categorical_data = np.random.random_integers(0,
                                                     len(x_data),
                                                     size=len(x_data))
        y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
        train_input_fn = numpy_io.numpy_input_fn(x={
            'x': x_data,
            'categories': categorical_data
        },
                                                 y=y_data,
                                                 batch_size=batch_size,
                                                 num_epochs=None,
                                                 shuffle=True)
        eval_input_fn = numpy_io.numpy_input_fn(x={
            'x': x_data,
            'categories': categorical_data
        },
                                                y=y_data,
                                                batch_size=batch_size,
                                                shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={
            'x':
            x_data,
            'categories':
            categorical_data
        },
                                                   batch_size=batch_size,
                                                   shuffle=False)

        feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, )),
            feature_column.embedding_column(
                feature_column.categorical_column_with_vocabulary_list(
                    'categories',
                    vocabulary_list=np.linspace(0.,
                                                len(x_data),
                                                len(x_data),
                                                dtype=np.int64)), 1)
        ]

        estimator = dnn.DNNClassifier(hidden_units=(2, 2),
                                      feature_columns=feature_columns,
                                      n_classes=n_classes,
                                      model_dir=self._model_dir)

        def optimizer_fn():
            return optimizers.get_optimizer_instance('Adagrad',
                                                     learning_rate=0.05)

        estimator = estimator_lib.Estimator(
            model_fn=replicate_model_fn.replicate_model_fn(
                estimator.model_fn,
                optimizer_fn,
                devices=['/gpu:0', '/gpu:1', '/gpu:2']),
            model_dir=estimator.model_dir,
            config=estimator.config,
            params=estimator.params)

        num_steps = 10
        estimator.train(train_input_fn, steps=num_steps)

        scores = estimator.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        predicted_proba = np.array([
            x[prediction_keys.PredictionKeys.PROBABILITIES]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
  def input_layer_with_layer_annotations(features,
                                         feature_columns,
                                         weight_collections=None,
                                         trainable=True,
                                         cols_to_vars=None,
                                         cols_to_output_tensors=None):
    """Returns a dense `Tensor` as input layer based on given `feature_columns`.

    Generally a single example in training data is described with
    FeatureColumns.
    At the first layer of the model, this column oriented data should be
    converted
    to a single `Tensor`.

    This is like tf.feature_column.input_layer, except with added
    Integrated-Gradient annotations.

    Args:
      features: A mapping from key to tensors. `_FeatureColumn`s look up via
        these keys. For example `numeric_column('price')` will look at 'price'
        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
        on corresponding `_FeatureColumn`.
      feature_columns: An iterable containing the FeatureColumns to use as
        inputs to your model. All items should be instances of classes derived
        from `_DenseColumn` such as `numeric_column`, `embedding_column`,
        `bucketized_column`, `indicator_column`. If you have categorical
        features, you can wrap them with an `embedding_column` or
        `indicator_column`.
      weight_collections: A list of collection names to which the Variable will
        be added. Note that variables will also be added to collections
        `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
      trainable: If `True` also add the variable to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      cols_to_vars: If not `None`, must be a dictionary that will be filled with
        a mapping from `_FeatureColumn` to list of `Variable`s.  For example,
        after the call, we might have cols_to_vars = {_EmbeddingColumn(
        categorical_column=_HashedCategoricalColumn( key='sparse_feature',
        hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable
        'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
          shape=(5, 10)]} If a column creates no variables, its value will be an
          empty list.
      cols_to_output_tensors: If not `None`, must be a dictionary that will be
        filled with a mapping from '_FeatureColumn' to the associated output
        `Tensor`s.

    Returns:
      A `Tensor` which represents input layer of a model. Its shape
      is (batch_size, first_layer_dimension) and its dtype is `float32`.
      first_layer_dimension is determined based on given `feature_columns`.

    Raises:
      ValueError: features and feature_columns have different lengths.
    """

    local_cols_to_output_tensors = {}
    input_layer = original_input_layer(
        features=features,
        feature_columns=feature_columns,
        weight_collections=weight_collections,
        trainable=trainable,
        cols_to_vars=cols_to_vars,
        cols_to_output_tensors=local_cols_to_output_tensors)

    if cols_to_output_tensors is not None:
      cols_to_output_tensors = local_cols_to_output_tensors

    if mode and mode == model_fn.ModeKeys.PREDICT:
      # Only annotate in PREDICT mode.

      # Annotate features.
      # These are the parsed Tensors, before embedding.

      # Only annotate features used by FeatureColumns.
      # We figure which ones are used by FeatureColumns by creating a parsing
      # spec and looking at the keys.
      spec = feature_column_lib.make_parse_example_spec(feature_columns)
      for key in spec.keys():
        tensor = features[key]
        ops.add_to_collection(
            LayerAnnotationsCollectionNames.keys(
                LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
        ops.add_to_collection(
            LayerAnnotationsCollectionNames.values(
                LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
            _to_any_wrapped_tensor_info(tensor))

      # Annotate feature columns.
      for column in feature_columns:
        # TODO(cyfoo): Find a better way to serialize and deserialize
        # _FeatureColumn.
        ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
                              serialize_feature_column(column))

      for column, tensor in local_cols_to_output_tensors.items():
        ops.add_to_collection(
            LayerAnnotationsCollectionNames.keys(
                LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
            column.name)
        ops.add_to_collection(
            LayerAnnotationsCollectionNames.values(
                LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
            _to_any_wrapped_tensor_info(tensor))

    return input_layer
  def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate):
    label_dimension = 2
    input_dimension = label_dimension
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    train_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices))
    eval_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices))
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns
    session_config = config_pb2.ConfigProto(
        log_device_placement=True, allow_soft_placement=True)
    estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir,
        dnn_optimizer=adam.Adam(0.001),
        linear_optimizer=adam.Adam(0.001),
        config=run_config.RunConfig(
            train_distribute=distribution,
            eval_distribute=distribution,
            session_config=session_config))

    num_steps = 2
    if use_train_and_evaluate:
      scores, _ = training.train_and_evaluate(
          estimator, training.TrainSpec(train_input_fn, max_steps=num_steps),
          training.EvalSpec(eval_input_fn))
    else:
      estimator.train(train_input_fn, steps=num_steps)
      scores = estimator.evaluate(eval_input_fn)

    self.assertIn('loss', six.iterkeys(scores))

    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                             serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
  def _complete_flow_with_mode(self, mode):
    n_classes = 3
    input_dimension = 2
    batch_size = 12

    data = np.linspace(
        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
    x_data = data.reshape(batch_size, input_dimension)
    categorical_data = np.random.random_integers(
        0, len(x_data), size=len(x_data))
    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data,
           'categories': categorical_data},
        y=y_data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data,
           'categories': categorical_data},
        y=y_data,
        batch_size=batch_size,
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data,
           'categories': categorical_data},
        batch_size=batch_size,
        shuffle=False)

    feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,)),
        feature_column.embedding_column(
            feature_column.categorical_column_with_vocabulary_list(
                'categories',
                vocabulary_list=np.linspace(
                    0., len(x_data), len(x_data), dtype=np.int64)), 1)
    ]

    estimator = dnn.DNNClassifier(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        n_classes=n_classes,
        model_dir=self._model_dir)

    def optimizer_fn():
      return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)

    if not mode:  # Use the public `replicate_model_fn`.
      model_fn = replicate_model_fn.replicate_model_fn(
          estimator.model_fn,
          optimizer_fn,
          devices=['/gpu:0', '/gpu:1', '/gpu:2'])
    else:
      model_fn = replicate_model_fn._replicate_model_fn_with_mode(
          estimator.model_fn,
          optimizer_fn,
          devices=['/gpu:0', '/gpu:1', '/gpu:2'],
          mode=mode)

    estimator = estimator_lib.Estimator(
        model_fn=model_fn,
        model_dir=estimator.model_dir,
        config=estimator.config,
        params=estimator.params)

    num_steps = 10
    estimator.train(train_input_fn, steps=num_steps)

    scores = estimator.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    predicted_proba = np.array([
        x[prediction_keys.PredictionKeys.PROBABILITIES]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                             serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
 def _get_exporter(self, name, fc):
   feature_spec = feature_column.make_parse_example_spec(fc)
   serving_input_receiver_fn = (
       export_lib.build_parsing_serving_input_receiver_fn(feature_spec))
   return exporter_lib.LatestExporter(
       name, serving_input_receiver_fn=serving_input_receiver_fn)
    def input_layer_with_layer_annotations(features,
                                           feature_columns,
                                           weight_collections=None,
                                           trainable=True,
                                           cols_to_vars=None,
                                           scope=None,
                                           cols_to_output_tensors=None,
                                           from_template=False):
        """Returns a dense `Tensor` as input layer based on given `feature_columns`.

    Generally a single example in training data is described with
    FeatureColumns.
    At the first layer of the model, this column oriented data should be
    converted
    to a single `Tensor`.

    This is like tf.feature_column.input_layer, except with added
    Integrated-Gradient annotations.

    Args:
      features: A mapping from key to tensors. `_FeatureColumn`s look up via
        these keys. For example `numeric_column('price')` will look at 'price'
        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
        on corresponding `_FeatureColumn`.
      feature_columns: An iterable containing the FeatureColumns to use as
        inputs to your model. All items should be instances of classes derived
        from `_DenseColumn` such as `numeric_column`, `embedding_column`,
        `bucketized_column`, `indicator_column`. If you have categorical
        features, you can wrap them with an `embedding_column` or
        `indicator_column`.
      weight_collections: A list of collection names to which the Variable will
        be added. Note that variables will also be added to collections
        `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
      trainable: If `True` also add the variable to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      cols_to_vars: If not `None`, must be a dictionary that will be filled with
        a mapping from `_FeatureColumn` to list of `Variable`s.  For example,
        after the call, we might have cols_to_vars = {_EmbeddingColumn(
        categorical_column=_HashedCategoricalColumn( key='sparse_feature',
        hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable
        'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
          shape=(5, 10)]} If a column creates no variables, its value will be an
          empty list.
      scope: A name or variable scope to use
      cols_to_output_tensors: If not `None`, must be a dictionary that will be
        filled with a mapping from '_FeatureColumn' to the associated output
        `Tensor`s.
      from_template: True if the method is being instantiated from a
        `make_template`.

    Returns:
      A `Tensor` which represents input layer of a model. Its shape
      is (batch_size, first_layer_dimension) and its dtype is `float32`.
      first_layer_dimension is determined based on given `feature_columns`.

    Raises:
      ValueError: features and feature_columns have different lengths.
    """

        local_cols_to_output_tensors = {}
        input_layer = original_input_layer(
            features=features,
            feature_columns=feature_columns,
            weight_collections=weight_collections,
            trainable=trainable,
            cols_to_vars=cols_to_vars,
            scope=scope,
            cols_to_output_tensors=local_cols_to_output_tensors,
            from_template=from_template)

        if cols_to_output_tensors is not None:
            cols_to_output_tensors = local_cols_to_output_tensors

        # Annotate features.
        # These are the parsed Tensors, before embedding.

        # Only annotate features used by FeatureColumns.
        # We figure which ones are used by FeatureColumns by creating a parsing
        # spec and looking at the keys.
        spec = feature_column_lib.make_parse_example_spec(feature_columns)
        for key in spec.keys():
            tensor = ops.convert_to_tensor_or_indexed_slices(features[key])
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.keys(
                    LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.values(
                    LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
                _to_any_wrapped_tensor_info(tensor))

        # Annotate feature columns.
        for column in feature_columns:
            # TODO(cyfoo): Find a better way to serialize and deserialize
            # _FeatureColumn.
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
                serialize_feature_column(column))

        for column, tensor in local_cols_to_output_tensors.items():
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.keys(
                    LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
                column.name)
            ops.add_to_collection(
                LayerAnnotationsCollectionNames.values(
                    LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
                _to_any_wrapped_tensor_info(tensor))

        return input_layer
Ejemplo n.º 29
0
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   time_placeholder = array_ops.placeholder(
       name=feature_keys.TrainEvalFeatures.TIMES,
       dtype=dtypes.int64,
       shape=[default_batch_size, default_series_length])
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size
                   if default_batch_size else 0, default_series_length
                   if default_series_length else 0, self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape, value_dtype) in (
         exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length]
                        + batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   # Models may not know the shape of their state without creating some
   # variables/ops. Avoid polluting the default graph by making a new one. We
   # use only static metadata from the returned Tensors.
   with ops.Graph().as_default():
     self._model.initialize_graph()
     # Evaluate the initial state as same-dtype "zero" values. These zero
     # constants aren't used, but are necessary for feeding to
     # placeholder_with_default for the "cold start" case where state is not
     # fed to the model.
     def _zeros_like_constant(tensor):
       return tensor_util.constant_value(array_ops.zeros_like(tensor))
     start_state = nest.map_structure(
         _zeros_like_constant, self._model.get_start_state())
   batch_size_tensor = array_ops.shape(time_placeholder)[0]
   for prefixed_state_name, state in ts_head_lib.state_to_dictionary(
       start_state).items():
     state_shape_with_batch = tensor_shape.TensorShape(
         (default_batch_size,)).concatenate(state.shape)
     default_state_broadcast = array_ops.tile(
         state[None, ...],
         multiples=array_ops.concat(
             [batch_size_tensor[None],
              array_ops.ones(len(state.shape), dtype=dtypes.int32)],
             axis=0))
     placeholders[prefixed_state_name] = array_ops.placeholder_with_default(
         input=default_state_broadcast,
         name=prefixed_state_name,
         shape=state_shape_with_batch)
   return export_lib.ServingInputReceiver(placeholders, placeholders)
Ejemplo n.º 30
0
def regressor_parse_example_spec(feature_columns,
                                 label_key,
                                 label_dtype=dtypes.float32,
                                 label_default=None,
                                 label_dimension=1,
                                 weight_column=None):
  """Generates parsing spec for tf.parse_example to be used with regressors.

  If users keep data in tf.Example format, they need to call tf.parse_example
  with a proper feature spec. There are two main things that this utility helps:

  * Users need to combine parsing spec of features with labels and weights
    (if any) since they are all parsed from same tf.Example instance. This
    utility combines these specs.
  * It is difficult to map expected label by a regressor such as `DNNRegressor`
    to corresponding tf.parse_example spec. This utility encodes it by getting
    related information from users (key, dtype).

  Example output of parsing spec:

  ```python
  # Define features and transformations
  feature_b = tf.feature_column.numeric_column(...)
  feature_c_bucketized = tf.feature_column.bucketized_column(
    tf.feature_column.numeric_column("feature_c"), ...)
  feature_a_x_feature_c = tf.feature_column.crossed_column(
      columns=["feature_a", feature_c_bucketized], ...)

  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
  parsing_spec = tf.estimator.regressor_parse_example_spec(
      feature_columns, label_key='my-label')

  # For the above example, regressor_parse_example_spec would return the dict:
  assert parsing_spec == {
    "feature_a": parsing_ops.VarLenFeature(tf.string),
    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32)
  }
  ```

  Example usage with a regressor:

  ```python
  feature_columns = # define features via tf.feature_column
  estimator = DNNRegressor(
      hidden_units=[256, 64, 16],
      feature_columns=feature_columns,
      weight_column='example-weight',
      label_dimension=3)
  # This label configuration tells the regressor the following:
  # * weights are retrieved with key 'example-weight'
  # * label is a 3 dimension tensor with float32 dtype.


  # Input builders
  def input_fn_train():  # Returns a tuple of features and labels.
    features = tf.contrib.learn.read_keyed_batch_features(
        file_pattern=train_files,
        batch_size=batch_size,
        # creates parsing configuration for tf.parse_example
        features=tf.estimator.classifier_parse_example_spec(
            feature_columns,
            label_key='my-label',
            label_dimension=3,
            weight_column='example-weight'),
        reader=tf.RecordIOReader)
     labels = features.pop('my-label')
     return features, labels

  estimator.train(input_fn=input_fn_train)
  ```

  Args:
    feature_columns: An iterable containing all feature columns. All items
      should be instances of classes derived from `_FeatureColumn`.
    label_key: A string identifying the label. It means tf.Example stores labels
      with this key.
    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
      `tf.float32`.
    label_default: used as label if label_key does not exist in given
      tf.Example. By default default_value is none, which means
      `tf.parse_example` will error out if there is any missing label.
    label_dimension: Number of regression targets per example. This is the
      size of the last dimension of the labels and logits `Tensor` objects
      (typically, these have shape `[batch_size, label_dimension]`).
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
      then weight_column.normalizer_fn is applied on it to get weight tensor.

  Returns:
    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
    value.

  Raises:
    ValueError: If label is used in `feature_columns`.
    ValueError: If weight_column is used in `feature_columns`.
    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
      instance.
    ValueError: If `weight_column` is not a `_NumericColumn` instance.
    ValueError: if label_key is None.
  """
  parsing_spec = fc.make_parse_example_spec(feature_columns)
  if label_key in parsing_spec:
    raise ValueError('label should not be used as feature. '
                     'label_key: {}, features: {}'.format(
                         label_key, parsing_spec.keys()))
  parsing_spec[label_key] = parsing_ops.FixedLenFeature(
      (label_dimension,), label_dtype, label_default)

  if weight_column is None:
    return parsing_spec

  if isinstance(weight_column, six.string_types):
    weight_column = fc.numeric_column(weight_column)

  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
    raise ValueError('weight_column should be an instance of '
                     'tf.feature_column.numeric_column. '
                     'Given type: {} value: {}'.format(
                         type(weight_column), weight_column))

  if weight_column.key in parsing_spec:
    raise ValueError('weight_column should not be used as feature. '
                     'weight_column: {}, features: {}'.format(
                         weight_column.key, parsing_spec.keys()))

  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
  return parsing_spec
Ejemplo n.º 31
0
  def testCreateFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    str_sparse_id_col = fc.sparse_column_with_keys(
        "str_id_column", ["marlo", "omar", "stringer"])
    int32_sparse_id_col = fc.sparse_column_with_keys(
        "int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
    int64_sparse_id_col = fc.sparse_column_with_keys(
        "int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
    weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
                                                "str_id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column1")
    real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
    bucketized_col1 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
    bucketized_col2 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization2", 4),
        [0, 4])
    a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
    b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
    cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
    one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
        "sparse_column_for_one_hot", hash_bucket_size=100))
    scattered_embedding_col = fc.scattered_embedding_column(
        "scattered_embedding_column", size=100, dimension=10, hash_key=1)
    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
        int64_sparse_id_col, real_valued_col1, real_valued_col2,
        bucketized_col1, bucketized_col2, cross_col, one_hot_col,
        scattered_embedding_col
    ])
    expected_config = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "str_id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "int32_id_column":
            parsing_ops.VarLenFeature(dtypes.int32),
        "int64_id_column":
            parsing_ops.VarLenFeature(dtypes.int64),
        "str_id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column2":
            parsing_ops.FixedLenFeature(
                [5], dtype=dtypes.float32),
        "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature(
                [4], dtype=dtypes.float32),
        "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
        "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_one_hot":
            parsing_ops.VarLenFeature(dtypes.string),
        "scattered_embedding_column":
            parsing_ops.VarLenFeature(dtypes.string),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)

    # Tests that contrib feature columns work with core library:
    config_core = fc_core.make_parse_example_spec(feature_columns)
    self.assertDictEqual(expected_config, config_core)

    # Test that the same config is parsed out if we pass a dictionary.
    feature_columns_dict = {
        str(i): val
        for i, val in enumerate(feature_columns)
    }
    config = fc.create_feature_spec_for_parsing(feature_columns_dict)
    self.assertDictEqual(expected_config, config)
Ejemplo n.º 32
0
    def test_complete_flow_with_mode(self, distribution,
                                     use_train_and_evaluate):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // len(distribution.worker_devices))
        eval_input_fn = self.dataset_input_fn(x={'x': data},
                                              y=data,
                                              batch_size=batch_size //
                                              len(distribution.worker_devices))
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        session_config = config_pb2.ConfigProto(log_device_placement=True,
                                                allow_soft_placement=True)
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            dnn_optimizer=adam.Adam(0.001),
            linear_optimizer=adam.Adam(0.001),
            config=run_config.RunConfig(train_distribute=distribution,
                                        eval_distribute=distribution,
                                        session_config=session_config))

        num_steps = 2
        if use_train_and_evaluate:
            scores, _ = training.train_and_evaluate(
                estimator,
                training.TrainSpec(train_input_fn, max_steps=num_steps),
                training.EvalSpec(eval_input_fn))
        else:
            estimator.train(train_input_fn, steps=num_steps)
            scores = estimator.evaluate(eval_input_fn)

        self.assertIn('loss', six.iterkeys(scores))

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 33
0
def classifier_parse_example_spec(feature_columns,
                                  label_key,
                                  label_dtype=dtypes.int64,
                                  label_default=None,
                                  weight_column=None):
  """Generates parsing spec for tf.parse_example to be used with classifiers.

  If users keep data in tf.Example format, they need to call tf.parse_example
  with a proper feature spec. There are two main things that this utility helps:

  * Users need to combine parsing spec of features with labels and weights
    (if any) since they are all parsed from same tf.Example instance. This
    utility combines these specs.
  * It is difficult to map expected label by a classifier such as
    `DNNClassifier` to corresponding tf.parse_example spec. This utility encodes
    it by getting related information from users (key, dtype).

  Example output of parsing spec:

  ```python
  # Define features and transformations
  feature_b = tf.feature_column.numeric_column(...)
  feature_c_bucketized = tf.feature_column.bucketized_column(
    tf.feature_column.numeric_column("feature_c"), ...)
  feature_a_x_feature_c = tf.feature_column.crossed_column(
      columns=["feature_a", feature_c_bucketized], ...)

  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
  parsing_spec = tf.estimator.classifier_parse_example_spec(
      feature_columns, label_key='my-label', label_dtype=tf.string)

  # For the above example, classifier_parse_example_spec would return the dict:
  assert parsing_spec == {
    "feature_a": parsing_ops.VarLenFeature(tf.string),
    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string)
  }
  ```

  Example usage with a classifier:

  ```python
  feature_columns = # define features via tf.feature_column
  estimator = DNNClassifier(
      n_classes=1000,
      feature_columns=feature_columns,
      weight_column='example-weight',
      label_vocabulary=['photos', 'keep', ...],
      hidden_units=[256, 64, 16])
  # This label configuration tells the classifier the following:
  # * weights are retrieved with key 'example-weight'
  # * label is string and can be one of the following ['photos', 'keep', ...]
  # * integer id for label 'photos' is 0, 'keep' is 1, ...


  # Input builders
  def input_fn_train():  # Returns a tuple of features and labels.
    features = tf.contrib.learn.read_keyed_batch_features(
        file_pattern=train_files,
        batch_size=batch_size,
        # creates parsing configuration for tf.parse_example
        features=tf.estimator.classifier_parse_example_spec(
            feature_columns,
            label_key='my-label',
            label_dtype=tf.string,
            weight_column='example-weight'),
        reader=tf.RecordIOReader)
     labels = features.pop('my-label')
     return features, labels

  estimator.train(input_fn=input_fn_train)
  ```

  Args:
    feature_columns: An iterable containing all feature columns. All items
      should be instances of classes derived from `_FeatureColumn`.
    label_key: A string identifying the label. It means tf.Example stores labels
      with this key.
    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
      `tf.int64`. If user defines a `label_vocabulary`, this should be set as
      `tf.string`. `tf.float32` labels are only supported for binary
      classification.
    label_default: used as label if label_key does not exist in given
      tf.Example. An example usage: let's say `label_key` is 'clicked' and
      tf.Example contains clicked data only for positive examples in following
      format `key:clicked, value:1`. This means that if there is no data with
      key 'clicked' it should count as negative example by setting
      `label_deafault=0`. Type of this value should be compatible with
      `label_dtype`.
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
      then weight_column.normalizer_fn is applied on it to get weight tensor.

  Returns:
    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
    value.

  Raises:
    ValueError: If label is used in `feature_columns`.
    ValueError: If weight_column is used in `feature_columns`.
    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
      instance.
    ValueError: If `weight_column` is not a `_NumericColumn` instance.
    ValueError: if label_key is None.
  """
  parsing_spec = fc.make_parse_example_spec(feature_columns)
  if label_key in parsing_spec:
    raise ValueError('label should not be used as feature. '
                     'label_key: {}, features: {}'.format(
                         label_key, parsing_spec.keys()))
  parsing_spec[label_key] = parsing_ops.FixedLenFeature((1,), label_dtype,
                                                        label_default)

  if weight_column is None:
    return parsing_spec

  if isinstance(weight_column, six.string_types):
    weight_column = fc.numeric_column(weight_column)

  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
    raise ValueError('weight_column should be an instance of '
                     'tf.feature_column.numeric_column. '
                     'Given type: {} value: {}'.format(
                         type(weight_column), weight_column))

  if weight_column.key in parsing_spec:
    raise ValueError('weight_column should not be used as feature. '
                     'weight_column: {}, features: {}'.format(
                         weight_column.key, parsing_spec.keys()))

  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
  return parsing_spec
Ejemplo n.º 34
0
def regressor_parse_example_spec(feature_columns,
                                 label_key,
                                 label_dtype=dtypes.float32,
                                 label_default=None,
                                 label_dimension=1,
                                 weight_column=None):
  """Generates parsing spec for tf.parse_example to be used with regressors.

  If users keep data in tf.Example format, they need to call tf.parse_example
  with a proper feature spec. There are two main things that this utility helps:

  * Users need to combine parsing spec of features with labels and weights
    (if any) since they are all parsed from same tf.Example instance. This
    utility combines these specs.
  * It is difficult to map expected label by a regressor such as `DNNRegressor`
    to corresponding tf.parse_example spec. This utility encodes it by getting
    related information from users (key, dtype).

  Example output of parsing spec:

  ```python
  # Define features and transformations
  feature_b = tf.feature_column.numeric_column(...)
  feature_c_bucketized = tf.feature_column.bucketized_column(
    tf.feature_column.numeric_column("feature_c"), ...)
  feature_a_x_feature_c = tf.feature_column.crossed_column(
      columns=["feature_a", feature_c_bucketized], ...)

  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
  parsing_spec = tf.estimator.regressor_parse_example_spec(
      feature_columns, label_key='my-label')

  # For the above example, regressor_parse_example_spec would return the dict:
  assert parsing_spec == {
    "feature_a": parsing_ops.VarLenFeature(tf.string),
    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32)
  }
  ```

  Example usage with a regressor:

  ```python
  feature_columns = # define features via tf.feature_column
  estimator = DNNRegressor(
      hidden_units=[256, 64, 16],
      feature_columns=feature_columns,
      weight_column='example-weight',
      label_dimension=3)
  # This label configuration tells the regressor the following:
  # * weights are retrieved with key 'example-weight'
  # * label is a 3 dimension tensor with float32 dtype.


  # Input builders
  def input_fn_train():  # Returns a tuple of features and labels.
    features = tf.contrib.learn.read_keyed_batch_features(
        file_pattern=train_files,
        batch_size=batch_size,
        # creates parsing configuration for tf.parse_example
        features=tf.estimator.classifier_parse_example_spec(
            feature_columns,
            label_key='my-label',
            label_dimension=3,
            weight_column='example-weight'),
        reader=tf.RecordIOReader)
     labels = features.pop('my-label')
     return features, labels

  estimator.train(input_fn=input_fn_train)
  ```

  Args:
    feature_columns: An iterable containing all feature columns. All items
      should be instances of classes derived from `_FeatureColumn`.
    label_key: A string identifying the label. It means tf.Example stores labels
      with this key.
    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
      `tf.float32`.
    label_default: used as label if label_key does not exist in given
      tf.Example. By default default_value is none, which means
      `tf.parse_example` will error out if there is any missing label.
    label_dimension: Number of regression targets per example. This is the
      size of the last dimension of the labels and logits `Tensor` objects
      (typically, these have shape `[batch_size, label_dimension]`).
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
      then weight_column.normalizer_fn is applied on it to get weight tensor.

  Returns:
    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
    value.

  Raises:
    ValueError: If label is used in `feature_columns`.
    ValueError: If weight_column is used in `feature_columns`.
    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
      instance.
    ValueError: If `weight_column` is not a `_NumericColumn` instance.
    ValueError: if label_key is None.
  """
  parsing_spec = fc.make_parse_example_spec(feature_columns)
  if label_key in parsing_spec:
    raise ValueError('label should not be used as feature. '
                     'label_key: {}, features: {}'.format(
                         label_key, parsing_spec.keys()))
  parsing_spec[label_key] = parsing_ops.FixedLenFeature(
      (label_dimension,), label_dtype, label_default)

  if weight_column is None:
    return parsing_spec

  if isinstance(weight_column, six.string_types):
    weight_column = fc.numeric_column(weight_column)

  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
    raise ValueError('weight_column should be an instance of '
                     'tf.feature_column.numeric_column. '
                     'Given type: {} value: {}'.format(
                         type(weight_column), weight_column))

  if weight_column.key in parsing_spec:
    raise ValueError('weight_column should not be used as feature. '
                     'weight_column: {}, features: {}'.format(
                         weight_column.key, parsing_spec.keys()))

  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
  return parsing_spec
Ejemplo n.º 35
0
def classifier_parse_example_spec(feature_columns,
                                  label_key,
                                  label_dtype=dtypes.int64,
                                  label_default=None,
                                  weight_column=None):
  """Generates parsing spec for tf.parse_example to be used with classifiers.

  If users keep data in tf.Example format, they need to call tf.parse_example
  with a proper feature spec. There are two main things that this utility helps:

  * Users need to combine parsing spec of features with labels and weights
    (if any) since they are all parsed from same tf.Example instance. This
    utility combines these specs.
  * It is difficult to map expected label by a classifier such as
    `DNNClassifier` to corresponding tf.parse_example spec. This utility encodes
    it by getting related information from users (key, dtype).

  Example output of parsing spec:

  ```python
  # Define features and transformations
  feature_b = tf.feature_column.numeric_column(...)
  feature_c_bucketized = tf.feature_column.bucketized_column(
    tf.feature_column.numeric_column("feature_c"), ...)
  feature_a_x_feature_c = tf.feature_column.crossed_column(
      columns=["feature_a", feature_c_bucketized], ...)

  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
  parsing_spec = tf.estimator.classifier_parse_example_spec(
      feature_columns, label_key='my-label', label_dtype=tf.string)

  # For the above example, classifier_parse_example_spec would return the dict:
  assert parsing_spec == {
    "feature_a": parsing_ops.VarLenFeature(tf.string),
    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string)
  }
  ```

  Example usage with a classifier:

  ```python
  feature_columns = # define features via tf.feature_column
  estimator = DNNClassifier(
      n_classes=1000,
      feature_columns=feature_columns,
      weight_column='example-weight',
      label_vocabulary=['photos', 'keep', ...],
      hidden_units=[256, 64, 16])
  # This label configuration tells the classifier the following:
  # * weights are retrieved with key 'example-weight'
  # * label is string and can be one of the following ['photos', 'keep', ...]
  # * integer id for label 'photos' is 0, 'keep' is 1, ...


  # Input builders
  def input_fn_train():  # Returns a tuple of features and labels.
    features = tf.contrib.learn.read_keyed_batch_features(
        file_pattern=train_files,
        batch_size=batch_size,
        # creates parsing configuration for tf.parse_example
        features=tf.estimator.classifier_parse_example_spec(
            feature_columns,
            label_key='my-label',
            label_dtype=tf.string,
            weight_column='example-weight'),
        reader=tf.RecordIOReader)
     labels = features.pop('my-label')
     return features, labels

  estimator.train(input_fn=input_fn_train)
  ```

  Args:
    feature_columns: An iterable containing all feature columns. All items
      should be instances of classes derived from `_FeatureColumn`.
    label_key: A string identifying the label. It means tf.Example stores labels
      with this key.
    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
      `tf.int64`. If user defines a `label_vocabulary`, this should be set as
      `tf.string`. `tf.float32` labels are only supported for binary
      classification.
    label_default: used as label if label_key does not exist in given
      tf.Example. An example usage: let's say `label_key` is 'clicked' and
      tf.Example contains clicked data only for positive examples in following
      format `key:clicked, value:1`. This means that if there is no data with
      key 'clicked' it should count as negative example by setting
      `label_deafault=0`. Type of this value should be compatible with
      `label_dtype`.
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
      then weight_column.normalizer_fn is applied on it to get weight tensor.

  Returns:
    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
    value.

  Raises:
    ValueError: If label is used in `feature_columns`.
    ValueError: If weight_column is used in `feature_columns`.
    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
      instance.
    ValueError: If `weight_column` is not a `_NumericColumn` instance.
    ValueError: if label_key is None.
  """
  parsing_spec = fc.make_parse_example_spec(feature_columns)
  if label_key in parsing_spec:
    raise ValueError('label should not be used as feature. '
                     'label_key: {}, features: {}'.format(
                         label_key, parsing_spec.keys()))
  parsing_spec[label_key] = parsing_ops.FixedLenFeature((1,), label_dtype,
                                                        label_default)

  if weight_column is None:
    return parsing_spec

  if isinstance(weight_column, six.string_types):
    weight_column = fc.numeric_column(weight_column)

  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
    raise ValueError('weight_column should be an instance of '
                     'tf.feature_column.numeric_column. '
                     'Given type: {} value: {}'.format(
                         type(weight_column), weight_column))

  if weight_column.key in parsing_spec:
    raise ValueError('weight_column should not be used as feature. '
                     'weight_column: {}, features: {}'.format(
                         weight_column.key, parsing_spec.keys()))

  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
  return parsing_spec
Ejemplo n.º 36
0
 def _get_exporter(self, name, fc):
     feature_spec = feature_column.make_parse_example_spec(fc)
     serving_input_receiver_fn = (
         export_lib.build_parsing_serving_input_receiver_fn(feature_spec))
     return exporter_lib.LatestExporter(
         name, serving_input_receiver_fn=serving_input_receiver_fn)
Ejemplo n.º 37
0
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   time_placeholder = array_ops.placeholder(
       name=feature_keys.TrainEvalFeatures.TIMES,
       dtype=dtypes.int64,
       shape=[default_batch_size, default_series_length])
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size
                   if default_batch_size else 0, default_series_length
                   if default_series_length else 0, self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape, value_dtype) in (
         exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length]
                        + batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   # Models may not know the shape of their state without creating some
   # variables/ops. Avoid polluting the default graph by making a new one. We
   # use only static metadata from the returned Tensors.
   with ops.Graph().as_default():
     self._model.initialize_graph()
     # Evaluate the initial state as same-dtype "zero" values. These zero
     # constants aren't used, but are necessary for feeding to
     # placeholder_with_default for the "cold start" case where state is not
     # fed to the model.
     def _zeros_like_constant(tensor):
       return tensor_util.constant_value(array_ops.zeros_like(tensor))
     start_state = nest.map_structure(
         _zeros_like_constant, self._model.get_start_state())
   batch_size_tensor = array_ops.shape(time_placeholder)[0]
   for prefixed_state_name, state in ts_head_lib.state_to_dictionary(
       start_state).items():
     state_shape_with_batch = tensor_shape.TensorShape(
         (default_batch_size,)).concatenate(state.shape)
     default_state_broadcast = array_ops.tile(
         state[None, ...],
         multiples=array_ops.concat(
             [batch_size_tensor[None],
              array_ops.ones(len(state.shape), dtype=dtypes.int32)],
             axis=0))
     placeholders[prefixed_state_name] = array_ops.placeholder_with_default(
         input=default_state_broadcast,
         name=prefixed_state_name,
         shape=state_shape_with_batch)
   return export_lib.ServingInputReceiver(placeholders, placeholders)
Ejemplo n.º 38
0
    def test_complete_flow(self):
        n_classes = 3
        input_dimension = 2
        batch_size = 12

        data = np.linspace(0.,
                           n_classes - 1.,
                           batch_size * input_dimension,
                           dtype=np.float32)
        x_data = data.reshape(batch_size, input_dimension)
        y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
        train_input_fn = numpy_io.numpy_input_fn(x={'x': x_data},
                                                 y=y_data,
                                                 batch_size=batch_size,
                                                 num_epochs=None,
                                                 shuffle=True)
        eval_input_fn = numpy_io.numpy_input_fn(x={'x': x_data},
                                                y=y_data,
                                                batch_size=batch_size,
                                                shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': x_data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]

        estimator = dnn.DNNClassifier(hidden_units=(2, 2),
                                      feature_columns=feature_columns,
                                      n_classes=n_classes,
                                      model_dir=self._model_dir)

        def optimizer_fn():
            return optimizers.get_optimizer_instance('Adagrad',
                                                     learning_rate=0.05)

        # TODO(isaprykin):  Switch Estimator to use allow_soft_placement=True
        # during export_savedmodel and then switch this test to replicate over
        # GPUs instead of CPUs.
        estimator = estimator_lib.Estimator(
            model_fn=replicate_model_fn.replicate_model_fn(
                estimator.model_fn,
                optimizer_fn,
                devices=['/cpu:0', '/cpu:0', '/cpu:0']),
            model_dir=estimator.model_dir,
            config=estimator.config,
            params=estimator.params)

        num_steps = 10
        estimator.train(train_input_fn, steps=num_steps)

        scores = estimator.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        predicted_proba = np.array([
            x[prediction_keys.PredictionKeys.PROBABILITIES]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 39
0
 def _serving_input_receiver_fn():
     """A receiver function to be passed to export_savedmodel."""
     placeholders = {}
     placeholders[feature_keys.TrainEvalFeatures.TIMES] = (
         array_ops.placeholder(
             name=feature_keys.TrainEvalFeatures.TIMES,
             dtype=dtypes.int64,
             shape=[default_batch_size, default_series_length]))
     # Values are only necessary when filtering. For prediction the default
     # value will be ignored.
     placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
         array_ops.placeholder_with_default(
             name=feature_keys.TrainEvalFeatures.VALUES,
             input=array_ops.zeros(shape=[
                 default_batch_size if default_batch_size else 0,
                 default_series_length if default_series_length else 0,
                 self._model.num_features
             ],
                                   dtype=self._model.dtype),
             shape=(default_batch_size, default_series_length,
                    self._model.num_features)))
     if self._model.exogenous_feature_columns:
         with ops.Graph().as_default():
             # Default placeholders have only an unknown batch dimension. Make them
             # in a separate graph, then splice in the series length to the shapes
             # and re-create them in the outer graph.
             parsed_features = (feature_column.make_parse_example_spec(
                 self._model.exogenous_feature_columns))
             placeholder_features = parsing_ops.parse_example(
                 serialized=array_ops.placeholder(shape=[None],
                                                  dtype=dtypes.string),
                 features=parsed_features)
             exogenous_feature_shapes = {
                 key: (value.get_shape(), value.dtype)
                 for key, value in placeholder_features.items()
             }
         for feature_key, (batch_only_feature_shape, value_dtype) in (
                 exogenous_feature_shapes.items()):
             batch_only_feature_shape = (
                 batch_only_feature_shape.with_rank_at_least(
                     1).as_list())
             feature_shape = (
                 [default_batch_size, default_series_length] +
                 batch_only_feature_shape[1:])
             placeholders[feature_key] = array_ops.placeholder(
                 dtype=value_dtype,
                 name=feature_key,
                 shape=feature_shape)
     # Models may not know the shape of their state without creating some
     # variables/ops. Avoid polluting the default graph by making a new one. We
     # use only static metadata from the returned Tensors.
     with ops.Graph().as_default():
         self._model.initialize_graph()
         model_start_state = self._model.get_start_state()
     for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary(
             model_start_state).items():
         state_shape_with_batch = tensor_shape.TensorShape(
             (default_batch_size, )).concatenate(
                 state_tensor.get_shape())
         placeholders[prefixed_state_name] = array_ops.placeholder(
             name=prefixed_state_name,
             shape=state_shape_with_batch,
             dtype=state_tensor.dtype)
     return export_lib.ServingInputReceiver(placeholders, placeholders)
  def testCreateFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    str_sparse_id_col = fc.sparse_column_with_keys(
        "str_id_column", ["marlo", "omar", "stringer"])
    int32_sparse_id_col = fc.sparse_column_with_keys(
        "int32_id_column", [42, 1, -1000], dtype=dtypes.int32)
    int64_sparse_id_col = fc.sparse_column_with_keys(
        "int64_id_column", [42, 1, -1000], dtype=dtypes.int64)
    weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col,
                                                "str_id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column1")
    real_valued_col2 = fc.real_valued_column("real_valued_column2", 5)
    bucketized_col1 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
    bucketized_col2 = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization2", 4),
        [0, 4])
    a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100)
    b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100)
    cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000)
    one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket(
        "sparse_column_for_one_hot", hash_bucket_size=100))
    scattered_embedding_col = fc.scattered_embedding_column(
        "scattered_embedding_column", size=100, dimension=10, hash_key=1)
    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col,
        int64_sparse_id_col, real_valued_col1, real_valued_col2,
        bucketized_col1, bucketized_col2, cross_col, one_hot_col,
        scattered_embedding_col
    ])
    expected_config = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "str_id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "int32_id_column":
            parsing_ops.VarLenFeature(dtypes.int32),
        "int64_id_column":
            parsing_ops.VarLenFeature(dtypes.int64),
        "str_id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column2":
            parsing_ops.FixedLenFeature(
                [5], dtype=dtypes.float32),
        "real_valued_column_for_bucketization1":
            parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32),
        "real_valued_column_for_bucketization2":
            parsing_ops.FixedLenFeature(
                [4], dtype=dtypes.float32),
        "cross_aaa":
            parsing_ops.VarLenFeature(dtypes.string),
        "cross_bbb":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_one_hot":
            parsing_ops.VarLenFeature(dtypes.string),
        "scattered_embedding_column":
            parsing_ops.VarLenFeature(dtypes.string),
    }

    config = fc.create_feature_spec_for_parsing(feature_columns)
    self.assertDictEqual(expected_config, config)

    # Tests that contrib feature columns work with core library:
    config_core = fc_core.make_parse_example_spec(feature_columns)
    self.assertDictEqual(expected_config, config_core)

    # Test that the same config is parsed out if we pass a dictionary.
    feature_columns_dict = {
        str(i): val
        for i, val in enumerate(feature_columns)
    }
    config = fc.create_feature_spec_for_parsing(feature_columns_dict)
    self.assertDictEqual(expected_config, config)