コード例 #1
0
  def _test(self, kwargs, expected_values=None, expected_err=None):
    with self.test_session() as sess:
      if expected_err:
        with self.assertRaisesWithPredicateMatch(expected_err[0],
                                                 expected_err[1]):
          out = parsing_ops.parse_example(**kwargs)
          sess.run(flatten_values_tensors_or_sparse(out.values()))
        return
      else:
        # Returns dict w/ Tensors and SparseTensors.
        out = parsing_ops.parse_example(**kwargs)
        result = flatten_values_tensors_or_sparse(out.values())
        # Check values.
        tf_result = sess.run(result)
        _compare_output_to_expected(self, out, expected_values, tf_result)

      # Check shapes; if serialized is a Tensor we need its size to
      # properly check.
      serialized = kwargs["serialized"]
      batch_size = (serialized.eval().size if isinstance(serialized, ops.Tensor)
                    else np.asarray(serialized).size)
      for k, f in kwargs["features"].items():
        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
          self.assertEqual(
              tuple(out[k].get_shape().as_list()), (batch_size,) + f.shape)
        elif isinstance(f, parsing_ops.VarLenFeature):
          self.assertEqual(
              tuple(out[k].indices.get_shape().as_list()), (None, 2))
          self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
          self.assertEqual(
              tuple(out[k].dense_shape.get_shape().as_list()), (2,))
コード例 #2
0
  def _writeDummySavedModel(self, path, feature_name):
    """Writes a classifier with two input features to the given path."""
    with ops.Graph().as_default():
      examples = array_ops.placeholder(dtypes.string, name="input_node")
      feature_configs = {
          feature_name: parsing_ops.FixedLenFeature(shape=[],
                                                    dtype=dtypes.float32),
      }
      features = parsing_ops.parse_example(examples, feature_configs)
      feature = features[feature_name]

      variable_node = variables.VariableV1(1.0, name="variable_node")
      scores = math_ops.multiply(variable_node, feature, name="output_node")
      class_feature = array_ops.fill(array_ops.shape(feature),
                                     "class_%s" % feature_name)
      classes = array_ops.transpose(class_feature)

      with session.Session() as sess:
        sess.run(variables.global_variables_initializer())
        signature = (
            signature_def_utils.classification_signature_def(
                examples=examples,
                classes=classes,
                scores=scores,))
        builder = saved_model_builder.SavedModelBuilder(path)
        builder.add_meta_graph_and_variables(
            sess,
            [tag_constants.SERVING],
            signature_def_map={
                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                    signature,
            },)
        builder.save(as_text=True)
コード例 #3
0
ファイル: estimators.py プロジェクト: DILASSS/tensorflow
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = (
       array_ops.placeholder(
           name=feature_keys.TrainEvalFeatures.TIMES,
           dtype=dtypes.int64,
           shape=[default_batch_size, default_series_length]))
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size
                   if default_batch_size else 0, default_series_length
                   if default_series_length else 0, self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape, value_dtype) in (
         exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length]
                        + batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   # Models may not know the shape of their state without creating some
   # variables/ops. Avoid polluting the default graph by making a new one. We
   # use only static metadata from the returned Tensors.
   with ops.Graph().as_default():
     self._model.initialize_graph()
     model_start_state = self._model.get_start_state()
   for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary(
       model_start_state).items():
     state_shape_with_batch = tensor_shape.TensorShape(
         (default_batch_size,)).concatenate(state_tensor.get_shape())
     placeholders[prefixed_state_name] = array_ops.placeholder(
         name=prefixed_state_name,
         shape=state_shape_with_batch,
         dtype=state_tensor.dtype)
   return export_lib.ServingInputReceiver(placeholders, placeholders)
コード例 #4
0
ファイル: dnn_test.py プロジェクト: AndrewTwinz/tensorflow
 def _predict_input_fn():
   feature_map = parsing_ops.parse_example(
       input_lib.limit_epochs(serialized_examples, num_epochs=1),
       feature_spec)
   features = _queue_parsed_features(feature_map)
   features.pop('y')
   return features, None
コード例 #5
0
ファイル: io_ops.py プロジェクト: 1000sprites/tensorflow
def parse_example(serialized, features, name=None, example_names=None):
  """Parse `Example` protos into a `dict` of labeled tensors.

  See tf.parse_example.

  Args:
    serialized: A 1-D LabeledTensor of strings, a batch of binary serialized
      `Example` protos.
    features: A `dict` mapping feature keys to `labeled_tensor.FixedLenFeature`
      values.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping feature keys to `LabeledTensor` values. The single axis
    from `serialized` will be prepended to the axes provided by each feature.

  Raises:
    ValueError: if any feature is invalid.
  """
  serialized = core.convert_to_labeled_tensor(serialized)
  unlabeled_features = _labeled_to_unlabeled_features(features)

  unlabeled_parsed = parsing_ops.parse_example(
      serialized.tensor, unlabeled_features, name, example_names)

  parsed = {}
  for name, parsed_feature in unlabeled_parsed.items():
    axes = list(serialized.axes.values()) + features[name].axes
    parsed[name] = core.LabeledTensor(parsed_feature, axes)

  return parsed
コード例 #6
0
 def _eval_input_fn():
   feature_map = parsing_ops.parse_example(
       input_lib.limit_epochs(serialized_examples, num_epochs=1),
       feature_spec)
   features = linear_testing_utils.queue_parsed_features(feature_map)
   labels = features.pop('y')
   return features, labels
コード例 #7
0
def create_example_parser_from_signatures(signatures, examples_batch,
                                          single_feature_name="feature"):
  """Creates example parser from given signatures.

  Args:
    signatures: Dict of `TensorSignature` objects or single `TensorSignature`.
    examples_batch: string `Tensor` of serialized `Example` proto.
    single_feature_name: string, single feature name.

  Returns:
    features: `Tensor` or `dict` of `Tensor` objects.
  """
  feature_spec = {}
  if not isinstance(signatures, dict):
    feature_spec[single_feature_name] = signatures.get_feature_spec()
  else:
    feature_spec = {key: signatures[key].get_feature_spec()
                    for key in signatures}
  features = parsing_ops.parse_example(examples_batch, feature_spec)
  if not isinstance(signatures, dict):
    # Returns single feature, casts if needed.
    features = features[single_feature_name]
    if not signatures.dtype.is_compatible_with(features.dtype):
      features = math_ops.cast(features, signatures.dtype)
    return features
  # Returns dict of features, casts if needed.
  for name in features:
    if not signatures[name].dtype.is_compatible_with(features[name].dtype):
      features[name] = math_ops.cast(features[name], signatures[name].dtype)
  return features
コード例 #8
0
ファイル: export.py プロジェクト: AbhinavJain13/tensorflow
 def serving_input_receiver_fn():
   """An input_fn that expects a serialized tf.Example."""
   serialized_tf_example = array_ops.placeholder(dtype=dtypes.string,
                                                 shape=[default_batch_size],
                                                 name='input_example_tensor')
   receiver_tensors = {'examples': serialized_tf_example}
   features = parsing_ops.parse_example(serialized_tf_example, feature_spec)
   return ServingInputReceiver(features, receiver_tensors)
コード例 #9
0
ファイル: graph_io.py プロジェクト: Baaaaam/tensorflow
def read_batch_features(file_pattern, batch_size, features, reader,
                        randomize_input=True, num_epochs=None,
                        queue_capacity=10000, reader_num_threads=1,
                        parser_num_threads=1,
                        name=None):
  """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_all_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    parser_num_threads: The number of threads to parse examples.
    name: Name of resulting op.

  Returns:
    A dict of `Tensor` or `SparseTensor` objects for each in `features`.

  Raises:
    ValueError: for invalid inputs.
  """
  with ops.op_scope([file_pattern], name, 'read_batch_features') as scope:
    examples = read_batch_examples(
        file_pattern, batch_size, reader, randomize_input=randomize_input,
        num_epochs=num_epochs, queue_capacity=queue_capacity,
        num_threads=reader_num_threads, name=scope)

    # Parse features into tensors in many threads and put on the queue.
    features_list = []
    for _ in range(parser_num_threads):
      features_list.append(parsing_ops.parse_example(examples, features))
    return input_ops.batch_join(
        features_list,
        batch_size=batch_size,
        capacity=queue_capacity,
        enqueue_many=True,
        name='parse_example_batch_join')
コード例 #10
0
ファイル: input_fn_utils.py プロジェクト: Ajaycs99/tensorflow
 def input_fn():
   """An input_fn that expects a serialized tf.Example."""
   serialized_tf_example = array_ops.placeholder(dtype=dtypes.string,
                                                 shape=[default_batch_size],
                                                 name='input_example_tensor')
   inputs = {'examples': serialized_tf_example}
   features = parsing_ops.parse_example(serialized_tf_example, feature_spec)
   labels = None  # these are not known in serving!
   return InputFnOps(features, labels, inputs)
コード例 #11
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
  """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'. A typical usage is as follows:

  ```python
  columns_to_tensor = parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  country = sparse_column_with_keys(column_name="native_country",
                                    keys=["US", "BRA", ...])
  country_emb = embedding_column(sparse_id_column=country, dimension=3,
                                 combiner="sum")
  occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
  occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                   combiner="sum")
  occupation_x_country = crossed_column(columns=[occupation, country],
                                        hash_bucket_size=10000)
  age = real_valued_column("age")
  age_buckets = bucketized_column(
      source_column=age,
      boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

  my_features = [occupation_emb, age_buckets, country_emb]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """
  check_feature_columns(feature_columns)
  columns_to_tensors = parsing_ops.parse_example(
      serialized=serialized,
      features=fc.create_feature_spec_for_parsing(feature_columns),
      name=name,
      example_names=example_names)

  transformer = _Transformer(columns_to_tensors)
  for column in sorted(set(feature_columns), key=lambda x: x.key):
    transformer.transform(column)
  return columns_to_tensors
コード例 #12
0
ファイル: readers.py プロジェクト: DjangoPeng/tensorflow
def _parse_example(serialized, features):
  parsed = parsing_ops.parse_example(serialized, features)
  result = []
  for key in sorted(features.keys()):
    val = parsed[key]
    if isinstance(val, sparse_tensor_lib.SparseTensor):
      result.extend([val.indices, val.values, val.dense_shape])
    else:
      result.append(val)
  return tuple(result)
コード例 #13
0
  def _ReadAndCheckRowsUsingFeatures(self, num_rows):
    self.server.handler.num_rows = num_rows

    with self.test_session() as sess:
      feature_configs = {
          "int64_col":
              parsing_ops.FixedLenFeature(
                  [1], dtype=dtypes.int64),
          "string_col":
              parsing_ops.FixedLenFeature(
                  [1], dtype=dtypes.string, default_value="s_default"),
      }
      reader = cloud.BigQueryReader(
          project_id=_PROJECT,
          dataset_id=_DATASET,
          table_id=_TABLE,
          num_partitions=4,
          features=feature_configs,
          timestamp_millis=1,
          test_end_point=("%s:%s" % (self.server.httpd.server_address[0],
                                     self.server.httpd.server_address[1])))

      key, value = _SetUpQueue(reader)

      seen_rows = []
      features = parsing_ops.parse_example(
          array_ops.reshape(value, [1]), feature_configs)
      for _ in range(num_rows):
        int_value, str_value = sess.run(
            [features["int64_col"], features["string_col"]])

        # Parse values returned from the session.
        self.assertEqual(int_value.shape, (1, 1))
        self.assertEqual(str_value.shape, (1, 1))
        int64_col = int_value[0][0]
        string_col = str_value[0][0]
        seen_rows.append(int64_col)

        # Compare.
        expected_row = _ROWS[int64_col]
        self.assertEqual(int64_col, expected_row[0])
        self.assertEqual(
            compat.as_str(string_col), ("s_%d" % int64_col) if expected_row[1]
            else "s_default")

      self.assertItemsEqual(seen_rows, range(num_rows))

      with self.assertRaisesOpError("is closed and has insufficient elements "
                                    "\\(requested 1, current size 0\\)"):
        sess.run([key, value])
コード例 #14
0
ファイル: estimators.py プロジェクト: ahmedsaiduk/tensorflow
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   time_placeholder = array_ops.placeholder(
       name=feature_keys.TrainEvalFeatures.TIMES,
       dtype=dtypes.int64,
       shape=[default_batch_size, default_series_length])
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size if default_batch_size else 0,
                   default_series_length if default_series_length else 0,
                   self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape,
                       value_dtype) in (exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length] +
                        batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   batch_size_tensor = array_ops.shape(time_placeholder)[0]
   placeholders.update(
       self._model_start_state_placeholders(
           batch_size_tensor, static_batch_size=default_batch_size))
   return export_lib.ServingInputReceiver(placeholders, placeholders)
コード例 #15
0
 def testBasic(self):
   golden_config = example_parser_configuration_pb2.ExampleParserConfiguration(
   )
   text_format.Parse(BASIC_PROTO, golden_config)
   with session.Session() as sess:
     examples = array_ops.placeholder(dtypes.string, shape=[1])
     feature_to_type = {
         'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0),
         'y': parsing_ops.VarLenFeature(dtypes.string)
     }
     _ = parsing_ops.parse_example(examples, feature_to_type)
     parse_example_op = sess.graph.get_operation_by_name(
         'ParseExample/ParseExample')
     config = extract_example_parser_configuration(parse_example_op, sess)
     self.assertProtoEquals(golden_config, config)
コード例 #16
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
  """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'. A typical usage is as follows:
  ```
  columns_to_tensor = tf.contrib.layers.parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  country = sparse_column_with_keys("country", ["US", "BRA", ...])
  country_embedding = embedding_column(query_word, dimension=3, combiner="sum")
  query_word = sparse_column_with_hash_bucket(
    "query_word", hash_bucket_size=int(1e6))
  query_embedding = embedding_column(query_word, dimension=16, combiner="sum")
  age_bucket = bucketized_column(real_valued_column("age"),
                                 boundaries=[18+i*5 for i in range(10)])

    my_features = [query_embedding, age_bucket, country_embedding]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """

  columns_to_tensors = parsing_ops.parse_example(
      serialized=serialized,
      features=fc.create_feature_spec_for_parsing(feature_columns),
      name=name,
      example_names=example_names)

  transformer = _Transformer(columns_to_tensors)
  for column in sorted(set(feature_columns), key=lambda x: x.key):
    transformer.transform(column)
  return columns_to_tensors
コード例 #17
0
ファイル: sdca_ops_test.py プロジェクト: Immexxx/tensorflow
 def parse_examples(example_protos):
   features = {
       'target':
           parsing_ops.FixedLenFeature(
               shape=[1], dtype=dtypes.float32, default_value=0),
       'age_indices':
           parsing_ops.VarLenFeature(dtype=dtypes.int64),
       'age_values':
           parsing_ops.VarLenFeature(dtype=dtypes.float32),
       'gender_indices':
           parsing_ops.VarLenFeature(dtype=dtypes.int64),
       'gender_values':
           parsing_ops.VarLenFeature(dtype=dtypes.float32)
   }
   return parsing_ops.parse_example(
       [e.SerializeToString() for e in example_protos], features)
コード例 #18
0
ファイル: estimators.py プロジェクト: Ajaycs99/tensorflow
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   times_column = feature_column.numeric_column(
       key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64)
   values_column = feature_column.numeric_column(
       key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype,
       shape=(self._model.num_features,))
   parsed_features_no_sequence = (
       feature_column.make_parse_example_spec(
           list(self._model.exogenous_feature_columns)
           + [times_column, values_column]))
   parsed_features = {}
   for key, feature_spec in parsed_features_no_sequence.items():
     if isinstance(feature_spec, parsing_ops.FixedLenFeature):
       if key == feature_keys.TrainEvalFeatures.VALUES:
         parsed_features[key] = feature_spec._replace(
             shape=((values_proto_length,)
                    + feature_spec.shape))
       else:
         parsed_features[key] = feature_spec._replace(
             shape=((filtering_length + prediction_length,)
                    + feature_spec.shape))
     elif feature_spec.dtype == dtypes.string:
       parsed_features[key] = parsing_ops.FixedLenFeature(
           shape=(filtering_length + prediction_length,),
           dtype=dtypes.string)
     else:  # VarLenFeature
       raise ValueError("VarLenFeatures not supported, got %s for key %s"
                        % (feature_spec, key))
   tfexamples = array_ops.placeholder(
       shape=[default_batch_size], dtype=dtypes.string, name="input")
   features = parsing_ops.parse_example(
       serialized=tfexamples,
       features=parsed_features)
   features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze(
       features[feature_keys.TrainEvalFeatures.TIMES], axis=-1)
   features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast(
       features[feature_keys.TrainEvalFeatures.VALUES],
       dtype=self._model.dtype)[:, :filtering_length]
   features.update(
       self._model_start_state_placeholders(
           batch_size_tensor=array_ops.shape(
               features[feature_keys.TrainEvalFeatures.TIMES])[0],
           static_batch_size=default_batch_size))
   return export_lib.ServingInputReceiver(
       features, {"examples": tfexamples})
コード例 #19
0
ファイル: model.py プロジェクト: AndrewTwinz/tensorflow
  def _get_exogenous_embedding_shape(self):
    """Computes the shape of the vector returned by _process_exogenous_features.

    Returns:
      The shape as a list. Does not include a batch dimension.
    """
    if not self._exogenous_feature_columns:
      return (0,)
    with ops.Graph().as_default():
      parsed_features = (
          feature_column.make_parse_example_spec(
              self._exogenous_feature_columns))
      placeholder_features = parsing_ops.parse_example(
          serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string),
          features=parsed_features)
      embedded = feature_column.input_layer(
          features=placeholder_features,
          feature_columns=self._exogenous_feature_columns)
      return embedded.get_shape().as_list()[1:]
コード例 #20
0
ファイル: tensor_signature.py プロジェクト: 0ruben/tensorflow
def create_example_parser_from_signatures(signatures, examples_batch,
                                          single_feature_name="feature"):
  """Creates example parser from given signatures.

  Args:
    signatures: Dict of `TensorSignature` objects or single `TensorSignature`.
    examples_batch: string `Tensor` of serialized `Example` proto.
    single_feature_name: string, single feature name.

  Returns:
    features: `Tensor` or `dict` of `Tensor` objects.
  """
  feature_spec = {}
  if not isinstance(signatures, dict):
    feature_spec[single_feature_name] = signatures.get_feature_spec()
  else:
    feature_spec = {key: signatures[key].get_feature_spec()
                    for key in signatures}
  return parsing_ops.parse_example(examples_batch, feature_spec)
コード例 #21
0
ファイル: graph_io.py プロジェクト: 01-/tensorflow
def read_batch_features(file_pattern, batch_size, features, reader,
                        randomize_input=True, queue_capacity=10000,
                        num_threads=1, name='dequeue_examples'):
  """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    name: Name of resulting op.

  Returns:
    A dict of `Tensor` or `SparseTensor` objects for each in `features`.

  Raises:
    ValueError: for invalid inputs.
  """
  examples = read_batch_examples(
      file_pattern, batch_size, reader, randomize_input,
      queue_capacity, num_threads, name=name)

  # Parse features into tensors.
  return parsing_ops.parse_example(examples, features)
コード例 #22
0
  def test_parse_single_example(self):

    def _int64_feature(*values):
      return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=values))

    def _bytes_feature(*values):
      return feature_pb2.Feature(
          bytes_list=feature_pb2.BytesList(
              value=[v.encode("utf-8") for v in values]))

    examples = constant_op.constant([
        example_pb2.Example(
            features=feature_pb2.Features(
                feature={
                    "dense_int": _int64_feature(i),
                    "dense_str": _bytes_feature(str(i)),
                    "sparse_int": _int64_feature(i, i * 2, i * 4, i * 8),
                    "sparse_str": _bytes_feature(*["abc"] * i)
                })).SerializeToString() for i in range(10)
    ])

    features = {
        "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0),
        "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""),
        "sparse_int": parsing_ops.VarLenFeature(dtypes.int64),
        "sparse_str": parsing_ops.VarLenFeature(dtypes.string),
    }

    def loop_fn(i):
      example_proto = array_ops.gather(examples, i)
      f = parsing_ops.parse_single_example(example_proto, features)
      return f

    pfor = pfor_control_flow_ops.pfor(loop_fn, iters=10)
    manual = parsing_ops.parse_example(examples, features)
    self.run_and_assert_equal(pfor, manual)
コード例 #23
0
    def _writeDummySavedModel(self, path, feature_name, tags):
        """Writes a classifier with two input features to the given path."""
        with ops.Graph().as_default():
            examples = array_ops.placeholder(dtypes.string, name="input_node")
            feature_configs = {
                feature_name:
                parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.float32),
            }
            features = parsing_ops.parse_example(examples, feature_configs)
            feature = features[feature_name]

            variable_node = variables.VariableV1(1.0, name="variable_node")
            scores = math_ops.multiply(variable_node,
                                       feature,
                                       name="output_node")
            class_feature = array_ops.fill(array_ops.shape(feature),
                                           "class_%s" % feature_name)
            classes = array_ops.transpose(class_feature)

            with session.Session() as sess:
                sess.run(variables.global_variables_initializer())
                signature = (signature_def_utils.classification_signature_def(
                    examples=examples,
                    classes=classes,
                    scores=scores,
                ))
                builder = saved_model_builder.SavedModelBuilder(path)
                builder.add_meta_graph_and_variables(
                    sess,
                    tags,
                    signature_def_map={
                        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                        signature,
                    },
                )
                builder.save(as_text=True)
コード例 #24
0
  def test_parse_single_example(self):

    def _int64_feature(*values):
      return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=values))

    def _bytes_feature(*values):
      return feature_pb2.Feature(
          bytes_list=feature_pb2.BytesList(
              value=[v.encode("utf-8") for v in values]))

    examples = constant_op.constant([
        example_pb2.Example(
            features=feature_pb2.Features(
                feature={
                    "dense_int": _int64_feature(i),
                    "dense_str": _bytes_feature(str(i)),
                    "sparse_int": _int64_feature(i, i * 2, i * 4, i * 8),
                    "sparse_str": _bytes_feature(*["abc"] * i)
                })).SerializeToString() for i in range(10)
    ])

    features = {
        "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0),
        "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""),
        "sparse_int": parsing_ops.VarLenFeature(dtypes.int64),
        "sparse_str": parsing_ops.VarLenFeature(dtypes.string),
    }

    def loop_fn(i):
      example_proto = array_ops.gather(examples, i)
      f = parsing_ops.parse_single_example(example_proto, features)
      return f

    pfor = pfor_control_flow_ops.pfor(loop_fn, iters=10)
    manual = parsing_ops.parse_example(examples, features)
    self.run_and_assert_equal(pfor, manual)
コード例 #25
0
ファイル: graph_io.py プロジェクト: zsgchinese/tensorflow
def read_keyed_batch_features(file_pattern,
                              batch_size,
                              features,
                              reader,
                              randomize_input=True,
                              num_epochs=None,
                              queue_capacity=10000,
                              reader_num_threads=1,
                              parser_num_threads=1,
                              name=None):
    """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_local_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    parser_num_threads: The number of threads to parse examples.
    name: Name of resulting op.

  Returns:
    A dict of `Tensor` or `SparseTensor` objects for each in `features`.
    If `keep_keys` is `True`, returns tuple of string `Tensor` and above dict.

  Raises:
    ValueError: for invalid inputs.
  """
    with ops.op_scope([file_pattern], name, 'read_batch_features') as scope:
        keys, examples = read_keyed_batch_examples(
            file_pattern,
            batch_size,
            reader,
            randomize_input=randomize_input,
            num_epochs=num_epochs,
            queue_capacity=queue_capacity,
            num_threads=reader_num_threads,
            read_batch_size=batch_size,
            name=scope)

        if parser_num_threads == 1:
            # Avoid queue overhead for single thread
            return keys, parsing_ops.parse_example(examples, features)

        # Parse features into tensors in many threads and put on the queue.
        features_list = []
        for _ in range(parser_num_threads):
            feature_dict = parsing_ops.parse_example(examples, features)
            feature_dict[KEY_FEATURE_NAME] = keys
            features_list.append(feature_dict)
        queued_features = input_ops.batch_join(features_list,
                                               batch_size=batch_size,
                                               capacity=queue_capacity,
                                               enqueue_many=True,
                                               name='parse_example_batch_join')
        queued_keys = queued_features.pop(KEY_FEATURE_NAME)
        return queued_keys, queued_features
コード例 #26
0
ファイル: readers.py プロジェクト: jfreedman0/tensorflow
def make_batched_features_dataset(file_pattern,
                                  batch_size,
                                  features,
                                  reader=core_readers.TFRecordDataset,
                                  reader_args=None,
                                  num_epochs=None,
                                  shuffle=True,
                                  shuffle_buffer_size=10000,
                                  shuffle_seed=None,
                                  prefetch_buffer_size=1,
                                  reader_num_threads=1,
                                  parser_num_threads=2,
                                  sloppy_ordering=False,
                                  drop_final_batch=False):
  """Returns a `Dataset` of feature dictionaries from `Example` protos.

  Example:

  ```
  serialized_examples = [
    features {
      feature { key: "age" value { int64_list { value: [ 0 ] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
    },
    features {
      feature { key: "age" value { int64_list { value: [] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  features: {
    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    "gender": FixedLenFeature([], dtype=tf.string),
    "kws": VarLenFeature(dtype=tf.string),
  }
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
    "kws": SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0]],
      values=["code", "art", "sports"]
      dense_shape=[2, 2]),
  }
  ```

  Args:
    file_pattern: List of files or patterns of file paths containing
      `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int representing the number of records to combine
      in a single batch.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values. See `tf.parse_example`.
    reader: A function or class that can be
      called with a `filenames` tensor and (optional) `reader_args` and returns
      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
    reader_args: Additional arguments to pass to the reader class.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. Defaults to `None`.
    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
      to `True`.
    shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
      ensures better shuffling but would increase memory usage and startup time.
    shuffle_seed: Randomization seed to use for shuffling.
    prefetch_buffer_size: Number of feature batches to prefetch in order to
      improve performance. Recommended value is the number of batches consumed
      per training step (default is 1).
    reader_num_threads: Number of threads used to read `Example` records. If >1,
      the results will be interleaved.
    parser_num_threads: Number of threads to use for parsing `Example` tensors
      into a dictionary of `Feature` tensors.
    sloppy_ordering: If `True`, reading performance will be improved at
      the cost of non-deterministic ordering. If `False`, the order of elements
      produced is deterministic prior to shuffling (elements are still
      randomized if `shuffle=True`. Note that if the seed is set, then order
      of elements after shuffling is deterministic). Defaults to `False`.
    drop_final_batch: If `True`, and the batch size does not evenly divide the
      input dataset size, the final smaller batch will be dropped. Defaults to
      `False`.

  Returns:
    A dataset of `dict` elements. Each `dict` maps feature keys to
    `Tensor` or `SparseTensor` objects.
  """
  # Create dataset of all matching filenames
  filenames = _get_file_names(file_pattern, False)
  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
  if shuffle:
    dataset = dataset.shuffle(len(filenames), shuffle_seed)

  # Read `Example` records from files as tensor objects.
  if reader_args is None:
    reader_args = []

  # Read files sequentially (if reader_num_threads=1) or in parallel
  dataset = dataset.apply(
      interleave_ops.parallel_interleave(
          lambda filename: reader(filename, *reader_args),
          cycle_length=reader_num_threads,
          sloppy=sloppy_ordering))

  # Extract values if the `Example` tensors are stored as key-value tuples.
  if dataset.output_types == (dtypes.string, dtypes.string):
    dataset = dataset.map(lambda _, v: v)

  # Apply dataset repeat and shuffle transformations.
  dataset = _maybe_shuffle_and_repeat(
      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)

  if drop_final_batch:
    dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size))
  else:
    dataset = dataset.batch(batch_size)

  # Parse `Example` tensors to a dictionary of `Feature` tensors.
  dataset = dataset.map(
      lambda x: parsing_ops.parse_example(x, features),
      num_parallel_calls=parser_num_threads)

  # TODO(rachelim): Add an optional label_name argument for extracting the label
  # from the features dictionary, to comply with the type expected by the
  # input_fn to a `tf.Estimator.train` or `tf.Estimator.evaluate` function.
  dataset = dataset.prefetch(prefetch_buffer_size)
  return dataset
コード例 #27
0
 def _get_feature_ops_from_example(self, examples_batch):
     column_types = layers.create_feature_spec_for_parsing(
         (self._get_linear_feature_columns() or []) + (self._get_dnn_feature_columns() or [])
     )
     features = parsing_ops.parse_example(examples_batch, column_types)
     return features
コード例 #28
0
 def _train_input_fn():
     feature_map = parsing_ops.parse_example(serialized_examples,
                                             feature_spec)
     _, features = graph_io.queue_parsed_features(feature_map)
     labels = features.pop('y')
     return features, labels
コード例 #29
0
def read_batch_features(file_pattern,
                        batch_size,
                        features,
                        reader,
                        reader_args=None,
                        randomize_input=True,
                        num_epochs=None,
                        capacity=10000):
    """Reads batches of Examples.

  Example:

  ```
  serialized_examples = [
    features {
      feature { key: "age" value { int64_list { value: [ 0 ] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
    },
    features {
      feature { key: "age" value { int64_list { value: [] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  features: {
    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    "gender": FixedLenFeature([], dtype=tf.string),
    "kws": VarLenFeature(dtype=tf.string),
  }
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
    "kws": SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0]],
      values=["code", "art", "sports"]
      dense_shape=[2, 2]),
  }
  ```

  Args:
    file_pattern: List of files or patterns of file paths containing
      `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int representing the number of consecutive elements of this
      dataset to combine in a single batch.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values. See `tf.parse_example`.
    reader: A function or class that can be called with a `filenames` tensor
      and (optional) `reader_args` and returns a `Dataset` of Examples.
    reader_args: Additional arguments to pass to the reader class.
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever.
    capacity: Capacity of the ShuffleDataset. A large capacity ensures better
      shuffling but would increase memory usage and startup time.

  Returns:
    A dict from keys in features to Tensor or SparseTensor objects.
  """
    filenames = _get_file_names(file_pattern, randomize_input)
    if reader_args:
        dataset = reader(filenames, *reader_args)
    else:
        dataset = reader(filenames)
    if dataset.output_types == (dtypes.string, dtypes.string):
        dataset = dataset.map(lambda _, v: v)
    if num_epochs != 1:
        dataset = dataset.repeat(num_epochs)
    if randomize_input:
        dataset = dataset.shuffle(capacity)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features))
    iterator = dataset.make_one_shot_iterator()
    outputs = iterator.get_next()
    return outputs
コード例 #30
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
  """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.parse_example'.

  Example:

  ```python
  columns_to_tensor = parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  sparse_feature_a = sparse_column_with_keys(
      column_name="sparse_feature_a", keys=["AB", "CD", ...])

  embedding_feature_a = embedding_column(
      sparse_id_column=sparse_feature_a, dimension=3, combiner="sum")

  sparse_feature_b = sparse_column_with_hash_bucket(
      column_name="sparse_feature_b", hash_bucket_size=1000)

  embedding_feature_b = embedding_column(
      sparse_id_column=sparse_feature_b, dimension=16, combiner="sum")

  crossed_feature_a_x_b = crossed_column(
      columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000)

  real_feature = real_valued_column("real_feature")
  real_feature_buckets = bucketized_column(
      source_column=real_feature, boundaries=[...])

  my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """
  check_feature_columns(feature_columns)
  columns_to_tensors = parsing_ops.parse_example(
      serialized=serialized,
      features=fc.create_feature_spec_for_parsing(feature_columns),
      name=name,
      example_names=example_names)

  transformer = _Transformer(columns_to_tensors)
  for column in sorted(set(feature_columns), key=lambda x: x.key):
    transformer.transform(column)
  return columns_to_tensors
コード例 #31
0
 def filter_fn(keys, examples_json):
     del keys
     serialized = parsing_ops.decode_json_example(examples_json)
     examples = parsing_ops.parse_example(serialized, features)
     return math_ops.less(examples["age"], 2)
コード例 #32
0
def read_keyed_batch_features(file_pattern,
                              batch_size,
                              features,
                              reader,
                              randomize_input=True,
                              num_epochs=None,
                              queue_capacity=10000,
                              reader_num_threads=1,
                              feature_queue_capacity=100,
                              num_queue_runners=2,
                              parser_num_threads=None,
                              name=None):
    """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_local_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_queue_runners: Number of queue runners to start for the feature queue,
      Adding multiple queue runners for the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing.
    parser_num_threads: (Deprecated) The number of threads to parse examples.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - A dict of `Tensor` or `SparseTensor` objects for each in `features`.

  Raises:
    ValueError: for invalid inputs.
  """

    if parser_num_threads:
        # TODO(sibyl-Aix6ihai): Remove on Sept 3 2016.
        logging.warning(
            'parser_num_threads is deprecated, it will be removed on'
            'Sept 3 2016')
    with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
        keys, examples = read_keyed_batch_examples(
            file_pattern,
            batch_size,
            reader,
            randomize_input=randomize_input,
            num_epochs=num_epochs,
            queue_capacity=queue_capacity,
            num_threads=reader_num_threads,
            read_batch_size=batch_size,
            name=scope)

        # Parse the example.
        feature_map = parsing_ops.parse_example(examples, features)

        # Lets also add preprocessed tensors into the queue types for each item of
        # the queue.
        tensors_to_enqueue = []
        # Each entry contains the key, and a boolean which indicates whether the
        # tensor was a sparse tensor.
        tensors_mapping = []
        # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
        # tensors into a queue. This could be taken care in somewhere else so others
        # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
        # directly.
        for key in sorted(feature_map.keys()):
            tensor = feature_map[key]
            if isinstance(tensor, ops.SparseTensor):
                tensors_mapping.append((key, True))
                tensors_to_enqueue.extend(
                    [tensor.indices, tensor.values, tensor.shape])
            else:
                tensors_mapping.append((key, False))
                tensors_to_enqueue.append(tensor)
        tensors_to_enqueue.append(keys)

        queue_dtypes = [x.dtype for x in tensors_to_enqueue]
        input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity,
                                              queue_dtypes)

        # Add a summary op to debug if our feature queue is full or not.
        logging_ops.scalar_summary(
            'queue/parsed_features/%s/fraction_of_%d_full' %
            (input_queue.name, feature_queue_capacity),
            math_ops.cast(input_queue.size(), dtypes.float32) *
            (1. / feature_queue_capacity))

        # Add multiple queue runners so that the queue is always full. Adding more
        # than two queue-runners may hog the cpu on the worker to fill up the queue.
        for _ in range(num_queue_runners):
            queue_runner.add_queue_runner(
                queue_runner.QueueRunner(
                    input_queue, [input_queue.enqueue(tensors_to_enqueue)]))

        dequeued_tensors = input_queue.dequeue()

        # Reset shapes on dequeued tensors.
        for i in range(len(tensors_to_enqueue)):
            dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())

        # Recreate feature mapping according to the original dictionary.
        dequeued_feature_map = {}
        index = 0
        for key, is_sparse_tensor in tensors_mapping:
            if is_sparse_tensor:
                # Three tensors are (indices, values, shape).
                dequeued_feature_map[key] = ops.SparseTensor(
                    dequeued_tensors[index], dequeued_tensors[index + 1],
                    dequeued_tensors[index + 2])
                index += 3
            else:
                dequeued_feature_map[key] = dequeued_tensors[index]
                index += 1
        dequeued_keys = dequeued_tensors[-1]

        return dequeued_keys, dequeued_feature_map
コード例 #33
0
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   time_placeholder = array_ops.placeholder(
       name=feature_keys.TrainEvalFeatures.TIMES,
       dtype=dtypes.int64,
       shape=[default_batch_size, default_series_length])
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size
                   if default_batch_size else 0, default_series_length
                   if default_series_length else 0, self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape, value_dtype) in (
         exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length]
                        + batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   # Models may not know the shape of their state without creating some
   # variables/ops. Avoid polluting the default graph by making a new one. We
   # use only static metadata from the returned Tensors.
   with ops.Graph().as_default():
     self._model.initialize_graph()
     # Evaluate the initial state as same-dtype "zero" values. These zero
     # constants aren't used, but are necessary for feeding to
     # placeholder_with_default for the "cold start" case where state is not
     # fed to the model.
     def _zeros_like_constant(tensor):
       return tensor_util.constant_value(array_ops.zeros_like(tensor))
     start_state = nest.map_structure(
         _zeros_like_constant, self._model.get_start_state())
   batch_size_tensor = array_ops.shape(time_placeholder)[0]
   for prefixed_state_name, state in ts_head_lib.state_to_dictionary(
       start_state).items():
     state_shape_with_batch = tensor_shape.TensorShape(
         (default_batch_size,)).concatenate(state.shape)
     default_state_broadcast = array_ops.tile(
         state[None, ...],
         multiples=array_ops.concat(
             [batch_size_tensor[None],
              array_ops.ones(len(state.shape), dtype=dtypes.int32)],
             axis=0))
     placeholders[prefixed_state_name] = array_ops.placeholder_with_default(
         input=default_state_broadcast,
         name=prefixed_state_name,
         shape=state_shape_with_batch)
   return export_lib.ServingInputReceiver(placeholders, placeholders)
コード例 #34
0
 def _process_records(self, examples):
     """Parse `tf.Example`s into `Tensors`."""
     return parsing_ops.parse_example(serialized=examples,
                                      features=self._features)
コード例 #35
0
ファイル: graph_io_test.py プロジェクト: Ajaycs99/tensorflow
 def filter_fn(keys, examples_json):
   del keys
   serialized = parsing_ops.decode_json_example(examples_json)
   examples = parsing_ops.parse_example(serialized, features)
   return math_ops.less(examples["age"], 2)
コード例 #36
0
 def _train_input_fn():
     feature_map = parsing_ops.parse_example(serialized_examples,
                                             feature_spec)
     features = linear_testing_utils.queue_parsed_features(feature_map)
     labels = features.pop('y')
     return features, labels
コード例 #37
0
ファイル: example_parser.py プロジェクト: Baaaaam/tensorflow
 def _apply_transform(self, input_tensors):
   parsed_values = parsing_ops.parse_example(input_tensors[0],
                                             features=self._ordered_features)
   # pylint: disable=not-callable
   return self.return_type(**parsed_values)
コード例 #38
0
ファイル: graph_io.py プロジェクト: DavidNemeskey/tensorflow
def _read_keyed_batch_features_shared_queue(file_pattern,
                                            batch_size,
                                            features,
                                            reader,
                                            randomize_input=True,
                                            num_epochs=None,
                                            queue_capacity=10000,
                                            reader_num_threads=1,
                                            feature_queue_capacity=100,
                                            num_queue_runners=2,
                                            parse_fn=None,
                                            name=None):
  """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a shared queue for file
  names, setup a worker queue that gets filenames from the shared queue,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_local_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_queue_runners: Number of queue runners to start for the feature queue,
      Adding multiple queue runners for the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing.
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - A dict of `Tensor` or `SparseTensor` objects for each in `features`.

  Raises:
    ValueError: for invalid inputs.
  """

  with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
    keys, examples = _read_keyed_batch_examples_shared_queue(
        file_pattern,
        batch_size,
        reader,
        randomize_input=randomize_input,
        num_epochs=num_epochs,
        queue_capacity=queue_capacity,
        num_threads=reader_num_threads,
        read_batch_size=batch_size,
        parse_fn=parse_fn,
        name=scope)
    # Parse the example.
    feature_map = parsing_ops.parse_example(examples, features)
    return queue_parsed_features(
        feature_map,
        keys=keys,
        feature_queue_capacity=feature_queue_capacity,
        num_queue_runners=num_queue_runners,
        name=scope)
コード例 #39
0
def _read_keyed_batch_features_shared_queue(file_pattern,
                                            batch_size,
                                            features,
                                            reader,
                                            randomize_input=True,
                                            num_epochs=None,
                                            queue_capacity=10000,
                                            reader_num_threads=1,
                                            feature_queue_capacity=100,
                                            num_queue_runners=2,
                                            parse_fn=None,
                                            name=None):
    """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a shared queue for file
  names, setup a worker queue that gets filenames from the shared queue,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_local_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_queue_runners: Number of queue runners to start for the feature queue,
      Adding multiple queue runners for the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing.
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - A dict of `Tensor` or `SparseTensor` objects for each in `features`.

  Raises:
    ValueError: for invalid inputs.
  """

    with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
        keys, examples = _read_keyed_batch_examples_shared_queue(
            file_pattern,
            batch_size,
            reader,
            randomize_input=randomize_input,
            num_epochs=num_epochs,
            queue_capacity=queue_capacity,
            num_threads=reader_num_threads,
            read_batch_size=batch_size,
            parse_fn=parse_fn,
            name=scope)
        # Parse the example.
        feature_map = parsing_ops.parse_example(examples, features)
        return queue_parsed_features(
            feature_map,
            keys=keys,
            feature_queue_capacity=feature_queue_capacity,
            num_queue_runners=num_queue_runners,
            name=scope)
コード例 #40
0
ファイル: estimators.py プロジェクト: Eagle732/tensorflow
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   placeholders = {}
   time_placeholder = array_ops.placeholder(
       name=feature_keys.TrainEvalFeatures.TIMES,
       dtype=dtypes.int64,
       shape=[default_batch_size, default_series_length])
   placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
   # Values are only necessary when filtering. For prediction the default
   # value will be ignored.
   placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
       array_ops.placeholder_with_default(
           name=feature_keys.TrainEvalFeatures.VALUES,
           input=array_ops.zeros(
               shape=[
                   default_batch_size
                   if default_batch_size else 0, default_series_length
                   if default_series_length else 0, self._model.num_features
               ],
               dtype=self._model.dtype),
           shape=(default_batch_size, default_series_length,
                  self._model.num_features)))
   if self._model.exogenous_feature_columns:
     with ops.Graph().as_default():
       # Default placeholders have only an unknown batch dimension. Make them
       # in a separate graph, then splice in the series length to the shapes
       # and re-create them in the outer graph.
       parsed_features = (
           feature_column.make_parse_example_spec(
               self._model.exogenous_feature_columns))
       placeholder_features = parsing_ops.parse_example(
           serialized=array_ops.placeholder(
               shape=[None], dtype=dtypes.string),
           features=parsed_features)
       exogenous_feature_shapes = {
           key: (value.get_shape(), value.dtype) for key, value
           in placeholder_features.items()}
     for feature_key, (batch_only_feature_shape, value_dtype) in (
         exogenous_feature_shapes.items()):
       batch_only_feature_shape = (
           batch_only_feature_shape.with_rank_at_least(1).as_list())
       feature_shape = ([default_batch_size, default_series_length]
                        + batch_only_feature_shape[1:])
       placeholders[feature_key] = array_ops.placeholder(
           dtype=value_dtype, name=feature_key, shape=feature_shape)
   # Models may not know the shape of their state without creating some
   # variables/ops. Avoid polluting the default graph by making a new one. We
   # use only static metadata from the returned Tensors.
   with ops.Graph().as_default():
     self._model.initialize_graph()
     # Evaluate the initial state as same-dtype "zero" values. These zero
     # constants aren't used, but are necessary for feeding to
     # placeholder_with_default for the "cold start" case where state is not
     # fed to the model.
     def _zeros_like_constant(tensor):
       return tensor_util.constant_value(array_ops.zeros_like(tensor))
     start_state = nest.map_structure(
         _zeros_like_constant, self._model.get_start_state())
   batch_size_tensor = array_ops.shape(time_placeholder)[0]
   for prefixed_state_name, state in ts_head_lib.state_to_dictionary(
       start_state).items():
     state_shape_with_batch = tensor_shape.TensorShape(
         (default_batch_size,)).concatenate(state.shape)
     default_state_broadcast = array_ops.tile(
         state[None, ...],
         multiples=array_ops.concat(
             [batch_size_tensor[None],
              array_ops.ones(len(state.shape), dtype=dtypes.int32)],
             axis=0))
     placeholders[prefixed_state_name] = array_ops.placeholder_with_default(
         input=default_state_broadcast,
         name=prefixed_state_name,
         shape=state_shape_with_batch)
   return export_lib.ServingInputReceiver(placeholders, placeholders)
コード例 #41
0
 def _train_input_fn():
     features = parsing_ops.parse_example(serialized_examples,
                                          feature_spec)
     labels = features.pop('label')
     return features, labels
コード例 #42
0
 def _eval_input_fn():
     features = parsing_ops.parse_example(
         input_lib.limit_epochs(serialized_examples, num_epochs=1),
         feature_spec)
     labels = features.pop('label')
     return features, labels
コード例 #43
0
 def _predict_input_fn():
     features = parsing_ops.parse_example(
         input_lib.limit_epochs(serialized_examples, num_epochs=1),
         feature_spec)
     features.pop('label')
     return features, None
コード例 #44
0
 def _serving_input_receiver_fn():
     """A receiver function to be passed to export_savedmodel."""
     placeholders = {}
     placeholders[feature_keys.TrainEvalFeatures.TIMES] = (
         array_ops.placeholder(
             name=feature_keys.TrainEvalFeatures.TIMES,
             dtype=dtypes.int64,
             shape=[default_batch_size, default_series_length]))
     # Values are only necessary when filtering. For prediction the default
     # value will be ignored.
     placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
         array_ops.placeholder_with_default(
             name=feature_keys.TrainEvalFeatures.VALUES,
             input=array_ops.zeros(shape=[
                 default_batch_size if default_batch_size else 0,
                 default_series_length if default_series_length else 0,
                 self._model.num_features
             ],
                                   dtype=self._model.dtype),
             shape=(default_batch_size, default_series_length,
                    self._model.num_features)))
     if self._model.exogenous_feature_columns:
         with ops.Graph().as_default():
             # Default placeholders have only an unknown batch dimension. Make them
             # in a separate graph, then splice in the series length to the shapes
             # and re-create them in the outer graph.
             parsed_features = (feature_column.make_parse_example_spec(
                 self._model.exogenous_feature_columns))
             placeholder_features = parsing_ops.parse_example(
                 serialized=array_ops.placeholder(shape=[None],
                                                  dtype=dtypes.string),
                 features=parsed_features)
             exogenous_feature_shapes = {
                 key: (value.get_shape(), value.dtype)
                 for key, value in placeholder_features.items()
             }
         for feature_key, (batch_only_feature_shape, value_dtype) in (
                 exogenous_feature_shapes.items()):
             batch_only_feature_shape = (
                 batch_only_feature_shape.with_rank_at_least(
                     1).as_list())
             feature_shape = (
                 [default_batch_size, default_series_length] +
                 batch_only_feature_shape[1:])
             placeholders[feature_key] = array_ops.placeholder(
                 dtype=value_dtype,
                 name=feature_key,
                 shape=feature_shape)
     # Models may not know the shape of their state without creating some
     # variables/ops. Avoid polluting the default graph by making a new one. We
     # use only static metadata from the returned Tensors.
     with ops.Graph().as_default():
         self._model.initialize_graph()
         model_start_state = self._model.get_start_state()
     for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary(
             model_start_state).items():
         state_shape_with_batch = tensor_shape.TensorShape(
             (default_batch_size, )).concatenate(
                 state_tensor.get_shape())
         placeholders[prefixed_state_name] = array_ops.placeholder(
             name=prefixed_state_name,
             shape=state_shape_with_batch,
             dtype=state_tensor.dtype)
     return export_lib.ServingInputReceiver(placeholders, placeholders)
コード例 #45
0
ファイル: dnn_test.py プロジェクト: AndrewTwinz/tensorflow
 def _train_input_fn():
   feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
   features = _queue_parsed_features(feature_map)
   labels = features.pop('y')
   return features, labels
コード例 #46
0
 def _get_feature_ops_from_example(self, examples_batch):
     column_types = layers.create_feature_spec_for_parsing(
         (self._get_linear_feature_columns() or []) +
         (self._get_dnn_feature_columns() or []))
     features = parsing_ops.parse_example(examples_batch, column_types)
     return features
コード例 #47
0
 def _apply_transform(self, input_tensors, **kwargs):
     parsed_values = parsing_ops.parse_example(
         input_tensors[0], features=self._ordered_features)
     # pylint: disable=not-callable
     return self.return_type(**parsed_values)
コード例 #48
0
def parse_feature_columns_from_examples(serialized,
                                        feature_columns,
                                        name=None,
                                        example_names=None):
    """Parses tf.Examples to extract tensors for given feature_columns.

  This is a wrapper of 'tf.io.parse_example'.

  Example:

  ```python
  columns_to_tensor = parse_feature_columns_from_examples(
      serialized=my_data,
      feature_columns=my_features)

  # Where my_features are:
  # Define features and transformations
  sparse_feature_a = sparse_column_with_keys(
      column_name="sparse_feature_a", keys=["AB", "CD", ...])

  embedding_feature_a = embedding_column(
      sparse_id_column=sparse_feature_a, dimension=3, combiner="sum")

  sparse_feature_b = sparse_column_with_hash_bucket(
      column_name="sparse_feature_b", hash_bucket_size=1000)

  embedding_feature_b = embedding_column(
      sparse_id_column=sparse_feature_b, dimension=16, combiner="sum")

  crossed_feature_a_x_b = crossed_column(
      columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000)

  real_feature = real_valued_column("real_feature")
  real_feature_buckets = bucketized_column(
      source_column=real_feature, boundaries=[...])

  my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a]
  ```

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    feature_columns: An iterable containing all the feature columns. All items
      should be instances of classes derived from _FeatureColumn.
    name: A name for this operation (optional).
    example_names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos in the batch.

  Returns:
    A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values.
  """
    check_feature_columns(feature_columns)
    columns_to_tensors = parsing_ops.parse_example(
        serialized=serialized,
        features=fc.create_feature_spec_for_parsing(feature_columns),
        name=name,
        example_names=example_names)

    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
        transformer.transform(column)
    return columns_to_tensors
コード例 #49
0
def make_batched_features_dataset(file_pattern,
                                  batch_size,
                                  features,
                                  reader=core_readers.TFRecordDataset,
                                  reader_args=None,
                                  num_epochs=None,
                                  shuffle=True,
                                  shuffle_buffer_size=10000,
                                  shuffle_seed=None,
                                  prefetch_buffer_size=1,
                                  reader_num_threads=1,
                                  parser_num_threads=2,
                                  sloppy_ordering=False,
                                  drop_final_batch=False):
    """Returns a `Dataset` of feature dictionaries from `Example` protos.

  Example:

  ```
  serialized_examples = [
    features {
      feature { key: "age" value { int64_list { value: [ 0 ] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
    },
    features {
      feature { key: "age" value { int64_list { value: [] } } }
      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
    }
  ]
  ```

  We can use arguments:

  ```
  features: {
    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    "gender": FixedLenFeature([], dtype=tf.string),
    "kws": VarLenFeature(dtype=tf.string),
  }
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
    "kws": SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0]],
      values=["code", "art", "sports"]
      dense_shape=[2, 2]),
  }
  ```

  Args:
    file_pattern: List of files or patterns of file paths containing
      `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int representing the number of consecutive elements of this
      dataset to combine in a single batch.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values. See `tf.parse_example`.
    reader: A function or class that can be
      called with a `filenames` tensor and (optional) `reader_args` and returns
      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
    reader_args: Additional arguments to pass to the reader class.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. Defaults to `None`.
    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
      to `True`.
    shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
      ensures better shuffling but would increase memory usage and startup time.
    shuffle_seed: Randomization seed to use for shuffling.
    prefetch_buffer_size: Number of feature batches to prefetch in order to
      improve performance. Recommended value is the number of batches consumed
      per training step (default is 1).
    reader_num_threads: Number of threads used to read `Example` records. If >1,
      the results will be interleaved.
    parser_num_threads: Number of threads to use for parsing `Example` tensors
      into a dictionary of `Feature` tensors.
    sloppy_ordering: If `True`, reading performance will be improved at
      the cost of non-deterministic ordering. If `False`, the order of elements
      produced is deterministic prior to shuffling (elements are still
      randomized if `shuffle=True`. Note that if the seed is set, then order
      of elements after shuffling is deterministic). Defaults to `False`.
    drop_final_batch: If `True`, and the batch size does not evenly divide the
      input dataset size, the final smaller batch will be dropped. Defaults to
      `False`.

  Returns:
    A dataset of `dict` elements. Each `dict` maps feature keys to
    `Tensor` or `SparseTensor` objects.
  """
    # Create dataset of all matching filenames
    filenames = _get_file_names(file_pattern, False)
    dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
    if shuffle:
        dataset = dataset.shuffle(len(filenames), shuffle_seed)

    # Read `Example` records from files as tensor objects.
    if reader_args is None:
        reader_args = []

    # Read files sequentially (if reader_num_threads=1) or in parallel
    dataset = dataset.apply(
        interleave_ops.parallel_interleave(
            lambda filename: reader(filename, *reader_args),
            cycle_length=reader_num_threads,
            sloppy=sloppy_ordering))

    # Extract values if the `Example` tensors are stored as key-value tuples.
    if dataset.output_types == (dtypes.string, dtypes.string):
        dataset = dataset.map(lambda _, v: v)

    # Apply dataset repeat and shuffle transformations.
    repeat_dataset = (num_epochs != 1)
    if repeat_dataset and shuffle:
        # Used fused shuffle_and_repeat operation for better performance
        dataset = dataset.apply(
            shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs,
                                           shuffle_seed))
    elif repeat_dataset:
        dataset = dataset.repeat(num_epochs)
    elif shuffle:
        dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed)

    if drop_final_batch:
        dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size))
    else:
        dataset = dataset.batch(batch_size)

    # Parse `Example` tensors to a dictionary of `Feature` tensors.
    dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features),
                          num_parallel_calls=parser_num_threads)

    # TODO(rachelim): Add an optional label_name argument for extracting the label
    # from the features dictionary, to comply with the type expected by the
    # input_fn to a `tf.Estimator.train` or `tf.Estimator.evaluate` function.
    dataset = dataset.prefetch(prefetch_buffer_size)
    return dataset