Example #1
0
def _parse_single_sequence_example_raw(serialized,
                                       context_sparse_keys=None,
                                       context_sparse_types=None,
                                       context_dense_keys=None,
                                       context_dense_types=None,
                                       context_dense_defaults=None,
                                       context_dense_shapes=None,
                                       feature_list_sparse_keys=None,
                                       feature_list_sparse_types=None,
                                       feature_list_dense_keys=None,
                                       feature_list_dense_types=None,
                                       feature_list_dense_shapes=None,
                                       feature_list_dense_defaults=None,
                                       debug_name=None,
                                       name=None):
  """Parses a single `SequenceExample` proto.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
  with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
    context_dense_defaults = (
        {} if context_dense_defaults is None else context_dense_defaults)
    context_sparse_keys = (
        [] if context_sparse_keys is None else context_sparse_keys)
    context_sparse_types = (
        [] if context_sparse_types is None else context_sparse_types)
    context_dense_keys = (
        [] if context_dense_keys is None else context_dense_keys)
    context_dense_types = (
        [] if context_dense_types is None else context_dense_types)
    context_dense_shapes = (
        [[]] * len(context_dense_keys)
        if context_dense_shapes is None else context_dense_shapes)
    feature_list_sparse_keys = (
        [] if feature_list_sparse_keys is None else feature_list_sparse_keys)
    feature_list_sparse_types = (
        [] if feature_list_sparse_types is None else feature_list_sparse_types)
    feature_list_dense_keys = (
        [] if feature_list_dense_keys is None else feature_list_dense_keys)
    feature_list_dense_types = (
        [] if feature_list_dense_types is None else feature_list_dense_types)
    feature_list_dense_shapes = (
        [[]] * len(feature_list_dense_keys)
        if feature_list_dense_shapes is None else feature_list_dense_shapes)
    feature_list_dense_defaults = (
        dict() if feature_list_dense_defaults is None
        else feature_list_dense_defaults)
    debug_name = "" if debug_name is None else debug_name

    # Internal
    feature_list_dense_missing_assumed_empty = []

    num_context_dense = len(context_dense_keys)
    num_feature_list_dense = len(feature_list_dense_keys)
    num_context_sparse = len(context_sparse_keys)
    num_feature_list_sparse = len(feature_list_sparse_keys)

    if len(context_dense_shapes) != num_context_dense:
      raise ValueError(
          "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
          % (len(context_dense_shapes), num_context_dense))
    if len(context_dense_types) != num_context_dense:
      raise ValueError(
          "len(context_dense_types) != len(num_context_dense): %d vs. %d"
          % (len(context_dense_types), num_context_dense))
    if len(feature_list_dense_shapes) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
          "%d vs. %d" % (len(feature_list_dense_shapes),
                         num_feature_list_dense))
    if len(feature_list_dense_types) != num_feature_list_dense:
      raise ValueError(
          "len(feature_list_dense_types) != len(num_feature_list_dense):"
          "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
    if len(context_sparse_types) != num_context_sparse:
      raise ValueError(
          "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
          % (len(context_sparse_types), num_context_sparse))
    if len(feature_list_sparse_types) != num_feature_list_sparse:
      raise ValueError(
          "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
          "%d vs. %d"
          % (len(feature_list_sparse_types), num_feature_list_sparse))
    if (num_context_dense + num_context_sparse
        + num_feature_list_dense + num_feature_list_sparse) == 0:
      raise ValueError(
          "Must provide at least one context_sparse key, context_dense key, "
          ", feature_list_sparse key, or feature_list_dense key")
    if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
      raise ValueError(
          "context_dense and context_sparse keys must not intersect; "
          "intersection: %s" %
          set(context_dense_keys).intersection(set(context_sparse_keys)))
    if not set(feature_list_dense_keys).isdisjoint(
        set(feature_list_sparse_keys)):
      raise ValueError(
          "feature_list_dense and feature_list_sparse keys must not intersect; "
          "intersection: %s" %
          set(feature_list_dense_keys).intersection(
              set(feature_list_sparse_keys)))
    if not isinstance(feature_list_dense_defaults, dict):
      raise TypeError("feature_list_dense_defaults must be a dict")
    for k, v in feature_list_dense_defaults.items():
      if v is not None:
        raise ValueError("Value feature_list_dense_defaults[%s] must be None"
                         % k)
      feature_list_dense_missing_assumed_empty.append(k)

    context_dense_defaults_vec = []
    for i, key in enumerate(context_dense_keys):
      default_value = context_dense_defaults.get(key)
      if default_value is None:
        default_value = constant_op.constant([], dtype=context_dense_types[i])
      elif not isinstance(default_value, ops.Tensor):
        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
        default_value = ops.convert_to_tensor(
            default_value, dtype=context_dense_types[i], name=key_name)
        default_value = array_ops.reshape(
            default_value, context_dense_shapes[i])

      context_dense_defaults_vec.append(default_value)

    context_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
                            for shape in context_dense_shapes]
    feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
                                 for shape in feature_list_dense_shapes]

    # pylint: disable=protected-access
    outputs = gen_parsing_ops._parse_single_sequence_example(
        serialized=serialized,
        debug_name=debug_name,
        context_dense_defaults=context_dense_defaults_vec,
        context_sparse_keys=context_sparse_keys,
        context_sparse_types=context_sparse_types,
        context_dense_keys=context_dense_keys,
        context_dense_shapes=context_dense_shapes,
        feature_list_sparse_keys=feature_list_sparse_keys,
        feature_list_sparse_types=feature_list_sparse_types,
        feature_list_dense_keys=feature_list_dense_keys,
        feature_list_dense_types=feature_list_dense_types,
        feature_list_dense_shapes=feature_list_dense_shapes,
        feature_list_dense_missing_assumed_empty=(
            feature_list_dense_missing_assumed_empty),
        name=name)
    # pylint: enable=protected-access

    (context_sparse_indices, context_sparse_values,
     context_sparse_shapes, context_dense_values,
     feature_list_sparse_indices, feature_list_sparse_values,
     feature_list_sparse_shapes, feature_list_dense_values) = outputs

    context_sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(context_sparse_indices,
               context_sparse_values,
               context_sparse_shapes)]

    feature_list_sparse_tensors = [
        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
        in zip(feature_list_sparse_indices,
               feature_list_sparse_values,
               feature_list_sparse_shapes)]

    context_output = dict(
        zip(context_sparse_keys + context_dense_keys,
            context_sparse_tensors + context_dense_values))
    feature_list_output = dict(
        zip(feature_list_sparse_keys + feature_list_dense_keys,
            feature_list_sparse_tensors + feature_list_dense_values))

    return (context_output, feature_list_output)
Example #2
0
def parse_single_sequence_example(
        serialized,  # pylint: disable=invalid-name
        context_sparse_keys=None,
        context_sparse_types=None,
        context_dense_keys=None,
        context_dense_types=None,
        context_dense_defaults=None,
        context_dense_shapes=None,
        feature_list_sparse_keys=None,
        feature_list_sparse_types=None,
        feature_list_dense_keys=None,
        feature_list_dense_types=None,
        feature_list_dense_shapes=None,
        feature_list_dense_defaults=None,
        debug_name=None,
        name="ParseSingleSequenceExample"):
    # pylint: disable=line-too-long
    """Parses a single `SequenceExample` proto.

  Parses a single serialized [`SequenceExample`]
  (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto)
  proto given in `serialized`.

  This op parses a serialize sequence example into a tuple of dictionaries
  mapping keys to `Tensor` and `SparseTensor` objects respectively.
  The first dictionary contains mappings for keys appearing in
  `context_sparse_keys` or `context_dense_keys`, and the second dictionary
  contains mappings for keys appearing in `feature_list_dense_keys`.

  The `context` keys are associated with a `SequenceExample` as a whole,
  independent of time / frame.  In contrast, the `feature_list` keys provide
  a way to access variable-length data within the `FeatureList` section of the
  `SequenceExample` proto.  While the shapes of `context` values are fixed
  with respect to frame, the frame dimension (the first dimension)
  of `feature_list` values may vary from `SequenceExample` to `SequenceExample`
  and even between `feature_list` keys within the same `SequenceExample`.

  The key `context_dense_keys[j]` is mapped to a `Tensor` of type
  `context_dense_types[j]` and of shape `context_dense_shapes[j]`.

  `context_dense_defaults` provides defaults for values referenced using
  `context_dense_keys`.  If a key is not present in this dictionary, the
  corresponding context_dense `Feature` is required in `serialized`.

  `context_dense_shapes[j]` provides the shape of each context `Feature` entry
  referenced by `context_dense_keys[j]`. The number of elements in the
  `Feature` corresponding to `context_dense_key[j]` must always have
  `np.prod(context_dense_shapes[j])` entries. The returned `Tensor` for
  `context_dense_key[j]` has shape `context_dense_shape[j]`.

  The key `context_sparse_keys[j]` is mapped to a `SparseTensor` of type
  `context_sparse_types[j]`. This `SparseTensor` represents a ragged vector.
  Its indices are `[index]`, where `index` is the value's index in the list of
  values associated with that feature and example.

  The key `feature_list_dense_keys[j]` is mapped to a `Tensor` of type
  `feature_list_dense_types[j]` and of shape
  `(T,) + feature_list_dense_shapes[j]`, where `T` is the length of the
  associated `FeatureList` in the `SequenceExample`.

  Note: every key declared in `feature_list_dense_keys` **must** be
  provided in the `SequenceExample`'s `FeatureLists`, even if just empty.
  Exceptions are allowed by adding the given key to the map
  `feature_list_dense_defaults` with value None.  Any key with value None
  map will be  treated as empty (zero length) if not found in the
  `FeatureList` map.

  The key `feature_list_sparse_keys[j]` is mapped to a `SparseTensor` of type
  `feature_list_sparse_types[j]`. This `SparseTensor` represents a ragged
  vector.  Its indices are `[time, index]`, where `time` is the FeatureList
  entry `index` is the value's index in the list of values associated with that
  time.

  `debug_name` may contain a descriptive name for the corresponding serialized
  proto. This may be useful for debugging purposes, but it has no effect on the
  output. If not `None`, `debug_name` must be a scalar.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed  to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
    # pylint: enable=line-too-long
    with ops.op_scope([serialized], name, "parse_single_sequence_example"):
        context_dense_defaults = ({} if context_dense_defaults is None else
                                  context_dense_defaults)
        context_sparse_keys = ([] if context_sparse_keys is None else
                               context_sparse_keys)
        context_sparse_types = ([] if context_sparse_types is None else
                                context_sparse_types)
        context_dense_keys = ([] if context_dense_keys is None else
                              context_dense_keys)
        context_dense_types = ([] if context_dense_types is None else
                               context_dense_types)
        context_dense_shapes = ([[]] * len(context_dense_keys)
                                if context_dense_shapes is None else
                                context_dense_shapes)
        feature_list_sparse_keys = ([] if feature_list_sparse_keys is None else
                                    feature_list_sparse_keys)
        feature_list_sparse_types = ([] if feature_list_sparse_types is None
                                     else feature_list_sparse_types)
        feature_list_dense_keys = ([] if feature_list_dense_keys is None else
                                   feature_list_dense_keys)
        feature_list_dense_types = ([] if feature_list_dense_types is None else
                                    feature_list_dense_types)
        feature_list_dense_shapes = ([[]] * len(feature_list_dense_keys)
                                     if feature_list_dense_shapes is None else
                                     feature_list_dense_shapes)
        feature_list_dense_defaults = (dict()
                                       if feature_list_dense_defaults is None
                                       else feature_list_dense_defaults)
        debug_name = "" if debug_name is None else debug_name

        # Internal
        feature_list_dense_missing_assumed_empty = []

        num_context_dense = len(context_dense_keys)
        num_feature_list_dense = len(feature_list_dense_keys)
        num_context_sparse = len(context_sparse_keys)
        num_feature_list_sparse = len(feature_list_sparse_keys)

        if len(context_dense_shapes) != num_context_dense:
            raise ValueError(
                "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
                % (len(context_dense_shapes), num_context_dense))
        if len(context_dense_types) != num_context_dense:
            raise ValueError(
                "len(context_dense_types) != len(num_context_dense): %d vs. %d"
                % (len(context_dense_types), num_context_dense))
        if len(feature_list_dense_shapes) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
                "%d vs. %d" %
                (len(feature_list_dense_shapes), num_feature_list_dense))
        if len(feature_list_dense_types) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_types) != len(num_feature_list_dense):"
                "%d vs. %d" %
                (len(feature_list_dense_types), num_feature_list_dense))
        if len(context_sparse_types) != num_context_sparse:
            raise ValueError(
                "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
                % (len(context_sparse_types), num_context_sparse))
        if len(feature_list_sparse_types) != num_feature_list_sparse:
            raise ValueError(
                "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
                "%d vs. %d" %
                (len(feature_list_sparse_types), num_feature_list_sparse))
        if (num_context_dense + num_context_sparse + num_feature_list_dense +
                num_feature_list_sparse) == 0:
            raise ValueError(
                "Must provide at least one context_sparse key, context_dense key, "
                ", feature_list_sparse key, or feature_list_dense key")
        if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
            raise ValueError(
                "context_dense and context_sparse keys must not intersect; "
                "intersection: %s" %
                set(context_dense_keys).intersection(set(context_sparse_keys)))
        if not set(feature_list_dense_keys).isdisjoint(
                set(feature_list_sparse_keys)):
            raise ValueError(
                "feature_list_dense and feature_list_sparse keys must not intersect; "
                "intersection: %s" % set(feature_list_dense_keys).intersection(
                    set(feature_list_sparse_keys)))
        if not isinstance(feature_list_dense_defaults, dict):
            raise TypeError("feature_list_dense_defaults must be a dict")
        for k, v in feature_list_dense_defaults.items():
            if v is not None:
                raise ValueError(
                    "Value feature_list_dense_defaults[%s] must be None" % k)
            feature_list_dense_missing_assumed_empty.append(k)

        context_dense_defaults_vec = []
        for i, key in enumerate(context_dense_keys):
            default_value = context_dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant(
                    [], dtype=context_dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(
                    default_value, dtype=context_dense_types[i], name=key_name)
                default_value = array_ops.reshape(default_value,
                                                  context_dense_shapes[i])

            context_dense_defaults_vec.append(default_value)

        context_dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(
                shape, (list, tuple)) else shape
            for shape in context_dense_shapes
        ]
        feature_list_dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(
                shape, (list, tuple)) else shape
            for shape in feature_list_dense_shapes
        ]

        outputs = gen_parsing_ops._parse_single_sequence_example(
            serialized=serialized,
            debug_name=debug_name,
            context_dense_defaults=context_dense_defaults_vec,
            context_sparse_keys=context_sparse_keys,
            context_sparse_types=context_sparse_types,
            context_dense_keys=context_dense_keys,
            context_dense_shapes=context_dense_shapes,
            feature_list_sparse_keys=feature_list_sparse_keys,
            feature_list_sparse_types=feature_list_sparse_types,
            feature_list_dense_keys=feature_list_dense_keys,
            feature_list_dense_types=feature_list_dense_types,
            feature_list_dense_shapes=feature_list_dense_shapes,
            feature_list_dense_missing_assumed_empty=(
                feature_list_dense_missing_assumed_empty),
            name=name)

        (context_sparse_indices, context_sparse_values, context_sparse_shapes,
         context_dense_values, feature_list_sparse_indices,
         feature_list_sparse_values, feature_list_sparse_shapes,
         feature_list_dense_values) = outputs

        context_sparse_tensors = [
            ops.SparseTensor(ix, val, shape)
            for (ix, val,
                 shape) in zip(context_sparse_indices, context_sparse_values,
                               context_sparse_shapes)
        ]

        feature_list_sparse_tensors = [
            ops.SparseTensor(ix, val, shape) for (ix, val, shape) in zip(
                feature_list_sparse_indices, feature_list_sparse_values,
                feature_list_sparse_shapes)
        ]

        context_output = dict(
            zip(context_sparse_keys + context_dense_keys,
                context_sparse_tensors + context_dense_values))
        feature_list_output = dict(
            zip(feature_list_sparse_keys + feature_list_dense_keys,
                feature_list_sparse_tensors + feature_list_dense_values))

        return (context_output, feature_list_output)
Example #3
0
def _parse_single_sequence_example_raw(serialized,
                                       context_sparse_keys=None,
                                       context_sparse_types=None,
                                       context_dense_keys=None,
                                       context_dense_types=None,
                                       context_dense_defaults=None,
                                       context_dense_shapes=None,
                                       feature_list_sparse_keys=None,
                                       feature_list_sparse_types=None,
                                       feature_list_dense_keys=None,
                                       feature_list_dense_types=None,
                                       feature_list_dense_shapes=None,
                                       feature_list_dense_defaults=None,
                                       debug_name=None,
                                       name=None):
    """Parses a single `SequenceExample` proto.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
    with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
        context_dense_defaults = ({} if context_dense_defaults is None else
                                  context_dense_defaults)
        context_sparse_keys = ([] if context_sparse_keys is None else
                               context_sparse_keys)
        context_sparse_types = ([] if context_sparse_types is None else
                                context_sparse_types)
        context_dense_keys = ([] if context_dense_keys is None else
                              context_dense_keys)
        context_dense_types = ([] if context_dense_types is None else
                               context_dense_types)
        context_dense_shapes = ([[]] * len(context_dense_keys)
                                if context_dense_shapes is None else
                                context_dense_shapes)
        feature_list_sparse_keys = ([] if feature_list_sparse_keys is None else
                                    feature_list_sparse_keys)
        feature_list_sparse_types = ([] if feature_list_sparse_types is None
                                     else feature_list_sparse_types)
        feature_list_dense_keys = ([] if feature_list_dense_keys is None else
                                   feature_list_dense_keys)
        feature_list_dense_types = ([] if feature_list_dense_types is None else
                                    feature_list_dense_types)
        feature_list_dense_shapes = ([[]] * len(feature_list_dense_keys)
                                     if feature_list_dense_shapes is None else
                                     feature_list_dense_shapes)
        feature_list_dense_defaults = (dict()
                                       if feature_list_dense_defaults is None
                                       else feature_list_dense_defaults)
        debug_name = "" if debug_name is None else debug_name

        # Internal
        feature_list_dense_missing_assumed_empty = []

        num_context_dense = len(context_dense_keys)
        num_feature_list_dense = len(feature_list_dense_keys)
        num_context_sparse = len(context_sparse_keys)
        num_feature_list_sparse = len(feature_list_sparse_keys)

        if len(context_dense_shapes) != num_context_dense:
            raise ValueError(
                "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
                % (len(context_dense_shapes), num_context_dense))
        if len(context_dense_types) != num_context_dense:
            raise ValueError(
                "len(context_dense_types) != len(num_context_dense): %d vs. %d"
                % (len(context_dense_types), num_context_dense))
        if len(feature_list_dense_shapes) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
                "%d vs. %d" %
                (len(feature_list_dense_shapes), num_feature_list_dense))
        if len(feature_list_dense_types) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_types) != len(num_feature_list_dense):"
                "%d vs. %d" %
                (len(feature_list_dense_types), num_feature_list_dense))
        if len(context_sparse_types) != num_context_sparse:
            raise ValueError(
                "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
                % (len(context_sparse_types), num_context_sparse))
        if len(feature_list_sparse_types) != num_feature_list_sparse:
            raise ValueError(
                "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
                "%d vs. %d" %
                (len(feature_list_sparse_types), num_feature_list_sparse))
        if (num_context_dense + num_context_sparse + num_feature_list_dense +
                num_feature_list_sparse) == 0:
            raise ValueError(
                "Must provide at least one context_sparse key, context_dense key, "
                ", feature_list_sparse key, or feature_list_dense key")
        if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
            raise ValueError(
                "context_dense and context_sparse keys must not intersect; "
                "intersection: %s" %
                set(context_dense_keys).intersection(set(context_sparse_keys)))
        if not set(feature_list_dense_keys).isdisjoint(
                set(feature_list_sparse_keys)):
            raise ValueError(
                "feature_list_dense and feature_list_sparse keys must not intersect; "
                "intersection: %s" % set(feature_list_dense_keys).intersection(
                    set(feature_list_sparse_keys)))
        if not isinstance(feature_list_dense_defaults, dict):
            raise TypeError("feature_list_dense_defaults must be a dict")
        for k, v in feature_list_dense_defaults.items():
            if v is not None:
                raise ValueError(
                    "Value feature_list_dense_defaults[%s] must be None" % k)
            feature_list_dense_missing_assumed_empty.append(k)

        context_dense_defaults_vec = []
        for i, key in enumerate(context_dense_keys):
            default_value = context_dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant(
                    [], dtype=context_dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(
                    default_value, dtype=context_dense_types[i], name=key_name)
                default_value = array_ops.reshape(default_value,
                                                  context_dense_shapes[i])

            context_dense_defaults_vec.append(default_value)

        context_dense_shapes = [
            tensor_shape.as_shape(shape).as_proto()
            for shape in context_dense_shapes
        ]
        feature_list_dense_shapes = [
            tensor_shape.as_shape(shape).as_proto()
            for shape in feature_list_dense_shapes
        ]

        # pylint: disable=protected-access
        outputs = gen_parsing_ops._parse_single_sequence_example(
            serialized=serialized,
            debug_name=debug_name,
            context_dense_defaults=context_dense_defaults_vec,
            context_sparse_keys=context_sparse_keys,
            context_sparse_types=context_sparse_types,
            context_dense_keys=context_dense_keys,
            context_dense_shapes=context_dense_shapes,
            feature_list_sparse_keys=feature_list_sparse_keys,
            feature_list_sparse_types=feature_list_sparse_types,
            feature_list_dense_keys=feature_list_dense_keys,
            feature_list_dense_types=feature_list_dense_types,
            feature_list_dense_shapes=feature_list_dense_shapes,
            feature_list_dense_missing_assumed_empty=(
                feature_list_dense_missing_assumed_empty),
            name=name)
        # pylint: enable=protected-access

        (context_sparse_indices, context_sparse_values, context_sparse_shapes,
         context_dense_values, feature_list_sparse_indices,
         feature_list_sparse_values, feature_list_sparse_shapes,
         feature_list_dense_values) = outputs

        context_sparse_tensors = [
            sparse_tensor.SparseTensor(ix, val, shape)
            for (ix, val,
                 shape) in zip(context_sparse_indices, context_sparse_values,
                               context_sparse_shapes)
        ]

        feature_list_sparse_tensors = [
            sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) in
            zip(feature_list_sparse_indices, feature_list_sparse_values,
                feature_list_sparse_shapes)
        ]

        context_output = dict(
            zip(context_sparse_keys + context_dense_keys,
                context_sparse_tensors + context_dense_values))
        feature_list_output = dict(
            zip(feature_list_sparse_keys + feature_list_dense_keys,
                feature_list_sparse_tensors + feature_list_dense_values))

        return (context_output, feature_list_output)
Example #4
0
def parse_single_sequence_example(
    serialized,  # pylint: disable=invalid-name
    context_sparse_keys=None,
    context_sparse_types=None,
    context_dense_keys=None,
    context_dense_types=None,
    context_dense_defaults=None,
    context_dense_shapes=None,
    feature_list_sparse_keys=None,
    feature_list_sparse_types=None,
    feature_list_dense_keys=None,
    feature_list_dense_types=None,
    feature_list_dense_shapes=None,
    feature_list_dense_defaults=None,
    debug_name=None,
    name="ParseSingleSequenceExample",
):
    # pylint: disable=line-too-long
    """Parses a single `SequenceExample` proto.

  Parses a single serialized [`SequenceExample`]
  (https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/example/example.proto)
  proto given in `serialized`.

  This op parses a serialize sequence example into a tuple of dictionaries
  mapping keys to `Tensor` and `SparseTensor` objects respectively.
  The first dictionary contains mappings for keys appearing in
  `context_sparse_keys` or `context_dense_keys`, and the second dictionary
  contains mappings for keys appearing in `feature_list_dense_keys`.

  The `context` keys are associated with a `SequenceExample` as a whole,
  independent of time / frame.  In contrast, the `feature_list` keys provide
  a way to access variable-length data within the `FeatureList` section of the
  `SequenceExample` proto.  While the shapes of `context` values are fixed
  with respect to frame, the frame dimension (the first dimension)
  of `feature_list` values may vary from `SequenceExample` to `SequenceExample`
  and even between `feature_list` keys within the same `SequenceExample`.

  The key `context_dense_keys[j]` is mapped to a `Tensor` of type
  `context_dense_types[j]` and of shape `context_dense_shapes[j]`.

  `context_dense_defaults` provides defaults for values referenced using
  `context_dense_keys`.  If a key is not present in this dictionary, the
  corresponding context_dense `Feature` is required in `serialized`.

  `context_dense_shapes[j]` provides the shape of each context `Feature` entry
  referenced by `context_dense_keys[j]`. The number of elements in the
  `Feature` corresponding to `context_dense_key[j]` must always have
  `np.prod(context_dense_shapes[j])` entries. The returned `Tensor` for
  `context_dense_key[j]` has shape `context_dense_shape[j]`.

  The key `context_sparse_keys[j]` is mapped to a `SparseTensor` of type
  `context_sparse_types[j]`. This `SparseTensor` represents a ragged vector.
  Its indices are `[index]`, where `index` is the value's index in the list of
  values associated with that feature and example.

  The key `feature_list_dense_keys[j]` is mapped to a `Tensor` of type
  `feature_list_dense_types[j]` and of shape
  `(T,) + feature_list_dense_shapes[j]`, where `T` is the length of the
  associated `FeatureList` in the `SequenceExample`.

  Note: every key declared in `feature_list_dense_keys` **must** be
  provided in the `SequenceExample`'s `FeatureLists`, even if just empty.
  Exceptions are allowed by adding the given key to the map
  `feature_list_dense_defaults` with value None.  Any key with value None
  map will be  treated as empty (zero length) if not found in the
  `FeatureList` map.

  The key `feature_list_sparse_keys[j]` is mapped to a `SparseTensor` of type
  `feature_list_sparse_types[j]`. This `SparseTensor` represents a ragged
  vector.  Its indices are `[time, index]`, where `time` is the FeatureList
  entry `index` is the value's index in the list of values associated with that
  time.

  `debug_name` may contain a descriptive name for the corresponding serialized
  proto. This may be useful for debugging purposes, but it has no effect on the
  output. If not `None`, `debug_name` must be a scalar.

  Args:
    serialized: A scalar (0-D Tensor) of type string, a single binary
      serialized `SequenceExample` proto.
    context_sparse_keys: A list of string keys in the `SequenceExample`'s
      features.  The results for these keys will be returned as
      `SparseTensor` objects.
    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    context_dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the context_dense_keys of the feature.
    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
      The shape of the data for each context_dense feature referenced by
      `context_dense_keys`.  Required for any input tensors identified by
      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
      feature_lists.  The results for these keys will be returned as
      `SparseTensor` objects.
    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
      features_lists. The results for these keys will be returned as `Tensor`s.
    feature_list_dense_types: A list of `DTypes`, same length as
      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
    feature_list_dense_shapes: A list of tuples, same length as
      `feature_list_dense_keys`.  The shape of the data for each
      `FeatureList` feature referenced by `feature_list_dense_keys`.
    feature_list_dense_defaults: A dict mapping key strings to values.
      The only currently allowed value is `None`.  Any key appearing
      in this dict with value `None` is allowed  to be missing from the
      `SequenceExample`.  If missing, the key is treated as zero-length.
    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
      the serialized proto.
    name: A name for this operation (optional).

  Returns:
    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    The first dict contains the context key/values.
    The second dict contains the feature_list key/values.

  Raises:
    ValueError: If context_sparse and context_dense key sets intersect,
      if input lengths do not match up, or if a value in
      feature_list_dense_defaults is not None.
    TypeError: if feature_list_dense_defaults is not either None or a dict.
  """
    # pylint: enable=line-too-long
    with ops.op_scope([serialized], name, "parse_single_sequence_example"):
        context_dense_defaults = {} if context_dense_defaults is None else context_dense_defaults
        context_sparse_keys = [] if context_sparse_keys is None else context_sparse_keys
        context_sparse_types = [] if context_sparse_types is None else context_sparse_types
        context_dense_keys = [] if context_dense_keys is None else context_dense_keys
        context_dense_types = [] if context_dense_types is None else context_dense_types
        context_dense_shapes = [[]] * len(context_dense_keys) if context_dense_shapes is None else context_dense_shapes
        feature_list_sparse_keys = [] if feature_list_sparse_keys is None else feature_list_sparse_keys
        feature_list_sparse_types = [] if feature_list_sparse_types is None else feature_list_sparse_types
        feature_list_dense_keys = [] if feature_list_dense_keys is None else feature_list_dense_keys
        feature_list_dense_types = [] if feature_list_dense_types is None else feature_list_dense_types
        feature_list_dense_shapes = (
            [[]] * len(feature_list_dense_keys) if feature_list_dense_shapes is None else feature_list_dense_shapes
        )
        feature_list_dense_defaults = dict() if feature_list_dense_defaults is None else feature_list_dense_defaults
        debug_name = "" if debug_name is None else debug_name

        # Internal
        feature_list_dense_missing_assumed_empty = []

        num_context_dense = len(context_dense_keys)
        num_feature_list_dense = len(feature_list_dense_keys)
        num_context_sparse = len(context_sparse_keys)
        num_feature_list_sparse = len(feature_list_sparse_keys)

        if len(context_dense_shapes) != num_context_dense:
            raise ValueError(
                "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
                % (len(context_dense_shapes), num_context_dense)
            )
        if len(context_dense_types) != num_context_dense:
            raise ValueError(
                "len(context_dense_types) != len(num_context_dense): %d vs. %d"
                % (len(context_dense_types), num_context_dense)
            )
        if len(feature_list_dense_shapes) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
                "%d vs. %d" % (len(feature_list_dense_shapes), num_feature_list_dense)
            )
        if len(feature_list_dense_types) != num_feature_list_dense:
            raise ValueError(
                "len(feature_list_dense_types) != len(num_feature_list_dense):"
                "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense)
            )
        if len(context_sparse_types) != num_context_sparse:
            raise ValueError(
                "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
                % (len(context_sparse_types), num_context_sparse)
            )
        if len(feature_list_sparse_types) != num_feature_list_sparse:
            raise ValueError(
                "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
                "%d vs. %d" % (len(feature_list_sparse_types), num_feature_list_sparse)
            )
        if (num_context_dense + num_context_sparse + num_feature_list_dense + num_feature_list_sparse) == 0:
            raise ValueError(
                "Must provide at least one context_sparse key, context_dense key, "
                ", feature_list_sparse key, or feature_list_dense key"
            )
        if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
            raise ValueError(
                "context_dense and context_sparse keys must not intersect; "
                "intersection: %s" % set(context_dense_keys).intersection(set(context_sparse_keys))
            )
        if not set(feature_list_dense_keys).isdisjoint(set(feature_list_sparse_keys)):
            raise ValueError(
                "feature_list_dense and feature_list_sparse keys must not intersect; "
                "intersection: %s" % set(feature_list_dense_keys).intersection(set(feature_list_sparse_keys))
            )
        if not isinstance(feature_list_dense_defaults, dict):
            raise TypeError("feature_list_dense_defaults must be a dict")
        for k, v in feature_list_dense_defaults.items():
            if v is not None:
                raise ValueError("Value feature_list_dense_defaults[%s] must be None" % k)
            feature_list_dense_missing_assumed_empty.append(k)

        context_dense_defaults_vec = []
        for i, key in enumerate(context_dense_keys):
            default_value = context_dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant([], dtype=context_dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(default_value, dtype=context_dense_types[i], name=key_name)
                default_value = array_ops.reshape(default_value, context_dense_shapes[i])

            context_dense_defaults_vec.append(default_value)

        context_dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(shape, (list, tuple)) else shape
            for shape in context_dense_shapes
        ]
        feature_list_dense_shapes = [
            tensor_util.MakeTensorShapeProto(shape) if isinstance(shape, (list, tuple)) else shape
            for shape in feature_list_dense_shapes
        ]

        outputs = gen_parsing_ops._parse_single_sequence_example(
            serialized=serialized,
            debug_name=debug_name,
            context_dense_defaults=context_dense_defaults_vec,
            context_sparse_keys=context_sparse_keys,
            context_sparse_types=context_sparse_types,
            context_dense_keys=context_dense_keys,
            context_dense_shapes=context_dense_shapes,
            feature_list_sparse_keys=feature_list_sparse_keys,
            feature_list_sparse_types=feature_list_sparse_types,
            feature_list_dense_keys=feature_list_dense_keys,
            feature_list_dense_types=feature_list_dense_types,
            feature_list_dense_shapes=feature_list_dense_shapes,
            feature_list_dense_missing_assumed_empty=(feature_list_dense_missing_assumed_empty),
            name=name,
        )

        (
            context_sparse_indices,
            context_sparse_values,
            context_sparse_shapes,
            context_dense_values,
            feature_list_sparse_indices,
            feature_list_sparse_values,
            feature_list_sparse_shapes,
            feature_list_dense_values,
        ) = outputs

        context_sparse_tensors = [
            ops.SparseTensor(ix, val, shape)
            for (ix, val, shape) in zip(context_sparse_indices, context_sparse_values, context_sparse_shapes)
        ]

        feature_list_sparse_tensors = [
            ops.SparseTensor(ix, val, shape)
            for (ix, val, shape) in zip(
                feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes
            )
        ]

        context_output = dict(
            zip(context_sparse_keys + context_dense_keys, context_sparse_tensors + context_dense_values)
        )
        feature_list_output = dict(
            zip(
                feature_list_sparse_keys + feature_list_dense_keys,
                feature_list_sparse_tensors + feature_list_dense_values,
            )
        )

        return (context_output, feature_list_output)