Exemple #1
0
def serialize_tf_example(datum):
    """Serialize example into tfrecord.Example proto. 

    Args:
        Datum is a dictionary of tuples of form (value, dtype). dtype can be "byte", "float" or "int".
    Returns:
        Serialized tfrecord.example to bytes.
    """
    features = {}
    for key, (value, dtype) in datum.items():
        feature = {
            "byte":
            lambda f: example_pb2.Feature(bytes_list=example_pb2.BytesList(
                value=f)),
            "float":
            lambda f: example_pb2.Feature(float_list=example_pb2.FloatList(
                value=f)),
            "int":
            lambda f: example_pb2.Feature(int64_list=example_pb2.Int64List(
                value=f))
        }[dtype](value)
        features[key] = feature

    example_proto = example_pb2.Example(features=example_pb2.Features(
        feature=features))
    return example_proto.SerializeToString()
Exemple #2
0
    def serialize_tf_sequence_example(
        context_datum: typing.Dict[str, typing.Tuple[typing.Any, str]],
        features_datum: typing.Dict[str, typing.Tuple[typing.List[typing.Any],
                                                      str]],
    ) -> bytes:
        """Serialize sequence example into tfrecord.SequenceExample proto.

        Params:
        -------
        context_datum: dict
            Dictionary of tuples of form (value, dtype). dtype can be
            "byte", "float" or int.

        features_datum: dict
            Same as `context_datum`, but for the features.

        Returns:
        --------
        proto: bytes
            Serialized tfrecord.SequenceExample to bytes.
        """
        feature_map = {
            "byte":
            lambda f: example_pb2.Feature(bytes_list=example_pb2.BytesList(
                value=f)),
            "float":
            lambda f: example_pb2.Feature(float_list=example_pb2.FloatList(
                value=f)),
            "int":
            lambda f: example_pb2.Feature(int64_list=example_pb2.Int64List(
                value=f))
        }

        def serialize(value, dtype):
            if not isinstance(value, (list, tuple, np.ndarray)):
                value = [value]
            return feature_map[dtype](value)

        def serialize_repeated(value, dtype):
            feature_list = example_pb2.FeatureList()
            for v in value:
                feature_list.feature.append(serialize(v, dtype))
            return feature_list

        context = {
            key: serialize(value, dtype)
            for key, (value, dtype) in context_datum.items()
        }
        features = {
            key: serialize_repeated(value, dtype)
            for key, (value, dtype) in features_datum.items()
        }

        context = example_pb2.Features(feature=context)
        features = example_pb2.FeatureLists(feature_list=features)
        proto = example_pb2.SequenceExample(context=context,
                                            feature_lists=features)
        return proto.SerializeToString()
Exemple #3
0
    def serialize_tf_example(
            datum: typing.Dict[str, typing.Tuple[typing.Any, str]]) -> bytes:
        """Serialize example into tfrecord.Example proto.

        Params:
        -------
        datum: dict
            Dictionary of tuples of form (value, dtype). dtype can be
            "byte", "float" or "int".

        Returns:
        --------
        proto: bytes
            Serialized tfrecord.example to bytes.
        """
        feature_map = {
            "byte":
            lambda f: example_pb2.Feature(bytes_list=example_pb2.BytesList(
                value=f)),
            "float":
            lambda f: example_pb2.Feature(float_list=example_pb2.FloatList(
                value=f)),
            "int":
            lambda f: example_pb2.Feature(int64_list=example_pb2.Int64List(
                value=f))
        }

        def serialize(value, dtype):
            if not isinstance(value, (list, tuple, np.ndarray)):
                value = [value]
            return feature_map[dtype](value)

        features = {
            key: serialize(value, dtype)
            for key, (value, dtype) in datum.items()
        }
        example_proto = example_pb2.Example(features=example_pb2.Features(
            feature=features))
        return example_proto.SerializeToString()