def serialize_tf_example(datum): """Serialize example into tfrecord.Example proto. Args: Datum is a dictionary of tuples of form (value, dtype). dtype can be "byte", "float" or "int". Returns: Serialized tfrecord.example to bytes. """ features = {} for key, (value, dtype) in datum.items(): feature = { "byte": lambda f: example_pb2.Feature(bytes_list=example_pb2.BytesList( value=f)), "float": lambda f: example_pb2.Feature(float_list=example_pb2.FloatList( value=f)), "int": lambda f: example_pb2.Feature(int64_list=example_pb2.Int64List( value=f)) }[dtype](value) features[key] = feature example_proto = example_pb2.Example(features=example_pb2.Features( feature=features)) return example_proto.SerializeToString()
def serialize_tf_sequence_example( context_datum: typing.Dict[str, typing.Tuple[typing.Any, str]], features_datum: typing.Dict[str, typing.Tuple[typing.List[typing.Any], str]], ) -> bytes: """Serialize sequence example into tfrecord.SequenceExample proto. Params: ------- context_datum: dict Dictionary of tuples of form (value, dtype). dtype can be "byte", "float" or int. features_datum: dict Same as `context_datum`, but for the features. Returns: -------- proto: bytes Serialized tfrecord.SequenceExample to bytes. """ feature_map = { "byte": lambda f: example_pb2.Feature(bytes_list=example_pb2.BytesList( value=f)), "float": lambda f: example_pb2.Feature(float_list=example_pb2.FloatList( value=f)), "int": lambda f: example_pb2.Feature(int64_list=example_pb2.Int64List( value=f)) } def serialize(value, dtype): if not isinstance(value, (list, tuple, np.ndarray)): value = [value] return feature_map[dtype](value) def serialize_repeated(value, dtype): feature_list = example_pb2.FeatureList() for v in value: feature_list.feature.append(serialize(v, dtype)) return feature_list context = { key: serialize(value, dtype) for key, (value, dtype) in context_datum.items() } features = { key: serialize_repeated(value, dtype) for key, (value, dtype) in features_datum.items() } context = example_pb2.Features(feature=context) features = example_pb2.FeatureLists(feature_list=features) proto = example_pb2.SequenceExample(context=context, feature_lists=features) return proto.SerializeToString()
def serialize_tf_example( datum: typing.Dict[str, typing.Tuple[typing.Any, str]]) -> bytes: """Serialize example into tfrecord.Example proto. Params: ------- datum: dict Dictionary of tuples of form (value, dtype). dtype can be "byte", "float" or "int". Returns: -------- proto: bytes Serialized tfrecord.example to bytes. """ feature_map = { "byte": lambda f: example_pb2.Feature(bytes_list=example_pb2.BytesList( value=f)), "float": lambda f: example_pb2.Feature(float_list=example_pb2.FloatList( value=f)), "int": lambda f: example_pb2.Feature(int64_list=example_pb2.Int64List( value=f)) } def serialize(value, dtype): if not isinstance(value, (list, tuple, np.ndarray)): value = [value] return feature_map[dtype](value) features = { key: serialize(value, dtype) for key, (value, dtype) in datum.items() } example_proto = example_pb2.Example(features=example_pb2.Features( feature=features)) return example_proto.SerializeToString()