def write(self, example: tf.train.Example): record = example.SerializeToString() length = len(record) length_bytes = struct.pack("<Q", length) self.java_file.write(length_bytes, 8) self.java_file.write(self.masked_crc(length_bytes), 4) self.java_file.write(record, length) self.java_file.write(self.masked_crc(record), 4)
def _ExamplePartitionKey(record: tf.train.Example, split_config: example_gen_pb2.SplitConfig) -> bytes: """Generates key for partition for tf.train.Example.""" if not split_config.HasField('partition_feature_name'): return record.SerializeToString(deterministic=True) # Use a feature for partitioning the examples. feature_name = split_config.partition_feature_name if feature_name not in record.features.feature: raise RuntimeError('Feature name `{}` does not exist.'.format(feature_name)) feature = record.features.feature[feature_name] if not feature.HasField('kind'): raise RuntimeError('Partition feature does not contain any value.') if (not feature.HasField('bytes_list') and not feature.HasField('int64_list')): raise RuntimeError('Only `bytes_list` and `int64_list` features are ' 'supported for partition.') return feature.SerializeToString(deterministic=True)
def write_tfrecord(example: tf.train.Example, filename: str): with tf.python_io.TFRecordWriter(filename) as writer: writer.write(example.SerializeToString())
def write(example: tf.train.Example): writer.write(example.SerializeToString())
def __call__(self, example: tf.train.Example): output_tfrecords[self.index % shards].write( example.SerializeToString()) self.index += 1
def write(self, example: tf.train.Example): self._sess.run(self._write_op, feed_dict={self._write_feed: example.SerializeToString()})