Exemple #1
0
 def write(self, example: tf.train.Example):
     record = example.SerializeToString()
     length = len(record)
     length_bytes = struct.pack("<Q", length)
     self.java_file.write(length_bytes, 8)
     self.java_file.write(self.masked_crc(length_bytes), 4)
     self.java_file.write(record, length)
     self.java_file.write(self.masked_crc(record), 4)
def _ExamplePartitionKey(record: tf.train.Example,
                         split_config: example_gen_pb2.SplitConfig) -> bytes:
  """Generates key for partition for tf.train.Example."""

  if not split_config.HasField('partition_feature_name'):
    return record.SerializeToString(deterministic=True)

  # Use a feature for partitioning the examples.
  feature_name = split_config.partition_feature_name
  if feature_name not in record.features.feature:
    raise RuntimeError('Feature name `{}` does not exist.'.format(feature_name))
  feature = record.features.feature[feature_name]
  if not feature.HasField('kind'):
    raise RuntimeError('Partition feature does not contain any value.')
  if (not feature.HasField('bytes_list') and
      not feature.HasField('int64_list')):
    raise RuntimeError('Only `bytes_list` and `int64_list` features are '
                       'supported for partition.')
  return feature.SerializeToString(deterministic=True)
Exemple #3
0
def write_tfrecord(example: tf.train.Example, filename: str):
    with tf.python_io.TFRecordWriter(filename) as writer:
        writer.write(example.SerializeToString())
Exemple #4
0
 def write(example: tf.train.Example):
     writer.write(example.SerializeToString())
Exemple #5
0
 def __call__(self, example: tf.train.Example):
     output_tfrecords[self.index % shards].write(
         example.SerializeToString())
     self.index += 1
Exemple #6
0
 def write(self, example: tf.train.Example):
     self._sess.run(self._write_op, feed_dict={self._write_feed: example.SerializeToString()})