Example #1
0
def write_tfrecords(protos, output_path, options=None):
    """Writes protos to output_path.

  This function writes serialized strings of each proto in protos to output_path
  in their original order. If output_path is a sharded file (e.g., foo@2), this
  function will write the protos spread out as evenly as possible among the
  individual components of the sharded spec (e.g., foo-00000-of-00002 and
  foo-00001-of-00002). Note that the order of records in the sharded files may
  differ from the order in protos due to the striping.

  Args:
    protos: An iterable of protobufs. The objects we want to write out.
    output_path: str. The filepath where we want to write protos.
    options: A python_io.TFRecordOptions object for the writer.
  """
    if not options:
        options = make_tfrecord_options(output_path)

    if sharded_file_utils.is_sharded_file_spec(output_path):
        with contextlib2.ExitStack() as stack:
            _, n_shards, _ = sharded_file_utils.parse_sharded_file_spec(
                output_path)
            writers = [
                stack.enter_context(
                    make_tfrecord_writer(
                        sharded_file_utils.sharded_filename(output_path, i),
                        options)) for i in range(n_shards)
            ]
            for i, proto in enumerate(protos):
                writers[i % n_shards].write(proto.SerializeToString())
    else:
        with make_tfrecord_writer(output_path, options) as writer:
            for proto in protos:
                writer.write(proto.SerializeToString())
  def testParseShardedFileSpec(self, spec, expected_basename,
                               expected_num_shards, expected_suffix):

    basename, num_shards, suffix = io.parse_sharded_file_spec(spec)
    self.assertEqual(basename, expected_basename)
    self.assertEqual(num_shards, expected_num_shards)
    self.assertEqual(suffix, expected_suffix)
Example #3
0
def write_tfrecords(protos, output_path, compression_type=None):
  """Writes protos to output_path.

  This function writes serialized strings of each proto in protos to output_path
  in their original order. If output_path is a sharded file (e.g., foo@2), this
  function will write the protos spread out as evenly as possible among the
  individual components of the sharded spec (e.g., foo-00000-of-00002 and
  foo-00001-of-00002). Note that the order of records in the sharded files may
  differ from the order in protos due to the striping.

  Args:
    protos: An iterable of protobufs. The objects we want to write out.
    output_path: str. The filepath where we want to write protos.
    compression_type: 'GZIP', 'ZLIB', '' (uncompressed), or None to autodetect
      based on file extension.
  """
  if sharded_file_utils.is_sharded_file_spec(output_path):
    with contextlib2.ExitStack() as stack:
      _, n_shards, _ = sharded_file_utils.parse_sharded_file_spec(output_path)
      writers = [
          stack.enter_context(
              Writer(sharded_file_utils.sharded_filename(
                  output_path, i), compression_type))
          for i in range(n_shards)
      ]
      for i, proto in enumerate(protos):
        writers[i % n_shards].write(proto)
  else:
    with Writer(output_path, compression_type=compression_type) as writer:
      for proto in protos:
        writer.write(proto)