Example #1
0
def write_data_example(record_writer, image1, image2):
  """Write data example to disk."""
  assert image1.shape[0] == image2.shape[0]
  assert image1.shape[1] == image2.shape[1]
  assert image1.shape[2] == image2.shape[2]

  feature = {
      'height': conversion_utils.int64_feature(image1.shape[0]),
      'width': conversion_utils.int64_feature(image1.shape[1]),
  }
  example = tf.train.SequenceExample(
      context=tf.train.Features(feature=feature),
      feature_lists=tf.train.FeatureLists(
          feature_list={
              'images':
                  tf.train.FeatureList(feature=[
                      conversion_utils.bytes_feature(
                          image1.astype('uint8').tobytes()),
                      conversion_utils.bytes_feature(
                          image2.astype('uint8').tobytes())
                  ]),
          }))
  record_writer.write(example.SerializeToString())
def write_records(data_list, output_folder):
    """Takes in list: [((im1_path, im2_path), flow_path)] and writes records."""

    # Reading ppm and flo can fail on network filesystem, so copy to tmpdir first.
    tmpdir = '/tmp/flying_chairs'
    if not os.path.exists(tmpdir):
        os.mkdir(tmpdir)

    filenames = conversion_utils.generate_sharded_filenames(
        os.path.join(output_folder, 'sintel@{}'.format(FLAGS.num_shards)))
    with tf.io.TFRecordWriter(filenames[FLAGS.shard]) as record_writer:
        total = len(data_list)
        images_per_shard = total // FLAGS.num_shards
        start = images_per_shard * FLAGS.shard
        end = start + images_per_shard
        # Account for num images not being divisible by num shards.
        if FLAGS.shard == FLAGS.num_shards - 1:
            data_list = data_list[start:]
        else:
            data_list = data_list[start:end]

        tf.compat.v1.logging.info('Writing %d images per shard',
                                  images_per_shard)
        tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start,
                                  end, total)

        img1_path = os.path.join(tmpdir, 'img1.png')
        img2_path = os.path.join(tmpdir, 'img2.png')
        flow_path = os.path.join(tmpdir, 'flow.flo')
        occlusion_path = os.path.join(tmpdir, 'occlusion.png')
        invalid1_path = os.path.join(tmpdir, 'invalid1.png')
        invalid2_path = os.path.join(tmpdir, 'invalid2.png')

        for i, (images, flow, occlusion, invalids) in enumerate(data_list):
            if os.path.exists(img1_path):
                os.remove(img1_path)
            if os.path.exists(img2_path):
                os.remove(img2_path)
            if os.path.exists(flow_path):
                os.remove(flow_path)
            if os.path.exists(occlusion_path):
                os.remove(occlusion_path)
            if os.path.exists(invalid1_path):
                os.remove(invalid1_path)
            if os.path.exists(invalid2_path):
                os.remove(invalid2_path)

            tf.io.gfile.copy(images[0], img1_path)
            tf.io.gfile.copy(images[1], img2_path)

            image1_data = imageio.imread(img1_path)
            image2_data = imageio.imread(img2_path)
            if flow is not None:
                assert occlusion is not None
                tf.io.gfile.copy(flow, flow_path)
                tf.io.gfile.copy(occlusion, occlusion_path)
                tf.io.gfile.copy(invalids[0], invalid1_path)
                tf.io.gfile.copy(invalids[1], invalid2_path)
                flow_data = conversion_utils.read_flow(flow_path)
                # Make binary
                occlusion_data = np.expand_dims(
                    imageio.imread(occlusion_path) // 255, axis=-1)
                invalid1_data = np.expand_dims(imageio.imread(invalid1_path) //
                                               255,
                                               axis=-1)
                invalid2_data = np.expand_dims(imageio.imread(invalid2_path) //
                                               255,
                                               axis=-1)
            else:  # Test has no flow data, spoof flow data.
                flow_data = np.zeros(
                    (image1_data.shape[0], image1_data.shape[1], 2),
                    np.float32)
                occlusion_data = invalid1_data = invalid2_data = np.zeros(
                    (image1_data.shape[0], image1_data.shape[1], 1), np.uint8)

            height = image1_data.shape[0]
            width = image1_data.shape[1]

            assert height == image2_data.shape[0] == flow_data.shape[0]
            assert width == image2_data.shape[1] == flow_data.shape[1]
            assert height == occlusion_data.shape[0] == invalid1_data.shape[0]
            assert width == occlusion_data.shape[1] == invalid1_data.shape[1]
            assert invalid1_data.shape == invalid2_data.shape

            feature = {
                'height':
                conversion_utils.int64_feature(height),
                'width':
                conversion_utils.int64_feature(width),
                'image1_path':
                conversion_utils.bytes_feature(str.encode(images[0])),
                'image2_path':
                conversion_utils.bytes_feature(str.encode(images[1])),
            }
            if flow is not None:
                feature.update({
                    'flow_uv':
                    conversion_utils.bytes_feature(flow_data.tobytes()),
                    'occlusion_mask':
                    conversion_utils.bytes_feature(occlusion_data.tobytes()),
                    'flow_path':
                    conversion_utils.bytes_feature(str.encode(flow)),
                    'occlusion_path':
                    conversion_utils.bytes_feature(str.encode(occlusion)),
                })
            example = tf.train.SequenceExample(
                context=tf.train.Features(feature=feature),
                feature_lists=tf.train.FeatureLists(
                    feature_list={
                        'images':
                        tf.train.FeatureList(feature=[
                            conversion_utils.bytes_feature(
                                image1_data.tobytes()),
                            conversion_utils.bytes_feature(
                                image2_data.tobytes())
                        ]),
                        'invalid_masks':
                        tf.train.FeatureList(feature=[
                            conversion_utils.bytes_feature(
                                invalid1_data.tobytes()),
                            conversion_utils.bytes_feature(
                                invalid2_data.tobytes())
                        ])
                    }))
            if i % 10 == 0:
                tf.compat.v1.logging.info('Writing %d out of %d total.', i,
                                          len(data_list))
            record_writer.write(example.SerializeToString())

    tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)
Example #3
0
def convert_dataset():
    """Convert the data to the TFRecord format."""

    # Make a directory to save the tfrecords to.
    if not tf.io.gfile.exists(FLAGS.output_dir):
        tf.io.gfile.mkdir(FLAGS.output_dir)

    train_dir = os.path.join(FLAGS.output_dir, 'train')
    test_dir = os.path.join(FLAGS.output_dir, 'test')
    if not tf.io.gfile.exists(train_dir):
        tf.io.gfile.mkdir(train_dir)
    if not tf.io.gfile.exists(test_dir):
        tf.io.gfile.mkdir(test_dir)

    # Directory with images.
    images = sorted(tf.io.gfile.glob(FLAGS.data_dir + '/*.ppm'))
    flow_list = sorted(tf.io.gfile.glob(FLAGS.data_dir + '/*.flo'))
    assert len(images) // 2 == len(flow_list)
    image_list = []
    for i in range(len(flow_list)):
        im1 = images[2 * i]
        im2 = images[2 * i + 1]
        image_list.append((im1, im2))
    assert len(image_list) == len(flow_list)

    # Reading ppm and flo can fail on network filesystem, so copy to tmpdir first.
    tmpdir = '/tmp/flying_chairs'
    if not os.path.exists(tmpdir):
        os.mkdir(tmpdir)

    train_filenames = conversion_utils.generate_sharded_filenames(
        os.path.join(train_dir, 'flying_chairs@{}'.format(FLAGS.num_shards)))
    test_filenames = conversion_utils.generate_sharded_filenames(
        os.path.join(test_dir, 'flying_chairs@{}'.format(FLAGS.num_shards)))
    train_record_writer = tf.io.TFRecordWriter(train_filenames[FLAGS.shard])
    test_record_writer = tf.io.TFRecordWriter(test_filenames[FLAGS.shard])
    total = len(image_list)
    images_per_shard = total // FLAGS.num_shards
    start = images_per_shard * FLAGS.shard
    filepath = FLAGS.train_split_file
    with open(filepath, mode='r') as f:
        train_val = f.readlines()
        train_val = [int(x.strip()) for x in train_val]
    if FLAGS.shard == FLAGS.num_shards - 1:
        end = len(image_list)
    else:
        end = start + images_per_shard
    assert len(train_val) == len(image_list)
    assert len(flow_list) == len(train_val)
    image_list = image_list[start:end]
    train_val = train_val[start:end]
    flow_list = flow_list[start:end]

    tf.compat.v1.logging.info('Writing %d images per shard', images_per_shard)
    tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start,
                              end, total)

    img1_path = os.path.join(tmpdir, 'img1.ppm')
    img2_path = os.path.join(tmpdir, 'img2.ppm')
    flow_path = os.path.join(tmpdir, 'flow.flo')

    for i, (images, flow,
            assignment) in enumerate(zip(image_list, flow_list, train_val)):
        if os.path.exists(img1_path):
            os.remove(img1_path)
        if os.path.exists(img2_path):
            os.remove(img2_path)
        if os.path.exists(flow_path):
            os.remove(flow_path)

        tf.io.gfile.copy(images[0], img1_path)
        tf.io.gfile.copy(images[1], img2_path)
        tf.io.gfile.copy(flow, flow_path)

        image1_data = imageio.imread(img1_path)
        image2_data = imageio.imread(img2_path)
        flow_data = conversion_utils.read_flow(flow_path)

        height = image1_data.shape[0]
        width = image1_data.shape[1]

        assert height == image2_data.shape[0] == flow_data.shape[0]
        assert width == image2_data.shape[1] == flow_data.shape[1]

        example = tf.train.SequenceExample(
            context=tf.train.Features(
                feature={
                    'height':
                    conversion_utils.int64_feature(height),
                    'width':
                    conversion_utils.int64_feature(width),
                    'flow_uv':
                    conversion_utils.bytes_feature(flow_data.tobytes()),
                    'image1_path':
                    conversion_utils.bytes_feature(str.encode(images[0])),
                    'image2_path':
                    conversion_utils.bytes_feature(str.encode(images[1])),
                }),
            feature_lists=tf.train.FeatureLists(
                feature_list={
                    'images':
                    tf.train.FeatureList(feature=[
                        conversion_utils.bytes_feature(image1_data.tobytes()),
                        conversion_utils.bytes_feature(image2_data.tobytes())
                    ])
                }))
        if i % 10 == 0:
            tf.compat.v1.logging.info('Writing %d out of %d total.', i,
                                      len(image_list))
        if assignment == 1:
            train_record_writer.write(example.SerializeToString())
        elif assignment == 2:
            test_record_writer.write(example.SerializeToString())
        else:
            assert False, 'There is an error in the chairs_train_val.txt'

    train_record_writer.close()
    test_record_writer.close()
    tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)