def create_output_sequence_example(
    images,
    flow,
    mask,
    add_visualization = True):
  """Creates a SequenceExample for the self-supervised training data.

  Args:
    images: Image triplet.
    flow: Flow field of the middle frame to the last frame.
    mask: Mask associated with the flow field indicating which locations hold a
      valid flow vector.
    add_visualization: If true adds a visualization of the flow field to the
      sequence example.

  Returns:
    Tensorflow SequenceExample holding the training data created of the triplet.
  """
  height = tf.shape(images)[-3]
  width = tf.shape(images)[-2]

  # Compute a flow visualization.
  if add_visualization:
    flow_visualization = tf.image.convert_image_dtype(
        smurf_plotting.flow_to_rgb(flow)[0], tf.uint8)
    flow_visualization_png = tf.image.encode_png(flow_visualization)

  context_features = {
      'height': conversion_utils.int64_feature(height),
      'width': conversion_utils.int64_feature(width),
      'flow_uv': conversion_utils.bytes_feature(flow[0].numpy().tobytes()),
      'flow_valid': conversion_utils.bytes_feature(mask.numpy().tobytes()),
  }
  if add_visualization:
    context_features['flow_viz'] = (
        conversion_utils.bytes_feature(flow_visualization_png.numpy()))

  sequence_features = {
      'images':
          tf.train.FeatureList(feature=[
              conversion_utils.bytes_feature(
                  tf.image.encode_png(
                      tf.image.convert_image_dtype(images[1],
                                                   tf.uint8)).numpy()),
              conversion_utils.bytes_feature(
                  tf.image.encode_png(
                      tf.image.convert_image_dtype(images[2],
                                                   tf.uint8)).numpy())
          ])
  }
  return tf.train.SequenceExample(
      context=tf.train.Features(feature=context_features),
      feature_lists=tf.train.FeatureLists(feature_list=sequence_features))
Example #2
0
def convert_dataset(data_dir):
    """Convert the data to the TFRecord format."""

    for subdir in FLAGS.subdirs.split(','):
        # Make a directory to save the tfrecords to.
        output_dir = data_dir + '_' + subdir + '-tfrecords'
        # Directory with images.
        image_dir = os.path.join(data_dir, subdir + '/image_2')
        image_dir_right = os.path.join(data_dir, subdir + '/image_3')
        num_images = len(tf.io.gfile.listdir(image_dir)) // 2

        if not os.path.exists(output_dir):
            os.mkdir(output_dir)

        for i in range(num_images):
            image_files = ['{0:06d}_{1:}.png'.format(i, j) for j in [10, 11]]

            # Collect RGB images.
            image_bytes_list_left = []
            image_bytes_list_right = []
            for image_file in image_files:
                image_path_left = os.path.join(image_dir, image_file)
                image_path_right = os.path.join(image_dir_right, image_file)
                image_data_left = tf.io.gfile.GFile(image_path_left,
                                                    'rb').read()
                image_data_right = tf.io.gfile.GFile(image_path_right,
                                                     'rb').read()
                image_tensor_left = tf.image.decode_png(image_data_left,
                                                        channels=3)
                image_tensor_right = tf.image.decode_png(image_data_right,
                                                         channels=3)
                height, width, _ = image_tensor_left.shape
                # Encode image as byte list again.
                image_bytes_list_left.append(
                    image_tensor_left.numpy().tobytes())
                image_bytes_list_right.append(
                    image_tensor_right.numpy().tobytes())

            if subdir == 'training':
                # Collect flow.
                # Flow in the first image points to the second one; including occluded
                # regions (occ), or not including occluded regions (noc).

                # NOTE: disp0 corresponds to disparity at time 0
                # and disp1 to disparity at time 1. All disparities are given
                # in the frame of the left image.
                flow_uv_bytes = dict()
                flow_valid_bytes = dict()
                disp0_bytes = dict()
                disp0_valid_bytes = dict()
                disp1_bytes = dict()
                disp1_valid_bytes = dict()
                for version in ['noc', 'occ']:
                    flow_path = os.path.join(data_dir, subdir,
                                             'flow_' + version, image_files[0])
                    disp_path0 = os.path.join(data_dir, subdir,
                                              'disp_' + version + '_0',
                                              image_files[0])
                    disp_path1 = os.path.join(data_dir, subdir,
                                              'disp_' + version + '_1',
                                              image_files[0])
                    flow_data = tf.io.gfile.GFile(flow_path, 'rb').read()
                    disp_data0 = tf.io.gfile.GFile(disp_path0, 'rb').read()
                    disp_data1 = tf.io.gfile.GFile(disp_path1, 'rb').read()
                    flow_tensor = tf.image.decode_png(flow_data,
                                                      channels=3,
                                                      dtype=tf.uint16)
                    disp0_tensor = tf.image.decode_png(disp_data0,
                                                       channels=1,
                                                       dtype=tf.uint16)
                    disp1_tensor = tf.image.decode_png(disp_data1,
                                                       channels=1,
                                                       dtype=tf.uint16)
                    # Recover flow vectors from flow image according to KITTI README.
                    flow_uv = (tf.cast(flow_tensor[Ellipsis, :2], tf.float32) -
                               2**15) / 64.0
                    flow_valid = tf.cast(flow_tensor[Ellipsis, 2:3], tf.uint8)
                    # Recover disp according to the KITTI README.
                    disp0 = tf.cast(disp0_tensor, tf.float32) / 256.
                    disp0_valid = tf.cast(disp0 > 0, tf.uint8)
                    disp1 = tf.cast(disp1_tensor, tf.float32) / 256.
                    disp1_valid = tf.cast(disp1 > 0, tf.uint8)

                    # Encode image as byte list again.
                    flow_uv_bytes[version] = flow_uv.numpy().tobytes()
                    flow_valid_bytes[version] = flow_valid.numpy().tobytes()
                    disp0_bytes[version] = disp0.numpy().tobytes()
                    disp0_valid_bytes[version] = disp0_valid.numpy().tobytes()
                    disp1_bytes[version] = disp1.numpy().tobytes()
                    disp1_valid_bytes[version] = disp1_valid.numpy().tobytes()

                # Build a tf sequence example.
                example = tf.train.SequenceExample(
                    context=tf.train.Features(
                        feature={
                            'height':
                            conversion_utils.int64_feature(height),
                            'width':
                            conversion_utils.int64_feature(width),
                            'flow_uv_occ':
                            conversion_utils.bytes_feature(
                                flow_uv_bytes['occ']),
                            'flow_uv_noc':
                            conversion_utils.bytes_feature(
                                flow_uv_bytes['noc']),
                            'flow_valid_occ':
                            conversion_utils.bytes_feature(
                                flow_valid_bytes['occ']),
                            'flow_valid_noc':
                            conversion_utils.
                            bytes_feature(flow_valid_bytes['noc']),
                            'disp0_occ':
                            conversion_utils.bytes_feature(disp0_bytes['occ']),
                            'disp0_noc':
                            conversion_utils.bytes_feature(disp0_bytes['noc']),
                            'disp1_occ':
                            conversion_utils.bytes_feature(disp1_bytes['occ']),
                            'disp1_noc':
                            conversion_utils.bytes_feature(disp1_bytes['noc']),
                            'disp0_valid_occ':
                            conversion_utils.bytes_feature(
                                disp0_valid_bytes['occ']),
                            'disp0_valid_noc':
                            conversion_utils.bytes_feature(
                                disp0_valid_bytes['noc']),
                            'disp1_valid_occ':
                            conversion_utils.bytes_feature(
                                disp1_valid_bytes['occ']),
                            'disp1_valid_noc':
                            conversion_utils.bytes_feature(
                                disp1_valid_bytes['noc']),
                        }),
                    feature_lists=tf.train.FeatureLists(
                        feature_list={
                            'images':
                            tf.train.FeatureList(feature=[
                                conversion_utils.bytes_feature(b)
                                for b in image_bytes_list_left
                            ]),
                            'images_right':
                            tf.train.FeatureList(feature=[
                                conversion_utils.bytes_feature(b)
                                for b in image_bytes_list_right
                            ])
                        }))
            elif subdir == 'testing':
                # Build a tf sequence example.
                example = tf.train.SequenceExample(
                    context=tf.train.Features(
                        feature={
                            'height': conversion_utils.int64_feature(height),
                            'width': conversion_utils.int64_feature(width),
                        }),
                    feature_lists=tf.train.FeatureLists(
                        feature_list={
                            'images':
                            tf.train.FeatureList(feature=[
                                conversion_utils.bytes_feature(b)
                                for b in image_bytes_list_left
                            ]),
                            'images_right':
                            tf.train.FeatureList(feature=[
                                conversion_utils.bytes_feature(b)
                                for b in image_bytes_list_right
                            ])
                        }))
            # Create a tfrecord file to save this sequence to.
            output_filename = data_dir.split(
                '/')[-1] + '_' + subdir + '_{0:06d}.tfrecord'.format(i)
            output_file = os.path.join(output_dir, output_filename)
            with tf.io.TFRecordWriter(output_file) as record_writer:
                record_writer.write(example.SerializeToString())
                record_writer.flush()
        print('Saved results to', output_dir)
Example #3
0
def write_records(data_list, output_folder, shard):
    """Takes in list: [((im1_path, im2_path), flow_path)] and writes records."""
    filenames = conversion_utils.generate_sharded_filenames(
        os.path.join(output_folder, 'sintel@{}'.format(FLAGS.num_shards)))
    with tf.io.TFRecordWriter(filenames[shard]) as record_writer:
        total = len(data_list)
        images_per_shard = total // FLAGS.num_shards
        start = images_per_shard * shard
        end = start + images_per_shard
        # Account for num images not being divisible by num shards.
        if shard == FLAGS.num_shards - 1:
            data_list = data_list[start:]
        else:
            data_list = data_list[start:end]

        tf.compat.v1.logging.info('Writing %d images per shard',
                                  images_per_shard)
        tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start,
                                  end, total)

        for i, (images, flow, occlusion, invalids, segments,
                segments_invalid) in enumerate(data_list):

            image1_data = scipy.ndimage.imread(images[0])
            image2_data = scipy.ndimage.imread(images[1])
            if flow is not None:
                assert occlusion is not None
                assert segments is not None
                assert segments_invalid is not None

                flow_data = conversion_utils.read_flow(flow)
                # Make binary
                occlusion_data = np.expand_dims(
                    scipy.ndimage.imread(occlusion) // 255, axis=-1)
                invalid1_data = np.expand_dims(
                    scipy.ndimage.imread(invalids[0]) // 255, axis=-1)
                invalid2_data = np.expand_dims(
                    scipy.ndimage.imread(invalids[1]) // 255, axis=-1)
                segment1_data = np.expand_dims(scipy.ndimage.imread(
                    segments[0]),
                                               axis=-1)
                segment2_data = np.expand_dims(scipy.ndimage.imread(
                    segments[1]),
                                               axis=-1)
                segment_invalid1_data = np.expand_dims(scipy.ndimage.imread(
                    segments_invalid[0]),
                                                       axis=-1)
                segment_invalid2_data = np.expand_dims(scipy.ndimage.imread(
                    segments_invalid[1]),
                                                       axis=-1)
            else:  # Test has no flow data, spoof flow data.
                flow_data = np.zeros(
                    (image1_data.shape[0], image1_data.shape[1], 2),
                    np.float32)
                occlusion_data = invalid1_data = invalid2_data = np.zeros(
                    (image1_data.shape[0], image1_data.shape[1], 1), np.uint8)
                segment1_data = segment2_data = occlusion_data
                segment_invalid1_data = segment_invalid2_data = segment1_data
            height = image1_data.shape[0]
            width = image1_data.shape[1]

            assert height == image2_data.shape[0] == flow_data.shape[0]
            assert width == image2_data.shape[1] == flow_data.shape[1]
            assert height == occlusion_data.shape[0] == invalid1_data.shape[0]
            assert width == occlusion_data.shape[1] == invalid1_data.shape[1]
            assert invalid1_data.shape == invalid2_data.shape
            feature = {
                'height':
                conversion_utils.int64_feature(height),
                'width':
                conversion_utils.int64_feature(width),
                'image1_path':
                conversion_utils.bytes_feature(str.encode(images[0])),
                'image2_path':
                conversion_utils.bytes_feature(str.encode(images[1])),
            }
            feature_list = {}
            if flow is not None:
                feature.update({
                    'flow_uv':
                    conversion_utils.bytes_feature(flow_data.tobytes()),
                    'occlusion_mask':
                    conversion_utils.bytes_feature(occlusion_data.tobytes()),
                    'flow_path':
                    conversion_utils.bytes_feature(str.encode(flow)),
                    'occlusion_path':
                    conversion_utils.bytes_feature(str.encode(occlusion))
                })
            if segments[0] is not None:
                feature.update({
                    'segment1_path':
                    conversion_utils.bytes_feature(str.encode(segments[0])),
                    'segment2_path':
                    conversion_utils.bytes_feature(str.encode(segments[1])),
                    'segment_invalid1_path':
                    conversion_utils.bytes_feature(
                        str.encode(segments_invalid[0])),
                    'segment_invalid2_path':
                    conversion_utils.bytes_feature(
                        str.encode(segments_invalid[1])),
                })
                feature_list.update({
                    'segments':
                    tf.train.FeatureList(feature=[
                        conversion_utils.bytes_feature(
                            segment1_data.tobytes()),
                        conversion_utils.bytes_feature(
                            segment2_data.tobytes()),
                    ]),
                    'segments_invalid':
                    tf.train.FeatureList(feature=[
                        conversion_utils.bytes_feature(
                            segment_invalid1_data.tobytes()),
                        conversion_utils.bytes_feature(
                            segment_invalid2_data.tobytes()),
                    ]),
                })
            feature_list.update({
                'images':
                tf.train.FeatureList(feature=[
                    conversion_utils.bytes_feature(image1_data.tobytes()),
                    conversion_utils.bytes_feature(image2_data.tobytes())
                ]),
                'invalid_masks':
                tf.train.FeatureList(feature=[
                    conversion_utils.bytes_feature(invalid1_data.tobytes()),
                    conversion_utils.bytes_feature(invalid2_data.tobytes())
                ])
            })
            example = tf.train.SequenceExample(
                context=tf.train.Features(feature=feature),
                feature_lists=tf.train.FeatureLists(feature_list=feature_list))
            if i % 10 == 0:
                tf.compat.v1.logging.info('Writing %d out of %d total.', i,
                                          len(data_list))
            record_writer.write(example.SerializeToString())

    tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)
def convert_dataset(shard):
    """Convert the data to the TFRecord format."""

    # Make a directory to save the tfrecords to.
    if not os.path.exists(FLAGS.output_dir):
        os.mkdir(FLAGS.output_dir)

    train_dir = os.path.join(FLAGS.output_dir, 'train')
    test_dir = os.path.join(FLAGS.output_dir, 'test')
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)
    if not os.path.exists(test_dir):
        os.mkdir(test_dir)

    # Directory with images.
    images = sorted(glob.glob(FLAGS.data_dir + '/*.ppm'))
    flow_list = sorted(glob.glob(FLAGS.data_dir + '/*.flo'))
    assert len(images) // 2 == len(flow_list)
    image_list = []
    for i in range(len(flow_list)):
        im1 = images[2 * i]
        im2 = images[2 * i + 1]
        image_list.append((im1, im2))
    assert len(image_list) == len(flow_list)

    train_filenames = conversion_utils.generate_sharded_filenames(
        os.path.join(train_dir, 'flying_chairs@{}'.format(FLAGS.num_shards)))
    test_filenames = conversion_utils.generate_sharded_filenames(
        os.path.join(test_dir, 'flying_chairs@{}'.format(FLAGS.num_shards)))
    train_record_writer = tf.io.TFRecordWriter(train_filenames[shard])
    test_record_writer = tf.io.TFRecordWriter(test_filenames[shard])
    total = len(image_list)
    images_per_shard = total // FLAGS.num_shards
    start = images_per_shard * shard
    smurf_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    filepath = 'files/chairs_train_val.txt'
    filepath = os.path.join(smurf_dir, filepath)
    with open(filepath, mode='r') as f:
        train_val = f.readlines()
        train_val = [int(x.strip()) for x in train_val]
    if shard == FLAGS.num_shards - 1:
        end = len(image_list)
    else:
        end = start + images_per_shard
    assert len(train_val) == len(image_list)
    assert len(flow_list) == len(train_val)
    image_list = image_list[start:end]
    train_val = train_val[start:end]
    flow_list = flow_list[start:end]

    tf.compat.v1.logging.info('Writing %d images per shard', images_per_shard)
    tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start,
                              end, total)

    for i, (images, flow,
            assignment) in enumerate(zip(image_list, flow_list, train_val)):
        image1_data = scipy.ndimage.imread(images[0])
        image2_data = scipy.ndimage.imread(images[1])
        flow_data = conversion_utils.read_flow(flow)

        height = image1_data.shape[0]
        width = image1_data.shape[1]

        assert height == image2_data.shape[0] == flow_data.shape[0]
        assert width == image2_data.shape[1] == flow_data.shape[1]

        example = tf.train.SequenceExample(
            context=tf.train.Features(
                feature={
                    'height': conversion_utils.int64_feature(height),
                    'width': conversion_utils.int64_feature(width),
                    'flow_uv': conversion_utils.bytes_feature(
                        flow_data.tobytes()),
                }),
            feature_lists=tf.train.FeatureLists(
                feature_list={
                    'images':
                    tf.train.FeatureList(feature=[
                        conversion_utils.bytes_feature(image1_data.tobytes()),
                        conversion_utils.bytes_feature(image2_data.tobytes())
                    ])
                }))
        if i % 10 == 0:
            tf.compat.v1.logging.info('Writing %d out of %d total.', i,
                                      len(image_list))
        if assignment == 1:
            train_record_writer.write(example.SerializeToString())
        elif assignment == 2:
            test_record_writer.write(example.SerializeToString())
        else:
            assert False, 'There is an error in the chairs_train_val.txt'

    train_record_writer.close()
    test_record_writer.close()
    tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)
def convert_dataset(data_dir):
    """Convert the data to the TFRecord format."""

    for subdir in ['training', 'testing']:

        if FLAGS.entire_sequence:
            sequences = [list(range(21))]
            output_dir = data_dir + '_{}_{}x{}_fullseq-tfrecords'.format(
                subdir[:-3], FLAGS.height, FLAGS.width)
        else:
            # Of the 21 frames, ignore frames 9-12 because we will test on those.
            sequences = [[0, 1, 2, 3, 4, 5, 6, 7, 8],
                         [13, 14, 15, 16, 17, 18, 19, 20]]
            # Make a directory to save the tfrecords to.
            output_dir = data_dir + '_{}_{}x{}-tfrecords'.format(
                subdir[:-3], FLAGS.height, FLAGS.width)

        # Directory with images.
        image_dir = os.path.join(data_dir, subdir + '/image_2')
        image_dir_right = os.path.join(data_dir, subdir + '/image_3')
        num_images = int(tf.io.gfile.listdir(image_dir)[-1][:-7])

        if not os.path.exists(output_dir):
            os.mkdir(output_dir)

        for i in range(num_images):
            # Don't use frames 9-12 because those will be tested.
            for js in sequences:
                image_files = ['{0:06d}_{1:02d}.png'.format(i, j) for j in js]

                try:
                    # Collect RGB images.
                    image_bytes_list = []
                    image_bytes_list_right = []
                    for image_file in image_files:
                        image_path = os.path.join(image_dir, image_file)
                        image_path_right = os.path.join(
                            image_dir_right, image_file)

                        image_data = tf.compat.v1.gfile.FastGFile(
                            image_path, 'rb').read()
                        image_data_right = tf.compat.v1.gfile.FastGFile(
                            image_path_right, 'rb').read()

                        image_tensor = tf.image.decode_png(image_data,
                                                           channels=3)
                        image_tensor_right = tf.image.decode_png(
                            image_data_right, channels=3)
                        image_resized = tf.image.resize(
                            image_tensor[None], [FLAGS.height, FLAGS.width],
                            method=tf.image.ResizeMethod.BILINEAR)[0]
                        image_resized_right = tf.image.resize(
                            image_tensor_right[None],
                            [FLAGS.height, FLAGS.width],
                            method=tf.image.ResizeMethod.BILINEAR)[0]
                        # Undo the implicit cast of resize_images to tf.float32
                        image_resized = tf.cast(image_resized, tf.uint8)
                        image_resized_right = tf.cast(image_resized_right,
                                                      tf.uint8)
                        # Encode image as byte list again.
                        image_bytes_list.append(
                            tf.image.encode_png(image_resized).numpy())
                        image_bytes_list_right.append(
                            tf.image.encode_png(image_resized_right).numpy())

                    # Build a tf sequence example.
                    example = tf.train.SequenceExample(
                        context=tf.train.Features(
                            feature={
                                'height':
                                conversion_utils.int64_feature(FLAGS.height),
                                'width':
                                conversion_utils.int64_feature(FLAGS.width),
                            }),
                        feature_lists=tf.train.FeatureLists(
                            feature_list={
                                'images':
                                tf.train.FeatureList(feature=[
                                    conversion_utils.bytes_feature(b)
                                    for b in image_bytes_list
                                ]),
                                'images_right':
                                tf.train.FeatureList(feature=[
                                    conversion_utils.bytes_feature(b)
                                    for b in image_bytes_list_right
                                ]),
                            }))
                    output_filename = data_dir.split(
                        '/'
                    )[-1] + '_' + subdir + '_{0:06d}_{1:02d}-{2:02d}.tfrecord'.format(
                        i, js[0], js[-1])
                    output_file = os.path.join(output_dir, output_filename)
                    with tf.io.TFRecordWriter(output_file) as record_writer:
                        record_writer.write(example.SerializeToString())
                        record_writer.flush()

                except tf.errors.NotFoundError:
                    print('Skipping {} because the file is not found.'.format(
                        image_path))

    print('Saved results to', output_dir)