def create_output_sequence_example( images, flow, mask, add_visualization = True): """Creates a SequenceExample for the self-supervised training data. Args: images: Image triplet. flow: Flow field of the middle frame to the last frame. mask: Mask associated with the flow field indicating which locations hold a valid flow vector. add_visualization: If true adds a visualization of the flow field to the sequence example. Returns: Tensorflow SequenceExample holding the training data created of the triplet. """ height = tf.shape(images)[-3] width = tf.shape(images)[-2] # Compute a flow visualization. if add_visualization: flow_visualization = tf.image.convert_image_dtype( smurf_plotting.flow_to_rgb(flow)[0], tf.uint8) flow_visualization_png = tf.image.encode_png(flow_visualization) context_features = { 'height': conversion_utils.int64_feature(height), 'width': conversion_utils.int64_feature(width), 'flow_uv': conversion_utils.bytes_feature(flow[0].numpy().tobytes()), 'flow_valid': conversion_utils.bytes_feature(mask.numpy().tobytes()), } if add_visualization: context_features['flow_viz'] = ( conversion_utils.bytes_feature(flow_visualization_png.numpy())) sequence_features = { 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature( tf.image.encode_png( tf.image.convert_image_dtype(images[1], tf.uint8)).numpy()), conversion_utils.bytes_feature( tf.image.encode_png( tf.image.convert_image_dtype(images[2], tf.uint8)).numpy()) ]) } return tf.train.SequenceExample( context=tf.train.Features(feature=context_features), feature_lists=tf.train.FeatureLists(feature_list=sequence_features))
def convert_dataset(data_dir): """Convert the data to the TFRecord format.""" for subdir in FLAGS.subdirs.split(','): # Make a directory to save the tfrecords to. output_dir = data_dir + '_' + subdir + '-tfrecords' # Directory with images. image_dir = os.path.join(data_dir, subdir + '/image_2') image_dir_right = os.path.join(data_dir, subdir + '/image_3') num_images = len(tf.io.gfile.listdir(image_dir)) // 2 if not os.path.exists(output_dir): os.mkdir(output_dir) for i in range(num_images): image_files = ['{0:06d}_{1:}.png'.format(i, j) for j in [10, 11]] # Collect RGB images. image_bytes_list_left = [] image_bytes_list_right = [] for image_file in image_files: image_path_left = os.path.join(image_dir, image_file) image_path_right = os.path.join(image_dir_right, image_file) image_data_left = tf.io.gfile.GFile(image_path_left, 'rb').read() image_data_right = tf.io.gfile.GFile(image_path_right, 'rb').read() image_tensor_left = tf.image.decode_png(image_data_left, channels=3) image_tensor_right = tf.image.decode_png(image_data_right, channels=3) height, width, _ = image_tensor_left.shape # Encode image as byte list again. image_bytes_list_left.append( image_tensor_left.numpy().tobytes()) image_bytes_list_right.append( image_tensor_right.numpy().tobytes()) if subdir == 'training': # Collect flow. # Flow in the first image points to the second one; including occluded # regions (occ), or not including occluded regions (noc). # NOTE: disp0 corresponds to disparity at time 0 # and disp1 to disparity at time 1. All disparities are given # in the frame of the left image. flow_uv_bytes = dict() flow_valid_bytes = dict() disp0_bytes = dict() disp0_valid_bytes = dict() disp1_bytes = dict() disp1_valid_bytes = dict() for version in ['noc', 'occ']: flow_path = os.path.join(data_dir, subdir, 'flow_' + version, image_files[0]) disp_path0 = os.path.join(data_dir, subdir, 'disp_' + version + '_0', image_files[0]) disp_path1 = os.path.join(data_dir, subdir, 'disp_' + version + '_1', image_files[0]) flow_data = tf.io.gfile.GFile(flow_path, 'rb').read() disp_data0 = tf.io.gfile.GFile(disp_path0, 'rb').read() disp_data1 = tf.io.gfile.GFile(disp_path1, 'rb').read() flow_tensor = tf.image.decode_png(flow_data, channels=3, dtype=tf.uint16) disp0_tensor = tf.image.decode_png(disp_data0, channels=1, dtype=tf.uint16) disp1_tensor = tf.image.decode_png(disp_data1, channels=1, dtype=tf.uint16) # Recover flow vectors from flow image according to KITTI README. flow_uv = (tf.cast(flow_tensor[Ellipsis, :2], tf.float32) - 2**15) / 64.0 flow_valid = tf.cast(flow_tensor[Ellipsis, 2:3], tf.uint8) # Recover disp according to the KITTI README. disp0 = tf.cast(disp0_tensor, tf.float32) / 256. disp0_valid = tf.cast(disp0 > 0, tf.uint8) disp1 = tf.cast(disp1_tensor, tf.float32) / 256. disp1_valid = tf.cast(disp1 > 0, tf.uint8) # Encode image as byte list again. flow_uv_bytes[version] = flow_uv.numpy().tobytes() flow_valid_bytes[version] = flow_valid.numpy().tobytes() disp0_bytes[version] = disp0.numpy().tobytes() disp0_valid_bytes[version] = disp0_valid.numpy().tobytes() disp1_bytes[version] = disp1.numpy().tobytes() disp1_valid_bytes[version] = disp1_valid.numpy().tobytes() # Build a tf sequence example. example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'height': conversion_utils.int64_feature(height), 'width': conversion_utils.int64_feature(width), 'flow_uv_occ': conversion_utils.bytes_feature( flow_uv_bytes['occ']), 'flow_uv_noc': conversion_utils.bytes_feature( flow_uv_bytes['noc']), 'flow_valid_occ': conversion_utils.bytes_feature( flow_valid_bytes['occ']), 'flow_valid_noc': conversion_utils. bytes_feature(flow_valid_bytes['noc']), 'disp0_occ': conversion_utils.bytes_feature(disp0_bytes['occ']), 'disp0_noc': conversion_utils.bytes_feature(disp0_bytes['noc']), 'disp1_occ': conversion_utils.bytes_feature(disp1_bytes['occ']), 'disp1_noc': conversion_utils.bytes_feature(disp1_bytes['noc']), 'disp0_valid_occ': conversion_utils.bytes_feature( disp0_valid_bytes['occ']), 'disp0_valid_noc': conversion_utils.bytes_feature( disp0_valid_bytes['noc']), 'disp1_valid_occ': conversion_utils.bytes_feature( disp1_valid_bytes['occ']), 'disp1_valid_noc': conversion_utils.bytes_feature( disp1_valid_bytes['noc']), }), feature_lists=tf.train.FeatureLists( feature_list={ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(b) for b in image_bytes_list_left ]), 'images_right': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(b) for b in image_bytes_list_right ]) })) elif subdir == 'testing': # Build a tf sequence example. example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'height': conversion_utils.int64_feature(height), 'width': conversion_utils.int64_feature(width), }), feature_lists=tf.train.FeatureLists( feature_list={ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(b) for b in image_bytes_list_left ]), 'images_right': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(b) for b in image_bytes_list_right ]) })) # Create a tfrecord file to save this sequence to. output_filename = data_dir.split( '/')[-1] + '_' + subdir + '_{0:06d}.tfrecord'.format(i) output_file = os.path.join(output_dir, output_filename) with tf.io.TFRecordWriter(output_file) as record_writer: record_writer.write(example.SerializeToString()) record_writer.flush() print('Saved results to', output_dir)
def write_records(data_list, output_folder, shard): """Takes in list: [((im1_path, im2_path), flow_path)] and writes records.""" filenames = conversion_utils.generate_sharded_filenames( os.path.join(output_folder, 'sintel@{}'.format(FLAGS.num_shards))) with tf.io.TFRecordWriter(filenames[shard]) as record_writer: total = len(data_list) images_per_shard = total // FLAGS.num_shards start = images_per_shard * shard end = start + images_per_shard # Account for num images not being divisible by num shards. if shard == FLAGS.num_shards - 1: data_list = data_list[start:] else: data_list = data_list[start:end] tf.compat.v1.logging.info('Writing %d images per shard', images_per_shard) tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start, end, total) for i, (images, flow, occlusion, invalids, segments, segments_invalid) in enumerate(data_list): image1_data = scipy.ndimage.imread(images[0]) image2_data = scipy.ndimage.imread(images[1]) if flow is not None: assert occlusion is not None assert segments is not None assert segments_invalid is not None flow_data = conversion_utils.read_flow(flow) # Make binary occlusion_data = np.expand_dims( scipy.ndimage.imread(occlusion) // 255, axis=-1) invalid1_data = np.expand_dims( scipy.ndimage.imread(invalids[0]) // 255, axis=-1) invalid2_data = np.expand_dims( scipy.ndimage.imread(invalids[1]) // 255, axis=-1) segment1_data = np.expand_dims(scipy.ndimage.imread( segments[0]), axis=-1) segment2_data = np.expand_dims(scipy.ndimage.imread( segments[1]), axis=-1) segment_invalid1_data = np.expand_dims(scipy.ndimage.imread( segments_invalid[0]), axis=-1) segment_invalid2_data = np.expand_dims(scipy.ndimage.imread( segments_invalid[1]), axis=-1) else: # Test has no flow data, spoof flow data. flow_data = np.zeros( (image1_data.shape[0], image1_data.shape[1], 2), np.float32) occlusion_data = invalid1_data = invalid2_data = np.zeros( (image1_data.shape[0], image1_data.shape[1], 1), np.uint8) segment1_data = segment2_data = occlusion_data segment_invalid1_data = segment_invalid2_data = segment1_data height = image1_data.shape[0] width = image1_data.shape[1] assert height == image2_data.shape[0] == flow_data.shape[0] assert width == image2_data.shape[1] == flow_data.shape[1] assert height == occlusion_data.shape[0] == invalid1_data.shape[0] assert width == occlusion_data.shape[1] == invalid1_data.shape[1] assert invalid1_data.shape == invalid2_data.shape feature = { 'height': conversion_utils.int64_feature(height), 'width': conversion_utils.int64_feature(width), 'image1_path': conversion_utils.bytes_feature(str.encode(images[0])), 'image2_path': conversion_utils.bytes_feature(str.encode(images[1])), } feature_list = {} if flow is not None: feature.update({ 'flow_uv': conversion_utils.bytes_feature(flow_data.tobytes()), 'occlusion_mask': conversion_utils.bytes_feature(occlusion_data.tobytes()), 'flow_path': conversion_utils.bytes_feature(str.encode(flow)), 'occlusion_path': conversion_utils.bytes_feature(str.encode(occlusion)) }) if segments[0] is not None: feature.update({ 'segment1_path': conversion_utils.bytes_feature(str.encode(segments[0])), 'segment2_path': conversion_utils.bytes_feature(str.encode(segments[1])), 'segment_invalid1_path': conversion_utils.bytes_feature( str.encode(segments_invalid[0])), 'segment_invalid2_path': conversion_utils.bytes_feature( str.encode(segments_invalid[1])), }) feature_list.update({ 'segments': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature( segment1_data.tobytes()), conversion_utils.bytes_feature( segment2_data.tobytes()), ]), 'segments_invalid': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature( segment_invalid1_data.tobytes()), conversion_utils.bytes_feature( segment_invalid2_data.tobytes()), ]), }) feature_list.update({ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(image1_data.tobytes()), conversion_utils.bytes_feature(image2_data.tobytes()) ]), 'invalid_masks': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(invalid1_data.tobytes()), conversion_utils.bytes_feature(invalid2_data.tobytes()) ]) }) example = tf.train.SequenceExample( context=tf.train.Features(feature=feature), feature_lists=tf.train.FeatureLists(feature_list=feature_list)) if i % 10 == 0: tf.compat.v1.logging.info('Writing %d out of %d total.', i, len(data_list)) record_writer.write(example.SerializeToString()) tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)
def convert_dataset(shard): """Convert the data to the TFRecord format.""" # Make a directory to save the tfrecords to. if not os.path.exists(FLAGS.output_dir): os.mkdir(FLAGS.output_dir) train_dir = os.path.join(FLAGS.output_dir, 'train') test_dir = os.path.join(FLAGS.output_dir, 'test') if not os.path.exists(train_dir): os.mkdir(train_dir) if not os.path.exists(test_dir): os.mkdir(test_dir) # Directory with images. images = sorted(glob.glob(FLAGS.data_dir + '/*.ppm')) flow_list = sorted(glob.glob(FLAGS.data_dir + '/*.flo')) assert len(images) // 2 == len(flow_list) image_list = [] for i in range(len(flow_list)): im1 = images[2 * i] im2 = images[2 * i + 1] image_list.append((im1, im2)) assert len(image_list) == len(flow_list) train_filenames = conversion_utils.generate_sharded_filenames( os.path.join(train_dir, 'flying_chairs@{}'.format(FLAGS.num_shards))) test_filenames = conversion_utils.generate_sharded_filenames( os.path.join(test_dir, 'flying_chairs@{}'.format(FLAGS.num_shards))) train_record_writer = tf.io.TFRecordWriter(train_filenames[shard]) test_record_writer = tf.io.TFRecordWriter(test_filenames[shard]) total = len(image_list) images_per_shard = total // FLAGS.num_shards start = images_per_shard * shard smurf_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) filepath = 'files/chairs_train_val.txt' filepath = os.path.join(smurf_dir, filepath) with open(filepath, mode='r') as f: train_val = f.readlines() train_val = [int(x.strip()) for x in train_val] if shard == FLAGS.num_shards - 1: end = len(image_list) else: end = start + images_per_shard assert len(train_val) == len(image_list) assert len(flow_list) == len(train_val) image_list = image_list[start:end] train_val = train_val[start:end] flow_list = flow_list[start:end] tf.compat.v1.logging.info('Writing %d images per shard', images_per_shard) tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start, end, total) for i, (images, flow, assignment) in enumerate(zip(image_list, flow_list, train_val)): image1_data = scipy.ndimage.imread(images[0]) image2_data = scipy.ndimage.imread(images[1]) flow_data = conversion_utils.read_flow(flow) height = image1_data.shape[0] width = image1_data.shape[1] assert height == image2_data.shape[0] == flow_data.shape[0] assert width == image2_data.shape[1] == flow_data.shape[1] example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'height': conversion_utils.int64_feature(height), 'width': conversion_utils.int64_feature(width), 'flow_uv': conversion_utils.bytes_feature( flow_data.tobytes()), }), feature_lists=tf.train.FeatureLists( feature_list={ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(image1_data.tobytes()), conversion_utils.bytes_feature(image2_data.tobytes()) ]) })) if i % 10 == 0: tf.compat.v1.logging.info('Writing %d out of %d total.', i, len(image_list)) if assignment == 1: train_record_writer.write(example.SerializeToString()) elif assignment == 2: test_record_writer.write(example.SerializeToString()) else: assert False, 'There is an error in the chairs_train_val.txt' train_record_writer.close() test_record_writer.close() tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)
def convert_dataset(data_dir): """Convert the data to the TFRecord format.""" for subdir in ['training', 'testing']: if FLAGS.entire_sequence: sequences = [list(range(21))] output_dir = data_dir + '_{}_{}x{}_fullseq-tfrecords'.format( subdir[:-3], FLAGS.height, FLAGS.width) else: # Of the 21 frames, ignore frames 9-12 because we will test on those. sequences = [[0, 1, 2, 3, 4, 5, 6, 7, 8], [13, 14, 15, 16, 17, 18, 19, 20]] # Make a directory to save the tfrecords to. output_dir = data_dir + '_{}_{}x{}-tfrecords'.format( subdir[:-3], FLAGS.height, FLAGS.width) # Directory with images. image_dir = os.path.join(data_dir, subdir + '/image_2') image_dir_right = os.path.join(data_dir, subdir + '/image_3') num_images = int(tf.io.gfile.listdir(image_dir)[-1][:-7]) if not os.path.exists(output_dir): os.mkdir(output_dir) for i in range(num_images): # Don't use frames 9-12 because those will be tested. for js in sequences: image_files = ['{0:06d}_{1:02d}.png'.format(i, j) for j in js] try: # Collect RGB images. image_bytes_list = [] image_bytes_list_right = [] for image_file in image_files: image_path = os.path.join(image_dir, image_file) image_path_right = os.path.join( image_dir_right, image_file) image_data = tf.compat.v1.gfile.FastGFile( image_path, 'rb').read() image_data_right = tf.compat.v1.gfile.FastGFile( image_path_right, 'rb').read() image_tensor = tf.image.decode_png(image_data, channels=3) image_tensor_right = tf.image.decode_png( image_data_right, channels=3) image_resized = tf.image.resize( image_tensor[None], [FLAGS.height, FLAGS.width], method=tf.image.ResizeMethod.BILINEAR)[0] image_resized_right = tf.image.resize( image_tensor_right[None], [FLAGS.height, FLAGS.width], method=tf.image.ResizeMethod.BILINEAR)[0] # Undo the implicit cast of resize_images to tf.float32 image_resized = tf.cast(image_resized, tf.uint8) image_resized_right = tf.cast(image_resized_right, tf.uint8) # Encode image as byte list again. image_bytes_list.append( tf.image.encode_png(image_resized).numpy()) image_bytes_list_right.append( tf.image.encode_png(image_resized_right).numpy()) # Build a tf sequence example. example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'height': conversion_utils.int64_feature(FLAGS.height), 'width': conversion_utils.int64_feature(FLAGS.width), }), feature_lists=tf.train.FeatureLists( feature_list={ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(b) for b in image_bytes_list ]), 'images_right': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(b) for b in image_bytes_list_right ]), })) output_filename = data_dir.split( '/' )[-1] + '_' + subdir + '_{0:06d}_{1:02d}-{2:02d}.tfrecord'.format( i, js[0], js[-1]) output_file = os.path.join(output_dir, output_filename) with tf.io.TFRecordWriter(output_file) as record_writer: record_writer.write(example.SerializeToString()) record_writer.flush() except tf.errors.NotFoundError: print('Skipping {} because the file is not found.'.format( image_path)) print('Saved results to', output_dir)