def write_data_example(record_writer, image1, image2): """Write data example to disk.""" assert image1.shape[0] == image2.shape[0] assert image1.shape[1] == image2.shape[1] assert image1.shape[2] == image2.shape[2] feature = { 'height': conversion_utils.int64_feature(image1.shape[0]), 'width': conversion_utils.int64_feature(image1.shape[1]), } example = tf.train.SequenceExample( context=tf.train.Features(feature=feature), feature_lists=tf.train.FeatureLists( feature_list={ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature( image1.astype('uint8').tobytes()), conversion_utils.bytes_feature( image2.astype('uint8').tobytes()) ]), })) record_writer.write(example.SerializeToString())
def write_records(data_list, output_folder): """Takes in list: [((im1_path, im2_path), flow_path)] and writes records.""" # Reading ppm and flo can fail on network filesystem, so copy to tmpdir first. tmpdir = '/tmp/flying_chairs' if not os.path.exists(tmpdir): os.mkdir(tmpdir) filenames = conversion_utils.generate_sharded_filenames( os.path.join(output_folder, 'sintel@{}'.format(FLAGS.num_shards))) with tf.io.TFRecordWriter(filenames[FLAGS.shard]) as record_writer: total = len(data_list) images_per_shard = total // FLAGS.num_shards start = images_per_shard * FLAGS.shard end = start + images_per_shard # Account for num images not being divisible by num shards. if FLAGS.shard == FLAGS.num_shards - 1: data_list = data_list[start:] else: data_list = data_list[start:end] tf.compat.v1.logging.info('Writing %d images per shard', images_per_shard) tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start, end, total) img1_path = os.path.join(tmpdir, 'img1.png') img2_path = os.path.join(tmpdir, 'img2.png') flow_path = os.path.join(tmpdir, 'flow.flo') occlusion_path = os.path.join(tmpdir, 'occlusion.png') invalid1_path = os.path.join(tmpdir, 'invalid1.png') invalid2_path = os.path.join(tmpdir, 'invalid2.png') for i, (images, flow, occlusion, invalids) in enumerate(data_list): if os.path.exists(img1_path): os.remove(img1_path) if os.path.exists(img2_path): os.remove(img2_path) if os.path.exists(flow_path): os.remove(flow_path) if os.path.exists(occlusion_path): os.remove(occlusion_path) if os.path.exists(invalid1_path): os.remove(invalid1_path) if os.path.exists(invalid2_path): os.remove(invalid2_path) tf.io.gfile.copy(images[0], img1_path) tf.io.gfile.copy(images[1], img2_path) image1_data = imageio.imread(img1_path) image2_data = imageio.imread(img2_path) if flow is not None: assert occlusion is not None tf.io.gfile.copy(flow, flow_path) tf.io.gfile.copy(occlusion, occlusion_path) tf.io.gfile.copy(invalids[0], invalid1_path) tf.io.gfile.copy(invalids[1], invalid2_path) flow_data = conversion_utils.read_flow(flow_path) # Make binary occlusion_data = np.expand_dims( imageio.imread(occlusion_path) // 255, axis=-1) invalid1_data = np.expand_dims(imageio.imread(invalid1_path) // 255, axis=-1) invalid2_data = np.expand_dims(imageio.imread(invalid2_path) // 255, axis=-1) else: # Test has no flow data, spoof flow data. flow_data = np.zeros( (image1_data.shape[0], image1_data.shape[1], 2), np.float32) occlusion_data = invalid1_data = invalid2_data = np.zeros( (image1_data.shape[0], image1_data.shape[1], 1), np.uint8) height = image1_data.shape[0] width = image1_data.shape[1] assert height == image2_data.shape[0] == flow_data.shape[0] assert width == image2_data.shape[1] == flow_data.shape[1] assert height == occlusion_data.shape[0] == invalid1_data.shape[0] assert width == occlusion_data.shape[1] == invalid1_data.shape[1] assert invalid1_data.shape == invalid2_data.shape feature = { 'height': conversion_utils.int64_feature(height), 'width': conversion_utils.int64_feature(width), 'image1_path': conversion_utils.bytes_feature(str.encode(images[0])), 'image2_path': conversion_utils.bytes_feature(str.encode(images[1])), } if flow is not None: feature.update({ 'flow_uv': conversion_utils.bytes_feature(flow_data.tobytes()), 'occlusion_mask': conversion_utils.bytes_feature(occlusion_data.tobytes()), 'flow_path': conversion_utils.bytes_feature(str.encode(flow)), 'occlusion_path': conversion_utils.bytes_feature(str.encode(occlusion)), }) example = tf.train.SequenceExample( context=tf.train.Features(feature=feature), feature_lists=tf.train.FeatureLists( feature_list={ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature( image1_data.tobytes()), conversion_utils.bytes_feature( image2_data.tobytes()) ]), 'invalid_masks': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature( invalid1_data.tobytes()), conversion_utils.bytes_feature( invalid2_data.tobytes()) ]) })) if i % 10 == 0: tf.compat.v1.logging.info('Writing %d out of %d total.', i, len(data_list)) record_writer.write(example.SerializeToString()) tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)
def convert_dataset(): """Convert the data to the TFRecord format.""" # Make a directory to save the tfrecords to. if not tf.io.gfile.exists(FLAGS.output_dir): tf.io.gfile.mkdir(FLAGS.output_dir) train_dir = os.path.join(FLAGS.output_dir, 'train') test_dir = os.path.join(FLAGS.output_dir, 'test') if not tf.io.gfile.exists(train_dir): tf.io.gfile.mkdir(train_dir) if not tf.io.gfile.exists(test_dir): tf.io.gfile.mkdir(test_dir) # Directory with images. images = sorted(tf.io.gfile.glob(FLAGS.data_dir + '/*.ppm')) flow_list = sorted(tf.io.gfile.glob(FLAGS.data_dir + '/*.flo')) assert len(images) // 2 == len(flow_list) image_list = [] for i in range(len(flow_list)): im1 = images[2 * i] im2 = images[2 * i + 1] image_list.append((im1, im2)) assert len(image_list) == len(flow_list) # Reading ppm and flo can fail on network filesystem, so copy to tmpdir first. tmpdir = '/tmp/flying_chairs' if not os.path.exists(tmpdir): os.mkdir(tmpdir) train_filenames = conversion_utils.generate_sharded_filenames( os.path.join(train_dir, 'flying_chairs@{}'.format(FLAGS.num_shards))) test_filenames = conversion_utils.generate_sharded_filenames( os.path.join(test_dir, 'flying_chairs@{}'.format(FLAGS.num_shards))) train_record_writer = tf.io.TFRecordWriter(train_filenames[FLAGS.shard]) test_record_writer = tf.io.TFRecordWriter(test_filenames[FLAGS.shard]) total = len(image_list) images_per_shard = total // FLAGS.num_shards start = images_per_shard * FLAGS.shard filepath = FLAGS.train_split_file with open(filepath, mode='r') as f: train_val = f.readlines() train_val = [int(x.strip()) for x in train_val] if FLAGS.shard == FLAGS.num_shards - 1: end = len(image_list) else: end = start + images_per_shard assert len(train_val) == len(image_list) assert len(flow_list) == len(train_val) image_list = image_list[start:end] train_val = train_val[start:end] flow_list = flow_list[start:end] tf.compat.v1.logging.info('Writing %d images per shard', images_per_shard) tf.compat.v1.logging.info('Writing range %d to %d of %d total.', start, end, total) img1_path = os.path.join(tmpdir, 'img1.ppm') img2_path = os.path.join(tmpdir, 'img2.ppm') flow_path = os.path.join(tmpdir, 'flow.flo') for i, (images, flow, assignment) in enumerate(zip(image_list, flow_list, train_val)): if os.path.exists(img1_path): os.remove(img1_path) if os.path.exists(img2_path): os.remove(img2_path) if os.path.exists(flow_path): os.remove(flow_path) tf.io.gfile.copy(images[0], img1_path) tf.io.gfile.copy(images[1], img2_path) tf.io.gfile.copy(flow, flow_path) image1_data = imageio.imread(img1_path) image2_data = imageio.imread(img2_path) flow_data = conversion_utils.read_flow(flow_path) height = image1_data.shape[0] width = image1_data.shape[1] assert height == image2_data.shape[0] == flow_data.shape[0] assert width == image2_data.shape[1] == flow_data.shape[1] example = tf.train.SequenceExample( context=tf.train.Features( feature={ 'height': conversion_utils.int64_feature(height), 'width': conversion_utils.int64_feature(width), 'flow_uv': conversion_utils.bytes_feature(flow_data.tobytes()), 'image1_path': conversion_utils.bytes_feature(str.encode(images[0])), 'image2_path': conversion_utils.bytes_feature(str.encode(images[1])), }), feature_lists=tf.train.FeatureLists( feature_list={ 'images': tf.train.FeatureList(feature=[ conversion_utils.bytes_feature(image1_data.tobytes()), conversion_utils.bytes_feature(image2_data.tobytes()) ]) })) if i % 10 == 0: tf.compat.v1.logging.info('Writing %d out of %d total.', i, len(image_list)) if assignment == 1: train_record_writer.write(example.SerializeToString()) elif assignment == 2: test_record_writer.write(example.SerializeToString()) else: assert False, 'There is an error in the chairs_train_val.txt' train_record_writer.close() test_record_writer.close() tf.compat.v1.logging.info('Saved results to %s', FLAGS.output_dir)