def get_inputs(padx, pady): """Get images, poses and intrinsics in required format.""" inputs = {} image1 = load_image(FLAGS.image1, padx, pady, 0, 0) image2 = load_image(FLAGS.image2, padx, pady, -FLAGS.xshift, -FLAGS.yshift) shape1_before_crop = tf.shape(image1) shape2_before_crop = tf.shape(image2) image1 = crop_to_multiple(image1, 16) image2 = crop_to_multiple(image2, 16) shape1_after_crop = tf.shape(image1) shape2_after_crop = tf.shape(image2) with tf.control_dependencies([ tf.Assert( tf.reduce_all( tf.logical_and( tf.equal(shape1_before_crop, shape2_before_crop), tf.equal(shape1_after_crop, shape2_after_crop))), [ 'Shape mismatch:', shape1_before_crop, shape2_before_crop, shape1_after_crop, shape2_after_crop ]) ]): # Add batch dimension (size 1). image1 = image1[tf.newaxis, ...] image2 = image2[tf.newaxis, ...] pose_one = pose_from_flag(FLAGS.pose1) pose_two = pose_from_flag(FLAGS.pose2) if not FLAGS.pose2: pose_two[0][3] = -FLAGS.xoffset pose_two[1][3] = -FLAGS.yoffset pose_two[2][3] = -FLAGS.zoffset pose_one = build_matrix(pose_one)[tf.newaxis, ...] pose_two = build_matrix(pose_two)[tf.newaxis, ...] # Use pre-crop and pre-padding sizing when converting fx, fy. This way the # field of view gets modified by the cropping correctly. original_width = shape1_before_crop[1] - 2 * padx original_height = shape1_before_crop[0] - 2 * pady eventual_width = shape1_after_crop[1] eventual_height = shape1_after_crop[0] fx = tf.multiply(tf.to_float(original_width), FLAGS.fx) fy = tf.multiply(tf.to_float(original_height), FLAGS.fy) # The MPI code may fail if the principal point is not in the center. In # reality cropping might have shifted it by half a pixel, but we'll ignore # that here. cx = tf.multiply(tf.to_float(eventual_width), 0.5) cy = tf.multiply(tf.to_float(eventual_height), 0.5) intrinsics = build_matrix([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]])[tf.newaxis, ...] inputs['ref_image'] = image1 inputs['ref_pose'] = pose_one inputs['src_images'] = tf.concat([image1, image2], axis=-1) inputs['src_poses'] = tf.stack([pose_one, pose_two], axis=1) inputs['intrinsics'] = intrinsics return inputs, original_width, original_height
def main(_): # Set up the inputs. # How much shall we pad the input images? We'll pad enough so that # (a) when we render output images we won't lose stuff at the edges # due to cropping, and (b) we can find a multiple of 16 size without # cropping into the original images. max_multiple = 0 if FLAGS.render: render_list = [float(x) for x in FLAGS.render_multiples.split(',')] max_multiple = max(abs(float(m)) for m in render_list) pady = int(max_multiple * abs(FLAGS.yshift) + 8) padx = int(max_multiple * abs(FLAGS.xshift) + 8) print 'Padding inputs: padx=%d, pady=%d (max_multiple=%d)' % (padx, pady, max_multiple) inputs, original_width, original_height = get_inputs(padx, pady) # MPI code requires images of known size. So we run the input part of the # graph now to find the size, which we can then set on the inputs. with tf.Session() as sess: dimensions, original_width, original_height = sess.run( [tf.shape(inputs['ref_image']), original_width, original_height]) batch = 1 channels = 3 assert dimensions[0] == batch mpi_height = dimensions[1] mpi_width = dimensions[2] assert dimensions[3] == channels print 'Original size: width=%d, height=%d' % (original_width, original_height) print ' MPI size: width=%d, height=%d' % (mpi_width, mpi_height) inputs['ref_image'].set_shape([batch, mpi_height, mpi_width, channels]) inputs['src_images'].set_shape( [batch, mpi_height, mpi_width, channels * 2]) # Build the MPI. model = MPI() psv_planes = model.inv_depths(FLAGS.min_depth, FLAGS.max_depth, FLAGS.num_psv_planes) mpi_planes = model.inv_depths(FLAGS.min_depth, FLAGS.max_depth, FLAGS.num_mpi_planes) outputs = model.infer_mpi(inputs['src_images'], inputs['ref_image'], inputs['ref_pose'], inputs['src_poses'], inputs['intrinsics'], FLAGS.which_color_pred, FLAGS.num_mpi_planes, psv_planes, FLAGS.test_outputs) saver = tf.train.Saver([var for var in tf.model_variables()]) ckpt_dir = os.path.join(FLAGS.model_root, FLAGS.model_name) ckpt_file = tf.train.latest_checkpoint(ckpt_dir) sv = tf.train.Supervisor(logdir=ckpt_dir, saver=None) config = tf.ConfigProto() config.gpu_options.allow_growth = True print 'Inferring MPI...' with sv.managed_session(config=config) as sess: saver.restore(sess, ckpt_file) ins, outs = sess.run([inputs, outputs]) # Render output images separately so as not to run out of memory. tf.reset_default_graph() renders = {} if FLAGS.render: print 'Rendering new views...' for index, multiple in enumerate(render_list): m = float(multiple) print ' offset: %s' % multiple pose = build_matrix([[1.0, 0.0, 0.0, -m * FLAGS.xoffset], [0.0, 1.0, 0.0, -m * FLAGS.yoffset], [0.0, 0.0, 1.0, -m * FLAGS.zoffset], [0.0, 0.0, 0.0, 1.0]])[tf.newaxis, ...] image = model.deprocess_image( model.mpi_render_view(tf.constant(outs['rgba_layers']), pose, mpi_planes, tf.constant(ins['intrinsics'])))[0] unshifted = shift_image(image, m * FLAGS.xshift, m * FLAGS.yshift) cropped = crop_to_size(unshifted, original_width, original_height) with tf.Session() as sess: renders[multiple] = (index, sess.run(cropped)) output_dir = FLAGS.output_dir if not tf.gfile.IsDirectory(output_dir): tf.gfile.MakeDirs(output_dir) print 'Saving results to %s' % output_dir # Write results to disk. for name, (index, image) in renders.items(): write_image(output_dir + '/render_%02d_%s.png' % (index, name), image) if 'intrinsics' in FLAGS.test_outputs: with open(output_dir + '/intrinsics.txt', 'w') as fh: write_intrinsics(fh, ins['intrinsics'][0]) if 'src_images' in FLAGS.test_outputs: for i in range(2): write_image(output_dir + '/src_image_%d.png' % i, ins['src_images'][0, :, :, i * 3:(i + 1) * 3] * 255.0) if 'poses' in FLAGS.test_outputs: write_pose(output_dir + '/src_pose_%d.txt' % i, ins['src_poses'][0, i]) if 'fgbg' in FLAGS.test_outputs: write_image(output_dir + '/foreground_color.png', outs['fg_image'][0]) write_image(output_dir + '/background_color.png', outs['bg_image'][0]) if 'blend_weights' in FLAGS.test_outputs: for i in range(FLAGS.num_mpi_planes): weight_img = outs['blend_weights'][0, :, :, i] * 255.0 write_image(output_dir + '/foreground_weight_plane_%.3d.png' % i, weight_img) if 'psv' in FLAGS.test_outputs: for j in range(FLAGS.num_psv_planes): plane_img = (outs['psv'][0, :, :, j * 3:(j + 1) * 3] + 1.) / 2. * 255 write_image(output_dir + '/psv_plane_%.3d.png' % j, plane_img) if 'rgba_layers' in FLAGS.test_outputs: for i in range(FLAGS.num_mpi_planes): alpha_img = outs['rgba_layers'][0, :, :, i, 3] * 255.0 rgb_img = (outs['rgba_layers'][0, :, :, i, :3] + 1.) / 2. * 255 write_image(output_dir + '/mpi_alpha_%.2d.png' % i, alpha_img) write_image(output_dir + '/mpi_rgb_%.2d.png' % i, rgb_img) with open(output_dir + '/README', 'w') as fh: fh.write( 'This directory was generated by mpi_from_images. Command-line:\n\n' ) fh.write('%s \\\n' % sys.argv[0]) for arg in sys.argv[1:-1]: fh.write(' %s \\\n' % arg) fh.write(' %s\n' % sys.argv[-1]) print 'Done.'