def main(argv): del argv # Unused. if FLAGS.input_file is None: raise ValueError("`input_file` must be defined") if FLAGS.output_dir is None: raise ValueError("`output_dir` must be defined") if FLAGS.model_dir is None: raise ValueError("`model_dir` must be defined") checkpoint = FLAGS.model_dir + "model.ckpt" if not os.path.exists(FLAGS.output_dir): os.mkdir(FLAGS.output_dir) # Set up model model = MPI() # Load input batch inputs = np.load(FLAGS.input_file) # Compute plane depths mpi_planes = model.inv_depths(min_depth, max_depth, num_mpi_planes) # Format inputs, convert from numpy arrays to tensors # Change this if you are training with a dataset iterator in_src_images = tf.constant(inputs["src_images"]) in_ref_image = tf.constant(inputs["ref_image"]) in_ref_pose = tf.constant(inputs["ref_pose"]) # in_tgt_pose = tf.constant(inputs["tgt_pose"]) # Unneeded for sway in_src_poses = tf.constant(inputs["src_poses"]) in_intrinsics = tf.constant(inputs["intrinsics"]) in_tgt_image = tf.constant(inputs["tgt_image"]) in_ref_image = tf.image.convert_image_dtype(in_ref_image, dtype=tf.float32) in_src_images = tf.image.convert_image_dtype(in_src_images, dtype=tf.float32) in_tgt_image = tf.image.convert_image_dtype(in_tgt_image, dtype=tf.float32) # Patched inference patch_ind = tf.placeholder(tf.int32, shape=(2)) buffersize = (patchsize - outsize) // 2 # Set up graph outputs = model.infer_mpi(in_src_images, in_ref_image, in_ref_pose, in_src_poses, in_intrinsics, num_mpi_planes, mpi_planes, run_patched=True, patch_ind=patch_ind, patchsize=patchsize, outsize=outsize) # Define shapes to placate tensorflow outputs["rgba_layers"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 4)) outputs["rgba_layers_refine"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 4)) outputs["refine_input_mpi"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 4)) outputs["stuff_behind"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 3)) outputs["flow_vecs"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 2)) # Patched inference for MPI (128 planes at 0.5MP res likely won't fit on GPU) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if checkpoint is not None: print("Loading from checkpoint:", checkpoint) saver.restore(sess, checkpoint) num_patches = [image_height // outsize[0], image_width // outsize[1]] print("patched inference with:", num_patches, "patches,", "buffersize:", buffersize) out_rgba = None for r in range(num_patches[0]): out_row_rgba = None for c in range(num_patches[1]): patch_num = r * num_patches[1] + c print("running patch:", patch_num) patch_ind_rc = np.array([r, c]) patch_start = patch_ind_rc * outsize patch_end = patch_start + patchsize print("patch ind:", patch_ind_rc, "patch_start", patch_start, "patch_end", patch_end) feed_dict = { patch_ind: patch_ind_rc, in_src_images: inputs["src_images"], in_ref_image: inputs["ref_image"], in_ref_pose: inputs["ref_pose"], in_src_poses: inputs["src_poses"], in_intrinsics: inputs["intrinsics"] } outs = sess.run(outputs, feed_dict=feed_dict) outs_rgba_patch = outs[ "rgba_layers"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_rgba_patch_refine = outs[ "rgba_layers_refine"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_refine_input_mpi_patch = outs[ "refine_input_mpi"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_stuff_behind_patch = outs[ "stuff_behind"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_flow_vecs = outs[ "flow_vecs"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] if out_row_rgba is None: out_row_rgba = outs_rgba_patch out_row_rgba_refine = outs_rgba_patch_refine out_row_refine_input_mpi = outs_refine_input_mpi_patch out_row_stuff_behind = outs_stuff_behind_patch out_row_flow_vecs = outs_flow_vecs else: out_row_rgba = np.concatenate( [out_row_rgba, outs_rgba_patch], 2) out_row_rgba_refine = np.concatenate( [out_row_rgba_refine, outs_rgba_patch_refine], 2) out_row_refine_input_mpi = np.concatenate([ out_row_refine_input_mpi, outs_refine_input_mpi_patch ], 2) out_row_stuff_behind = np.concatenate( [out_row_stuff_behind, outs_stuff_behind_patch], 2) out_row_flow_vecs = np.concatenate( [out_row_flow_vecs, outs_flow_vecs], 2) if out_rgba is None: out_rgba = out_row_rgba out_rgba_refine = out_row_rgba_refine out_refine_input_mpi = out_row_refine_input_mpi out_stuff_behind = out_row_stuff_behind out_flow_vecs = out_row_flow_vecs else: out_rgba = np.concatenate([out_rgba, out_row_rgba], 1) out_rgba_refine = np.concatenate( [out_rgba_refine, out_row_rgba_refine], 1) out_refine_input_mpi = np.concatenate( [out_refine_input_mpi, out_row_refine_input_mpi], 1) out_stuff_behind = np.concatenate( [out_stuff_behind, out_row_stuff_behind], 1) out_flow_vecs = np.concatenate( [out_flow_vecs, out_row_flow_vecs], 1) outs["rgba_layers"] = np.concatenate( [out_rgba[Ellipsis, :3] / 2.0 + 0.5, out_rgba[Ellipsis, 3:]], axis=-1) outs["rgba_layers_refine"] = np.concatenate([ out_rgba_refine[Ellipsis, :3] / 2.0 + 0.5, out_rgba_refine[Ellipsis, 3:] ], axis=-1) outs["refine_input_mpi"] = np.concatenate([ out_refine_input_mpi[Ellipsis, :3] / 2.0 + 0.5, out_refine_input_mpi[Ellipsis, 3:] ], axis=-1) outs["stuff_behind"] = out_stuff_behind / 2.0 + 0.5 outs["flow_vecs"] = out_flow_vecs # Save MPI layers layers = outs["rgba_layers_refine"] for i in range(layers.shape[3]): i_filename = FLAGS.output_dir + "mpi_rgba_{:04d}.png".format(i) data = np.clip(layers[0, :, :, i, :], 0, 1) plt.imsave(i_filename, data) print("wrote layer:", i) # Render example sway camera path mpi_placeholder = tf.placeholder(dtype=tf.float32, shape=[ 1, layers.shape[1] - 2 * crop, layers.shape[2] - 2 * crop, layers.shape[3], 4 ]) tgt_pose_placeholder = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4]) intrinsics_placeholder = tf.placeholder(dtype=tf.float32, shape=[1, 3, 3]) output_render, _ = model.mpi_render_view(mpi_placeholder, tgt_pose_placeholder, mpi_planes, intrinsics_placeholder) # Compute sway path poses max_trans = max_disp / inputs["intrinsics"][ 0, 0, 0] # Maximum camera translation to satisfy max_disp parameter output_poses = [] for i in range(num_frames): i_trans = max_trans * np.sin( 2.0 * np.pi * float(i) / float(num_frames)) i_pose = np.concatenate([ np.concatenate( [np.eye(3), np.array([i_trans, 0.0, 0.0])[:, np.newaxis]], axis=1), np.array([0.0, 0.0, 0.0, 1.0])[np.newaxis, :] ], axis=0)[np.newaxis, :, :] output_poses.append(i_pose) # Render sway path output_render_list = [] with tf.Session() as sess: for i in range(num_frames): print("Rendering pose:", i, "of:", num_frames) i_output = sess.run( output_render, feed_dict={ mpi_placeholder: outs["rgba_layers_refine"][:, crop:-crop, crop:-crop, :, :], tgt_pose_placeholder: output_poses[i], intrinsics_placeholder: inputs["intrinsics"] }) output_render_list.append(i_output) for i in range(len(output_render_list)): plt.imsave(FLAGS.output_dir + "tmp_{:03d}.png".format(i), output_render_list[i][0, :, :, :]) # Save sway path to video (requires FFMPEG) subprocess.call([ "ffmpeg", "-i", FLAGS.output_dir + "tmp_%03d.png", FLAGS.output_dir + "sway.mp4" ]) for f in glob.glob(FLAGS.output_dir + "tmp*.png"): os.remove(f)
def testPredictedMPI(self): """Test MPI prediction against a saved array.""" checkpoint = FLAGS.model_dir + "model.ckpt" # Set up model model = MPI() # Load input batch inputs = np.load(FLAGS.input_dir + "0.npz") # Load ground truth for testing testing_truth = np.load(FLAGS.input_dir + "0_truth.npz") true_mpi = testing_truth["mpi"] # Compute plane depths mpi_planes = model.inv_depths(min_depth, max_depth, num_mpi_planes) # Format inputs, convert from numpy arrays to tensors in_src_images = tf.constant(inputs["src_images"]) in_ref_image = tf.constant(inputs["ref_image"]) in_ref_pose = tf.constant(inputs["ref_pose"]) # in_tgt_pose = tf.constant(inputs["tgt_pose"]) # Unneeded for sway in_src_poses = tf.constant(inputs["src_poses"]) in_intrinsics = tf.constant(inputs["intrinsics"]) in_tgt_image = tf.constant(inputs["tgt_image"]) in_ref_image = tf.image.convert_image_dtype(in_ref_image, dtype=tf.float32) in_src_images = tf.image.convert_image_dtype(in_src_images, dtype=tf.float32) in_tgt_image = tf.image.convert_image_dtype(in_tgt_image, dtype=tf.float32) # Patched inference patch_ind = tf.placeholder(tf.int32, shape=(2)) buffersize = (patchsize - outsize) // 2 # Set up graph outputs = model.infer_mpi(in_src_images, in_ref_image, in_ref_pose, in_src_poses, in_intrinsics, num_mpi_planes, mpi_planes, run_patched=True, patch_ind=patch_ind, patchsize=patchsize, outsize=outsize) # Define shapes to placate tensorflow outputs["rgba_layers"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 4)) outputs["rgba_layers_refine"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 4)) outputs["refine_input_mpi"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 4)) outputs["stuff_behind"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 3)) outputs["flow_vecs"].set_shape( (1, patchsize[0], patchsize[1], num_mpi_planes, 2)) # Patched inference for MPI saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if checkpoint is not None: print("Loading from checkpoint:", checkpoint) saver.restore(sess, checkpoint) num_patches = [ image_height // outsize[0], image_width // outsize[1] ] print("patched inference with:", num_patches, "patches,", "buffersize:", buffersize) out_rgba = None for r in range(num_patches[0]): out_row_rgba = None for c in range(num_patches[1]): patch_num = r * num_patches[1] + c print("running patch:", patch_num) patch_ind_rc = np.array([r, c]) patch_start = patch_ind_rc * outsize patch_end = patch_start + patchsize print("patch ind:", patch_ind_rc, "patch_start", patch_start, "patch_end", patch_end) feed_dict = { patch_ind: patch_ind_rc, in_src_images: inputs["src_images"], in_ref_image: inputs["ref_image"], in_ref_pose: inputs["ref_pose"], in_src_poses: inputs["src_poses"], in_intrinsics: inputs["intrinsics"] } outs = sess.run(outputs, feed_dict=feed_dict) outs_rgba_patch = outs[ "rgba_layers"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_rgba_patch_refine = outs[ "rgba_layers_refine"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_refine_input_mpi_patch = outs[ "refine_input_mpi"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_stuff_behind_patch = outs[ "stuff_behind"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] outs_flow_vecs = outs[ "flow_vecs"][:, buffersize[0]:buffersize[0] + outsize[0], buffersize[1]:buffersize[1] + outsize[1], :, :] if out_row_rgba is None: out_row_rgba = outs_rgba_patch out_row_rgba_refine = outs_rgba_patch_refine out_row_refine_input_mpi = outs_refine_input_mpi_patch out_row_stuff_behind = outs_stuff_behind_patch out_row_flow_vecs = outs_flow_vecs else: out_row_rgba = np.concatenate( [out_row_rgba, outs_rgba_patch], 2) out_row_rgba_refine = np.concatenate( [out_row_rgba_refine, outs_rgba_patch_refine], 2) out_row_refine_input_mpi = np.concatenate([ out_row_refine_input_mpi, outs_refine_input_mpi_patch ], 2) out_row_stuff_behind = np.concatenate( [out_row_stuff_behind, outs_stuff_behind_patch], 2) out_row_flow_vecs = np.concatenate( [out_row_flow_vecs, outs_flow_vecs], 2) if out_rgba is None: out_rgba = out_row_rgba out_rgba_refine = out_row_rgba_refine out_refine_input_mpi = out_row_refine_input_mpi out_stuff_behind = out_row_stuff_behind out_flow_vecs = out_row_flow_vecs else: out_rgba = np.concatenate([out_rgba, out_row_rgba], 1) out_rgba_refine = np.concatenate( [out_rgba_refine, out_row_rgba_refine], 1) out_refine_input_mpi = np.concatenate( [out_refine_input_mpi, out_row_refine_input_mpi], 1) out_stuff_behind = np.concatenate( [out_stuff_behind, out_row_stuff_behind], 1) out_flow_vecs = np.concatenate( [out_flow_vecs, out_row_flow_vecs], 1) outs["rgba_layers_refine"] = np.concatenate([ out_rgba_refine[Ellipsis, :3] / 2.0 + 0.5, out_rgba_refine[Ellipsis, 3:] ], axis=-1) # Save MPI layers layers = outs["rgba_layers_refine"] self.assertAllClose(layers, true_mpi)