def trajectory_inference(): """Generates trajectories from the KITTI odometry sets and a checkpoint.""" # Note that the struct2depth code only works at batch_size=1, because it uses # the training mode of batchnorm at inference. inference_model = model.Model(is_training=False, batch_size=1, img_height=FLAGS.img_height, img_width=FLAGS.img_width) saver = tf.train.Saver() sess = tf.Session() def infer_egomotion(image1, image2): return inference_model.inference_egomotion(image1, image2, sess) saver.restore(sess, FLAGS.checkpoint_path) if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) for odo_set in ODOMETRY_SETS: logging.info('Evaluating odometry on %s', odo_set) test_file_dir = os.path.join(FLAGS.odometry_test_set_dir, odo_set) output_file = os.path.join(FLAGS.output_dir, 'odometry_%s.txt' % odo_set) odometry_inference(test_file_dir, output_file, infer_egomotion)
def main(_): # Fixed seed for repeatability seed = FLAGS.seed tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) if not gfile.Exists(FLAGS.checkpoint_dir): gfile.MakeDirs(FLAGS.checkpoint_dir) train_model = model.Model( boxify=FLAGS.boxify, data_dir=FLAGS.data_dir, file_extension=FLAGS.file_extension, is_training=True, foreground_dilation=FLAGS.foreground_dilation, learn_intrinsics=FLAGS.learn_intrinsics, learning_rate=FLAGS.learning_rate, reconstr_weight=FLAGS.reconstr_weight, smooth_weight=FLAGS.smooth_weight, ssim_weight=FLAGS.ssim_weight, translation_consistency_weight=FLAGS.translation_consistency_weight, rotation_consistency_weight=FLAGS.rotation_consistency_weight, batch_size=FLAGS.batch_size, img_height=FLAGS.img_height, img_width=FLAGS.img_width, weight_reg=FLAGS.weight_reg, depth_consistency_loss_weight=FLAGS.depth_consistency_loss_weight, queue_size=FLAGS.queue_size, input_file=FLAGS.input_file) _train(train_model, FLAGS.checkpoint_dir, FLAGS.train_steps, FLAGS.summary_freq) if FLAGS.debug: _print_losses(os.path.join(FLAGS.checkpoint_dir, 'debug'))
def _run_inference(output_dir=None, file_extension='png', depth=True, egomotion=False, model_ckpt=None, input_dir=None, input_list_file=None, batch_size=1, img_height=128, img_width=416, seq_length=3, architecture=nets.RESNET, imagenet_norm=True, use_skip=True, joint_encoder=True, shuffle=False, flip_for_depth=False, inference_mode=INFERENCE_MODE_SINGLE, inference_crop=INFERENCE_CROP_NONE, use_masks=False): """Runs inference. Refer to flags in inference.py for details.""" inference_model = model.Model(is_training=False, batch_size=1, img_height=128, img_width=416) vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt) saver = tf.train.Saver(vars_to_restore) sv = tf.train.Supervisor(logdir='/tmp/', saver=None) with sv.managed_session() as sess: saver.restore(sess, model_ckpt) if not gfile.Exists(output_dir): gfile.MakeDirs(output_dir) logging.info('Predictions will be saved in %s.', output_dir) # Collect all images to run inference on. im_files, basepath_in = collect_input_images(input_dir, input_list_file, file_extension) if shuffle: logging.info('Shuffling data...') np.random.shuffle(im_files) logging.info('Running inference on %d files.', len(im_files)) # Create missing output folders and pre-compute target directories. output_dirs = create_output_dirs(im_files, basepath_in, output_dir) # Run depth prediction network. if depth: im_batch = [] npys = [] for i in range(len(im_files)): if i % 100 == 0: logging.info('%s of %s files processed.', i, len(im_files)) # Read image and run inference. if inference_mode == INFERENCE_MODE_SINGLE: if inference_crop == INFERENCE_CROP_NONE: im = util.load_image(im_files[i], resize=(img_width, img_height)) elif inference_crop == INFERENCE_CROP_CITYSCAPES: im = util.crop_cityscapes(util.load_image(im_files[i]), resize=(img_width, img_height)) elif inference_mode == INFERENCE_MODE_TRIPLETS: im = util.load_image(im_files[i], resize=(img_width * 3, img_height)) im = im[:, img_width:img_width * 2] if flip_for_depth: im = np.flip(im, axis=1) im_batch.append(im) if len(im_batch) == batch_size or i == len(im_files) - 1: # Call inference on batch. for _ in range(batch_size - len(im_batch)): # Fill up batch. im_batch.append( np.zeros(shape=(img_height, img_width, 3), dtype=np.float32)) im_batch = np.stack(im_batch, axis=0) est_depth = inference_model.inference_depth(im_batch, sess) if flip_for_depth: est_depth = np.flip(est_depth, axis=2) im_batch = np.flip(im_batch, axis=2) for j in range(len(im_batch)): color_map = util.normalize_depth_for_display( np.squeeze(est_depth[j])) visualization = np.concatenate( (im_batch[j], color_map), axis=0) # Save raw prediction and color visualization. Extract filename # without extension from full path: e.g. path/to/input_dir/folder1/ # file1.png -> file1 k = i - len(im_batch) + 1 + j filename_root = os.path.splitext( os.path.basename(im_files[k]))[0] pref = '_flip' if flip_for_depth else '' output_raw = os.path.join( output_dirs[k], filename_root + pref + '.npy') output_vis = os.path.join( output_dirs[k], filename_root + pref + '.png') with gfile.Open(output_raw, 'wb') as f: np.save(f, est_depth[j]) npys.append(est_depth[j]) util.save_image(output_vis, visualization, file_extension) im_batch = [] with gfile.Open(output_dir + "result.npy", 'wb') as f: np.save(f, npys)
from matplotlib import pyplot as plt import cv2 # used for resize. if you dont have it, use anything else import numpy as np import tensorflow as tf import sys from PIL import Image from depth_from_video_in_the_wild import depth_prediction_net from depth_from_video_in_the_wild import model if __name__ == "__main__": # Load images img = np.asarray(Image.open("data_example/erfurt_93/0000000002.png")) img1 = img[:, :416] img1 = np.expand_dims(img1, axis=0) img2 = img[:, 416:832] img2 = np.expand_dims(img2, axis=0) inference_model = model.Model(is_training=False, batch_size=1, img_height=128, img_width=416) saver = tf.train.Saver() sess = tf.Session() saver.restore(sess, "example_model/model-413174") test = inference_model.inference_egomotion(img1, img2, sess) print(test)
def _run_inference(output_dir=None, file_extension='png', depth=True, egomotion=False, model_ckpt=None, input_dir=None, input_list_file=None, batch_size=1, img_height=128, img_width=416, imagenet_norm=True, shuffle=False, flip_for_depth=False, inference_mode=INFERENCE_MODE_SINGLE, inference_crop=INFERENCE_CROP_NONE, use_masks=False): """Runs inference. Refer to flags in inference.py for details.""" inference_model = model.Model(is_training=False, batch_size=batch_size, img_height=img_height, img_width=img_width, imagenet_norm=imagenet_norm) vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt) saver = tf.train.Saver(vars_to_restore) sv = tf.train.Supervisor(logdir='/tmp/', saver=None) with sv.managed_session() as sess: saver.restore(sess, model_ckpt) if not gfile.Exists(output_dir): gfile.MakeDirs(output_dir) logging.info('Predictions will be saved in %s.', output_dir) # Collect all images to run inference on. im_files, basepath_in = collect_input_images(input_dir, input_list_file, file_extension) if shuffle: logging.info('Shuffling data...') np.random.shuffle(im_files) logging.info('Running inference on %d files.', len(im_files)) # Create missing output folders and pre-compute target directories. output_dirs = create_output_dirs(im_files, basepath_in, output_dir) # Run depth prediction network. if depth: im_batch = [] for i in range(len(im_files)): if i % 100 == 0: logging.info('%s of %s files processed.', i, len(im_files)) # Read image and run inference. if inference_mode == INFERENCE_MODE_SINGLE: if inference_crop == INFERENCE_CROP_NONE: im = util.load_image(im_files[i], resize=(img_width, img_height)) elif inference_crop == INFERENCE_CROP_CITYSCAPES: im = util.crop_cityscapes(util.load_image(im_files[i]), resize=(img_width, img_height)) elif inference_mode == INFERENCE_MODE_TRIPLETS: im = util.load_image(im_files[i], resize=(img_width * 3, img_height)) im = im[:, img_width:img_width * 2] if flip_for_depth: im = np.flip(im, axis=1) im_batch.append(im) if len(im_batch) == batch_size or i == len(im_files) - 1: # Call inference on batch. for _ in range(batch_size - len(im_batch)): # Fill up batch. im_batch.append( np.zeros(shape=(img_height, img_width, 3), dtype=np.float32)) im_batch = np.stack(im_batch, axis=0) est_depth = inference_model.inference_depth(im_batch, sess) if flip_for_depth: est_depth = np.flip(est_depth, axis=2) im_batch = np.flip(im_batch, axis=2) for j in range(len(im_batch)): color_map = util.normalize_depth_for_display( np.squeeze(est_depth[j])) visualization = np.concatenate( (im_batch[j], color_map), axis=0) # Save raw prediction and color visualization. Extract filename # without extension from full path: e.g. path/to/input_dir/folder1/ # file1.png -> file1 k = i - len(im_batch) + 1 + j filename_root = os.path.splitext( os.path.basename(im_files[k]))[0] pref = '_flip' if flip_for_depth else '' output_raw = os.path.join( output_dirs[k], filename_root + pref + '.npy') output_vis = os.path.join( output_dirs[k], filename_root + pref + '.png') with gfile.Open(output_raw, 'wb') as f: np.save(f, est_depth[j]) util.save_image(output_vis, visualization, file_extension) im_batch = [] # Run egomotion network. if egomotion: if inference_mode == INFERENCE_MODE_SINGLE: # Run regular egomotion inference loop. input_image_seq = [] input_seg_seq = [] current_sequence_dir = None current_output_handle = None for i in range(len(im_files)): sequence_dir = os.path.dirname(im_files[i]) if sequence_dir != current_sequence_dir: # Assume start of a new sequence, since this image lies in a # different directory than the previous ones. # Clear egomotion input buffer. output_filepath = os.path.join(output_dirs[i], 'egomotion.txt') if current_output_handle is not None: current_output_handle.close() current_sequence_dir = sequence_dir logging.info('Writing egomotion sequence to %s.', output_filepath) current_output_handle = gfile.Open( output_filepath, 'w') input_image_seq = [] im = util.load_image(im_files[i], resize=(img_width, img_height)) input_image_seq.append(im) if use_masks: im_seg_path = im_files[i].replace( '.%s' % file_extension, '-seg.%s' % file_extension) if not gfile.Exists(im_seg_path): raise ValueError( 'No segmentation mask %s has been found for ' 'image %s. If none are available, disable ' 'use_masks.' % (im_seg_path, im_files[i])) input_seg_seq.append( util.load_image(im_seg_path, resize=(img_width, img_height), interpolation='nn')) if len(input_image_seq ) < seq_length: # Buffer not filled yet. continue if len(input_image_seq ) > seq_length: # Remove oldest entry. del input_image_seq[0] if use_masks: del input_seg_seq[0] input_image_stack = np.concatenate(input_image_seq, axis=2) input_image_stack = np.expand_dims(input_image_stack, axis=0) if use_masks: input_image_stack = mask_image_stack( input_image_stack, input_seg_seq) est_egomotion = np.squeeze( inference_model.inference_egomotion( input_image_stack, sess)) egomotion_str = [] for j in range(seq_length - 1): egomotion_str.append(','.join( [str(d) for d in est_egomotion[j]])) current_output_handle.write( str(i) + ' ' + ' '.join(egomotion_str) + '\n') if current_output_handle is not None: current_output_handle.close() elif inference_mode == INFERENCE_MODE_TRIPLETS: written_before = [] for i in range(len(im_files)): im = util.load_image(im_files[i], resize=(img_width * 3, img_height)) input_image_stack = np.concatenate([ im[:, :img_width], im[:, img_width:img_width * 2], im[:, img_width * 2:] ], axis=2) input_image_stack = np.expand_dims(input_image_stack, axis=0) if use_masks: im_seg_path = im_files[i].replace( '.%s' % file_extension, '-seg.%s' % file_extension) if not gfile.Exists(im_seg_path): raise ValueError( 'No segmentation mask %s has been found for ' 'image %s. If none are available, disable ' 'use_masks.' % (im_seg_path, im_files[i])) seg = util.load_image(im_seg_path, resize=(img_width * 3, img_height), interpolation='nn') input_seg_seq = [ seg[:, :img_width], seg[:, img_width:img_width * 2], seg[:, img_width * 2:] ] input_image_stack = mask_image_stack( input_image_stack, input_seg_seq) est_egomotion = inference_model.inference_egomotion( input_image_stack, sess) est_egomotion = np.squeeze(est_egomotion) egomotion_1_2 = ','.join( [str(d) for d in est_egomotion[0]]) egomotion_2_3 = ','.join( [str(d) for d in est_egomotion[1]]) output_filepath = os.path.join(output_dirs[i], 'egomotion.txt') file_mode = 'w' if output_filepath not in written_before else 'a' with gfile.Open(output_filepath, file_mode) as current_output_handle: current_output_handle.write( str(i) + ' ' + egomotion_1_2 + ' ' + egomotion_2_3 + '\n') written_before.append(output_filepath) logging.info('Done.')