def trajectory_inference():
    """Generates trajectories from the KITTI odometry sets and a checkpoint."""

    # Note that the struct2depth code only works at batch_size=1, because it uses
    # the training mode of batchnorm at inference.
    inference_model = model.Model(is_training=False,
                                  batch_size=1,
                                  img_height=FLAGS.img_height,
                                  img_width=FLAGS.img_width)
    saver = tf.train.Saver()
    sess = tf.Session()

    def infer_egomotion(image1, image2):
        return inference_model.inference_egomotion(image1, image2, sess)

    saver.restore(sess, FLAGS.checkpoint_path)
    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    for odo_set in ODOMETRY_SETS:
        logging.info('Evaluating odometry on %s', odo_set)
        test_file_dir = os.path.join(FLAGS.odometry_test_set_dir, odo_set)
        output_file = os.path.join(FLAGS.output_dir,
                                   'odometry_%s.txt' % odo_set)
        odometry_inference(test_file_dir, output_file, infer_egomotion)
Exemple #2
0
def main(_):
    # Fixed seed for repeatability
    seed = FLAGS.seed
    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    if not gfile.Exists(FLAGS.checkpoint_dir):
        gfile.MakeDirs(FLAGS.checkpoint_dir)

    train_model = model.Model(
        boxify=FLAGS.boxify,
        data_dir=FLAGS.data_dir,
        file_extension=FLAGS.file_extension,
        is_training=True,
        foreground_dilation=FLAGS.foreground_dilation,
        learn_intrinsics=FLAGS.learn_intrinsics,
        learning_rate=FLAGS.learning_rate,
        reconstr_weight=FLAGS.reconstr_weight,
        smooth_weight=FLAGS.smooth_weight,
        ssim_weight=FLAGS.ssim_weight,
        translation_consistency_weight=FLAGS.translation_consistency_weight,
        rotation_consistency_weight=FLAGS.rotation_consistency_weight,
        batch_size=FLAGS.batch_size,
        img_height=FLAGS.img_height,
        img_width=FLAGS.img_width,
        weight_reg=FLAGS.weight_reg,
        depth_consistency_loss_weight=FLAGS.depth_consistency_loss_weight,
        queue_size=FLAGS.queue_size,
        input_file=FLAGS.input_file)

    _train(train_model, FLAGS.checkpoint_dir, FLAGS.train_steps,
           FLAGS.summary_freq)

    if FLAGS.debug:
        _print_losses(os.path.join(FLAGS.checkpoint_dir, 'debug'))
Exemple #3
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   seq_length=3,
                   architecture=nets.RESNET,
                   imagenet_norm=True,
                   use_skip=True,
                   joint_encoder=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
    """Runs inference. Refer to flags in inference.py for details."""
    inference_model = model.Model(is_training=False,
                                  batch_size=1,
                                  img_height=128,
                                  img_width=416)

    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
    saver = tf.train.Saver(vars_to_restore)
    sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
    with sv.managed_session() as sess:
        saver.restore(sess, model_ckpt)
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)
        logging.info('Predictions will be saved in %s.', output_dir)

        # Collect all images to run inference on.
        im_files, basepath_in = collect_input_images(input_dir,
                                                     input_list_file,
                                                     file_extension)
        if shuffle:
            logging.info('Shuffling data...')
            np.random.shuffle(im_files)
        logging.info('Running inference on %d files.', len(im_files))

        # Create missing output folders and pre-compute target directories.
        output_dirs = create_output_dirs(im_files, basepath_in, output_dir)

        # Run depth prediction network.
        if depth:
            im_batch = []
            npys = []
            for i in range(len(im_files)):
                if i % 100 == 0:
                    logging.info('%s of %s files processed.', i, len(im_files))

                # Read image and run inference.
                if inference_mode == INFERENCE_MODE_SINGLE:
                    if inference_crop == INFERENCE_CROP_NONE:
                        im = util.load_image(im_files[i],
                                             resize=(img_width, img_height))
                    elif inference_crop == INFERENCE_CROP_CITYSCAPES:
                        im = util.crop_cityscapes(util.load_image(im_files[i]),
                                                  resize=(img_width,
                                                          img_height))
                elif inference_mode == INFERENCE_MODE_TRIPLETS:
                    im = util.load_image(im_files[i],
                                         resize=(img_width * 3, img_height))
                    im = im[:, img_width:img_width * 2]
                if flip_for_depth:
                    im = np.flip(im, axis=1)
                im_batch.append(im)

                if len(im_batch) == batch_size or i == len(im_files) - 1:
                    # Call inference on batch.
                    for _ in range(batch_size -
                                   len(im_batch)):  # Fill up batch.
                        im_batch.append(
                            np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))
                    im_batch = np.stack(im_batch, axis=0)
                    est_depth = inference_model.inference_depth(im_batch, sess)
                    if flip_for_depth:
                        est_depth = np.flip(est_depth, axis=2)
                        im_batch = np.flip(im_batch, axis=2)

                    for j in range(len(im_batch)):
                        color_map = util.normalize_depth_for_display(
                            np.squeeze(est_depth[j]))
                        visualization = np.concatenate(
                            (im_batch[j], color_map), axis=0)
                        # Save raw prediction and color visualization. Extract filename
                        # without extension from full path: e.g. path/to/input_dir/folder1/
                        # file1.png -> file1
                        k = i - len(im_batch) + 1 + j
                        filename_root = os.path.splitext(
                            os.path.basename(im_files[k]))[0]
                        pref = '_flip' if flip_for_depth else ''
                        output_raw = os.path.join(
                            output_dirs[k], filename_root + pref + '.npy')
                        output_vis = os.path.join(
                            output_dirs[k], filename_root + pref + '.png')
                        with gfile.Open(output_raw, 'wb') as f:
                            np.save(f, est_depth[j])
                            npys.append(est_depth[j])
                        util.save_image(output_vis, visualization,
                                        file_extension)
                    im_batch = []
            with gfile.Open(output_dir + "result.npy", 'wb') as f:
                np.save(f, npys)
Exemple #4
0
from matplotlib import pyplot as plt
import cv2  # used for resize. if you dont have it, use anything else
import numpy as np
import tensorflow as tf
import sys
from PIL import Image

from depth_from_video_in_the_wild import depth_prediction_net
from depth_from_video_in_the_wild import model

if __name__ == "__main__":
    # Load images
    img = np.asarray(Image.open("data_example/erfurt_93/0000000002.png"))
    img1 = img[:, :416]
    img1 = np.expand_dims(img1, axis=0)
    img2 = img[:, 416:832]
    img2 = np.expand_dims(img2, axis=0)

    inference_model = model.Model(is_training=False,
                                  batch_size=1,
                                  img_height=128,
                                  img_width=416)
    saver = tf.train.Saver()
    sess = tf.Session()
    saver.restore(sess, "example_model/model-413174")

    test = inference_model.inference_egomotion(img1, img2, sess)
    print(test)
Exemple #5
0
def _run_inference(output_dir=None,
                   file_extension='png',
                   depth=True,
                   egomotion=False,
                   model_ckpt=None,
                   input_dir=None,
                   input_list_file=None,
                   batch_size=1,
                   img_height=128,
                   img_width=416,
                   imagenet_norm=True,
                   shuffle=False,
                   flip_for_depth=False,
                   inference_mode=INFERENCE_MODE_SINGLE,
                   inference_crop=INFERENCE_CROP_NONE,
                   use_masks=False):
    """Runs inference. Refer to flags in inference.py for details."""
    inference_model = model.Model(is_training=False,
                                  batch_size=batch_size,
                                  img_height=img_height,
                                  img_width=img_width,
                                  imagenet_norm=imagenet_norm)
    vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt)
    saver = tf.train.Saver(vars_to_restore)
    sv = tf.train.Supervisor(logdir='/tmp/', saver=None)
    with sv.managed_session() as sess:
        saver.restore(sess, model_ckpt)
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)
        logging.info('Predictions will be saved in %s.', output_dir)

        # Collect all images to run inference on.
        im_files, basepath_in = collect_input_images(input_dir,
                                                     input_list_file,
                                                     file_extension)
        if shuffle:
            logging.info('Shuffling data...')
            np.random.shuffle(im_files)
        logging.info('Running inference on %d files.', len(im_files))

        # Create missing output folders and pre-compute target directories.
        output_dirs = create_output_dirs(im_files, basepath_in, output_dir)

        # Run depth prediction network.
        if depth:
            im_batch = []
            for i in range(len(im_files)):
                if i % 100 == 0:
                    logging.info('%s of %s files processed.', i, len(im_files))

                # Read image and run inference.
                if inference_mode == INFERENCE_MODE_SINGLE:
                    if inference_crop == INFERENCE_CROP_NONE:
                        im = util.load_image(im_files[i],
                                             resize=(img_width, img_height))
                    elif inference_crop == INFERENCE_CROP_CITYSCAPES:
                        im = util.crop_cityscapes(util.load_image(im_files[i]),
                                                  resize=(img_width,
                                                          img_height))
                elif inference_mode == INFERENCE_MODE_TRIPLETS:
                    im = util.load_image(im_files[i],
                                         resize=(img_width * 3, img_height))
                    im = im[:, img_width:img_width * 2]
                if flip_for_depth:
                    im = np.flip(im, axis=1)
                im_batch.append(im)

                if len(im_batch) == batch_size or i == len(im_files) - 1:
                    # Call inference on batch.
                    for _ in range(batch_size -
                                   len(im_batch)):  # Fill up batch.
                        im_batch.append(
                            np.zeros(shape=(img_height, img_width, 3),
                                     dtype=np.float32))
                    im_batch = np.stack(im_batch, axis=0)
                    est_depth = inference_model.inference_depth(im_batch, sess)
                    if flip_for_depth:
                        est_depth = np.flip(est_depth, axis=2)
                        im_batch = np.flip(im_batch, axis=2)

                    for j in range(len(im_batch)):
                        color_map = util.normalize_depth_for_display(
                            np.squeeze(est_depth[j]))
                        visualization = np.concatenate(
                            (im_batch[j], color_map), axis=0)
                        # Save raw prediction and color visualization. Extract filename
                        # without extension from full path: e.g. path/to/input_dir/folder1/
                        # file1.png -> file1
                        k = i - len(im_batch) + 1 + j
                        filename_root = os.path.splitext(
                            os.path.basename(im_files[k]))[0]
                        pref = '_flip' if flip_for_depth else ''
                        output_raw = os.path.join(
                            output_dirs[k], filename_root + pref + '.npy')
                        output_vis = os.path.join(
                            output_dirs[k], filename_root + pref + '.png')
                        with gfile.Open(output_raw, 'wb') as f:
                            np.save(f, est_depth[j])
                        util.save_image(output_vis, visualization,
                                        file_extension)
                    im_batch = []

        # Run egomotion network.
        if egomotion:
            if inference_mode == INFERENCE_MODE_SINGLE:
                # Run regular egomotion inference loop.
                input_image_seq = []
                input_seg_seq = []
                current_sequence_dir = None
                current_output_handle = None
                for i in range(len(im_files)):
                    sequence_dir = os.path.dirname(im_files[i])
                    if sequence_dir != current_sequence_dir:
                        # Assume start of a new sequence, since this image lies in a
                        # different directory than the previous ones.
                        # Clear egomotion input buffer.
                        output_filepath = os.path.join(output_dirs[i],
                                                       'egomotion.txt')
                        if current_output_handle is not None:
                            current_output_handle.close()
                        current_sequence_dir = sequence_dir
                        logging.info('Writing egomotion sequence to %s.',
                                     output_filepath)
                        current_output_handle = gfile.Open(
                            output_filepath, 'w')
                        input_image_seq = []
                    im = util.load_image(im_files[i],
                                         resize=(img_width, img_height))
                    input_image_seq.append(im)
                    if use_masks:
                        im_seg_path = im_files[i].replace(
                            '.%s' % file_extension, '-seg.%s' % file_extension)
                        if not gfile.Exists(im_seg_path):
                            raise ValueError(
                                'No segmentation mask %s has been found for '
                                'image %s. If none are available, disable '
                                'use_masks.' % (im_seg_path, im_files[i]))
                        input_seg_seq.append(
                            util.load_image(im_seg_path,
                                            resize=(img_width, img_height),
                                            interpolation='nn'))

                    if len(input_image_seq
                           ) < seq_length:  # Buffer not filled yet.
                        continue
                    if len(input_image_seq
                           ) > seq_length:  # Remove oldest entry.
                        del input_image_seq[0]
                        if use_masks:
                            del input_seg_seq[0]

                    input_image_stack = np.concatenate(input_image_seq, axis=2)
                    input_image_stack = np.expand_dims(input_image_stack,
                                                       axis=0)
                    if use_masks:
                        input_image_stack = mask_image_stack(
                            input_image_stack, input_seg_seq)
                    est_egomotion = np.squeeze(
                        inference_model.inference_egomotion(
                            input_image_stack, sess))
                    egomotion_str = []
                    for j in range(seq_length - 1):
                        egomotion_str.append(','.join(
                            [str(d) for d in est_egomotion[j]]))
                    current_output_handle.write(
                        str(i) + ' ' + ' '.join(egomotion_str) + '\n')
                if current_output_handle is not None:
                    current_output_handle.close()
            elif inference_mode == INFERENCE_MODE_TRIPLETS:
                written_before = []
                for i in range(len(im_files)):
                    im = util.load_image(im_files[i],
                                         resize=(img_width * 3, img_height))
                    input_image_stack = np.concatenate([
                        im[:, :img_width], im[:, img_width:img_width * 2],
                        im[:, img_width * 2:]
                    ],
                                                       axis=2)
                    input_image_stack = np.expand_dims(input_image_stack,
                                                       axis=0)
                    if use_masks:
                        im_seg_path = im_files[i].replace(
                            '.%s' % file_extension, '-seg.%s' % file_extension)
                        if not gfile.Exists(im_seg_path):
                            raise ValueError(
                                'No segmentation mask %s has been found for '
                                'image %s. If none are available, disable '
                                'use_masks.' % (im_seg_path, im_files[i]))
                        seg = util.load_image(im_seg_path,
                                              resize=(img_width * 3,
                                                      img_height),
                                              interpolation='nn')
                        input_seg_seq = [
                            seg[:, :img_width], seg[:,
                                                    img_width:img_width * 2],
                            seg[:, img_width * 2:]
                        ]
                        input_image_stack = mask_image_stack(
                            input_image_stack, input_seg_seq)
                    est_egomotion = inference_model.inference_egomotion(
                        input_image_stack, sess)
                    est_egomotion = np.squeeze(est_egomotion)
                    egomotion_1_2 = ','.join(
                        [str(d) for d in est_egomotion[0]])
                    egomotion_2_3 = ','.join(
                        [str(d) for d in est_egomotion[1]])

                    output_filepath = os.path.join(output_dirs[i],
                                                   'egomotion.txt')
                    file_mode = 'w' if output_filepath not in written_before else 'a'
                    with gfile.Open(output_filepath,
                                    file_mode) as current_output_handle:
                        current_output_handle.write(
                            str(i) + ' ' + egomotion_1_2 + ' ' +
                            egomotion_2_3 + '\n')
                    written_before.append(output_filepath)
            logging.info('Done.')