Ejemplo n.º 1
0
def test(args):
    """ Function to test the architecture by saving disparities to the output directory
    """
    # Since it is clear post-processing is better in all runs I have done, I will only
    # save post-processed results. Unless explicitly stated otherwise.
    # Also for Pilzer, the disparities are already post-processed by their own FuseNet.
    do_post_processing = args.postprocessing and 'pilzer' not in args.architecture

    input_height = args.input_height
    input_width = args.input_width

    output_directory = args.output_dir
    n_img, test_loader = prepare_dataloader(args, 'test')

    model = create_architecture(args)
    which_model = 'final' if args.load_final else 'best'
    model.load_networks(which_model)
    model.to_test()

    disparities = np.zeros((n_img, input_height, input_width),
                           dtype=np.float32)
    inference_time = 0.0

    # used in test time, wrapping `forward` in no_grad() so we don't save
    # intermediate steps for backprop
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            if i % 100 == 0 and i != 0:
                print('Testing... Now at image: {}'.format(i))

            t_start = time.time()
            # Do a forward pass
            left_view = data['left_image'].squeeze()
            disps = model.fit(data)
            # Some architectures output a single disparity, not a tuple of 4 disparities.
            disps = disps[0][:, 0, :, :] if isinstance(
                disps, tuple) else disps.squeeze()

            if do_post_processing:
                disparities[i] = post_process_disparity(disps.cpu().numpy())
            else:
                disp = disps.unsqueeze(1)
                disparities[i] = disp[0].squeeze().cpu().numpy()
            t_end = time.time()
            inference_time += (t_end - t_start)

    if args.test_time:
        test_time_message = 'Inference took {:.4f} seconds. That is {:.2f} imgs/s or {:.6f} s/img.'
        print(
            test_time_message.format(inference_time, (n_img / inference_time),
                                     1.0 / (n_img / inference_time)))

    disp_file_name = 'disparities_{}_{}.npy'.format(args.dataset, model.name)
    full_disp_path = os.path.join(output_directory, disp_file_name)

    if os.path.exists(full_disp_path):
        print('Overwriting disparities at {}...'.format(full_disp_path))
    np.save(full_disp_path, disparities)
    print('Finished Testing')
Ejemplo n.º 2
0
def reconstruct_right(args):
    """ Function to reconstruct the right view of stereo pairs from left view
    """
    # Since it is clear post-processing is better in all runs I have done, I will only
    # save post-processed results. Unless explicitly stated otherwise.
    # Also for Pilzer, the disparities are already post-processed by their own FuseNet.
    do_post_processing = args.postprocessing and 'pilzer' not in args.architecture

    input_height = args.input_height
    input_width = args.input_width

    output_directory = args.output_dir

    file_names = os.listdir(args.data_dir)
    file_names.sort()
    file_names = [args.data_dir + '/' + file_name for file_name in file_names]

    # Create model
    model = create_architecture(args)
    which_model = 'final' if args.load_final else 'best'
    model.load_networks(which_model)
    model.to_test()

    # Make Fake loss module to use monodepthloss.generate_image_right
    # Can use it with "import MonodepthLoss"  and MonodepthLoss.generate_image...?
    fake_loss = MonodepthLoss(args)
    fake_loss = fake_loss.to(args.device)

    for idx, left in enumerate(file_names):
        left_image = Image.open(left)
        input_size = (left_image.width, left_image.height)
        resize = transforms.ResizeImage(train=False, size=(input_height, input_width))
        totensor = transforms.ToTensor(train=False)
        left_image = totensor(resize(left_image))
        left_image = torch.stack((left_image, torch.flip(left_image, [2])))
        # Make dicctionary to feed model.fit()
        left_data = {'left_image': left_image}

        # used in test time, wrapping `forward` in no_grad() so we don't save
        # intermediate steps for backprop
        with torch.no_grad():
            # Estimate disparity
            disps = model.fit(left_data)
            disp_right_est = [d[:, 1, :, :].unsqueeze(1) for d in disps]
            disp_right_est = disp_right_est[0]

            # Using estimated disparity, apply it to left view and obtain right view
            fake_right = fake_loss.generate_image_right(left_image.to(args.device), disp_right_est)

            # convert Tensor(fake_right) to PIL image and save it!
            output_name = os.path.splitext(os.path.basename(left))[0]
            save_path = os.path.join(output_directory, '{}.png'.format(output_name))
            save_right = torchvision.transforms.functional.to_pil_image(fake_right[0].cpu())
            save_right = save_right.resize(input_size)
            save_right.save(save_path)

            if idx % 200 == 0:
                print('Processed ' + save_path)
Ejemplo n.º 3
0
def check_reconstruct_right(args):
    """ Function to reconstruct the right view of stereo pairs from left view
    """
    # Since it is clear post-processing is better in all runs I have done, I will only
    # save post-processed results. Unless explicitly stated otherwise.
    # Also for Pilzer, the disparities are already post-processed by their own FuseNet.
    do_post_processing = args.postprocessing and 'pilzer' not in args.architecture

    input_height = args.input_height
    input_width = args.input_width
    input_left = args.left_view

    output_directory = args.output_dir
    # n_img, test_loader = prepare_dataloader(args, 'test')
    # Create model
    model = create_architecture(args)
    which_model = 'final' if args.load_final else 'best'
    model.load_networks(which_model)
    model.to_test()

    # Make Fake loss module to use monodepthloss.generate_image_right
    # Can use it with "import MonodepthLoss"  and MonodepthLoss.generate_image...?
    fake_loss = MonodepthLoss(args)
    fake_loss = fake_loss.to(args.device)

    # used in test time, wrapping `forward` in no_grad() so we don't save
    # intermediate steps for backprop

    ext = ""
    if args.load_final is False:
        ext = "_best"
    file_names = ['000025.png', '000031.png', '000036.png', '000049.png']
    file_names = [
        '/home/kishida/depthgan/sample_kitti_obj/' + file_name
        for file_name in file_names
    ]

    for filename in file_names:
        # Conver input PIL image to Tensor with transformation
        left_image = Image.open(filename)
        input_size = (left_image.width, left_image.height)
        resize = transforms.ResizeImage(train=False,
                                        size=(input_height, input_width))
        totensor = transforms.ToTensor(train=False)
        left_image = totensor(resize(left_image))
        left_image = torch.stack((left_image, torch.flip(left_image, [2])))
        # Make dicctionary to feed model.fit()
        left_data = {'left_image': left_image}

        with torch.no_grad():
            # Estimate disparity
            disps = model.fit(left_data)
            disp_right_est = [d[:, 1, :, :].unsqueeze(1) for d in disps]
            disp_right_est = disp_right_est[0]

            # Using estimated disparity, apply it to left view and obtain right view
            print('reconstructing right view from left view')
            fake_right = fake_loss.generate_image_right(
                left_image.to(args.device), disp_right_est)

            # convert Tensor(fake_right) to PIL image.
            output_dir = os.path.dirname(filename)
            output_name = os.path.splitext(os.path.basename(filename))[0]
            model_name = os.path.basename(args.model_name)
            save_path = os.path.join(
                output_dir, '{}_rec_{}{}.jpg'.format(output_name, model_name,
                                                     ext))
            save_right = torchvision.transforms.functional.to_pil_image(
                fake_right[0].cpu())
            print(input_size)
            save_right = save_right.resize(input_size)
            save_right.save(save_path)
            print('Saved image : ' + save_path)
            arguments = ['display', save_path]
            subprocess.call(arguments)
Ejemplo n.º 4
0
def train(args):
    """ Function used for training any of the architectures, given an input parse.
    """
    tb_dir = os.path.join("saved_models", args.model_name,
                          "tensorboard_" + args.model_name)
    writer = tbx.SummaryWriter(tb_dir)

    def validate(epoch):
        model.to_test()

        disparities = np.zeros((val_n_img, 256, 512), dtype=np.float32)
        model.set_new_loss_item(epoch, train=False)

        # For a WGAN architecture we need to access gradients.
        if 'wgan' not in args.architecture:
            torch.set_grad_enabled(False)

        for i, data in enumerate(val_loader):
            # Get the losses for the model for this epoch.
            model.set_input(data)
            model.forward()
            model.add_running_loss_val(epoch)

        if 'wgan' not in args.architecture:
            torch.set_grad_enabled(True)

        # Store the running loss for the validation images.
        model.make_running_loss(epoch, val_n_img, train=False)
        return

    n_img, loader = prepare_dataloader(args, 'train')
    val_n_img, val_loader = prepare_dataloader(args, 'val')

    model = create_architecture(args)
    model.set_data_loader(loader)

    print('data loader is set')

    if not args.resume:
        # We keep track of the aggregated losses per epoch in a dict. For
        # now the pre-training train loss is set to zero. The pre-training
        # validation loss will be computed.
        best_val_loss = float('Inf')

        # Compute loss per image (computation keeps number of images over
        # batch size in mind, to compensate for partial batches being forwarded.
        validate(-1)
        pre_validation_update(model.losses[-1]['val'])
    else:
        best_val_loss = min(
            [model.losses[epoch]['val']['G'] for epoch in model.losses.keys()])

    running_val_loss = 0.0
    print('Now, training starts')

    for epoch in range(model.start_epoch, args.epochs):
        print('Epoch {} is beginning...'.format(epoch))
        model.update_learning_rate(epoch, args.learning_rate)

        c_time = time.time()
        model.to_train()
        model.set_new_loss_item(epoch)

        # Run a single training epoch. Generalizes to WGAN variants as well.
        model.run_epoch(epoch, n_img)

        # The validate can return either a dictionary with metrics or None.
        validate(epoch)

        # Print an update of training, val losses. Possibly also do full evaluation of depth maps.
        print_epoch_update(epoch,
                           time.time() - c_time, model.losses,
                           model.rec_losses)

        for loss_name in model.loss_names:
            writer.add_scalar('GAN/' + loss_name,
                              model.losses[epoch]['train'][loss_name], epoch)

        for loss_name in model.rec_loss_names:
            writer.add_scalar('Reconstruction/' + loss_name,
                              model.rec_losses[epoch]['train'][loss_name],
                              epoch)

        # Make a checkpoint, so training can be resumed.
        running_val_loss = model.losses[epoch]['val']['G']
        is_best = running_val_loss < best_val_loss
        if is_best:
            best_val_loss = running_val_loss
        model.save_checkpoint(epoch, is_best, best_val_loss)

        print('Epoch {} ended'.format(epoch))

    print('Finished Training. Best validation loss:\t{:.3f}'.format(
        best_val_loss))

    # Save the model of the final epoch. If another model was better, also save it separately as best.
    model.save_networks('final')
    if running_val_loss != best_val_loss:
        model.save_best_networks()

    model.save_losses()
    writer.close()
Ejemplo n.º 5
0
def reconstruct_right(args):
    """ Function to reconstruct the right view of stereo pairs from left view
    """
    # Since it is clear post-processing is better in all runs I have done, I will only
    # save post-processed results. Unless explicitly stated otherwise.
    # Also for Pilzer, the disparities are already post-processed by their own FuseNet.
    do_post_processing = args.postprocessing and 'pilzer' not in args.architecture

    input_height = args.input_height
    input_width = args.input_width
    input_left = args.left_view

    output_directory = args.output_dir
    # n_img, test_loader = prepare_dataloader(args, 'test')

    # Conver input PIL image to Tensor with transformation
    left_image = Image.open(input_left)
    resize = transforms.ResizeImage(train=False, size=(256, 512))
    totensor = transforms.ToTensor(train=False)
    left_image = totensor(resize(left_image))
    left_image = torch.stack((left_image, torch.flip(left_image, [2])))
    # Make dicctionary to feed model.fit()
    left_data = {'left_image': left_image}

    # Create model
    model = create_architecture(args)
    which_model = 'final' if args.load_final else 'best'
    model.load_networks(which_model)
    model.to_test()

    # Make Fake loss module to use monodepthloss.generate_image_right
    # Can use it with "import MonodepthLoss"  and MonodepthLoss.generate_image...?
    fake_loss = MonodepthLoss(args)
    fake_loss = fake_loss.to(args.device)

    # used in test time, wrapping `forward` in no_grad() so we don't save
    # intermediate steps for backprop
    with torch.no_grad():
        # Estimate disparity
        disps = model.fit(left_data)
        disp_right_est = [d[:, 1, :, :].unsqueeze(1) for d in disps]
        disp_right_est = disp_right_est[0]

        # Using estimated disparity, apply it to left view and obtain right view
        print('reconstructing right view from left view')
        fake_right = fake_loss.generate_image_right(left_image.to(args.device),
                                                    disp_right_est)
        disp_to_img = scipy.misc.imresize(disp_right_est[0].squeeze(),
                                          [input_height, input_width])
        print(type(disp_to_img))
        print(disp_to_img.shape)
        print(disp_to_img[100:110, 100:110])
        Image.fromarray(disp_to_img).save('./output/000031_disp.jpg')

        # convert Tensor(fake_right) to PIL image and save it!
        print('Saving reconstructed right view...')
        output_dir = os.path.dirname(input_left)
        output_name = os.path.splitext(os.path.basename(input_left))[0]
        model_name = os.path.basename(args.model_name)
        save_path = os.path.join(
            output_dir, '{}_rec_{}.jpg'.format(output_name, model_name))
        save_right = torchvision.transforms.functional.to_pil_image(
            fake_right[0].cpu())
        save_right.save(save_path)
        print('Saved image : ' + save_path)