Beispiel #1
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    opt.batch_size = 1

    assert sum(
        (opt.eval_mono, opt.eval_stereo, opt.no_eval)
    ) == 1, "Please choose mono or stereo evaluation by setting either --eval_mono, --eval_stereo, --custom_run"
    assert sum(
        (opt.log, opt.repr)
    ) < 2, "Please select only one between LR and LOG by setting --repr or --log"
    assert opt.bootstraps == 1 or opt.snapshots == 1, "Please set only one of --bootstraps or --snapshots to be major than 1"

    # get the number of networks
    nets = max(opt.bootstraps, opt.snapshots)
    do_uncert = (opt.log or opt.repr or opt.dropout or opt.post_process
                 or opt.bootstraps > 1 or opt.snapshots > 1)

    print("-> Beginning inference...")

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)
    assert os.path.isdir(
        opt.load_weights_folder), "Cannot find a folder at {}".format(
            opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(
        os.path.join(splits_dir, opt.eval_split, "test_files.txt"))

    if opt.bootstraps > 1:

        # prepare multiple checkpoint paths from different trainings
        encoder_path = [
            os.path.join(opt.load_weights_folder, "boot_%d" % i, "weights_19",
                         "encoder.pth") for i in range(1, opt.bootstraps + 1)
        ]
        decoder_path = [
            os.path.join(opt.load_weights_folder, "boot_%d" % i, "weights_19",
                         "depth.pth") for i in range(1, opt.bootstraps + 1)
        ]
        encoder_dict = [
            torch.load(encoder_path[i]) for i in range(opt.bootstraps)
        ]
        height = encoder_dict[0]['height']
        width = encoder_dict[0]['width']

    elif opt.snapshots > 1:

        # prepare multiple checkpoint paths from the same training
        encoder_path = [
            os.path.join(opt.load_weights_folder, "weights_%d" % i,
                         "encoder.pth")
            for i in range(opt.num_epochs - opt.snapshots, opt.num_epochs)
        ]
        decoder_path = [
            os.path.join(opt.load_weights_folder, "weights_%d" % i,
                         "depth.pth")
            for i in range(opt.num_epochs - opt.snapshots, opt.num_epochs)
        ]
        encoder_dict = [
            torch.load(encoder_path[i]) for i in range(opt.snapshots)
        ]
        height = encoder_dict[0]['height']
        width = encoder_dict[0]['width']

    else:

        # prepare just a single path
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")
        encoder_dict = torch.load(encoder_path)
        height = encoder_dict['height']
        width = encoder_dict['width']

    img_ext = '.png' if opt.png else '.jpg'
    dataset = datasets.KITTIRAWDataset(opt.data_path,
                                       filenames,
                                       height,
                                       width, [0],
                                       4,
                                       is_train=False,
                                       img_ext=img_ext)
    dataloader = DataLoader(dataset,
                            1,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    if nets > 1:

        # load multiple encoders and decoders
        encoder = [
            legacy.ResnetEncoder(opt.num_layers, False) for i in range(nets)
        ]
        depth_decoder = [
            networks.DepthUncertaintyDecoder(encoder[i].num_ch_enc,
                                             num_output_channels=1,
                                             uncert=(opt.log or opt.repr),
                                             dropout=opt.dropout)
            for i in range(nets)
        ]

        model_dict = [encoder[i].state_dict() for i in range(nets)]
        for i in range(nets):
            encoder[i].load_state_dict({
                k: v
                for k, v in encoder_dict[i].items() if k in model_dict[i]
            })
            depth_decoder[i].load_state_dict(torch.load(decoder_path[i]))
            encoder[i].cuda()
            encoder[i].eval()
            depth_decoder[i].cuda()
            depth_decoder[i].eval()

    else:

        # load a single encoder and decoder
        encoder = legacy.ResnetEncoder(opt.num_layers, False)
        depth_decoder = networks.DepthUncertaintyDecoder(encoder.num_ch_enc,
                                                         num_output_channels=1,
                                                         uncert=(opt.log
                                                                 or opt.repr),
                                                         dropout=opt.dropout)
        model_dict = encoder.state_dict()
        encoder.load_state_dict(
            {k: v
             for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))
        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

    # accumulators for depth and uncertainties
    pred_disps = []
    pred_uncerts = []

    print("-> Computing predictions with size {}x{}".format(width, height))
    with torch.no_grad():
        bar = progressbar.ProgressBar(max_value=len(dataloader))
        for i, data in enumerate(dataloader):

            input_color = data[("color", 0, 0)].cuda()

            # updating progress bar
            bar.update(i)
            if opt.post_process:

                # post-processed results require each image to have two forward passes
                input_color = torch.cat(
                    (input_color, torch.flip(input_color, [3])), 0)
            if nets > 1:

                # infer multiple predictions from multiple networks
                disps_distribution = []
                uncerts_distribution = []
                for i in range(nets):
                    output = depth_decoder[i](encoder[i](input_color))
                    disps_distribution.append(
                        torch.unsqueeze(output[("disp", 0)], 0))
                    if opt.log:
                        uncerts_distribution.append(
                            torch.unsqueeze(torch.exp(output[("uncert", 0)]),
                                            0))

                disps_distribution = torch.cat(disps_distribution, 0)
                if opt.log:

                    # bayesian uncertainty
                    pred_uncert = torch.var(
                        disps_distribution, dim=0, keepdim=False) + torch.sum(
                            torch.cat(uncerts_distribution, 0),
                            dim=0,
                            keepdim=False)
                else:

                    # uncertainty as variance of the predictions
                    pred_uncert = torch.var(disps_distribution,
                                            dim=0,
                                            keepdim=False)
                pred_uncert = pred_uncert.cpu()[0].numpy()
                output = torch.mean(disps_distribution, dim=0, keepdim=False)
                pred_disp, _ = disp_to_depth(output, opt.min_depth,
                                             opt.max_depth)
            elif opt.dropout:

                # infer multiple predictions from multiple networks with dropout
                disps_distribution = []
                uncerts = []

                # we infer 8 predictions as the number of bootstraps and snaphots
                for i in range(8):
                    output = depth_decoder(encoder(input_color))
                    disps_distribution.append(
                        torch.unsqueeze(output[("disp", 0)], 0))
                disps_distribution = torch.cat(disps_distribution, 0)

                # uncertainty as variance of the predictions
                pred_uncert = torch.var(disps_distribution,
                                        dim=0,
                                        keepdim=False).cpu()[0].numpy()

                # depth as mean of the predictions
                output = torch.mean(disps_distribution, dim=0, keepdim=False)
                pred_disp, _ = disp_to_depth(output, opt.min_depth,
                                             opt.max_depth)
            else:
                output = depth_decoder(encoder(input_color))
                pred_disp, _ = disp_to_depth(output[("disp", 0)],
                                             opt.min_depth, opt.max_depth)
                if opt.log:

                    # log-likelihood maximization
                    pred_uncert = torch.exp(output[("uncert",
                                                    0)]).cpu()[:, 0].numpy()
                elif opt.repr:

                    # learned reprojection
                    pred_uncert = (output[("uncert", 0)]).cpu()[:, 0].numpy()

            pred_disp = pred_disp.cpu()[:, 0].numpy()
            if opt.post_process:

                # applying Monodepthv1 post-processing to improve depth and get uncertainty
                N = pred_disp.shape[0] // 2
                pred_uncert = np.abs(pred_disp[:N] - pred_disp[N:, :, ::-1])
                pred_disp = batch_post_process_disparity(
                    pred_disp[:N], pred_disp[N:, :, ::-1])
                pred_uncerts.append(pred_uncert)

            pred_disps.append(pred_disp)

            # uncertainty normalization
            if opt.log or opt.repr or opt.dropout or nets > 1:
                pred_uncert = (pred_uncert - np.min(pred_uncert)) / (
                    np.max(pred_uncert) - np.min(pred_uncert))
                pred_uncerts.append(pred_uncert)
    pred_disps = np.concatenate(pred_disps)
    if do_uncert:
        pred_uncerts = np.concatenate(pred_uncerts)

    # saving 16 bit depth and uncertainties
    print("-> Saving 16 bit maps")
    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path,
                        fix_imports=True,
                        encoding='latin1',
                        allow_pickle=True)["data"]

    if not os.path.exists(os.path.join(opt.output_dir, "raw", "disp")):
        os.makedirs(os.path.join(opt.output_dir, "raw", "disp"))

    if not os.path.exists(os.path.join(opt.output_dir, "raw", "uncert")):
        os.makedirs(os.path.join(opt.output_dir, "raw", "uncert"))

    if opt.qual:
        if not os.path.exists(os.path.join(opt.output_dir, "qual", "disp")):
            os.makedirs(os.path.join(opt.output_dir, "qual", "disp"))
        if do_uncert:
            if not os.path.exists(
                    os.path.join(opt.output_dir, "qual", "uncert")):
                os.makedirs(os.path.join(opt.output_dir, "qual", "uncert"))

    bar = progressbar.ProgressBar(max_value=len(pred_disps))
    for i in range(len(pred_disps)):
        bar.update(i)
        if opt.eval_stereo:

            # save images scaling with KITTI baseline
            cv2.imwrite(
                os.path.join(opt.output_dir, "raw", "disp", '%06d_10.png' % i),
                (pred_disps[i] * (dataset.K[0][0] * gt_depths[i].shape[1]) *
                 256. / 10).astype(np.uint16))

        elif opt.eval_mono:

            # save images scaling with ground truth median
            ratio = get_mono_ratio(pred_disps[i], gt_depths[i])
            cv2.imwrite(
                os.path.join(opt.output_dir, "raw", "disp", '%06d_10.png' % i),
                (pred_disps[i] * (dataset.K[0][0] * gt_depths[i].shape[1]) *
                 256. / ratio / 10.).astype(np.uint16))
        else:

            # save images scaling with custom factor
            cv2.imwrite(
                os.path.join(opt.output_dir, "raw", "disp", '%06d_10.png' % i),
                (pred_disps[i] * (opt.custom_scale) * 256. / 10).astype(
                    np.uint16))

        if do_uncert:

            # save uncertainties
            cv2.imwrite(
                os.path.join(opt.output_dir, "raw", "uncert",
                             '%06d_10.png' % i),
                (pred_uncerts[i] * (256 * 256 - 1)).astype(np.uint16))

        if opt.qual:

            # save colored depth maps
            plt.imsave(os.path.join(opt.output_dir, "qual", "disp",
                                    '%06d_10.png' % i),
                       pred_disps[i],
                       cmap='magma')
            if do_uncert:

                # save colored uncertainty maps
                plt.imsave(os.path.join(opt.output_dir, "qual", "uncert",
                                        '%06d_10.png' % i),
                           pred_uncerts[i],
                           cmap='hot')

    # see you next time!
    print("\n-> Done!")
def test_simple(args):
    """Function to predict for a single image or folder of images
    """

    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    print("-> Loading weights from ", args.load_weights_folder)
    encoder_path = os.path.join(args.load_weights_folder, "encoder.pth")
    depth_decoder_path = os.path.join(args.load_weights_folder, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = legacy.ResnetEncoder(args.num_layers, False)
    loaded_dict_enc = torch.load(encoder_path)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthUncertaintyDecoder(encoder.num_ch_enc, num_output_channels=1, uncert=True, dropout=args.dropout)


    depth_decoder.load_state_dict(torch.load(depth_decoder_path))

    depth_decoder.to(device)
    depth_decoder.eval()

    # FINDING INPUT IMAGES
    if os.path.isfile(args.image_path):
        # Only testing on a single image
        paths = [args.image_path]
        output_directory = os.path.dirname(args.image_path)
    elif os.path.isdir(args.image_path):
        # Searching folder for images
        paths = glob.glob(os.path.join(args.image_path, '*.{}'.format(args.ext)))
        output_directory = args.image_path
    else:
        raise Exception("Can not find args.image_path: {}".format(args.image_path))

    print("-> Predicting on {:d} test images".format(len(paths)))

    # PREDICTING ON EACH IMAGE IN TURN
    with torch.no_grad():
        for idx, image_path in enumerate(paths):

            if image_path.endswith("_disp.jpg"):
                # don't try to predict disparity for a disparity image!
                continue

            # Load image and preprocess
            input_image = pil.open(image_path).convert('RGB')
            original_width, original_height = input_image.size
            input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
            input_image = transforms.ToTensor()(input_image).unsqueeze(0)

            # PREDICTION
            input_image = input_image.to(device)
            features = encoder(input_image)
            outputs = depth_decoder(features)

            disp = outputs[("disp", 0)]
            disp_resized = torch.nn.functional.interpolate(
                disp, (original_height, original_width), mode="bilinear", align_corners=False)

            uncert = outputs[("uncert", 0)]
            uncert_resized = torch.nn.functional.interpolate(
                uncert, (original_height, original_width), mode="bilinear", align_corners=False)

            # Saving numpy file
            output_name = os.path.splitext(os.path.basename(image_path))[0]
            #name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name))
            #scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
            #np.save(name_dest_npy, scaled_disp.cpu().numpy())

            # Saving colormapped depth image
            disp_resized_np = disp_resized.squeeze().cpu().numpy()
            disp_vmax = np.percentile(disp_resized_np, 95)
            disp_normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=disp_vmax)
            disp_mapper = cm.ScalarMappable(norm=disp_normalizer, cmap='magma')
            disp_colormapped_im = (disp_mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
            disp_im = pil.fromarray(disp_colormapped_im)

            name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name))
            disp_im.save(name_dest_im)

            # Saving colormapped uncertainty image
            uncert_resized_np = uncert_resized.squeeze().cpu().numpy()
            uncert_vmax = np.percentile(uncert_resized_np, 95)
            uncert_normalizer = mpl.colors.Normalize(vmin=uncert_resized_np.min(), vmax=uncert_vmax)
            uncert_mapper = cm.ScalarMappable(norm=uncert_normalizer, cmap='hot')
            uncert_colormapped_im = (uncert_mapper.to_rgba(uncert_resized_np)[:, :, :3] * 255).astype(np.uint8)
            uncert_im = pil.fromarray(uncert_colormapped_im)

            name_uncert_im = os.path.join(output_directory, "{}_uncert.jpeg".format(output_name))
            uncert_im.save(name_uncert_im)

    print('-> Done!')