コード例 #1
0
    def set_dataset(self):
        """properly handle multiple dataset situation
        """

        fpath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "splits", self.opt.split, "{}_files.txt")
        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))

        train_dataset = datasets.KITTIRAWDataset(
            self.opt.data_path, train_filenames, self.opt.height, self.opt.width,
            self.opt.frame_ids, 4, is_train=not self.opt.no_aug, load_seman = True, load_hints = self.opt.load_hints, hints_path = self.opt.hints_path, PreSIL_root = self.opt.PreSIL_path,
            kitti_gt_path = self.opt.kitti_gt_path, theta_gt_path=self.opt.theta_gt_path, surfnorm_gt_path=self.opt.surfnorm_gt_path
        )

        val_dataset = datasets.KITTIRAWDataset(
            self.opt.data_path, val_filenames, self.opt.height, self.opt.width,
            self.opt.frame_ids, 4, is_train=False, load_seman = True, load_hints = self.opt.load_hints, hints_path = self.opt.hints_path, PreSIL_root = self.opt.PreSIL_path,
            kitti_gt_path=self.opt.kitti_gt_path, theta_gt_path=self.opt.theta_gt_path, surfnorm_gt_path=self.opt.surfnorm_gt_path
        )

        self.train_loader = DataLoader(
            train_dataset, self.opt.batch_size, shuffle=not self.opt.no_shuffle,
            num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)
        self.val_loader = DataLoader(
            val_dataset, self.opt.batch_size, shuffle=True,
            num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.train_num = train_dataset.__len__()
        self.val_num = val_dataset.__len__()
        self.num_total_steps = self.train_num // self.opt.batch_size * self.opt.num_epochs
コード例 #2
0
    def set_dataset(self):
        fpath = os.path.join(os.path.dirname(__file__), "splits",
                             self.opt.split, "{}_files.txt")
        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))

        train_dataset = datasets.KITTIRAWDataset(
            self.opt.data_path,
            train_filenames,
            self.opt.height,
            self.opt.width,
            self.opt.frame_ids,
            4,
            is_train=True,
            load_meta=self.opt.load_meta,
            is_load_semantics=True,
            is_predicted_semantics=self.opt.is_predicted_semantics,
            load_morphed_depth=self.opt.load_morphed_depth,
            read_stereo=self.opt.read_stereo,
            stereo_meta=self.opt.SGMStereo_prediction_folder,
            morphFolder=self.opt.read_processed_results_path)
        val_dataset = datasets.KITTIRAWDataset(
            self.opt.data_path,
            val_filenames,
            self.opt.height,
            self.opt.width,
            self.opt.frame_ids,
            4,
            is_train=False,
            load_meta=self.opt.load_meta,
            is_load_semantics=True,
            read_stereo=self.opt.read_stereo,
            stereo_meta=self.opt.SGMStereo_prediction_folder,
            is_predicted_semantics=self.opt.is_predicted_semantics)

        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       shuffle=True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)
        self.val_loader = DataLoader(val_dataset,
                                     self.opt.batch_size,
                                     shuffle=True,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.train_num = train_dataset.__len__()
        self.val_num = val_dataset.__len__()
        self.num_total_steps = self.train_num // self.opt.batch_size * self.opt.num_epochs
コード例 #3
0
 def __init__(self, options):
     self.opt = options
     fpath = os.path.join(os.path.dirname(__file__), "../splits",
                          self.opt.split, "{}_files.txt")
     self.train_filenames = readlines(fpath.format("train"))
     num_train_samples = len(self.train_filenames)
     self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs
     train_dataset = datasets.KITTIRAWDataset(
         self.opt.data_path,
         self.train_filenames,
         self.opt.height,
         self.opt.width,
         self.opt.frame_ids,
         4,
         is_train=True and not self.opt.noAug,
         load_detect=self.opt.predins,
         detect_path=self.opt.detect_path,
         load_seman=self.opt.loadSeman,
         load_pose=self.opt.loadPose,
         loadPredDepth=self.opt.loadPredDepth,
         predDepthPath=self.opt.predDepthPath)
     self.train_loader = DataLoader(train_dataset,
                                    self.opt.batch_size,
                                    shuffle=not self.opt.noShuffle,
                                    num_workers=self.opt.num_workers,
                                    pin_memory=True,
                                    drop_last=True)
コード例 #4
0
ファイル: compare_eval.py プロジェクト: zebrajack/DepthC3D
def network_define(opt, data_path, height, width):
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.eval_split, split_file))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path,
                              map_location=torch.device("cuda:1"))

    if opt.dataset_val[0] == "kitti":
        dataset = datasets.KITTIRAWDataset(data_path,
                                           filenames,
                                           height,
                                           width, [0],
                                           4,
                                           is_train=False)
    elif opt.dataset_val[0] == "vkitti":
        dataset = datasets.VKITTIDataset(data_path,
                                         filenames,
                                         height,
                                         width, [0],
                                         4,
                                         is_train=False)
    # dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
    #                         pin_memory=True, drop_last=False)
    dataloader = DataLoader(
        dataset,
        1,
        shuffle=False,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=False,
        collate_fn=my_collate_fn
    )  ## the default collate_fn will fail because there are non-deterministic length sample

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(
        torch.load(decoder_path, map_location=torch.device("cuda:1")))

    encoder.cuda(1)
    encoder.eval()
    depth_decoder.cuda(1)
    depth_decoder.eval()

    return encoder, depth_decoder, dataloader, filenames
コード例 #5
0
def main():
    models = build_model(device=torch.device("cuda"))
    models, parameters = models
    print("models was loaded")
    print(
        f"Total params: {sum([get_params_num(m) for m in models.values()]) / 10 ** 6}"
    )
    print(f"Train params: {sum([p.numel() for p in parameters])/  10** 6}")
    assert False

    fpath = os.path.join(os.getcwd(), "splits", "eigen_zhou_small",
                         "{}_files.txt")
    val_filenames = readlines(fpath.format("val"))
    img_ext = '.jpg'
    val_dataset = datasets.KITTIRAWDataset(
        "/home/ankarpov/Datasets/kitti_data",
        val_filenames,
        192,
        640, [0, 1, -1],
        4,
        is_train=False,
        img_ext=img_ext)

    transform = pth_transforms.Compose([
        # pth_transforms.Resize(target_size),
        pth_transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])
    frame_ids = [0, 1, -1]
    input_tensor = torch.stack(
        [val_dataset[21][("color_aug", i, 0)] for i in frame_ids], dim=0)
    # input_tensor = transform(input_tensor)
    input_tensor = input_tensor[None]
    input_tensor = input_tensor.to(torch.device("cuda"))
    print("input shape: ", input_tensor.shape)
    print("tensor on cuda")

    all_features = models["encoder"](input_tensor)
    all_features["act"] = [torch.split(f, 1) for f in all_features["act"]]
    all_features["attn"] = [torch.split(f, 1) for f in all_features["attn"]]
    acts = {}
    attns = {}
    for i, k in enumerate(frame_ids):
        acts[k] = [act[i] for act in all_features["act"]]
        attns[k] = [attn[i] for attn in all_features["attn"]]
    print("Encoder was processed")
    depth = models["depth"](acts[0], attns[0])
    print("Depth was processed")
    print("Depth shape: ", depth.shape)
    acts = acts[-1]
    attns = attns[-1]
    for f_i in frame_ids[1:]:
        if f_i < 0:
            act_inputs = (acts[f_i], acts[0])
            attn_inputs = (attns[f_i], attns[0])
        else:
            act_inputs = (acts[0], acts[f_i])
            attn_inputs = (attns[0], attns[f_i])
        act_inputs = torch.stack(act_inputs, dim=1)
        attn_inputs = torch.stack(attn_inputs, dim=1)
        # print(act_inputs.shape, attn_inputs.shape)
        axisangle, translation = models["pose"](act_inputs, attn_inputs)
        print(f"Pose frame {f_i} was processed")
        print(axisangle.shape, translation.shape)

    # attn = [t.cpu() for t in vit_out["attn"]]
    # act = [t.cpu() for t in vit_out["act"]]
    # torch.save(attn, "/home/ankarpov/tmp/attn_21.pt")
    # torch.save(act, "/home/ankarpov/tmp/act_21.pt")
    # torch.save(input_tensor.cpu(), "/home/ankarpov/tmp/input_21.pt")

    print("was forward")
コード例 #6
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    if opt.isCudaMorphing and opt.borderMorphLoss:
        bnmorph = BNMorph(height=opt.height, width=opt.width, sparsityRad=2).cuda()
    assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
        "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"

    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        # print("-> Loading weights from {}".format(opt.load_weights_folder))
        if not opt.UseCustTest:
            filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
        else:
            filenames = readlines(os.path.join(splits_dir, "eigen_test_toy", "val_files.txt"))
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, tag=opt.dataset, img_ext = 'png', load_meta=opt.load_meta, is_load_semantics=opt.use_kitti_gt_semantics, is_predicted_semantics = opt.is_predicted_semantics)

        dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, drop_last=True)

        encoder = networks.ResnetEncoder(opt.num_layers, False)
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=(opt.switchMode == 'on'), isMulChannel=opt.isMulChannel)

        if opt.borderMorphLoss:
            tool = grad_computation_tools(batch_size=opt.batch_size, height=opt.height, width=opt.width).cuda()
            auto_morph = AutoMorph(height=opt.height, width=opt.width)
            foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
            MorphitNum = 5


        model_dict = encoder.state_dict()
        encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))

        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

        if opt.set_eval_train:
            encoder.train()
            depth_decoder.train()

        # encoder.train()
        # depth_decoder.train()

        pred_disps = []
        mergeDisp = Merge_MultDisp(opt.scales, batchSize = opt.batch_size)

        # print("-> Computing predictions with size {}x{}".format(
        #     encoder_dict['width'], encoder_dict['height']))
        count = 0
        with torch.no_grad():
            for data in dataloader:
                input_color = data[("color", 0, 0)].cuda()

                if opt.post_process:
                    # Post-processed results require each image to have two forward passes
                    input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)

                features = encoder(input_color)
                outputs = dict()
                # outputs.update(depth_decoder(features, computeSemantic=True, computeDepth=False))
                outputs.update(depth_decoder(features, computeSemantic=False, computeDepth=True))

                mergeDisp(data, outputs, eval=True)
                # outputs['disp', 0] = F.interpolate(outputs['disp', 0], [opt.height, opt.width], mode='bilinear', align_corners=True)
                # pickle.dump(outputs, open("eval_outputs.p", "wb"))
                if opt.borderMorphLoss:
                    for key, ipt in data.items():
                        if not (key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta' or key == 'file_add'):
                            data[key] = ipt.to(torch.device("cuda"))

                    foregroundMapGt = torch.ones([opt.batch_size, 1, opt.height, opt.width],
                                                 dtype=torch.uint8, device=torch.device("cuda"))
                    for m in foregroundType:
                        foregroundMapGt = foregroundMapGt * (data['seman_gt'] != m)
                    foregroundMapGt = (1 - foregroundMapGt).float()

                    disparity_grad = torch.abs(tool.convDispx(outputs['disp', 0])) + torch.abs(
                        tool.convDispy(outputs['disp', 0]))
                    semantics_grad = torch.abs(tool.convDispx(foregroundMapGt)) + torch.abs(
                        tool.convDispy(foregroundMapGt))
                    disparity_grad = disparity_grad * tool.zero_mask
                    semantics_grad = semantics_grad * tool.zero_mask

                    disparity_grad_bin = disparity_grad > tool.disparityTh
                    semantics_grad_bin = semantics_grad > tool.semanticsTh

                    if opt.isCudaMorphing:
                        morphedx, morphedy, coeff = bnmorph.find_corresponding_pts(disparity_grad_bin, semantics_grad_bin)
                        morphedx = (morphedx / (opt.width - 1) - 0.5) * 2
                        morphedy = (morphedy / (opt.height - 1) - 0.5) * 2
                        grid = torch.cat([morphedx, morphedy], dim=1).permute(0, 2, 3, 1)
                        dispMaps_morphed = F.grid_sample(outputs['disp', 0], grid, padding_mode="border")
                    else:
                        disparity_grad_bin = disparity_grad_bin.detach().cpu().numpy()
                        semantics_grad_bin = semantics_grad_bin.detach().cpu().numpy()

                        disparityMap_to_processed = outputs['disp', 0].detach().cpu().numpy()
                        dispMaps_morphed = list()
                        changeingRecs = list()
                        for mm in range(opt.batch_size):
                            dispMap_morphed, changeingRec = auto_morph.automorph(
                                disparity_grad_bin[mm, 0, :, :], semantics_grad_bin[mm, 0, :, :],
                                disparityMap_to_processed[mm, 0, :, :])
                            dispMaps_morphed.append(dispMap_morphed)
                            changeingRecs.append(changeingRec)
                        dispMaps_morphed = torch.from_numpy(np.stack(dispMaps_morphed, axis=0)).unsqueeze(1).cuda()
                    outputs[("disp", 0)] = dispMaps_morphed
                    # tensor2disp(dispMaps_morphed, ind=0, vmax=0.09).show()

                # print(count)
                count = count + 1
                pred_disp, _ = disp_to_depth(outputs[("disp", 0)], opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                # Some check:
                # with open('train_outputs.p', 'rb') as handle:
                #     train_outputs = pickle.load(handle)
                #     pred_disp, pdepth = disp_to_depth(outputs[("disp", 0)], opt.min_depth, opt.max_depth)
                #     torch.mean(torch.abs(train_outputs[('disp', 0)] - outputs[("disp", 0)]))
                #     torch.mean(torch.abs(train_outputs[('depth', 0, 0)] - pdepth))

                if opt.post_process:
                    N = pred_disp.shape[0] // 2
                    pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])

                pred_disps.append(pred_disp)

        pred_disps = np.concatenate(pred_disps)

    else:
        # Load predictions from file
        print("-> Loading predictions from {}".format(opt.ext_disp_to_eval))
        pred_disps = np.load(opt.ext_disp_to_eval)

        if opt.eval_eigen_to_benchmark:
            eigen_to_benchmark_ids = np.load(
                os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy"))

            pred_disps = pred_disps[eigen_to_benchmark_ids]

    if opt.save_pred_disps:
        output_path = os.path.join(
            opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
        print("-> Saving predicted disparities to ", output_path)
        np.save(output_path, pred_disps)

    if opt.no_eval:
        print("-> Evaluation disabled. Done.")
        quit()

    elif opt.eval_split == 'benchmark':
        save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions")
        print("-> Saving out benchmark predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
        quit()
    if not opt.UseCustTest:
        gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
        gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle = True)["data"]
    else:
        gt_depths = np.load("/media/shengjie/other/sceneUnderstanding/SDNET/splits/eigen_test_toy/gt_depths.npz", fix_imports=True, encoding='latin1', allow_pickle=True)["data"]

    print("-> Evaluating")

    if opt.eval_stereo:
        print("   Stereo evaluation - "
              "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
        opt.disable_median_scaling = True
        opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
        if opt.EnableMedianScaleInEval:
            opt.disable_median_scaling = False
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        # Some check:
        # with open('recompare.p', 'rb') as handle:
        #     train_outputs = pickle.load(handle)
        #     calib_dir = '/media/shengjie/other/sceneUnderstanding/monodepth2/kitti_data/kitti_raw/2011_09_26'
        #     velo_filename = '/media/shengjie/other/sceneUnderstanding/monodepth2/kitti_data/kitti_raw/2011_09_26/2011_09_26_drive_0002_sync/velodyne_points/data/0000000069.bin'
        #     gt_depth2 = kitti_utils.generate_depth_map(calib_dir, velo_filename, 2, True)
        #
        #     np.mean(np.abs(train_outputs['depth_gt'][0,0,:,:].cpu().numpy() - gt_depth))
        #     np.mean(np.abs(train_outputs['depth_pred'][0, 0, :, :].cpu().numpy() - pred_depth))
        #     pred_depth = pred_depth * train_outputs['scaleRation'].cpu().numpy()
        #
        #     train_depth = F.interpolate(train_outputs[('depth', 0, 1)], [gt_height, gt_width], mode='bilinear', align_corners=True)
        #     np.mean(np.abs(train_depth[0,0,:,:].cpu().numpy() - pred_depth))

        if opt.eval_split == "eigen" or opt.UseCustTest:
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
                             0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

        else:
            mask = gt_depth > 0

        # Some check:
        # with open('recompare.p', 'rb') as handle:
        #     eval_outputs = pickle.load(handle)
        #     np.mean(np.abs(eval_outputs['depth_gt'][0,0,:,:].cpu().numpy() -gt_depth ))

        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]

        pred_depth *= opt.pred_depth_scale_factor
        if not opt.disable_median_scaling:
            ratio = np.median(gt_depth) / np.median(pred_depth)
            ratios.append(ratio)
            pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

        errors.append(compute_errors(gt_depth, pred_depth, UseGtMedianScaling = (opt.UseGtMedianScaling == True)))

    if not opt.disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " + ("{:>8} | " * 8).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3", "abs_shift"))
    print(("&{: 8.3f}  " * 8).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")
    if opt.isCudaMorphing and opt.borderMorphLoss:
        bnmorph.print_params()
コード例 #7
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.use_stereo:
        opt.frame_ids.append("s")
    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path,
                                               filenames,
                                               encoder_dict['height'],
                                               encoder_dict['width'],
                                               opt.frame_ids,
                                               4,
                                               is_train=False,
                                               tag=opt.dataset,
                                               load_meta=True,
                                               is_sep_train_seman=False)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(opt.data_path,
                                           filenames,
                                           encoder_dict['height'],
                                           encoder_dict['width'],
                                           opt.frame_ids,
                                           4,
                                           is_train=False,
                                           tag=opt.dataset)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=True)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc,
                                              isSwitch=True,
                                              isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    # x = torch.ones(2, 2, requires_grad=True)
    # print(x)
    # y = x + 2 + x
    # y = y.detach()
    # print(y)
    # z = y * y * 3
    # out = z.mean()
    # print(z, out)
    # out.backward()
    # print(x.grad)

    ##--------------------Visualization parameter here----------------------------##
    sfx = torch.nn.Softmax(dim=1)
    mergeDisp = Merge_MultDisp(opt.scales,
                               batchSize=opt.batch_size,
                               isMulChannel=opt.isMulChannel)
    svRoot = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/figure_visual'
    index = 0
    isvisualize = True
    viewEdgeMerge = False
    isHist = False
    useGtSeman = True
    viewSurfaceNormal = True
    viewSelfOcclu = True
    viewDispUp = True
    viewSmooth = True
    viewMulReg = True
    viewBorderRegress = False
    viewBorderSimilarity = False
    viewRandomSample = True
    viewSemanReg = False
    viewDepthGuess = False
    height = 256
    width = 512
    tensor23dPts = Tensor23dPts()

    if isHist:
        rec = np.zeros((19, 100))

    if opt.isMulChannel:
        app = os.path.join('mulDispOn', opt.model_name)
    else:
        app = os.path.join('mulDispOff', opt.model_name)

    dirpath = os.path.join(svRoot, app)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

    if viewEdgeMerge:
        comp1dgrad = Comp1dgrad().cuda()

    if viewSurfaceNormal:
        compsn = ComputeSurfaceNormal(height=height,
                                      width=width,
                                      batch_size=opt.batch_size).cuda()

    if viewSelfOcclu:
        selfclu = SelfOccluMask().cuda()

    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            # if idx != 12:
            #     continue
            for key, ipt in inputs.items():
                if not (key == 'height' or key == 'width' or key == 'tag'
                        or key == 'cts_meta'):
                    inputs[key] = ipt.to(torch.device("cuda"))
            input_color = inputs[("color", 0, 0)].cuda()
            # input_color = torch.flip(input_color, dims=[3])
            features = encoder(input_color)
            outputs = dict()
            outputs.update(
                depth_decoder(features,
                              computeSemantic=True,
                              computeDepth=False))
            outputs.update(
                depth_decoder(features,
                              computeSemantic=False,
                              computeDepth=True))

            # view the processed semantic seperate training data
            # for viewInd in range(opt.batch_size):
            #     label = inputs['semanTrain_label']
            #     visualize_semantic(label[viewInd, 0, :, :].cpu().numpy()).show()
            #     fig_rgb = inputs['semanTrain_rgb'][viewInd, :, :, :].permute(1, 2, 0).cpu().numpy()
            #     fig_rgb = (fig_rgb * 255).astype(np.uint8)
            #     fig_rgb = pil.fromarray(fig_rgb)
            #     fig_rgb.show()

            if isHist:
                mulDisp = outputs[('mul_disp', 0)]
                scaled_disp, mulDepth = disp_to_depth(mulDisp, 0.1, 100)
                mulDepth = mulDepth.cpu()
                for i in range(mulDisp.shape[1]):
                    rec[i, :] += torch.histc(mulDepth[:, i, :, :],
                                             bins=100,
                                             min=0,
                                             max=100).numpy()

            if isvisualize:
                if useGtSeman:
                    # outputs[('mul_disp', 0)][:,2,:,:] = outputs[('mul_disp', 0)][:,2,:,:] * 0
                    # outputs[('mul_disp', 0)][:, 12, :, :] = outputs[('mul_disp', 0)][:, 12, :, :] * 0
                    mergeDisp(inputs, outputs, eval=False)
                else:
                    mergeDisp(inputs, outputs, eval=True)

                dispMap = outputs[('disp', 0)]
                scaled_disp, depthMap = disp_to_depth(dispMap, 0.1, 100)
                depthMap = depthMap * STEREO_SCALE_FACTOR
                # _, mul_depthMap = disp_to_depth(outputs[('mul_disp', 0)], 0.1, 100)
                # mul_depthMap = mul_depthMap * STEREO_SCALE_FACTOR

                if viewDispUp:
                    fig_dispup = compDispUp.visualize(scaled_disp,
                                                      viewindex=index)

                if viewSmooth:
                    rgb = inputs[('color_aug', 0, 0)]
                    smoothfig = comSmooth.visualize(rgb=rgb,
                                                    disp=scaled_disp,
                                                    viewindex=index)

                if useGtSeman:
                    fig_seman = tensor2semantic(inputs['seman_gt'],
                                                ind=index,
                                                isGt=True)
                else:
                    fig_seman = tensor2semantic(outputs[('seman', 0)],
                                                ind=index)

                if viewSemanReg:
                    foregroundType = [
                        11, 12, 13, 14, 15, 16, 17, 18
                    ]  # person, rider, car, truck, bus, train, motorcycle, bicycle
                    softmaxedSeman = F.softmax(outputs[('seman', 0)], dim=1)
                    forePredMask = torch.sum(
                        softmaxedSeman[:, foregroundType, :, :],
                        dim=1,
                        keepdim=True)
                    foreGtMask = torch.ones(dispMap.shape).cuda().byte()

                    for m in foregroundType:
                        foreGtMask = foreGtMask * (inputs['seman_gt'] != m)
                    foreGtMask = 1 - foreGtMask
                    foreGtMask = foreGtMask.float()

                    forePredMask[forePredMask > 0.5] = 1
                    forePredMask[forePredMask <= 0.5] = 0

                    forePredMask = foreGtMask
                    rdSampleSeman.visualizeBorderSample(dispMap,
                                                        forePredMask,
                                                        gtMask=foreGtMask,
                                                        viewIndex=index)

                    cm = plt.get_cmap('magma')
                    viewForePred = forePredMask[index, :, :, :].squeeze(
                        0).detach().cpu().numpy()
                    viewForePred = (cm(viewForePred) * 255).astype(np.uint8)
                    # pil.fromarray(viewForePred).show()

                    viewForeGt = foreGtMask[index, :, :, :].squeeze(
                        0).detach().cpu().numpy()
                    viewForeGt = (cm(viewForeGt) * 255).astype(np.uint8)
                    # pil.fromarray(viewForeGt).show()
                    forePredictCombined = np.concatenate(
                        [viewForePred, viewForeGt], axis=0)
                    # pil.fromarray(forePredictCombined).show()
                    pil.fromarray(forePredictCombined).save(
                        os.path.join(dirpath,
                                     str(idx) + '_fg.png'))

                if viewDepthGuess:
                    wallType = [2, 3, 4]  # Building, wall, fence
                    roadType = [0, 1, 9]  # road, sidewalk, terrain
                    foregroundType = [
                        5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
                    ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle

                    wallTypeMask = torch.ones(dispMap.shape).cuda().byte()
                    roadTypeMask = torch.ones(dispMap.shape).cuda().byte()
                    foreGroundMask = torch.ones(dispMap.shape).cuda().byte()

                    with torch.no_grad():
                        for m in wallType:
                            wallTypeMask = wallTypeMask * (inputs['seman_gt']
                                                           != m)
                        wallTypeMask = (1 - wallTypeMask).float()

                        for m in roadType:
                            roadTypeMask = roadTypeMask * (inputs['seman_gt']
                                                           != m)
                        roadTypeMask = (1 - roadTypeMask).float()

                        for m in foregroundType:
                            foreGroundMask = foreGroundMask * (
                                inputs['seman_gt'] != m)
                        foreGroundMask = (1 - foreGroundMask).float()
                    originalSieze = [2048, 1024]
                    # currentSize = np.array([dispMap.shape[3], dispMap.shape[2]])
                    # scaleFac = np.eye(4)
                    # scaleFac[0,0] = currentSize[0] / originalSieze[0]
                    # scaleFac[1,1] = currentSize[1] / originalSieze[1]
                    # scaleFac = torch.Tensor(scaleFac).view(1,4,4).repeat(opt.batch_size, 1, 1).cuda()
                    # scaledIntrinsic = scaleFac @ inputs['realIn']
                    scaledIntrinsic = inputs['realIn']
                    depthGuess.visualizeDepthGuess(
                        realDepth=depthMap,
                        dispAct=dispMap,
                        foredgroundMask=foreGroundMask,
                        wallTypeMask=wallTypeMask,
                        groundTypeMask=roadTypeMask,
                        intrinsic=scaledIntrinsic,
                        extrinsic=inputs['realEx'],
                        semantic=inputs['seman_gt_eval'],
                        cts_meta=inputs['cts_meta'],
                        viewInd=index)
                    # realDepth, foredgroundMask, wallTypeMask, groundTypeMask, intrinsic, extrinsic

                fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=index)
                fig_disp = tensor2disp(outputs[('disp', 0)], ind=index)
                fig_3d, veh_coord, veh_coord_gt = tensor23dPts.visualize3d(
                    depthMap,
                    ind=index,
                    intrinsic=inputs['cts_meta']['intrinsic'][index, :, :],
                    extrinsic=inputs['cts_meta']['extrinsic'][index, :, :],
                    gtmask=inputs['cts_meta']['mask'][index, :, :],
                    gtdepth=inputs['cts_meta']['depthMap'][index, :, :],
                    semanticMap=inputs['seman_gt_eval'][index, :, :])
                # check:
                # torch.inverse(inputs['invcamK'][index, :, :] @ inputs['realIn'][index, :, :]) - inputs['cts_meta']['extrinsic'][index, :, :]
                fig_grad = None

                if viewSurfaceNormal:
                    # surnorm = compsn.visualize(depthMap = depthMap, invcamK = inputs['invcamK'].cuda(), orgEstPts = veh_coord, gtEstPts = veh_coord_gt, viewindex = index)
                    surnorm = compsn.visualize(
                        depthMap=depthMap,
                        invcamK=inputs['invcamK'].cuda(),
                        orgEstPts=veh_coord,
                        gtEstPts=veh_coord_gt,
                        viewindex=index)
                    surnormMap = compsn(depthMap=depthMap,
                                        invcamK=inputs['invcamK'].cuda())

                if viewMulReg:
                    depthMapLoc = depthMap / STEREO_SCALE_FACTOR
                    skyId = 10
                    skyMask = inputs['seman_gt'] == skyId
                    skyerr = objReg.visualize_regularizeSky(depthMapLoc,
                                                            skyMask,
                                                            viewInd=index)

                    wallType = [2, 3, 4]  # Building, wall, fence
                    roadType = [0, 1, 9]  # road, sidewalk, terrain
                    permuType = [5, 7]  # Pole, traffic sign
                    chanWinSize = 5

                    wallMask = torch.ones_like(skyMask)
                    roadMask = torch.ones_like(skyMask)
                    permuMask = torch.ones_like(skyMask)

                    with torch.no_grad():
                        for m in wallType:
                            wallMask = wallMask * (inputs['seman_gt'] != m)
                        wallMask = 1 - wallMask
                        wallMask = wallMask[:, :, 1:-1, 1:-1]

                        for m in roadType:
                            roadMask = roadMask * (inputs['seman_gt'] != m)
                        roadMask = 1 - roadMask
                        roadMask = roadMask[:, :, 1:-1, 1:-1]

                        for m in permuType:
                            permuMask = permuMask * (inputs['seman_gt'] != m)
                        permuMask = 1 - permuMask
                        permuMask = permuMask[:, :, 1:-1, 1:-1]

                    BdErrFig, viewRdErrFig = objReg.visualize_regularizeBuildingRoad(
                        surnormMap, wallMask, roadMask, dispMap, viewInd=index)

                    padSize = int((chanWinSize - 1) / 2)
                    permuMask = permuMask[:, :, padSize:-padSize,
                                          padSize:-padSize]
                    surVarFig = objReg.visualize_regularizePoleSign(
                        surnormMap, permuMask, dispMap, viewInd=index)

                if viewBorderRegress:
                    foregroundType = [
                        5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
                    ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                    backgroundType = [
                        0, 1, 2, 3, 4, 8, 9, 10
                    ]  # road, sidewalk, building, wall, fence, vegetation, terrain, sky
                    suppressType = [255]  # Suppress no label lines
                    # foreGroundMask = torch.sum(inputs['seman_gt'][:, foregroundType, :, :], dim=1, keepdim=True)
                    # backGroundMask = torch.sum(inputs['seman_gt'][:, backgroundType, :, :], dim=1, keepdim=True)
                    foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    backGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    suppresMask = torch.ones(dispMap.shape).cuda().byte()

                    with torch.no_grad():
                        for m in foregroundType:
                            foreGroundMask = foreGroundMask * (
                                inputs['seman_gt'] != m)
                        foreGroundMask = 1 - foreGroundMask
                        for m in backgroundType:
                            backGroundMask = backGroundMask * (
                                inputs['seman_gt'] != m)
                        backGroundMask = 1 - backGroundMask
                        for m in suppressType:
                            suppresMask = suppresMask * (inputs['seman_gt'] !=
                                                         m)
                        suppresMask = 1 - suppresMask
                        suppresMask = suppresMask.float()
                        combinedMask = torch.cat(
                            [foreGroundMask, backGroundMask], dim=1).float()

                    # borderRegFig = borderRegress.visualize_computeBorder(dispMap, combinedMask, suppresMask = suppresMask, viewIndex=index)
                    borderRegFig = None

                else:
                    borderRegFig = None

                # if viewBorderSimilarity:
                #     foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17,
                #                       18]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                #     backgroundType = [0, 1, 2, 3, 4, 8, 9,
                #                       10]  # road, sidewalk, building, wall, fence, vegetation, terrain, sky
                #     suppressType = [255]  # Suppress no label lines
                #     foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                #     backGroundMask = torch.ones(dispMap.shape).cuda().byte()
                #     suppresMask = torch.ones(dispMap.shape).cuda().byte()
                #
                #     with torch.no_grad():
                #         for m in foregroundType:
                #             foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m)
                #         foreGroundMask = 1 - foreGroundMask
                #         for m in backgroundType:
                #             backGroundMask = backGroundMask * (inputs['seman_gt'] != m)
                #         backGroundMask = 1 - backGroundMask
                #         for m in suppressType:
                #             suppresMask = suppresMask * (inputs['seman_gt'] != m)
                #         suppresMask = 1 - suppresMask
                #         suppresMask = suppresMask.float()
                #         combinedMask = torch.cat([foreGroundMask, backGroundMask], dim=1).float()
                #
                #     borderSimFig = borderSim.visualize_borderSimilarity(dispMap, foreGroundMask.float(), suppresMask = suppresMask, viewIndex=index)

                if viewRandomSample:
                    foregroundType = [
                        5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
                    ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                    backgroundType = [
                        0, 1, 2, 3, 4, 8, 9, 10
                    ]  # road, sidewalk, building, wall, fence, vegetation, terrain, sky
                    suppressType = [255]  # Suppress no label lines
                    foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    backGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    suppresMask = torch.ones(dispMap.shape).cuda().byte()

                    with torch.no_grad():
                        for m in foregroundType:
                            foreGroundMask = foreGroundMask * (
                                inputs['seman_gt'] != m)
                        foreGroundMask = 1 - foreGroundMask
                        for m in suppressType:
                            suppresMask = suppresMask * (inputs['seman_gt'] !=
                                                         m)
                        suppresMask = 1 - suppresMask
                        suppresMask = suppresMask.float()
                        foreGroundMask = foreGroundMask.float()

                    rdSampleOnBorder.visualize_randomSample(dispMap,
                                                            foreGroundMask,
                                                            suppresMask,
                                                            viewIndex=index)
                    # rdSampleOnBorder.randomSampleReg(dispMap, foreGroundMask)

                if viewEdgeMerge:
                    grad_disp = comp1dgrad(outputs[('mul_disp', 0)])
                    fig_grad = tensor2disp(grad_disp, ind=index, vmax=1)
                    fig_grad = fig_grad.resize([512, 256])

                if viewSelfOcclu:
                    fl = inputs[("K", 0)][:, 0, 0]
                    bs = torch.abs(inputs["stereo_T"][:, 0, 3])
                    clufig, suppressedDisp = selfclu.visualize(dispMap,
                                                               viewind=index)

                if fig_grad is not None:
                    grad_seman = (
                        np.array(fig_grad)[:, :, 0:3].astype(np.float) * 0.7 +
                        np.array(fig_seman).astype(np.float) * 0.3).astype(
                            np.uint8)
                    # combined = [np.array(fig_disp)[:, :, 0:3], np.array(fig_grad)[:, :, 0:3], np.array(fig_seman), np.array(fig_rgb)]
                    combined = [
                        grad_seman,
                        np.array(fig_disp)[:, :, 0:3],
                        np.array(fig_rgb)
                    ]
                    combined = np.concatenate(combined, axis=1)
                else:
                    if viewSurfaceNormal and viewSelfOcclu:
                        surnorm = surnorm.resize([512, 256])
                        surnorm_mixed = pil.fromarray(
                            (np.array(surnorm) * 0.2 +
                             np.array(fig_disp)[:, :, 0:3] * 0.8).astype(
                                 np.uint8))
                        disp_seman = (
                            np.array(fig_disp)[:, :, 0:3].astype(np.float) *
                            0.8 +
                            np.array(fig_seman).astype(np.float) * 0.2).astype(
                                np.uint8)
                        supprressed_disp_seman = (
                            np.array(suppressedDisp)[:, :, 0:3].astype(
                                np.float) * 0.8 +
                            np.array(fig_seman).astype(np.float) * 0.2).astype(
                                np.uint8)
                        rgb_seman = (
                            np.array(fig_seman).astype(np.float) * 0.5 +
                            np.array(fig_rgb).astype(np.float) * 0.5).astype(
                                np.uint8)

                        # clud_disp = (np.array(clufig)[:, :, 0:3].astype(np.float) * 0.3 + np.array(fig_disp)[:, :, 0:3].astype(
                        #     np.float) * 0.7).astype(np.uint8)
                        comb1 = np.concatenate([
                            np.array(supprressed_disp_seman)[:, :, 0:3],
                            np.array(suppressedDisp)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb2 = np.concatenate([
                            np.array(disp_seman)[:, :, 0:3],
                            np.array(fig_disp)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb3 = np.concatenate([
                            np.array(surnorm_mixed)[:, :, 0:3],
                            np.array(surnorm)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb4 = np.concatenate([
                            np.array(fig_seman)[:, :, 0:3],
                            np.array(rgb_seman)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb6 = np.concatenate([
                            np.array(clufig)[:, :, 0:3],
                            np.array(fig_dispup)[:, :, 0:3]
                        ],
                                               axis=1)

                        fig3dsize = np.ceil(
                            np.array([
                                comb4.shape[1], comb4.shape[1] /
                                fig_3d.size[0] * fig_3d.size[1]
                            ])).astype(np.int)
                        comb5 = np.array(fig_3d.resize(fig3dsize))
                        # combined = np.concatenate([comb1, comb6, comb2, comb3, comb4, comb5], axis=0)
                        combined = np.concatenate([comb1, comb2, comb4, comb3],
                                                  axis=0)
                    else:
                        disp_seman = (
                            np.array(fig_disp)[:, :, 0:3].astype(np.float) *
                            0.8 +
                            np.array(fig_seman).astype(np.float) * 0.2).astype(
                                np.uint8)
                        rgb_seman = (
                            np.array(fig_seman).astype(np.float) * 0.5 +
                            np.array(fig_rgb).astype(np.float) * 0.5).astype(
                                np.uint8)
                        # combined = [np.array(disp_seman)[:,:,0:3], np.array(fig_disp)[:, :, 0:3], np.array(fig_seman), np.array(fig_rgb)]
                        combined = [
                            np.array(disp_seman)[:, :, 0:3],
                            np.array(fig_disp)[:, :, 0:3],
                            np.array(fig_seman),
                            np.array(rgb_seman)
                        ]
                        combined = np.concatenate(combined, axis=1)

                fig = pil.fromarray(combined)
                # fig.show()
                fig.save(os.path.join(dirpath, str(idx) + '.png'))
                if borderRegFig is not None:
                    borderRegFig.save(
                        os.path.join(dirpath,
                                     str(idx) + '_borderRegress.png'))
                # fig_3d.save(os.path.join(dirpath, str(idx) + '_fig3d.png'))
                # for k in range(10):
                #     fig_disp = tensor2disp(outputs[('disp', 0)], ind=k)
                #     fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=k)
                #     combined = [np.array(fig_disp)[:, :, 0:3], np.array(fig_rgb)]
                #     combined = np.concatenate(combined, axis=1)
                #     fig = pil.fromarray(combined)
                #     fig.save(
                #         os.path.join('/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/MoredispOrg' + str(k) + '.png'))

                # fig_rgb.save(os.path.join(svRoot, app, 'rgb' + str(idx) + '.png'))
                # fig_seman.save(os.path.join(svRoot, app, 'semantic'+ str(idx) + '.png'))
                # fig_disp.save(os.path.join(svRoot, app, 'disp'+ str(idx) + '.png'))
                # a = inputs['seman_gt_eval']
                # scaled_disp, _ = disp_to_depth(outputs[('disp', 0)], 0.1, 100)
                print("%dth saved" % idx)
    # If compute the histogram
    if isHist:
        svPath = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/mul_channel_depth'
        carId = 13
        prob = copy.deepcopy(rec)
        ind = np.arange(prob.shape[1] * 2)
        for i in range(prob.shape[0]):
            prob[i, :] = prob[i, :] / np.sum(prob[i, :])
        for i in range(prob.shape[0]):
            trainStr = trainId2label[i][0]
            fig, ax = plt.subplots()
            rects1 = ax.bar(ind[0::2], prob[carId, :], label='obj:car')
            rects2 = ax.bar(ind[1::2], prob[i, :], label='obj:' + trainStr)
            ax.set_ylabel('Meter in percentile')
            ax.set_xlabel('Meters')
            ax.set_title('Scale Changes between scale car and scale %s' %
                         trainStr)
            ax.legend()
            plt.savefig(os.path.join(svPath, str(i)), dpi=200)
            plt.close(fig)
コード例 #8
0
ファイル: run_infer.py プロジェクト: xdr940/DeepSfMLearner
def evaluate(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']

    out_dir = Path(opts['out_dir'])
    out_dir.mkdir_p()
    sub_dirs = opts['sub_dirs']
    for item in sub_dirs:
        (out_dir / item).mkdir_p()

    # metric_mode = opts['metric_mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    encoder_mode = opts['model']['encoder_mode']
    frame_sides = opts['frame_sides']
    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=encoder_mode)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="test")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="test")
    elif opts['dataset']['type'] == 'custom_mono':
        dataset = datasets.CustomMonoDataset(data_path=data_path,
                                             filenames=file_names,
                                             height=feed_height,
                                             width=feed_width,
                                             frame_sides=frame_sides,
                                             num_scales=1,
                                             mode='test')

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_depths = []
    gt_depths = []
    disps = []
    idx = 0
    for data in tqdm(dataloader):

        input_color = reframe(encoder_mode,
                              data,
                              frame_sides=frame_sides,
                              key='color')
        input_color = input_color.cuda()

        features = encoder(input_color)
        disp = decoder(*features)

        # depth_gt = data['depth_gt']

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)
        #pred_depth = disp2depth(disp)

        if "depth" in sub_dirs:
            pred_depth = pred_depth.cpu()[:, 0].numpy()[0]
            depth = cv2.resize(pred_depth, (full_width, full_height))
            depth = np_normalize_image(depth)
            cv2.imwrite(out_dir / "depth" / file_names[idx].replace('/', '_'),
                        depth * 255)

        if "disp" in sub_dirs:
            pred_disp = pred_disp.cpu()[:, 0].numpy()[0]
            disp = cv2.resize(pred_disp, (full_width, full_height))
            disp = np_normalize_image(disp)

            cv2.imwrite(out_dir / "disp" / file_names[idx].replace('/', '_'),
                        disp * 255)

        idx += 1
コード例 #9
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    is_use_disparity = True
    is_eval_morph = True
    is_cts_bst = True
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    if is_use_disparity:
        getDisp = get_disparity_predict()
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, tag=opt.dataset)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTISemanticDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, tag=opt.dataset)
        train_dataset_predict = datasets.KITTIRAWDataset(
            opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'],
            [0,'s'], 4, tag='kitti', is_train=False, img_ext='png',
            load_meta=False, is_load_semantics=True,
            is_predicted_semantics=True, load_morphed_depth=False)
        train_dataset_gt = datasets.KITTIRAWDataset(
            opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'],
            [0,'s'], 4, tag='kitti', is_train=False, img_ext='png',
            load_meta=False, is_load_semantics=True,
            is_predicted_semantics=False, load_morphed_depth=False)
    else:
        raise ValueError("No predefined dataset")
    dataloader_predict = DataLoader(train_dataset_predict, 1, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=False)
    dataloader_gt = DataLoader(train_dataset_gt, 1, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=False)
    dataloader_predict_iter = iter(dataloader_predict)
    dataloader_gt_iter = iter(dataloader_gt)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()
    sfx = torch.nn.Softmax(dim=1)
    depth_pos = '/media/shengjie/other/sceneUnderstanding/bts/result_bts_eigen/raw'


    print("Evaluation starts")


    width = 1216
    height = 352
    height_s = int(0.40810811 * height)
    height_e = int(0.99189189 * height)
    width_s = int(0.03594771 * width)
    width_e = int(0.96405229 * width)

    if not is_use_disparity:
        ms = Morph_semantics(height=206, width=1129)
    else:
        ms = Morph_semantics(height=218, width=1153)
    with torch.no_grad():
        for idx in range(dataloader_gt.__len__()):
            inputs_predict = dataloader_predict_iter.__next__()
            inputs_gt = dataloader_gt_iter.__next__()
            if not is_cts_bst:
                inputs_predict['seman_gt_eval'] = inputs_predict['seman_gt_eval']
            else:
                tcomp = filenames[idx].split(' ')
                path = os.path.join('/media/shengjie/other/sceneUnderstanding/SDNET/cts_best_seman', tcomp[0].split('/')[0] +'_' + tcomp[0].split('/')[1] +  '_' + tcomp[1].zfill(10) + '.png')
                cts_pred = Image.open(path)
                cts_pred = np.array(cts_pred)
                for k in np.unique(cts_pred):
                    cts_pred[cts_pred == k] = labels[k].trainId
                inputs_predict['seman_gt_eval'] = torch.from_numpy(cts_pred).unsqueeze(0)
            # tensor2semantic(inputs_predict['seman_gt_eval'].unsqueeze(1), ind=0).show()
            # tensor2semantic(inputs_gt['seman_gt_eval'].unsqueeze(1), ind=0).show()
            # tensor2semantic(inputs_predict['seman_gt_eval'].unsqueeze(1), ind=0).show()

            # input_color = inputs[("color", 0, 0)].cuda()
            # outputs = depth_decoder(encoder(input_color),computeSemantic = True, computeDepth = False)
            resized_gt = inputs_gt['seman_gt_eval'].unsqueeze(1)
            # resized_gt = F.interpolate(inputs_gt['seman_gt_eval'].unsqueeze(1).float(), [height, width], mode='nearest')
            # resized_gt = resized_gt.squeeze(1).byte()
            resized_pred = F.interpolate(inputs_predict['seman_gt_eval'].unsqueeze(1).float(), [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='nearest')
            resized_pred = resized_pred.byte()
            resized_rgb = F.interpolate(inputs_gt[('color', 0, 0)],
                                        [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]],
                                        mode='bilinear', align_corners=True)

            resized_pred_list = list()
            resized_morph_list = list()
            groundTruthNp_list = list()
            if not is_use_disparity:
                t_height = resized_gt.shape[2]
                t_width = resized_gt.shape[3]
                top_margin = int(t_height - 352)
                left_margin = int((t_width - 1216) / 2)
                resized_gt = resized_gt[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216]
                resized_pred = resized_pred[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216]
                # tensor2semantic(resized_gt, ind=0).show()
                # tensor2semantic(resized_pred, ind=0).show()

                resized_rgb = F.interpolate(inputs_gt[('color', 0, 0)], [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='bilinear', align_corners=True)
                resized_rgb = resized_rgb[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216]

                pred_depth = get_depth_predict(filenames[idx])

                resized_depth = pred_depth
                # resized_gt = resized_gt.cpu().numpy().astype(np.uint8)
                # resized_pred = resized_pred.cpu().numpy().astype(np.uint8)
                # resized_depth = pred_depth
                # visualize_semantic(gt[0,:,:]).show()
                # visualize_semantic(pred[0,:,:]).show()
                # pred_depth = get_depth_predict(filenames[idx])
                # pred_depth = F.interpolate(pred_depth.float(), [height, width], mode='bilinear', align_corners=True)

                # resized_pred = resized_pred.unsqueeze(1)
                # resized_gt = resized_gt.unsqueeze(1)
                # tensor2semantic(resized_pred, ind=0).show()
                # tensor2semantic(resized_gt, ind=0).show()
                # tensor2disp(1 / pred_depth, vmax=0.15, ind=0).show()
                # disp_map = tensor2disp(1 / pred_depth, vmax=0.15, ind=0)
                # disp_map_combined = combined_2_img(disp_map, tensor2rgb(resized_rgb, ind=0), 0.5)

                pred_depth_cropped = resized_depth[:,:,height_s : height_e, width_s : width_e]
                resized_pred_cropped = resized_pred[:,:,height_s : height_e, width_s : width_e]
                resized_gt_cropped = resized_gt[:,:,height_s : height_e, width_s : width_e]
                resized_rgb_cropped = resized_rgb[:,:,height_s : height_e, width_s : width_e]
                # tensor2semantic(resized_pred_cropped, ind=0).show()
                # tensor2semantic(resized_gt_cropped, ind=0).show()
                # tensor2disp(1 / pred_depth_cropped, vmax=0.15, ind=0).show()
                figseman_gt = tensor2semantic(resized_gt_cropped, ind=0)
                figseman_pred = tensor2semantic(resized_pred_cropped, ind=0)
                figdisp = tensor2disp(1 / pred_depth_cropped, vmax=0.15, ind=0)
                combined_2_img(figseman_pred, figdisp, 0.7).show()
                combined_2_img(figseman_gt, figdisp, 0.7).show()
                seman_morphed = ms.morh_semantics(pred_depth_cropped, resized_pred_cropped)
            else:
                pred_depth = getDisp.read_disparity_predict(filenames[idx])
                pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).unsqueeze(0)
                pred_depth = F.interpolate(pred_depth, [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]],
                                        mode='bilinear', align_corners=True)
                # tensor2disp(pred_depth, ind=0, percentile=95).show()
                if pred_depth.shape[2] < 371 or pred_depth.shape[3] < 1197:
                    print("Error")
                pred_depth_cropped = pred_depth[:, :, 153:371, 44:1197]
                resized_pred_cropped = resized_pred[:, :, 153:371, 44:1197]
                resized_gt_cropped = resized_gt[:, :, 153:371, 44:1197]
                resized_rgb_cropped = resized_rgb[:, :, 153:371, 44:1197]
                # figdisp = tensor2disp(pred_depth_cropped, percentile=95, ind=0)
                # figseman = tensor2semantic(resized_gt_cropped, ind=0)
                # figcombined = combined_2_img(figdisp, figseman, 0.7)
                # figcombined.show()
                #
                # figdisp = tensor2disp(pred_depth_cropped, percentile=95, ind=0)
                # figseman = tensor2semantic(resized_pred_cropped, ind=0)
                # figcombined = combined_2_img(figdisp, figseman, 0.7)
                # figcombined.show()
                seman_morphed = ms.morh_semantics(pred_depth_cropped, resized_pred_cropped)
                ms.compute_edge_distance(pred_depth_cropped, resized_pred_cropped, resized_gt_cropped)
            resized_pred_list.append(resized_pred_cropped.squeeze(1).detach().cpu().numpy())
            resized_morph_list.append(seman_morphed.squeeze(1).detach().cpu().numpy().astype(np.uint8))
            groundTruthNp_list.append(resized_gt_cropped.squeeze(1).detach().cpu().numpy())



            sv_path = '/media/shengjie/other/sceneUnderstanding/SDNET/visualization/semantic_morph'
            gt_blended = combined_2_img(tensor2semantic(resized_gt_cropped, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.2)
            pred_blended = combined_2_img(tensor2semantic(resized_pred_cropped, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.2)
            morph_blended = combined_2_img(tensor2semantic(seman_morphed, ind=0),
                                          tensor2rgb(resized_rgb_cropped, ind=0), 0.2)
            improved_region = (seman_morphed.cuda().byte() == resized_gt_cropped.cuda().byte()) > (resized_pred_cropped.cuda().byte() == resized_gt_cropped.cuda().byte())
            deterized_region = (seman_morphed.cuda().byte() == resized_gt_cropped.cuda().byte()) < (
                        resized_pred_cropped.cuda().byte() == resized_gt_cropped.cuda().byte())
            improve_blend = combined_2_img(tensor2disp(improved_region, vmax = 1, ind=0),
                                          tensor2rgb(resized_rgb_cropped, ind=0), 0.6)
            deterized_blend = combined_2_img(tensor2disp(deterized_region, vmax = 1, ind=0),
                                          tensor2rgb(resized_rgb_cropped, ind=0), 0.6)
            cat_img = concat_imgs([gt_blended, pred_blended, morph_blended, improve_blend, deterized_blend])
            cat_img.save(os.path.join('/media/shengjie/other/sceneUnderstanding/SDNET/visualization/semantic_morph', str(idx) + '.png'))

            # groundTruthNp = resized_gt_cropped.squeeze(1).detach().cpu().numpy()
            # if is_eval_morph:
            #     predictionNp = seman_morphed.byte().squeeze(1).detach().cpu().numpy()
            # else:
            #     predictionNp = resized_pred_cropped.squeeze(1).detach().cpu().numpy()
            print("Finish %dth batch" % idx)
    ms.show_dis_comp()
    for pp in range(2):
        nbPixels = 0
        count255 = 0
        confMatrix = generateMatrix(args)
        for k in range(len(resized_pred_list)):
            groundTruthNp = groundTruthNp_list[k]
            if pp == 0:
                predictionNp = resized_pred_list[k]
            else:
                predictionNp = resized_morph_list[k]
            nbPixels = nbPixels + groundTruthNp.shape[0] * groundTruthNp.shape[1] * groundTruthNp.shape[2]


            encoding_value = 256  # precomputed
            encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp

            values, cnt = np.unique(encoded, return_counts=True)

            for value, c in zip(values, cnt):
                pred_id = value % encoding_value
                gt_id = int((value - pred_id) / encoding_value)
                if pred_id == 255 or gt_id == 255:
                    count255 = count255 + c
                    continue
                if not gt_id in args.evalLabels:
                    printError("Unknown label with id {:}".format(gt_id))
                confMatrix[gt_id][pred_id] += c

        if confMatrix.sum() + count255!= nbPixels:
            printError(
                'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(
                    confMatrix.sum(), nbPixels))
        classScoreList = {}
        for label in args.evalLabels:
            labelName = trainId2label[label].name
            classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args)
        vals = np.array(list(classScoreList.values()))
        print(vals)
        mIOU = np.mean(vals[np.logical_not(np.isnan(vals))])
        if pp == 0:
            print("Original mIOU is %f" % mIOU)
        else:
            print("Morphed mIOU is %f" % mIOU)
コード例 #10
0
ファイル: evaluate_depth_rans.py プロジェクト: starbike/DNet
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    K = np.array([[0.58, 0, 0.5, 0],
                  [0, 1.92, 0.5, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]], dtype=np.float32)

    assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
        "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"

    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        print("-> Loading weights from {}".format(opt.load_weights_folder))

        filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        dataset = datasets.KITTIRAWDataset(
            opt.data_path, filenames,
            encoder_dict['height'], encoder_dict['width'],
            [0], 4, is_train=False)
        dataloader = DataLoader(
            dataset, 16, shuffle=False, num_workers=opt.num_workers,
            pin_memory=True, drop_last=False)

        encoder = networks.ResnetEncoder(opt.num_layers, False)
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

        model_dict = encoder.state_dict()
        encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))

        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

        pred_disps = []

        print("-> Computing predictions with size {}x{}".format(
            encoder_dict['width'], encoder_dict['height']))

        with torch.no_grad():
            for data in dataloader:
                input_color = data[("color", 0, 0)].cuda()

                if opt.post_process:
                    # Post-processed results require each image to have two forward passes
                    input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)

                output = depth_decoder(encoder(input_color))

                pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                if opt.post_process:
                    N = pred_disp.shape[0] // 2
                    pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])

                pred_disps.append(pred_disp)

        pred_disps = np.concatenate(pred_disps)

    else:
        # Load predictions from file
        print("-> Loading predictions from {}".format(opt.ext_disp_to_eval))
        pred_disps = np.load(opt.ext_disp_to_eval)

        if opt.eval_eigen_to_benchmark:
            eigen_to_benchmark_ids = np.load(
                os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy"))

            pred_disps = pred_disps[eigen_to_benchmark_ids]

    if opt.eval_object:
        object_masks = []
        for line in filenames:
            line = line.split()
            folder, frame_index = line[0], int(line[1])

            object_mask_filename = os.path.join(
                os.path.dirname(__file__),
                "object_masks",
                folder,
                "{:010d}.npy".format(int(frame_index)))
            object_mask = np.load(object_mask_filename)
            object_masks.append(object_mask)

    if opt.save_pred_disps:
        output_path = os.path.join(
            opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
        print("-> Saving predicted disparities to ", output_path)
        np.save(output_path, pred_disps)

    if opt.no_eval:
        print("-> Evaluation disabled. Done.")
        quit()

    elif opt.eval_split == 'benchmark':
        save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions")
        print("-> Saving out benchmark predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
        quit()

    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"]

    print("-> Evaluating")

    if opt.eval_stereo:
        print("   Stereo evaluation - "
            "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
        opt.scaling = "disable" 
        opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []
    ex_logs = []
    mean_scale = []
    side_map = {"2": 2, "3": 3, "l": 2, "r": 3}
    #resize_ori = transforms.Resize((pred_disps.shape[1],pred_disps.shape[2]),interpolation=Image.ANTIALIAS)

    for i in range(pred_disps.shape[0]):
        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]
        line = filenames[i].split()
        folder = line[0]
        frame_index = line[1]
        side = side_map[line[2]]
        color = pil_loader(get_image_path(folder,int(frame_index),side))
        #color = pil_loader('/mnt/sdb/xuefeng_data/dkit_dataset/20200629_mechanical_fast/images/{:006d}.png'.format(i))
        #color = color.crop((0,191,640,383))
        

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp
        

        if opt.eval_split == "eigen":
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array(
                [0.40810811 * gt_height, 0.99189189 * gt_height,
                 0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)
            if opt.eval_object:
                object_mask = object_masks[i].astype(np.bool)

        else:
            mask = gt_depth > 0
        
        if opt.scaling == "gt":
            ratio = np.median(gt_depth[mask]) / np.median(pred_depth[mask])
            if opt.eval_object:
                mask = np.logical_and(mask, object_mask)
        elif opt.scaling == "dgc":
            scale_recovery = ScaleRecovery(1, gt_height, gt_width, K).cuda()
            #scale_recovery = ScaleRecovery(1, 192, 640, K).cuda()
            pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda()
            ratio1,surface_normal1,ground_mask1,cam_points1 = scale_recovery(pred_depth)
            #ratio = ratio1.cpu().item()
            surface_normal = surface_normal1.cpu()[0,0,:,:].numpy()
            ground_mask = ground_mask1.cpu()[0,0,:,:].numpy()
            pred_depth = pred_depth[0].cpu().numpy()
            cam_points=cam_points1.cpu().numpy()
            cam_points2=cam_points.transpose(1,2,0)
            cam_points_masked = cam_points2[np.where(ground_mask==1)]
            np.random.shuffle(cam_points_masked) 
            cam_points4 = np.array(cam_points_masked)
            print(cam_points4.shape)
            cam_points4 = cam_points4[:2000,:]
            cam_points3 = np.concatenate((cam_points4, np.ones((cam_points4.shape[0], 1))), axis=1)
            print(cam_points3.shape)
            plane,inliers = fit_plane_LSE_RANSAC(cam_points3)
            #print(plane)
            ratio_rans = abs(1.65 / plane[-1])
        else:
            ratio = 1
        #print(ratio)
        #print(max(pred_depth))
        #print(min(pred_depth))
        
        pred_depth_ori = pred_depth*mask
        gt_depth_ori = gt_depth*mask
        pred_depth_ori = np.where(mask==1,pred_depth_ori,1)
        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]
        #mean_scale.append(np.mean(gt_depth/pred_depth))

        '''
        error_try = 100
        scale_abs = 0 
        for ratio_try in np.arange(0.1,50,step=0.1):
            pred_depth1=pred_depth * ratio_try
            error_tmp = compute_errors(gt_depth, pred_depth1)[0]
            #print(error_tmp)
            if error_tmp < error_try:
                error_try = error_tmp
                scale_abs = ratio_try
        div_scale = gt_depth_ori / pred_depth_ori
        #print(div_scale.shape)
        div_values1 = div_scale[mask]
        div_scale = (div_scale-scale_abs)/scale_abs
        div_values = div_scale[mask]
        div_rmse = sqrt(sum((div_values1-scale_abs)*(div_values1-scale_abs))/len(div_values1))
        print(min(div_values),max(div_values))
        ex_logs.append([i,min(div_values), max(div_values), div_rmse,scale_abs])
        #print(div_scale.shape)
        #div_scale = div_scale/np.max(div_scale)
        mu = np.mean(div_values1)
        sigma = np.std(div_values1)
        print(min(div_values1),max(div_values1))
        fig,ax=plt.subplots()
        n, bins, patches = ax.hist(div_values1,150,range=(3,130),density = True)
        y = norm.pdf(bins, mu, 0.8*sigma)
        ax.plot(bins, y, 'r')
        plt.xlabel('Scale')
        plt.ylabel('Density')
        plt.savefig(os.path.join(os.path.dirname(__file__), "hist_imgs2","{:010d}.jpg".format(i)))
        plt.close()
        
        #blend_img = blending_imgs(div_scale, color,i)
        #blend_img.save(os.path.join(os.path.dirname(__file__), "blend_imgs","{:010d}.jpg".format(i)))
        
        
        blending_imgs(surface_normal,color,i,'surface_normals')
        blending_imgs(ground_mask,color,i,'ground_masks')
        '''
        blending_imgs(ground_mask,color,i,ground_mask)
        pred_depth *= ratio_rans
        ratios.append(ratio_rans)

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH
        #blending_imgs(div_scale, color,i,mask)

        if len(gt_depth) != 0:
            errors.append(compute_errors(gt_depth, pred_depth))
    '''
    fl = open('ex.txt','w')
    fl.writelines(str(ex_logs))
    fl.close()
    '''
    #np.save('mean_scale.npy', mean_scale)

    ratios = np.array(ratios)
    med = np.median(ratios)
    print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    
    print("\n-> Done!")
コード例 #11
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)
    print("-> Loading weights from {}".format(opt.load_weights_folder))

    # Load Encoder and Decoder
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")
    encoder_dict = torch.load(encoder_path)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc,
                                          num_output_channels=3)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    encoder_path = os.path.join(
        '/home/shengjie/Documents/Project_SemanticDepth/tmp/patchmatch_bs/weights_13',
        "encoder.pth")
    decoder_path = os.path.join(
        '/home/shengjie/Documents/Project_SemanticDepth/tmp/patchmatch_bs/weights_13',
        "depth.pth")
    encoder_dict = torch.load(encoder_path)

    encoder_bs = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder_bs = networks.DepthDecoder(encoder.num_ch_enc,
                                             num_output_channels=3)

    model_dict = encoder.state_dict()
    encoder_bs.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder_bs.load_state_dict(torch.load(decoder_path))

    encoder_bs.cuda()
    encoder_bs.eval()
    depth_decoder_bs.cuda()
    depth_decoder_bs.eval()

    filenames = readlines(
        '/home/shengjie/Documents/Project_SemanticDepth/splits/eigen/test_files.txt'
    )

    opt.frame_ids.append("s")
    dataset = datasets.KITTIRAWDataset(opt.data_path,
                                       filenames,
                                       encoder_dict['height'],
                                       encoder_dict['width'], [0],
                                       4,
                                       is_train=False)
    dataloader = DataLoader(dataset,
                            16,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    count = 0
    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            for key, ipt in inputs.items():
                if not (key == 'entry_tag' or key == 'syn_tag'):
                    inputs[key] = ipt.to(torch.device("cuda"))
            input_color = inputs[("color", 0, 0)].cuda()
            outputs = depth_decoder(encoder(input_color))
            outputs_bs = depth_decoder_bs(encoder_bs(input_color))
            for i in range(input_color.shape[0]):
                figbs = tensor2disp(outputs_bs[('disp', 0)][:, 2:3, :, :],
                                    vmax=0.1,
                                    ind=i)
                fig2 = tensor2disp(outputs[('disp', 0)][:, 2:3, :, :],
                                   vmax=0.1,
                                   ind=i)
                figrgb = tensor2rgb(inputs[("color", 0, 0)], ind=i)
                combined = np.concatenate(
                    [np.array(figrgb),
                     np.array(figbs),
                     np.array(fig2)])
                pil.fromarray(combined).save(
                    os.path.join(
                        '/media/shengjie/c9c81c9f-511c-41c6-bfe0-2fc19666fb32/Visualizations/Project_SemanDepth/vls_patchmatch_test_visualization',
                        str(count) + '.png'))
                count = count + 1
コード例 #12
0
ファイル: get_metrics.py プロジェクト: dsGonz/md2_experiments
# Load depth decoder network with weights
loaded_dict = torch.load(depth_decoder_path)
depth_decoder.load_state_dict(loaded_dict)

# Set to eval mode on GPU
encoder.cuda()
depth_decoder.cuda()
encoder.eval()
depth_decoder.eval()


# Load validation data
print('Loading data...')
data_path = join(dpath_root, dset_type)
filenames = readlines(join('splits', split, 'val_files.txt'))
dataset = datasets.KITTIRAWDataset(data_path, filenames, loaded_dict_enc['height'], loaded_dict_enc['width'], [0], num_scales, is_train=False, img_ext='.png')
dataloader = DataLoader(dataset, 1, shuffle=False, num_workers=1, pin_memory=True, drop_last=False)
print('Loaded {} validation images from SPLIT: {}  DATASET: {}'.format(len(dataloader), split, dset_type))


# Create dirs for model outputs
dest_path = join(abspath('./outputs'), model_name)
if not os.path.isdir(dest_path):
    os.makedirs(dest_path)
    if write_depths:
        os.makedirs(join(dest_path, 'dense_depth'))
        os.makedirs(join(dest_path, 'registered_depth'))
    if visualize:
        os.makedirs(join(dest_path, 'viz'))

# Get predictions. Time the duration
コード例 #13
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    if viewStereoMask:
        stereoMaskComputer = StereoMask()
        stereoMaskComputer.cuda()

    if viewSurfaceNormal:
        compsurfnorm = ComputeSurfaceNormal(height=opt.height, width=opt.width, batch_size=opt.batch_size)
        compsurfnorm.cuda()

    if viewTypeWiseRegularization:
        typeWReg = TypeWiseRegularization()
        typeWReg.cuda()

    if viewBorderWiseRegularization:
        borderWiseReg = BorderWiseRegularization(batchNum=opt.batch_size, width=opt.width, height=opt.height).cuda()

    if viewMonoMsak:
        monoMask = MonocularMask()
        monoMask.cuda()
    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)
    tensor23dPts = Tensor23dPts(height=opt.height, width=opt.width)

    if opt.use_stereo:
        opt.frame_ids.append("s")

    dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,opt.height, opt.width, opt.frame_ids, 4, is_train=False, load_gt_semantics=opt.load_gt_semantics, load_gt_velodine=opt.load_gt_velodine)
    dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=True)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    dirpath = '/media/shengjie/other/sceneUnderstanding/semantic_regularized_unsupervised_depth_estimation/visualization'
    sv_path = os.path.join(dirpath, opt.model_name)
    index = 0

    if viewMonoMsak:
        num_pose_frames = 2
        posenet_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth")
        posenet_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")
        posenet_encoder_dict = torch.load(posenet_encoder_path)
        posenet_decoder_dict = torch.load(posenet_decoder_path)
        posenet_encoder = networks.ResnetEncoder(
            opt.num_layers,
            opt.weights_init == "pretrained",
            num_input_images=num_pose_frames)

        posenet_decoder = networks.PoseDecoder(
            encoder.num_ch_enc,
            num_input_features=1,
            num_frames_to_predict_for=2)
        posenet_encoder.load_state_dict({k: v for k, v in posenet_encoder_dict.items() if k in posenet_encoder_dict})
        posenet_decoder.load_state_dict({k: v for k, v in posenet_decoder_dict.items() if k in posenet_decoder_dict})
        posenet_encoder = posenet_encoder.cuda()
        posenet_decoder = posenet_decoder.cuda()

    if not os.path.exists(sv_path):
        os.makedirs(sv_path)

    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            for key, ipt in inputs.items():
                if not(key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta'):
                    inputs[key] = ipt.to(torch.device("cuda"))
            input_color = inputs[("color", 0, 0)]
            features = encoder(input_color)
            outputs = dict()
            outputs.update(depth_decoder(features))

            dispMap = outputs[('disp', 0)]
            scaledDisp, depthMap = disp_to_depth(dispMap, opt.min_depth, opt.max_depth)

            foreGroundMask = torch.ones(scaledDisp.shape, device=torch.device("cuda")).byte()
            scaled_smeantic_label = F.interpolate(inputs[('semantic_label', 0)].cpu().float(), size=(scaledDisp.shape[2], scaledDisp.shape[3]), mode='nearest').cuda().byte()
            for m in foregroundType:
                foreGroundMask = foreGroundMask * (scaled_smeantic_label != m)
            foreGroundMask = (1 - foreGroundMask)
            foreGroundMask = foreGroundMask.float()

            if viewStereoMask:
                scale = 0
                T = inputs["stereo_T"]
                real_scale_disp = scaledDisp * (torch.abs(inputs[("K", scale)][:, 0, 0] * T[:, 0, 3]).view(opt.batch_size, 1, 1, 1).expand_as(scaledDisp))
                stereoMask = stereoMaskComputer.computeMask(real_scale_disp, T[:, 0, 3])
                stereoSemanticalMask = stereoMaskComputer.computeSemanticalMask(stereoMask, foreGroundMask, T[:, 0, 3])
                # stereoMask_fig = tensor2disp(stereoMask, ind=index, vmax=1)
                # stereoSemanticalMask_fig = tensor2disp(stereoSemanticalMask, ind=index, vmax=1)
                # foreGroundMask_fig = tensor2disp(foreGroundMask, ind=index, vmax=1)

            if viewSurfaceNormal:
                surnormMap_fig = compsurfnorm.visualize(depthMap=depthMap, invcamK=inputs['invcamK'], viewindex = index)
                surnormMap = compsurfnorm(depthMap=depthMap, invcamK=inputs['invcamK'])

            if viewTypeWiseRegularization:
                wallType = [2, 3, 4]  # Building, wall, fence
                roadType = [0, 1, 9]  # road, sidewalk, terrain
                permuType = [5, 7]  # Pole, traffic sign
                chanWinSize = 5

                wallMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8)
                roadMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8)
                permuMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8)

                for m in wallType:
                    wallMask = wallMask * (scaled_smeantic_label != m)
                wallMask = 1 - wallMask
                wallMask = wallMask[:, :, 1:-1, 1:-1]

                for m in roadType:
                    roadMask = roadMask * (scaled_smeantic_label != m)
                roadMask = 1 - roadMask
                roadMask = roadMask[:, :, 1:-1, 1:-1]

                for m in permuType:
                    permuMask = permuMask * (scaled_smeantic_label != m)
                permuMask = 1 - permuMask
                permuMask = permuMask[:, :, 1:-1, 1:-1]

                BdErrFig, viewRdErrFig = typeWReg.visualize_regularizeBuildingRoad(surnormMap, wallMask, roadMask,
                                                                                 dispMap, viewInd=index)
                padSize = int((chanWinSize - 1) / 2)
                permuMask = permuMask[:, :, padSize: -padSize, padSize: -padSize]
                surVarFig = typeWReg.visualize_regularizePoleSign(surnormMap, permuMask, dispMap, viewInd=index)

            if viewBorderWiseRegularization:
                wallType = [2, 3, 4]  # Building, wall, fence
                roadType = [0, 1, 9]  # road, sidewalk, terrain
                wallTypeMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8)
                roadTypeMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8)
                foreGroundMask = torch.ones((opt.batch_size, 1, opt.height, opt.width), device=torch.device("cuda"), dtype=torch.uint8)

                for m in wallType:
                    wallTypeMask = wallTypeMask * (scaled_smeantic_label != m)
                wallTypeMask = (1 - wallTypeMask).float()

                for m in roadType:
                    roadTypeMask = roadTypeMask * (scaled_smeantic_label != m)
                roadTypeMask = (1 - roadTypeMask).float()

                for m in foregroundType:
                    foreGroundMask = foreGroundMask * (scaled_smeantic_label != m)
                foreGroundMask = (1 - foreGroundMask).float()

                borderWiseReg.visualize(
                    realDepth=depthMap, dispAct=depthMap,
                    foredgroundMask=foreGroundMask, wallTypeMask=wallTypeMask, groundTypeMask=roadTypeMask,
                    intrinsic=inputs['realIn'], extrinsic=inputs['realEx'], semantic=scaled_smeantic_label, viewInd=0)

            if viewMonoMsak:
                extrinsics = computePose(inputs, opt, depthMap, posenet_encoder, posenet_decoder)
                depthMap_cur = depthMap
                depthMap_prev = computeDepthMap(inputs['color', -1, 0], encoder, depth_decoder, opt.min_depth, opt.max_depth)
                depthMap_next = computeDepthMap(inputs['color', 1, 0], encoder, depth_decoder, opt.min_depth, opt.max_depth)
                pts_cur = depth23dpts(depthMap_cur, inputs['intrinsic'])
                pts_next = depth23dpts(depthMap_prev, inputs['intrinsic'], extrinsics)
                pts_prev = depth23dpts(depthMap_next, inputs['intrinsic'], extrinsics)

            if opt.eval_stereo:
                real_scale_depth = depthMap * STEREO_SCALE_FACTOR
            elif opt.eval_mono:
                ratio = torch.mean(inputs['depth_gt'][inputs['depth_gt'] > 0.1]) / torch.mean(depthMap)
                real_scale_depth = depthMap * ratio

            gtmask = (inputs['depth_gt'] > 0).float()
            gtdepth = inputs['depth_gt']
            velo = inputs['velo']
            tensor23dPts.visualize3d(
                real_scale_depth, ind=index, intrinsic_in=inputs['realIn'], extrinsic_in=inputs['realEx'], gtmask_in=gtmask,
                gtdepth_in=gtdepth, semanticMap=scaled_smeantic_label, velo_in=velo, rgb_in=inputs[('color', 's', 0)],
                disp_in=outputs[('disp', 0)]
                                   )

            suppressed_disp_Map = dispMap * (1 - stereoSemanticalMask)
            semantic_fig = tensor2semantic(inputs[('semantic_label', 0)], ind=index, isGt=True).resize([opt.width, opt.height], pil.NEAREST)
            disp_fig = tensor2disp(dispMap, ind = index)
            suppressed_disp_Map_fig = tensor2disp(suppressed_disp_Map, ind = index)
            rgb_fig = tensor2rgb(inputs[("color", 0, 0)], ind = index)
            combined_fig1 = pil.fromarray((np.array(semantic_fig) * 0.15 + np.array(disp_fig)[:,:,0:3] * 0.85).astype(np.uint8))
            combined_fig2 = pil.fromarray(
                (np.array(rgb_fig) * 0.2 + np.array(disp_fig)[:, :, 0:3] * 0.8).astype(np.uint8))
            combined_fig = pil.fromarray(np.concatenate([np.array(combined_fig1), np.array(combined_fig2), np.array(suppressed_disp_Map_fig)[:,:,0:3], np.array(surnormMap_fig)], axis=0))
            combined_fig.save(os.path.join(sv_path, str(idx) + ".png"))
            print("save %s" % (str(idx) + ".png"))
コード例 #14
0
    options = MonodepthOptions()
    opt = options.parse()

    splits_dir = os.path.join(os.path.dirname(__file__), "splits")
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)
    filenames = readlines(
        os.path.join(splits_dir, opt.split, "train_files.txt"))
    height = 288
    width = 960
    dataset = datasets.KITTIRAWDataset(
        opt.data_path,
        filenames,
        height,
        width, [0],
        4,
        is_train=False,
        tag=opt.dataset,
        img_ext='png',
        load_meta=opt.load_meta,
        is_load_semantics=opt.use_kitti_gt_semantics,
        is_predicted_semantics=opt.is_predicted_semantics,
        load_morphed_depth=True)

    dataloader = DataLoader(dataset,
                            1,
                            shuffle=True,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    pred_disps = []
コード例 #15
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    ts = time.time()
    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        print("-> Loading weights from {}".format(opt.load_weights_folder))

        # filenames = readlines(os.path.join(splits_dir, opt.split, "train_files.txt"))
        filenames = collect_all_entries(opt.data_path)
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        dataset = datasets.KITTIRAWDataset(opt.data_path,
                                           filenames,
                                           encoder_dict['height'],
                                           encoder_dict['width'], [0],
                                           4,
                                           is_train=False)
        dataloader = DataLoader(dataset,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                pin_memory=True,
                                drop_last=False)

        encoder = networks.ResnetEncoder(opt.num_layers, False)
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

        model_dict = encoder.state_dict()
        encoder.load_state_dict(
            {k: v
             for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))

        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

        mapping = {'l': 'image_02', 'r': 'image_03'}

        print("-> Computing predictions with size {}x{}".format(
            encoder_dict['width'], encoder_dict['height']))

        save_dir = opt.save_dir
        print("-> Saving out predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        imgCount = 0
        with torch.no_grad():
            for idx, data in enumerate(dataloader):
                input_color = data[("color", 0, 0)].cuda()

                if opt.post_process:
                    # Post-processed results require each image to have two forward passes
                    input_color = torch.cat(
                        (input_color, torch.flip(input_color, [3])), 0)

                output = depth_decoder(encoder(input_color))

                pred_disp, _ = disp_to_depth(output[("disp", 0)],
                                             opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                if opt.post_process:
                    N = pred_disp.shape[0] // 2
                    pred_disp = batch_post_process_disparity(
                        pred_disp[:N], pred_disp[N:, :, ::-1])

                depth = STEREO_SCALE_FACTOR / pred_disp
                depth = np.clip(depth, 0, 80)
                depth = np.uint16(depth * 256)

                for k in range(depth.shape[0]):
                    comps = filenames[imgCount].split(" ")
                    save_folder = os.path.join(save_dir, comps[0],
                                               mapping[comps[2][0]])
                    os.makedirs(save_folder, exist_ok=True)
                    save_path = os.path.join(save_folder, comps[1] + '.png')
                    cv2.imwrite(save_path, depth[k, :, :])
                    te = time.time()
                    imgCount = imgCount + 1
                print("%d finished, %f hours left" %
                      (idx, (te - ts) / imgCount *
                       (len(filenames) - imgCount) / 60 / 60))
コード例 #16
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    viewPythonVer = False
    viewCudaVer = True

    if viewCudaVer:
        bnmorph = BNMorph(height=opt.height, width=opt.width).cuda()

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.use_stereo:
        opt.frame_ids.append("s")
    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(
            opt.data_path,
            filenames,
            opt.height,
            opt.width,
            opt.frame_ids,
            4,
            is_train=False,
            tag=opt.dataset,
            load_meta=True,
            direction_left=opt.direction_left)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(
            opt.data_path,
            filenames,
            opt.height,
            opt.width,
            opt.frame_ids,
            4,
            is_train=False,
            tag=opt.dataset,
            is_load_semantics=opt.use_kitti_gt_semantics,
            is_predicted_semantics=opt.is_predicted_semantics,
            direction_left=opt.direction_left)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=True)

    encoder = networks.ResnetEncoder(opt.num_layers, False, num_input_images=2)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(
            encoder.num_ch_enc,
            isSwitch=True,
            isMulChannel=opt.isMulChannel,
            outputtwoimage=(opt.outputtwoimage == True))
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    viewIndex = 0
    tool = grad_computation_tools(batch_size=opt.batch_size,
                                  height=opt.height,
                                  width=opt.width).cuda()
    auto_morph = AutoMorph(height=opt.height, width=opt.width)
    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            for key, ipt in inputs.items():
                if not (key == 'height' or key == 'width' or key == 'tag'
                        or key == 'cts_meta' or key == 'file_add'):
                    inputs[key] = ipt.to(torch.device("cuda"))

            input_color = torch.cat(
                [inputs[("color_aug", 0, 0)], inputs[("color_aug", 's', 0)]],
                dim=1).cuda()
            # input_color = inputs[("color", 0, 0)].cuda()
            # tensor2rgb(inputs[("color_aug", 0, 0)], ind=0).show()
            # tensor2rgb(inputs[("color_aug", 's', 0)], ind=0).show()
            features = encoder(input_color)
            outputs = dict()
            outputs.update(
                depth_decoder(features,
                              computeSemantic=True,
                              computeDepth=False))
            outputs.update(
                depth_decoder(features,
                              computeSemantic=False,
                              computeDepth=True))

            if not opt.view_right:
                disparityMap = outputs[('mul_disp', 0)][:, 0:1, :, :]
            else:
                disparityMap = outputs[('mul_disp', 0)][:, 1:2, :, :]
            depthMap = torch.clamp(disparityMap, max=80)
            fig_seman = tensor2semantic(inputs['seman_gt'],
                                        ind=viewIndex,
                                        isGt=True)
            fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=viewIndex)
            fig_disp = tensor2disp(disparityMap, ind=viewIndex, vmax=0.1)

            segmentationMapGt = inputs['seman_gt']
            foregroundType = [
                5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
            ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
            foregroundMapGt = torch.ones(disparityMap.shape).cuda().byte()
            for m in foregroundType:
                foregroundMapGt = foregroundMapGt * (segmentationMapGt != m)
            foregroundMapGt = (1 - foregroundMapGt).float()

            disparity_grad = torch.abs(
                tool.convDispx(disparityMap)) + torch.abs(
                    tool.convDispy(disparityMap))
            semantics_grad = torch.abs(
                tool.convDispx(foregroundMapGt)) + torch.abs(
                    tool.convDispy(foregroundMapGt))
            disparity_grad = disparity_grad * tool.zero_mask
            semantics_grad = semantics_grad * tool.zero_mask

            disparity_grad_bin = disparity_grad > tool.disparityTh
            semantics_grad_bin = semantics_grad > tool.semanticsTh

            # tensor2disp(disparity_grad_bin, ind=viewIndex, vmax=1).show()
            # tensor2disp(semantics_grad_bin, ind=viewIndex, vmax=1).show()

            if viewPythonVer:
                disparity_grad_bin = disparity_grad_bin.detach().cpu().numpy()
                semantics_grad_bin = semantics_grad_bin.detach().cpu().numpy()

                disparityMap_to_processed = disparityMap.detach().cpu().numpy(
                )[viewIndex, 0, :, :]
                dispMap_morphed, dispMap_morphRec = auto_morph.automorph(
                    disparity_grad_bin[viewIndex, 0, :, :],
                    semantics_grad_bin[viewIndex,
                                       0, :, :], disparityMap_to_processed)

                fig_disp_processed = visualizeNpDisp(dispMap_morphed, vmax=0.1)
                overlay_processed = pil.fromarray(
                    (np.array(fig_disp_processed) * 0.7 +
                     np.array(fig_seman) * 0.3).astype(np.uint8))
                overlay_org = pil.fromarray(
                    (np.array(fig_disp) * 0.7 +
                     np.array(fig_seman) * 0.3).astype(np.uint8))
                combined_fig = pil.fromarray(
                    np.concatenate([
                        np.array(overlay_org),
                        np.array(overlay_processed),
                        np.array(fig_disp),
                        np.array(fig_disp_processed)
                    ],
                                   axis=0))
                combined_fig.save(
                    "/media/shengjie/other/sceneUnderstanding/Stereo_SDNET/visualization/border_morph_l2_3/"
                    + str(idx) + ".png")
            if viewCudaVer:
                # morphedx, morphedy = bnmorph.find_corresponding_pts(disparity_grad_bin, semantics_grad_bin, disparityMap, fig_seman, 10)
                # morphedx = (morphedx / (opt.width - 1) - 0.5) * 2
                # morphedy = (morphedy / (opt.height - 1) - 0.5) * 2
                # grid = torch.cat([morphedx, morphedy], dim = 1).permute(0,2,3,1)
                # disparityMap_morphed = F.grid_sample(disparityMap, grid, padding_mode="border")
                # fig_morphed = tensor2disp(disparityMap_morphed, vmax=0.08, ind=0)
                # fig_disp = tensor2disp(disparityMap, vmax=0.08, ind=0)
                # fig_combined = pil.fromarray(np.concatenate([np.array(fig_morphed), np.array(fig_disp)], axis=0))
                # fig_combined.show()
                svpath = os.path.join(opt.load_weights_folder).split('/')
                try:
                    svpath = os.path.join(
                        "/media/shengjie/other/sceneUnderstanding/Stereo_SDNET/visualization",
                        svpath[-3])
                    os.mkdir(svpath)
                except FileExistsError:
                    a = 1
                morphedx, morphedy, coeff = bnmorph.find_corresponding_pts(
                    disparity_grad_bin, semantics_grad_bin)
                morphedx = (morphedx / (opt.width - 1) - 0.5) * 2
                morphedy = (morphedy / (opt.height - 1) - 0.5) * 2
                grid = torch.cat([morphedx, morphedy],
                                 dim=1).permute(0, 2, 3, 1)
                disparityMap_morphed = F.grid_sample(disparityMap,
                                                     grid,
                                                     padding_mode="border")

                fig_morphed = tensor2disp(disparityMap_morphed,
                                          vmax=0.08,
                                          ind=0)
                fig_disp = tensor2disp(disparityMap, vmax=0.08, ind=0)
                fig_morphed_overlayed = pil.fromarray(
                    (np.array(fig_seman) * 0.5 +
                     np.array(fig_morphed) * 0.5).astype(np.uint8))
                fig_disp_overlayed = pil.fromarray(
                    (np.array(fig_seman) * 0.5 +
                     np.array(fig_disp) * 0.5).astype(np.uint8))
                # fig_rgb =  tensor2rgb(inputs[("color", 0, 0)], ind=0)
                # fig_combined = pil.fromarray(np.concatenate([np.array(fig_disp_overlayed), np.array(fig_morphed_overlayed), np.array(fig_disp), np.array(fig_morphed), np.array(fig_rgb)], axis=0))
                fig_combined = pil.fromarray(
                    np.concatenate([
                        np.array(fig_disp_overlayed),
                        np.array(fig_morphed_overlayed),
                        np.array(fig_disp),
                        np.array(fig_morphed)
                    ],
                                   axis=0))
                fig_combined.save(os.path.join(svpath, str(idx) + ".png"))
コード例 #17
0
def evaluate(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']
    metric_mode = opts['metric_mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    encoder_mode = opts['model']['encoder_mode']
    frame_sides = opts['frame_sides']
    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=encoder_mode)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> metrics mode: {}".format(metric_mode))
    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="test")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="test")

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_depths = []
    gt_depths = []
    disps = []
    for data in tqdm(dataloader):

        image = cv2.imread('/home/roit/datasets/nyudepthv2/img/0001.jpg')
        image = cv2.resize(image, (384, 288))
        image = np.transpose(image, [2, 0, 1])
        image = torch.tensor(image).cuda() / 255.
        image = image.unsqueeze(0)

        # input_color = reframe(encoder_mode,data,frame_sides=frame_sides,key='color')
        # input_color = input_color.cuda()

        features = encoder(image)
        disp = decoder(*features)

        depth_gt = data['depth_gt']

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)
        #pred_depth = disp2depth(disp)

        pred_depth = pred_depth.cpu()[:, 0].numpy()
        depth_gt = depth_gt.cpu()[:, 0].numpy()

        pred_depths.append(pred_depth)
        gt_depths.append(depth_gt)
    gt_depths = np.concatenate(gt_depths, axis=0)

    pred_depths = np.concatenate(pred_depths, axis=0)

    metrics = []
    ratios = []

    for gt, pred in zip(gt_depths, pred_depths):
        gt_height, gt_width = gt.shape[:2]
        pred = cv2.resize(pred, (gt_width, gt_height))
        # crop
        # if test_dir.stem == "eigen" or test_dir.stem == 'custom':#???,可能是以前很老的
        if opts['dataset']['type'] == "kitti":  # ???,可能是以前很老的
            mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH)
            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)
        else:
            mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH)

        pred = pred[mask]  # 并reshape成1d
        gt = gt[mask]

        ratio = np.median(gt) / np.median(
            pred)  # 中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可
        ratios.append(ratio)
        pred *= ratio

        pred[pred < MIN_DEPTH] = MIN_DEPTH  # 所有历史数据中最小的depth, 更新,
        pred[pred > MAX_DEPTH] = MAX_DEPTH  # ...
        metric = compute_errors(gt, pred, mode=metric_mode)
        metrics.append(metric)

    metrics = np.array(metrics)
    mean_metrics = np.mean(metrics, axis=0)

    # print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_metrics.tolist()) + "\\\\")

    ratios = np.array(ratios)
    median = np.median(ratios)
    print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format(
        median, np.std(ratios / median)))
コード例 #18
0
def export_gt_depths_kitti():

    parser = argparse.ArgumentParser(description='export_pred_theta')

    parser.add_argument('--data_path',
                        type=str,
                        help='path to the root of the KITTI data',
                        required=True)
    parser.add_argument('--save_dir',
                        type=str,
                        help='path to the root of save folder',
                        required=True)
    parser.add_argument('--load_weights_folder',
                        type=str,
                        help='path to the root of save folder',
                        required=True)
    parser.add_argument('--num_layers',
                        type=int,
                        default=18)
    parser.add_argument('--num_workers',
                        type=int,
                        default=16)
    parser.add_argument('--banvls',
                        action='store_true')


    opt = parser.parse_args()
    os.makedirs(opt.save_dir, exist_ok=True)
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")
    encoder_dict = torch.load(encoder_path)
    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, num_output_channels=3)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()


    lines = collect_all_entries(opt.data_path)
    lines_valid = list()
    for line in lines:
        folder, frame_id, direction = line.split()
        frame_id = int(frame_id)
        velo_filename = os.path.join(opt.data_path, folder, "velodyne_points/data", "{:010d}.bin".format(frame_id))
        if os.path.isfile(velo_filename):
            lines_valid.append(line)

    mapping = {'l': 'image_02', 'r': 'image_03'}
    mapping_cam = {'l': 2, 'r': 3}

    ts = time.time()
    imgCount = 0

    dataset = datasets.KITTIRAWDataset(opt.data_path, lines_valid, encoder_dict['height'], encoder_dict['width'], [0], 4, is_train=False)
    dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False)
    with torch.no_grad():
        for data in dataloader:

            allexist = True
            for i in range(data[('color', 0, 0)].shape[0]):
                folder, frame_id, direction, _, _ = data['entry_tag'][i].split()
                direction = direction[0]
                frame_id = int(frame_id)

                cklist = list()
                cklist.append(os.path.join(opt.save_dir, folder, 'htheta_flipped', mapping[direction], str(frame_id).zfill(10) + '.png'))
                cklist.append(os.path.join(opt.save_dir, folder, 'vtheta_flipped', mapping[direction], str(frame_id).zfill(10) + '.png'))
                cklist.append(os.path.join(opt.save_dir, folder, 'htheta', mapping[direction], str(frame_id).zfill(10) + '.png'))
                cklist.append(os.path.join(opt.save_dir, folder, 'vtheta', mapping[direction], str(frame_id).zfill(10) + '.png'))

                for cke in cklist:
                    if not os.path.isfile(cke):
                        allexist = False

            if allexist:
                continue


            outputs = dict()
            outputs_flipped = dict()
            input_color = data[("color", 0, 0)].cuda()
            input_color_flipped = torch.flip(input_color, dims=[3])
            # tensor2rgb(input_color, ind=0).show()
            # tensor2rgb(input_color_flipped, ind=0).show()
            outputs.update(depth_decoder(encoder(input_color)))
            outputs_flipped.update(depth_decoder(encoder(input_color_flipped)))

            for i in range(outputs[('disp', 0)].shape[0]):
                folder, frame_id, direction, _, _ = data['entry_tag'][i].split()
                direction = direction[0]
                frame_id = int(frame_id)

                print("Exporting: Folder: %s, direction: %s, frame_id: %d" % (folder, direction, frame_id))

                output_folder_h = os.path.join(opt.save_dir, folder, 'htheta', mapping[direction])
                output_folder_v = os.path.join(opt.save_dir, folder, 'vtheta', mapping[direction])
                os.makedirs(output_folder_h, exist_ok=True)
                os.makedirs(output_folder_v, exist_ok=True)
                save_path_h = os.path.join(output_folder_h, str(frame_id).zfill(10) + '.png')
                save_path_v = os.path.join(output_folder_v, str(frame_id).zfill(10) + '.png')

                thetah = outputs[('disp', 0)][i:i+1,0:1,:,:] * 2 * np.pi
                thetav = outputs[('disp', 0)][i:i + 1, 1:2, :, :] * 2 * np.pi

                thetahnp = thetah.squeeze(0).squeeze(0).cpu().numpy()
                thetavnp = thetav.squeeze(0).squeeze(0).cpu().numpy()

                thetahnp_towrite = (thetahnp * 10 * 256).astype(np.uint16)
                thetavnp_towrite = (thetavnp * 10 * 256).astype(np.uint16)
                cv2.imwrite(save_path_h, thetahnp_towrite)
                cv2.imwrite(save_path_v, thetavnp_towrite)

                # reopen_h = np.array(pil.open(save_path_h)).astype(np.float32) / 10 / 256
                # reopen_v = np.array(pil.open(save_path_v)).astype(np.float32) / 10 / 256
                # print(np.abs(reopen_h - thetahnp).max())
                # print(np.abs(reopen_v - thetavnp).max())

                if not opt.banvls:
                    output_folder_hvls = os.path.join(opt.save_dir, folder, 'htheta_vls', mapping[direction])
                    output_folder_vvls = os.path.join(opt.save_dir, folder, 'vtheta_vls', mapping[direction])
                    os.makedirs(output_folder_hvls, exist_ok=True)
                    os.makedirs(output_folder_vvls, exist_ok=True)
                    figh = tensor2disp(thetah - 1, vmax=4, ind=0)
                    figv = tensor2disp(thetav - 1, vmax=4, ind=0)
                    save_path_hvls = os.path.join(output_folder_hvls, str(frame_id).zfill(10) + '.png')
                    save_path_vvls = os.path.join(output_folder_vvls, str(frame_id).zfill(10) + '.png')
                    figh.save(save_path_hvls)
                    figv.save(save_path_vvls)


                output_folder_h = os.path.join(opt.save_dir, folder, 'htheta_flipped', mapping[direction])
                output_folder_v = os.path.join(opt.save_dir, folder, 'vtheta_flipped', mapping[direction])
                os.makedirs(output_folder_h, exist_ok=True)
                os.makedirs(output_folder_v, exist_ok=True)
                save_path_h = os.path.join(output_folder_h, str(frame_id).zfill(10) + '.png')
                save_path_v = os.path.join(output_folder_v, str(frame_id).zfill(10) + '.png')

                thetah = outputs_flipped[('disp', 0)][i:i+1,0:1,:,:] * 2 * np.pi
                thetav = outputs_flipped[('disp', 0)][i:i + 1, 1:2, :, :] * 2 * np.pi

                thetahnp = thetah.squeeze(0).squeeze(0).cpu().numpy()
                thetavnp = thetav.squeeze(0).squeeze(0).cpu().numpy()

                thetahnp_towrite = (thetahnp * 10 * 256).astype(np.uint16)
                thetavnp_towrite = (thetavnp * 10 * 256).astype(np.uint16)
                cv2.imwrite(save_path_h, thetahnp_towrite)
                cv2.imwrite(save_path_v, thetavnp_towrite)

                if not opt.banvls:
                    output_folder_hvls = os.path.join(opt.save_dir, folder, 'htheta_vls_flipped', mapping[direction])
                    output_folder_vvls = os.path.join(opt.save_dir, folder, 'vtheta_vls_flipped', mapping[direction])
                    os.makedirs(output_folder_hvls, exist_ok=True)
                    os.makedirs(output_folder_vvls, exist_ok=True)
                    figh = tensor2disp(thetah - 1, vmax=4, ind=0)
                    figv = tensor2disp(thetav - 1, vmax=4, ind=0)
                    save_path_hvls = os.path.join(output_folder_hvls, str(frame_id).zfill(10) + '.png')
                    save_path_vvls = os.path.join(output_folder_vvls, str(frame_id).zfill(10) + '.png')
                    figh.save(save_path_hvls)
                    figv.save(save_path_vvls)

                te = time.time()
                imgCount = imgCount + 1
                print("%d finished, %f hours left" % (imgCount, (te - ts) / imgCount * (len(lines) - imgCount) / 60 / 60))
コード例 #19
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
        "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"

    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        print("-> Loading weights from {}".format(opt.load_weights_folder))

        filenames = readlines(
            os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        dataset = datasets.KITTIRAWDataset(opt.data_path,
                                           filenames,
                                           encoder_dict['height'],
                                           encoder_dict['width'], [0],
                                           4,
                                           is_train=False)
        dataloader = DataLoader(dataset,
                                16,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                pin_memory=True,
                                drop_last=False)

        encoder = networks.ResnetEncoder(opt.num_layers, False)
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

        model_dict = encoder.state_dict()
        encoder.load_state_dict(
            {k: v
             for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))

        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

        pred_disps = []

        print("-> Computing predictions with size {}x{}".format(
            encoder_dict['width'], encoder_dict['height']))

        with torch.no_grad():
            for data in dataloader:
                input_color = data[("color", 0, 0)].cuda()

                if opt.post_process:
                    # Post-processed results require each image to have two forward passes
                    input_color = torch.cat(
                        (input_color, torch.flip(input_color, [3])), 0)

                output = depth_decoder(encoder(input_color))

                pred_disp, _ = disp_to_depth(output[("disp", 0)],
                                             opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                if opt.post_process:
                    N = pred_disp.shape[0] // 2
                    pred_disp = batch_post_process_disparity(
                        pred_disp[:N], pred_disp[N:, :, ::-1])

                pred_disps.append(pred_disp)

        pred_disps = np.concatenate(pred_disps)

    else:
        # Load predictions from file
        print("-> Loading predictions from {}".format(opt.ext_disp_to_eval))
        pred_disps = np.load(opt.ext_disp_to_eval)

        if opt.eval_eigen_to_benchmark:
            eigen_to_benchmark_ids = np.load(
                os.path.join(splits_dir, "benchmark",
                             "eigen_to_benchmark_ids.npy"))

            pred_disps = pred_disps[eigen_to_benchmark_ids]

    if opt.save_pred_disps:
        output_path = os.path.join(opt.eval_out_dir,
                                   "disps_{}_split.npy".format(opt.eval_split))
        print("-> Saving predicted disparities to ", output_path)
        np.save(output_path, pred_disps)

    if opt.no_eval:
        print("-> Evaluation disabled. Done.")
        quit()

    elif opt.eval_split == 'benchmark':
        save_dir = os.path.join(opt.load_weights_folder,
                                "benchmark_predictions")
        print("-> Saving out benchmark predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        print(
            "-> No ground truth is available for the KITTI benchmark, so not evaluating. Done."
        )
        quit()

    elif opt.eval_split == 'mine_0319':
        save_dir = os.path.join(opt.eval_out_dir, "mine_0319_predictions")
        print("-> Saving out mine_0319 predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        # print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
        # quit()

    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1')["data"]

    print("-> Evaluating")

    if opt.eval_stereo:
        print("   Stereo evaluation - "
              "disabling median scaling, scaling by {}".format(
                  STEREO_SCALE_FACTOR))
        opt.disable_median_scaling = True
        opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        if opt.eval_split == "eigen":
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

        else:
            mask = gt_depth > 0

        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]

        pred_depth *= opt.pred_depth_scale_factor
        if not opt.disable_median_scaling:
            ratio = np.median(gt_depth) / np.median(pred_depth)
            ratios.append(ratio)
            pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

        errors.append(compute_errors(gt_depth, pred_depth))

    if not opt.disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(
            med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " +
          ("{:>8} | " * 7
           ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")
コード例 #20
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)
    # encoder's record of height and weight are of less important now

    if opt.use_stereo:
        opt.frame_ids.append("s")
    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames,
                                           opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, load_meta=True)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,
                                           opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, is_load_semantics=True)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=True)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()




    ##--------------------Visualization parameter here----------------------------##
    sfx = torch.nn.Softmax(dim=1)
    mergeDisp = Merge_MultDisp(opt.scales, batchSize = opt.batch_size, isMulChannel = opt.isMulChannel)
    svRoot = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/figure_visual'
    index = 0
    isvisualize = True
    useGtSeman = True
    useSeman = False
    viewSurfaceNormal = False
    viewSelfOcclu = False
    viewMutuallyRegularizedBorder= False
    viewLiuSemanCompare = False
    viewSecondOrder = False
    viewBorderConverge = True
    expBin = True
    height = 288
    width = 960
    tensor23dPts = Tensor23dPts(height=height, width=width)

    dirpath = os.path.join(svRoot, opt.model_name)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

    if viewSurfaceNormal:
        compsn = ComputeSurfaceNormal(height = height, width = width, batch_size = opt.batch_size).cuda()

    if viewSelfOcclu:
        selfclu = SelfOccluMask().cuda()

    if viewMutuallyRegularizedBorder:
        mrb = MutuallyRegularizedBorders(height=height, width=width, batchsize=opt.batch_size)
        iouFore_gtdepth2gtseman = list()
        iouBack_gtdepth2gtseman = list()
        iouValid_gtdepth2gtseman = list()

        iouFore_estdepth2gtseman = list()
        iouBack_estdepth2gtseman = list()
        iouValid_estdepth2gtseman = list()

        iouFore_estdepth2estseman = list()
        iouBack_estdepth2estseman = list()
        iouValid_estdepth2estseman = list()

    if viewLiuSemanCompare:
        cmpBCons = computeBorderDistance()
        compGrad = computeGradient()
        semanest2semangt = np.zeros(31)
        depth2disp = np.zeros(31)
        depth2semangt = np.zeros(31)
        disp2semanest = np.zeros(31)
        sfx = torch.nn.Softmax(dim=1)
        cmpBCons.cuda()
        compGrad.cuda()

    if viewSecondOrder:
        compSecGrad = SecondOrderGrad().cuda()

    if viewBorderConverge:
        borderConverge = BorderConverge(height, width, opt.batch_size).cuda()

    if expBin:
        expbinmap = expBinaryMap(height, width, opt.batch_size).cuda()

    computedNum = 0
    # with torch.no_grad():
    for idx, inputs in enumerate(dataloader):
        for key, ipt in inputs.items():
            if not(key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta'):
                inputs[key] = ipt.to(torch.device("cuda"))
        input_color = inputs[("color", 0, 0)].cuda()
        features = encoder(input_color)
        outputs = dict()
        outputs.update(depth_decoder(features, computeSemantic=True, computeDepth=False))
        outputs.update(depth_decoder(features, computeSemantic=False, computeDepth=True))

        if isvisualize:
            if useGtSeman:
                mergeDisp(inputs, outputs, eval=False)
            else:
                mergeDisp(inputs, outputs, eval=True)

            dispMap = outputs[('disp', 0)]
            scaled_disp, depthMap = disp_to_depth(dispMap, 0.1, 100)
            depthMap = depthMap * STEREO_SCALE_FACTOR
            depthMap = torch.clamp(depthMap, max=80)

            if useGtSeman:
                fig_seman = tensor2semantic(inputs['seman_gt'], ind=index, isGt=True)
            else:
                if useSeman:
                    fig_seman = tensor2semantic(outputs[('seman', 0)], ind=index)
                else:
                    fig_seman = inputs[('color', 0, 0)][index, :, :, :].permute(1,2,0).cpu().numpy()
                    fig_seman = (fig_seman * 255).astype(np.uint8)
                    fig_seman = pil.fromarray(fig_seman)

            fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=index)
            fig_disp = tensor2disp(outputs[('disp', 0)], ind=index, vmax=0.1)

            gtmask = (inputs['depth_gt'] > 0).float()
            gtdepth = inputs['depth_gt']
            velo = inputs['velo']
            fig_3d, veh_coord, veh_coord_gt = tensor23dPts.visualize3d(depthMap.detach(), ind=index,
                                                                       intrinsic_in=inputs['realIn'],
                                                                       extrinsic_in=inputs['realEx'],
                                                                       gtmask_in=gtmask,
                                                                       gtdepth_in=gtdepth,
                                                                       semanticMap=None,
                                                                       velo_in=velo,
                                                                       rgb_in = inputs[('color', 's', 0)],
                                                                       disp_in = outputs[('disp', 0)].detach()
                                                                       )
            if viewMutuallyRegularizedBorder:
                foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                backgroundType = [2, 3, 4, 8, 9, 10] #building, wall, fence, vegetation, terrain, sky
                foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                backGroundMask = torch.ones(dispMap.shape).cuda().byte()

                with torch.no_grad():
                    for m in foregroundType:
                        foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m)
                    foreGroundMask = 1 - foreGroundMask
                    for m in backgroundType:
                        backGroundMask = backGroundMask * (inputs['seman_gt'] != m)
                    backGroundMask = 1 - backGroundMask

                # tensor2disp(foreGroundMask, ind=0, vmax=1).show()
                # tensor2disp(backGroundMask, ind=0, vmax=1).show()
                # tensor2rgb(inputs[('color', 0, 0)], ind=0).show()
                # tensor2semantic(inputs['seman_gt'],ind=0,isGt=True).show()
                iouForeMean, iouBackMean, isvalid = mrb.visualization(gtdepth, foreGroundMask, backGroundMask, viewind= index, rgb=inputs[('color', 0, 0)])
                iouFore_gtdepth2gtseman.append(iouForeMean)
                iouBack_gtdepth2gtseman.append(iouBackMean)
                iouValid_gtdepth2gtseman.append(isvalid)


                iouForeMean, iouBackMean, isvalid = mrb.visualization(1 - dispMap, foreGroundMask, backGroundMask,
                                                                      viewind=index, rgb=inputs[('color', 0, 0)])

                iouFore_estdepth2gtseman.append(iouForeMean)
                iouBack_estdepth2gtseman.append(iouBackMean)
                iouValid_estdepth2gtseman.append(isvalid)

                semanMapEst = outputs[('seman', 0)]
                semanMapEst_sfxed = sfx(semanMapEst)
                foreGroundMask_est = torch.sum(semanMapEst_sfxed[:, foregroundType, :, :], dim=1).unsqueeze(1)
                backGroundMask_est = torch.sum(semanMapEst_sfxed[:, backgroundType, :, :], dim=1).unsqueeze(1)
                other_est = 1 - (foreGroundMask_est + backGroundMask_est)
                tot_est = torch.cat([foreGroundMask_est, backGroundMask_est, other_est], dim=1)
                foreGroundMask_est_bin = (torch.argmax(tot_est, dim=1) == 0).unsqueeze(1)
                backGroundMask_est_bin = (torch.argmax(tot_est, dim=1) == 1).unsqueeze(1)
                iouForeMean, iouBackMean, isvalid = mrb.visualization(1 - dispMap, foreGroundMask_est_bin, backGroundMask_est_bin,
                                                                      viewind=index, rgb=inputs[('color', 0, 0)])
                iouFore_estdepth2estseman.append(iouForeMean)
                iouBack_estdepth2estseman.append(iouBackMean)
                iouValid_estdepth2estseman.append(isvalid)

                # tensor2disp(foreGroundMask_est_bin, vmax=1, ind=0).show()
                # tensor2disp(backGroundMask_est_bin, vmax=1, ind=0).show()
            if viewLiuSemanCompare:
                foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                backgroundType = [2, 3, 4, 8, 9, 10] #building, wall, fence, vegetation, terrain, sky
                foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                backGroundMask = torch.ones(dispMap.shape).cuda().byte()

                with torch.no_grad():
                    for m in foregroundType:
                        foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m)
                    foreGroundMask = 1 - foreGroundMask
                    for m in backgroundType:
                        backGroundMask = backGroundMask * (inputs['seman_gt'] != m)
                    backGroundMask = 1 - backGroundMask

                dispMapEst = outputs[('disp', 0)]
                semanMapEst = outputs[('seman', 0)]
                semanMapGt = inputs['seman_gt']
                depthMapGt = inputs['depth_gt']

                sparseDepthmapGrad = compGrad.computegrad11_sparse(depthMapGt)
                sparseDepthmapGrad_bin = sparseDepthmapGrad > 0
                sparseDepthmapGrad = F.interpolate(sparseDepthmapGrad, [height, width], mode='bilinear', align_corners=True)
                sparseDepthmapGrad_bin = F.interpolate(sparseDepthmapGrad_bin.float(), [height, width], mode='nearest')
                sparseDepthmapGrad = sparseDepthmapGrad * sparseDepthmapGrad_bin
                # depthMapGt_bin = depthMapGt > 1e-1
                # depthMapGt = F.interpolate(sparseDepthmapGrad, (height, width), mode='bilinear', align_corners=False)
                # depthMapGt_bin = F.interpolate(depthMapGt_bin.float(), (height, width), mode='nearest')
                # depthMapGt = depthMapGt * depthMapGt_bin
                # compGrad.computegrad11_sparse(depthMapGt)
                # tensor2disp(depthMapGt>0, ind=0, vmax=1).show()


                semanMapEst_sfxed = sfx(semanMapEst)
                semanMapEst_inds = torch.argmax(semanMapEst_sfxed, dim=1).unsqueeze(1)
                seman_est_fig = tensor2semantic(semanMapEst_inds, ind=0)
                seman_gt_fig = tensor2semantic(semanMapGt, ind=0)
                depthMapGt_fig = tensor2disp(depthMapGt, ind=0, vmax=20)
                depthMapGt_fig = depthMapGt_fig.resize((width, height), resample=pil.BILINEAR)


                foreGroundMask_est = torch.sum(semanMapEst_sfxed[:,foregroundType,:,:], dim=1).unsqueeze(1)

                dispMapGrad = compGrad.computegrad11(dispMapEst)
                foreGroundMaskGrad = compGrad.computegrad11(foreGroundMask.float())
                foreGroundMask_estGrad = compGrad.computegrad11(foreGroundMask_est)
                sparseDepthmapGrad_fig = tensor2disp(sparseDepthmapGrad, ind=0, vmax=20)
                dispMapGrad_fig = tensor2disp(dispMapGrad, ind=0, vmax=0.08)
                foreGroundMaskGrad_fig = tensor2disp(foreGroundMaskGrad, ind=0, vmax=1)
                foreGroundMask_estGrad_fig = tensor2disp(foreGroundMask_estGrad, ind=0, vmax=1.5)

                dispMapGrad_bin = dispMapGrad > 0.011
                foreGroundMaskGrad_bin = foreGroundMaskGrad > 0.5
                foreGroundMask_estGrad_bin = foreGroundMask_estGrad > 0.6
                sparseDepthmapGrad_bin = sparseDepthmapGrad > 9
                dispMapGrad_bin_fig = tensor2disp(dispMapGrad_bin, ind=0, vmax=1)
                foreGroundMaskGrad_bin_fig = tensor2disp(foreGroundMaskGrad_bin, ind=0, vmax=1)
                foreGroundMask_estGrad_bin_fig = tensor2disp(foreGroundMask_estGrad_bin, ind=0, vmax=1)
                sparseDepthmapGrad_bin_fig = tensor2disp(sparseDepthmapGrad_bin, ind=0, vmax=1)

                visualizeImage = np.concatenate([np.array(fig_rgb), np.array(fig_disp)[:,:,0:3], np.array(seman_est_fig), np.array(seman_gt_fig), np.array(depthMapGt_fig)[:,:,0:3]], axis=0)
                visualizeImage_grad = np.concatenate([np.array(fig_rgb), np.array(dispMapGrad_fig)[:,:,0:3], np.array(foreGroundMask_estGrad_fig)[:,:,0:3], np.array(foreGroundMaskGrad_fig)[:,:,0:3], np.array(sparseDepthmapGrad_fig)[:,:,0:3]], axis=0)
                visualizeimage_grad_bin = np.concatenate([np.array(fig_rgb), np.array(dispMapGrad_bin_fig)[:,:,0:3], np.array(foreGroundMask_estGrad_bin_fig)[:,:,0:3], np.array(foreGroundMaskGrad_bin_fig)[:,:,0:3], np.array(sparseDepthmapGrad_bin_fig)[:,:,0:3]], axis=0)
                tot = np.concatenate([np.array(visualizeImage), np.array(visualizeImage_grad), np.array(visualizeimage_grad_bin)], axis=1)
                pil.fromarray(tot).save('/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/%d.png' % idx)
                # pil.fromarray(tot).show()
                # pil.fromarray(visualizeImage).show()
                # pil.fromarray(visualizeImage_grad).show()
                # pil.fromarray(visualizeimage_grad_bin).show()


                semanest2semangt = semanest2semangt + cmpBCons.computeDistance(foreGroundMask_estGrad_bin, foreGroundMaskGrad_bin)
                depth2disp = depth2disp + cmpBCons.computeDistance(sparseDepthmapGrad_bin, dispMapGrad_bin)
                depth2semangt = depth2semangt + cmpBCons.computeDistance(sparseDepthmapGrad_bin, foreGroundMaskGrad_bin)
                disp2semanest = disp2semanest + cmpBCons.computeDistance(dispMapGrad_bin, foreGroundMask_estGrad_bin)

                # tensor2disp(dispMapEst, ind=index, percentile=90).show()

            if viewBorderConverge:
                semanMapEst = outputs[('seman', 0)]
                semanMapEst_sfxed = sfx(semanMapEst)
                foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17,
                                  18]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                foreGroundMask_est = torch.sum(semanMapEst_sfxed[:, foregroundType, :, :], dim=1).unsqueeze(1)
                dispMapEst = outputs[('disp', 0)]

                # borderConverge.visualization(dispMapEst, foreGroundMask_est)
                if expBin:
                    expbinmap.visualization3(disparity=dispMapEst, semantics=foreGroundMask_est)
                a = 1

            if viewSecondOrder:
                disp2order = compSecGrad.computegrad11(outputs[('disp', 0)])
                tensor2disp(disp2order, ind=0, percentile=95).show()

            if viewSurfaceNormal:
                surnorm = compsn.visualize(depthMap=depthMap, invcamK=inputs['invcamK'].cuda().float(), orgEstPts=veh_coord,
                                           gtEstPts=veh_coord_gt, viewindex=index)
                surnormMap = compsn(depthMap=depthMap, invcamK=inputs['invcamK'].cuda().float())

            if viewSelfOcclu:
                fl = inputs[("K", 0)][:, 0, 0]
                bs = torch.abs(inputs["stereo_T"][:, 0, 3])
                clufig, suppressedDisp = selfclu.visualize(dispMap, viewind=index)

            if viewSurfaceNormal and viewSelfOcclu:
                surnorm = surnorm.resize([width, height])
                surnorm_mixed = pil.fromarray(
                    (np.array(surnorm) * 0.2 + np.array(fig_disp)[:, :, 0:3] * 0.8).astype(np.uint8))
                disp_seman = (np.array(fig_disp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype(
                    np.float) * 0.2).astype(np.uint8)
                supprressed_disp_seman = (np.array(suppressedDisp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype(
                    np.float) * 0.2).astype(np.uint8)
                rgb_seman = (np.array(fig_seman).astype(np.float) * 0.5 + np.array(fig_rgb).astype(
                    np.float) * 0.5).astype(np.uint8)

                # clud_disp = (np.array(clufig)[:, :, 0:3].astype(np.float) * 0.3 + np.array(fig_disp)[:, :, 0:3].astype(
                #     np.float) * 0.7).astype(np.uint8)
                comb1 = np.concatenate([np.array(supprressed_disp_seman)[:, :, 0:3], np.array(suppressedDisp)[:, :, 0:3]], axis=1)
                comb2 = np.concatenate([np.array(disp_seman)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3]], axis=1)
                # comb3 = np.concatenate([np.array(errFig)[:, :, 0:3], np.array(surnorm)[:, :, 0:3]], axis=1)
                comb4 = np.concatenate([np.array(fig_seman)[:, :, 0:3], np.array(rgb_seman)[:, :, 0:3]],
                                       axis=1)
                comb6 = np.concatenate([np.array(clufig)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3]], axis=1)

                fig3dsize = np.ceil(np.array([comb4.shape[1] , comb4.shape[1] / fig_3d.size[0] * fig_3d.size[1]])).astype(np.int)
                comb5 = np.array(fig_3d.resize(fig3dsize))

            # fig = pil.fromarray(combined)
            # fig.save(os.path.join(dirpath, str(idx) + '.png'))
            print("%dth img finished" % idx)
            # if idx >=4:
            #     break
    if viewLiuSemanCompare:
        semanest2semangt_p = semanest2semangt / np.sum(semanest2semangt)
        semanest2semangt_p_ = semanest2semangt_p[0:-1]
        mean = np.sum(np.arange(len(semanest2semangt_p_)) * semanest2semangt_p_)
        std = np.sqrt(np.sum((np.arange(len(semanest2semangt_p_)) - mean) ** 2 * semanest2semangt_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(semanest2semangt_p)), semanest2semangt_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of semantic, est to gt")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/seman_est2gt.png")
        plt.close(fig)

        depth2disp_p = depth2disp / np.sum(depth2disp)
        depth2disp_p_ = depth2disp_p[0:-1]
        mean = np.sum(np.arange(len(depth2disp_p_)) * depth2disp_p_)
        std = np.sqrt(np.sum((np.arange(len(depth2disp_p_)) - mean) ** 2 * depth2disp_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(depth2disp_p)), depth2disp_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of depth, gt to est")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth_gt2est.png")
        plt.close(fig)

        depth2semangt_p = depth2semangt / np.sum(depth2semangt)
        depth2semangt_p_ = depth2semangt_p[0:-1]
        mean = np.sum(np.arange(len(depth2semangt_p_)) * depth2semangt_p_)
        std = np.sqrt(np.sum((np.arange(len(depth2semangt_p_)) - mean) ** 2 * depth2semangt_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(depth2semangt_p)), depth2semangt_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of depth and semantic, gt")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth2seman_gt.png")
        plt.close(fig)

        disp2semanest_p = disp2semanest / np.sum(disp2semanest)
        disp2semanest_p_ = disp2semanest_p[0:-1]
        mean = np.sum(np.arange(len(disp2semanest_p_)) * disp2semanest_p_)
        std = np.sqrt(np.sum((np.arange(len(disp2semanest_p_)) - mean) ** 2 * disp2semanest_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(disp2semanest_p)), disp2semanest_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of depth and semantic, est")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth2seman_est.png")
        plt.close(fig)

    if viewMutuallyRegularizedBorder:
        iouFore_gtdepth2gtseman = np.array(iouFore_gtdepth2gtseman)
        iouBack_gtdepth2gtseman = np.array(iouBack_gtdepth2gtseman)
        iouValid_gtdepth2gtseman = np.array(iouValid_gtdepth2gtseman)
        iouFore_gtdepth2gtsemanMean = np.sum(iouFore_gtdepth2gtseman * iouValid_gtdepth2gtseman) / np.sum(iouValid_gtdepth2gtseman)
        iouBack_gtdepth2gtsemanMean = np.sum(iouBack_gtdepth2gtseman * iouValid_gtdepth2gtseman) / np.sum(iouValid_gtdepth2gtseman)

        iouFore_estdepth2gtseman = np.array(iouFore_estdepth2gtseman)
        iouBack_estdepth2gtseman = np.array(iouBack_estdepth2gtseman)
        iouValid_estdepth2gtseman = np.array(iouValid_estdepth2gtseman)
        iouFore_estdepth2gtsemanMean = np.sum(iouFore_estdepth2gtseman * iouValid_estdepth2gtseman) / np.sum(iouValid_estdepth2gtseman)
        iouBack_estdepth2gtsemanMean = np.sum(iouBack_estdepth2gtseman * iouValid_estdepth2gtseman) / np.sum(iouValid_estdepth2gtseman)

        iouFore_estdepth2estseman = np.array(iouFore_estdepth2estseman)
        iouBack_estdepth2estseman = np.array(iouBack_estdepth2estseman)
        iouValid_estdepth2estseman = np.array(iouValid_estdepth2estseman)
        iouFore_estdepth2estsemanMean = np.sum(iouFore_estdepth2estseman * iouValid_estdepth2estseman) / np.sum(iouValid_estdepth2estseman)
        iouBack_estdepth2estsemanMean = np.sum(iouBack_estdepth2estseman * iouValid_estdepth2estseman) / np.sum(iouValid_estdepth2estseman)

        print("iouFore_gtdepth2gtsemanMean is % f" % iouFore_gtdepth2gtsemanMean)
        print("iouBack_gtdepth2gtsemanMean is % f" % iouBack_gtdepth2gtsemanMean)
        print("iouFore_estdepth2gtsemanMean is % f" % iouFore_estdepth2gtsemanMean)
        print("iouBack_estdepth2gtsemanMean is % f" % iouBack_estdepth2gtsemanMean)
        print("iouFore_estdepth2estsemanMean is % f" % iouFore_estdepth2estsemanMean)
        print("iouBack_estdepth2estsemanMean is % f" % iouBack_estdepth2estsemanMean)
コード例 #21
0
    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(
        os.path.join(splits_dir, opt.split, "train_files.txt"))
    # filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    dataset = datasets.KITTIRAWDataset(
        opt.data_path,
        filenames,
        encoder_dict['height'],
        encoder_dict['width'], [0],
        4,
        is_train=False,
        tag=opt.dataset,
        img_ext='png',
        load_meta=opt.load_meta,
        is_load_semantics=opt.use_kitti_gt_semantics,
        is_predicted_semantics=opt.is_predicted_semantics)

    # dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers,
    #                         pin_memory=True, drop_last=False)

    dataloader = DataLoader(dataset,
                            1,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)
コード例 #22
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(
        os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    dataset = datasets.KITTIRAWDataset(opt.data_path,
                                       filenames,
                                       encoder_dict['height'],
                                       encoder_dict['width'], [0],
                                       4,
                                       is_train=False)
    dataloader = DataLoader(dataset,
                            16,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)
    if opt.Lite_HR_Depth:
        encoder = networks.MobileEncoder(pretrained=None)
    elif opt.HR_Depth:
        encoder = networks.ResnetEncoder(18, False)
    else:
        assert False, " Please choose HR-Depth or Lite-HR-Depth "
    depth_decoder = networks.HRDepthDecoder(encoder.num_ch_enc,
                                            mobile_encoder=opt.Lite_HR_Depth)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    pred_disps = []

    print("-> Computing predictions with size {}x{}".format(
        encoder_dict['width'], encoder_dict['height']))

    with torch.no_grad():
        for data in dataloader:
            input_color = data[("color", 0, 0)].cuda()

            output = depth_decoder(encoder(input_color))
            pred_disp, _ = disp_to_depth(output[("disparity", "Scale0")], 0.1,
                                         100.0)
            pred_disp = pred_disp.cpu()[:, 0].numpy()

            pred_disps.append(pred_disp)

    pred_disps = np.concatenate(pred_disps)

    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path,
                        fix_imports=True,
                        encoding='latin1',
                        allow_pickle=True)["data"]

    print("-> Evaluating")
    print("   Using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        # Apply the mask proposed by Eigen
        mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

        crop = np.array([
            0.40810811 * gt_height, 0.99189189 * gt_height,
            0.03594771 * gt_width, 0.96405229 * gt_width
        ]).astype(np.int32)
        crop_mask = np.zeros(mask.shape)
        crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
        mask = np.logical_and(mask, crop_mask)

        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]

        ratio = np.median(gt_depth) / np.median(pred_depth)
        ratios.append(ratio)
        pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

        errors.append(compute_errors(gt_depth, pred_depth))

    ratios = np.array(ratios)
    med = np.median(ratios)
    print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(
        med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " +
          ("{:>8} | " * 7
           ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")
コード例 #23
0
ファイル: prediction.py プロジェクト: xdr940/DeepSfMLearner
def prediction(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']
    metric_mode = opts['metric_mode']

    framework_mode = opts['model']['mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    components = opts['model']['mode']
    frame_sides = opts['frame_sides']
    out_dir_base = Path(opts['out_dir_base'])

    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=components)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> metrics mode: {}".format(metric_mode))
    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    file_names.sort()
    #prediction loader
    # test_files = []
    # for base in file_names:
    #     test_files.append(data_path/base)
    # test_files.sort()

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="prediction")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="prediction")

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    out_shows = []

    if opts['out_dir']:
        out_dir = out_dir_base / opts['out_dir']
    else:
        out_dir = out_dir_base / data_path.stem
    out_dir.mkdir_p()
    for data in tqdm(dataloader):

        input_color = input_frames(data,
                                   mode=framework_mode,
                                   frame_sides=frame_sides)

        features = encoder(input_color)
        disp = decoder(*features)

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)

        out_show = pred_disp
        out_show = out_show.cpu()[:, 0].numpy()

        out_shows.append(out_show)

    for idx, item in enumerate(out_shows):

        depth_name = file_names[idx].replace('/', '_').replace('.png', 'depth')
        idx += 1
        plt.imsave(out_dir / depth_name + '{}'.format('.png'),
                   item[0],
                   cmap='magma')
コード例 #24
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, tag=opt.dataset)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(
            opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'],
            [0,'s'], 4, tag='kitti', is_train=False, img_ext='png',
            load_meta=False, is_load_semantics=True,
            is_predicted_semantics=True, load_morphed_depth=False)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=False)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()
    sfx = torch.nn.Softmax(dim=1)

    print("Evaluation starts")

    confMatrix = generateMatrix(args)
    nbPixels = 0
    count255 = 0
    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            input_color = inputs[("color", 0, 0)].cuda()
            outputs = depth_decoder(encoder(input_color),computeSemantic = True, computeDepth = False)

            gt = inputs['seman_gt_eval'].cpu().numpy().astype(np.uint8)
            pred = sfx(outputs[('seman', 0)]).detach()
            pred = torch.argmax(pred, dim=1).type(torch.float).unsqueeze(1)
            pred = F.interpolate(pred, [gt.shape[1], gt.shape[2]], mode='nearest')
            pred = pred.squeeze(1).cpu().numpy().astype(np.uint8)
            # visualize_semantic(gt[0,:,:]).show()
            # visualize_semantic(pred[0,:,:]).show()

            groundTruthNp = gt
            predictionNp = pred
            nbPixels = nbPixels + groundTruthNp.shape[0] * groundTruthNp.shape[1] * groundTruthNp.shape[2]

            # encoding_value = max(groundTruthNp.max(), predictionNp.max()).astype(np.int32) + 1
            encoding_value = 256  # precomputed
            encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp

            values, cnt = np.unique(encoded, return_counts=True)

            for value, c in zip(values, cnt):
                pred_id = value % encoding_value
                gt_id = int((value - pred_id) / encoding_value)
                if pred_id == 255 or gt_id == 255:
                    count255 = count255 + c
                    continue
                if not gt_id in args.evalLabels:
                    printError("Unknown label with id {:}".format(gt_id))
                confMatrix[gt_id][pred_id] += c
            print("Finish %dth batch" % idx)
    if confMatrix.sum() + count255 != nbPixels:
        printError(
            'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(
                confMatrix.sum(), nbPixels))

    classScoreList = {}
    for label in args.evalLabels:
        labelName = trainId2label[label].name
        classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args)
    vals = np.array(list(classScoreList.values()))
    mIOU = np.mean(vals[np.logical_not(np.isnan(vals))])
    # if opt.save_pred_disps:
    #     output_path = os.path.join(
    #         opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
    #     print("-> Saving predicted disparities to ", output_path)
    #     np.save(output_path, pred_disps)

    print("mIOU is %f" % mIOU)
コード例 #25
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
        "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"

    img_ext = '.png' if opt.png else '.jpg'

    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        print("-> Loading weights from {}".format(opt.load_weights_folder))

        filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        # Check if superpixel dataset is used and create superpixel image
        if "superpixel" in opt.dataset or opt.superpixel_mask_loss_binary or opt.normal_loss or \
                opt.superpixel_mask_loss_continuous or opt.input_channels is 4 or opt.input_channels is 6:

            # get number of channels to use for superpixel
            # 4 channel will use numpy array with superpixel indices
            # 3 channel will use rgb and put superpixel in dictionary. Can be used eg. in loss
            # 6 channel will use normal image + image averaged over superpixel area

            num_sup_channels = opt.input_channels
            print("Using {} channel input.".format(num_sup_channels))

            if opt.no_superpixel_check:
                # dont check if superpixel information is correct
                print("Warning: Skip checking superpixel information.")

            else:
                print("Start converting test images to superpixel.")
                convert_rgb_to_superpixel(opt.data_path, filenames, opt.superpixel_method,
                                          opt.superpixel_arguments, img_ext=img_ext, num_channel=num_sup_channels)

            dataset = datasets.SuperpixelDataset(opt.data_path, filenames,
                                                 encoder_dict['height'], encoder_dict['width'],
                                                 [0], 4, opt, is_train=False)
        else:
            dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,
                                               encoder_dict['height'], encoder_dict['width'],
                                               [0], 4, opt, is_train=False)

        dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
                                pin_memory=True, drop_last=False)

        # will use encoder according to number of input channels
        encoder = networks.ResnetEncoder(opt.num_layers, False, num_input_channels=opt.input_channels)

        # if the surface normal are used we have to select the NormalDecoder instead of the Depth Decoder.
        if opt.decoder == "normal_vector":
            depth_decoder = networks.NormalDecoder(encoder.num_ch_enc)
        else:
            depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

        model_dict = encoder.state_dict()
        encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))

        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

        pred_disps = []

        print("-> Computing predictions with size {}x{}".format(
            encoder_dict['width'], encoder_dict['height']))

        with torch.no_grad():
            for data in dataloader:

                if opt.dataset == "kitti_superpixel":
                    if opt.input_channels is 3:
                        input_color = data[("color", 0, 0)].cuda()

                    elif opt.input_channels is 4:
                        color = data[("color", 0, 0)].cuda()
                        superpixel = data[("super_label", 0, 0)].cuda()
                        input_color = torch.cat((color, superpixel), dim=1)

                    elif opt.input_channels is 6:
                        color = data[("color", 0, 0)].cuda()
                        superpixel = data[("super_img", 0, 0)].cuda()
                        input_color = torch.cat((color, superpixel), dim=1)
                    else:
                        raise NotImplementedError("given input channel size is not implemented.")
                else:
                    input_color = data[("color", 0, 0)].cuda()

                K = data[("K", 0)].cuda()
                K_inv = data[("inv_K", 0)]

                if opt.post_process:
                    # Post-processed results require each image to have two forward passes
                    input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)

                output = depth_decoder(encoder(input_color))

                if opt.decoder == "normal_vector":

                    normal_vec = output[("normal_vec", 0)]

                    #depth = nd.normal_to_depth(K_inv, normal_vec, opt.min_depth, opt.max_depth)

                    disp = nd.normals_to_disp3(K_inv, normal_vec)
                    # print("new depth tensor shape", depth.shape)

                    output[("disp", 0)] = disp

                    # scaling of disp to min_depth to max_depth
                    pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth)
                    pred_disp = pred_disp.cpu()[:, 0].numpy()

                else:
                    pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth)
                    pred_disp = pred_disp.cpu()[:, 0].numpy()

                if opt.post_process:
                    N = pred_disp.shape[0] // 2
                    pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])

                pred_disps.append(pred_disp)

        pred_disps = np.concatenate(pred_disps)

    else:
        # Load predictions from file
        print("-> Loading predictions from {}".format(opt.ext_disp_to_eval))
        pred_disps = np.load(opt.ext_disp_to_eval)

        if opt.eval_eigen_to_benchmark:
            eigen_to_benchmark_ids = np.load(
                os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy"))

            pred_disps = pred_disps[eigen_to_benchmark_ids]

    if opt.save_pred_disps:
        output_path = os.path.join(
            opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
        print("-> Saving predicted disparities to ", output_path)
        np.save(output_path, pred_disps)

    if opt.no_eval:
        print("-> Evaluation disabled. Done.")
        quit()

    elif opt.eval_split == 'benchmark':
        save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions")
        print("-> Saving out benchmark predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
        quit()

    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1')["data"]

    print("-> Evaluating")

    if opt.eval_stereo:
        print("   Stereo evaluation - "
              "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
        opt.disable_median_scaling = True
        opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        if opt.eval_split == "eigen":
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
                             0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

        else:
            mask = gt_depth > 0

        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]

        pred_depth *= opt.pred_depth_scale_factor
        if not opt.disable_median_scaling:
            ratio = np.median(gt_depth) / np.median(pred_depth)
            ratios.append(ratio)
            pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

        errors.append(compute_errors(gt_depth, pred_depth))

    if not opt.disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")
コード例 #26
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
        "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"

    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        print("-> Loading weights from {}".format(opt.load_weights_folder))

        filenames = readlines(
            os.path.join(splits_dir, opt.eval_split, "test_files.txt"))

        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        dataset = datasets.KITTIRAWDataset(opt.data_path,
                                           filenames,
                                           encoder_dict['height'],
                                           encoder_dict['width'], [0],
                                           4,
                                           is_train=False,
                                           load_semantics=opt.load_semantics,
                                           seman_path=opt.seman_path)

        dataloader = DataLoader(dataset,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                drop_last=False)

        encoder = networks.ResnetEncoder(opt.num_layers, False)
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

        if opt.bnMorphLoss:
            from bnmorph.bnmorph import BNMorph
            bnmorph = BNMorph(height=encoder_dict['height'],
                              width=encoder_dict['width']).cuda()
            if opt.post_process:
                tool = grad_computation_tools(
                    batch_size=opt.batch_size * 2,
                    height=encoder_dict['height'],
                    width=encoder_dict['width']).cuda()
            else:
                tool = grad_computation_tools(
                    batch_size=opt.batch_size,
                    height=encoder_dict['height'],
                    width=encoder_dict['width']).cuda()

        model_dict = encoder.state_dict()
        encoder.load_state_dict(
            {k: v
             for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))

        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

        pred_disps = []
        count = 0
        with torch.no_grad():
            for data in dataloader:
                input_color = data[("color", 0, 0)].cuda()
                if opt.post_process:
                    input_color = torch.cat(
                        (input_color, torch.flip(input_color, [3])), 0)
                    if 'seman_gt' in data:
                        data['seman_gt'] = torch.cat(
                            (data['seman_gt'], torch.flip(
                                data['seman_gt'], [3])), 0)

                features = encoder(input_color)
                outputs = dict()
                outputs.update(depth_decoder(features))

                if opt.bnMorphLoss:
                    for key, ipt in data.items():
                        if not (key == 'height' or key == 'width'
                                or key == 'tag' or key == 'cts_meta'
                                or key == 'file_add'):
                            data[key] = ipt.to(torch.device("cuda"))

                    disparity_grad_bin = tool.get_disparityEdge(outputs['disp',
                                                                        0])
                    semantics_grad_bin = tool.get_semanticsEdge(
                        data['seman_gt'])

                    morphedx, morphedy, coeff = bnmorph.find_corresponding_pts(
                        disparity_grad_bin, semantics_grad_bin)
                    morphedx = (morphedx /
                                (encoder_dict['width'] - 1) - 0.5) * 2
                    morphedy = (morphedy /
                                (encoder_dict['height'] - 1) - 0.5) * 2
                    grid = torch.cat([morphedx, morphedy],
                                     dim=1).permute(0, 2, 3, 1)
                    dispMaps_morphed = F.grid_sample(outputs['disp', 0],
                                                     grid,
                                                     padding_mode="border")
                    outputs[("disp", 0)] = dispMaps_morphed

                count = count + 1
                pred_disp, _ = disp_to_depth(outputs[("disp", 0)],
                                             opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                if opt.post_process:
                    N = pred_disp.shape[0] // 2
                    pred_disp = batch_post_process_disparity(
                        pred_disp[:N], pred_disp[N:, :, ::-1])
                pred_disps.append(pred_disp)

        pred_disps = np.concatenate(pred_disps)
    else:
        # Load predictions from file
        print("-> Loading predictions from {}".format(opt.ext_disp_to_eval))
        pred_disps = np.load(opt.ext_disp_to_eval)

        if opt.eval_eigen_to_benchmark:
            eigen_to_benchmark_ids = np.load(
                os.path.join(splits_dir, "benchmark",
                             "eigen_to_benchmark_ids.npy"))

            pred_disps = pred_disps[eigen_to_benchmark_ids]

    if opt.save_pred_disps:
        output_path = os.path.join(opt.load_weights_folder,
                                   "disps_{}_split.npy".format(opt.eval_split))
        print("-> Saving predicted disparities to ", output_path)
        np.save(output_path, pred_disps)

    if opt.no_eval:
        print("-> Evaluation disabled. Done.")
        quit()

    elif opt.eval_split == 'benchmark':
        save_dir = os.path.join(opt.load_weights_folder,
                                "benchmark_predictions")
        print("-> Saving out benchmark predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        print(
            "-> No ground truth is available for the KITTI benchmark, so not evaluating. Done."
        )
        quit()

    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path,
                        fix_imports=True,
                        encoding='latin1',
                        allow_pickle=True)["data"]

    print("-> Evaluating")

    if opt.eval_stereo:
        print("   Stereo evaluation - "
              "disabling median scaling, scaling by {}".format(
                  STEREO_SCALE_FACTOR))
        opt.disable_median_scaling = True
        opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        if opt.eval_split == "eigen" or opt.UseCustTest:
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)
        else:
            mask = gt_depth > 0

        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]

        pred_depth *= opt.pred_depth_scale_factor
        if not opt.disable_median_scaling:
            ratio = np.median(gt_depth) / np.median(pred_depth)
            ratios.append(ratio)
            pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH
        errors.append(
            compute_errors(
                gt_depth,
                pred_depth,
                UseGtMedianScaling=(opt.UseGtMedianScaling == True)))

    if not opt.disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(
            med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " +
          ("{:>8} | " * 7
           ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")
コード例 #27
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    selfOccluMask = SelfOccluMask().cuda()
    selfOccluMask.th = 0
    if opt.isCudaMorphing and opt.borderMorphLoss:
        bnmorph = BNMorph(height=opt.height, width=opt.width,
                          sparsityRad=2).cuda()
    assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
        "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"

    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        filenames = readlines(
            os.path.join(splits_dir, opt.split_name,
                         opt.appendix_name + ".txt"))
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        dataset = datasets.KITTIRAWDataset(
            opt.data_path,
            filenames,
            encoder_dict['height'],
            encoder_dict['width'], [0, 's'],
            4,
            is_train=False,
            tag=opt.dataset,
            img_ext='png',
            load_meta=opt.load_meta,
            is_load_semantics=opt.use_kitti_gt_semantics,
            is_predicted_semantics=opt.is_predicted_semantics)

        dataloader = DataLoader(dataset,
                                2,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                drop_last=False)

        encoder = networks.ResnetEncoder(opt.num_layers,
                                         False,
                                         num_input_images=2)
        depth_decoder = networks.DepthDecoder(
            encoder.num_ch_enc,
            isSwitch=(opt.switchMode == 'on'),
            isMulChannel=opt.isMulChannel,
            outputtwoimage=(opt.outputtwoimage == True))

        model_dict = encoder.state_dict()
        encoder.load_state_dict(
            {k: v
             for k, v in encoder_dict.items() if k in model_dict})
        depth_decoder.load_state_dict(torch.load(decoder_path))

        encoder.cuda()
        encoder.eval()
        depth_decoder.cuda()
        depth_decoder.eval()

        pred_disps = []
        mergeDisp = Merge_MultDisp(opt.scales, batchSize=opt.batch_size)

        count = 0
        tottime = 0

        if not os.path.isdir(opt.output_dir):
            os.mkdir(opt.output_dir)

        with torch.no_grad():
            for data in dataloader:
                # input_colorl = torch.cat([data[("color", 0, 0)], data[("color", 's', 0)]], dim=1).cuda()
                # input_colorr = torch.cat([data[("color", 's', 0)], data[("color", 0, 0)]], dim=1).cuda()
                # input_color = torch.cat([input_colorl, input_colorr], dim=0)
                start = time.time()
                input_color = torch.cat(
                    [data[("color", 0, 0)], data[("color", 's', 0)]],
                    dim=1).cuda()
                # tensor2rgb(input_color[:,0:3,:,:], ind=0).show()
                # tensor2rgb(input_color[:, 3:6, :, :], ind=0).show()
                # tensor2rgb(input_color[:, 0:3, :, :], ind=1).show()

                features = encoder(input_color)
                outputs = dict()
                outputs.update(
                    depth_decoder(features,
                                  computeSemantic=False,
                                  computeDepth=True))

                mergeDisp(data, outputs, eval=True)

                count = count + 1
                scaled_disp, _ = disp_to_depth(outputs[("disp", 0)],
                                               opt.min_depth, opt.max_depth)
                pred_disp = scaled_disp
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                real_scale_disp = scaled_disp * (torch.abs(
                    data[("K", 0)][:, 0, 0] * data["stereo_T"][:, 0, 3]).view(
                        opt.batch_size, 1, 1,
                        1).expand_as(scaled_disp)).cuda()
                SSIMMask = selfOccluMask(real_scale_disp,
                                         data["stereo_T"][:, 0, 3].cuda())

                store_path = filenames[data['idx'][0].numpy()].split(' ')
                folder1 = os.path.join(opt.output_dir,
                                       store_path[0].split('/')[0])
                folder2 = os.path.join(opt.output_dir, store_path[0])
                folder3 = os.path.join(folder2, 'image_02')
                folder4 = os.path.join(folder2, 'image_03')
                if not os.path.isdir(folder1):
                    os.mkdir(folder1)
                if not os.path.isdir(folder2):
                    os.mkdir(folder2)
                if not os.path.isdir(folder3):
                    os.mkdir(folder3)
                if not os.path.isdir(folder4):
                    os.mkdir(folder4)
                if opt.outputvisualizaiton:
                    folder5 = os.path.join(folder2, 'image_02_compose')
                    folder6 = os.path.join(folder2, 'image_03_compose')
                    if not os.path.isdir(folder5):
                        os.mkdir(folder5)
                    if not os.path.isdir(folder6):
                        os.mkdir(folder6)
                    a = outputs[("disp", 0)] * (1 - SSIMMask)
                    fig1 = tensor2disp(a, ind=0, vmax=0.15)
                    fig2 = tensor2disp(a, ind=1, vmax=0.15)
                    fig1.save(
                        os.path.join(folder5,
                                     store_path[1].zfill(10) + '.png'))
                    fig2.save(
                        os.path.join(folder6,
                                     store_path[1].zfill(10) + '.png'))
                pathl = os.path.join(folder3, store_path[1].zfill(10) + '.png')
                pathr = os.path.join(folder4, store_path[1].zfill(10) + '.png')

                # fig1 = tensor2disp(outputs[("disp", 0)], ind=1, vmax=0.1)
                # fig2 = tensor2disp(outputs[("disp", 0)] * (1 - SSIMMask), ind=1, vmax=0.1)
                # fig_combined = np.concatenate([np.array(fig1), np.array(fig2)], axis=0)
                # pil.fromarray(fig_combined).show()
                real_scale_disp = real_scale_disp * (1 - SSIMMask)
                stored_disp = real_scale_disp / 960
                save_loss(stored_disp[0, 0, :, :].cpu().numpy(), pathl)
                save_loss(stored_disp[1, 0, :, :].cpu().numpy(), pathr)

                duration = time.time() - start
                tottime = tottime + duration
                print("left time %f hours" %
                      (tottime / count * (len(filenames) - count) / 60 / 60))
コード例 #28
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,
                                       encoder_dict['height'], encoder_dict['width'],[0], 4, is_train=False)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, num_output_channels=3)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    dirmapping = {'l':'image_02', 'r':'image_03'}
    localgeomDict = dict()

    print("-> Computing predictions with size {}x{}".format(encoder_dict['width'], encoder_dict['height']))

    totloss = 0

    with torch.no_grad():
        for count in range(len(filenames)):
            data = dataset.__getitem__(count)
            input_color = data[("color", 0, 0)].unsqueeze(0).cuda()

            output = depth_decoder(encoder(input_color))
            _, preddepth = disp_to_depth(output[("disp", 0)][:,2:3,:,:], opt.min_depth, opt.max_depth)
            preddepth = preddepth * STEREO_SCALE_FACTOR
            htheta = output[("disp", 0)][:, 0:1, :, :] * 2 * np.pi
            vtheta = output[("disp", 0)][:, 1:2, :, :] * 2 * np.pi

            seq, frame, dir = filenames[count].split(' ')
            depthgt = pil.open(os.path.join(opt.kitti_gt_path, seq, dirmapping[dir], frame + '.png'))
            depthgt = np.array(depthgt).astype(np.float32) / 256.0
            depthgt = torch.from_numpy(depthgt).unsqueeze(0).unsqueeze(0).cuda()

            _, _, ch, cw = depthgt.shape

            acckey = str(ch) + '_' + str(cw)
            if acckey not in localgeomDict:
                kittiw = cw
                kittih = ch
                intrinsicKitti = np.array([
                    [0.58 * kittiw, 0, 0.5 * kittiw],
                    [0, 1.92 * kittih, 0.5 * kittih],
                    [0, 0, 1]], dtype=np.float32)
                localthetadesp = LocalThetaDesp(height=kittih, width=kittiw, batch_size=1, intrinsic=intrinsicKitti).cuda()
                localgeomDict[acckey] = localthetadesp

            rgbi = F.interpolate(input_color, [ch, cw], mode='bilinear', align_corners=True)
            hthetai = F.interpolate(htheta, [ch, cw], mode='bilinear', align_corners=True)
            vthetai = F.interpolate(vtheta, [ch, cw], mode='bilinear', align_corners=True)
            preddepthi = F.interpolate(preddepth, [ch, cw], mode='bilinear', align_corners=True)

            # hthetai, vthetai = localgeomDict[acckey].get_theta(preddepthi)

            ratioh, ratiohl, ratiov, ratiovl = localgeomDict[acckey].get_ratio(htheta=hthetai, vtheta=vthetai)

            # ratiohl = torch.zeros_like(ratiohl)
            # ratiovl = torch.zeros_like(ratiovl)

            logdepthd = torch.log(depthgt)
            valindic = depthgt > 0
            lossrec = torch.zeros_like(logdepthd)
            countsrec = torch.zeros_like(logdepthd)
            rndseeds = torch.rand_like(logdepthd)
            inplaceShapeLoss_cuda.inplaceShapeLoss_forward(logdepthd, ratiohl, ratiovl, valindic.int(), lossrec, countsrec, rndseeds, 30, 30)

            totloss = totloss + torch.sum(lossrec[lossrec > 0]) / torch.sum(lossrec > 0)

            # cm = plt.get_cmap('bwr')
            # xx, yy = np.meshgrid(range(cw), range(ch), indexing='xy')
            # lossrecnp = lossrec[0, 0, :, :].cpu().numpy()
            # valmask = np.abs(lossrecnp) > 0
            # z = lossrecnp[valmask]
            #
            # selector_pos = z > 0
            # selector_neg = z < 0
            #
            # bar = 0.005
            #
            # if np.sum(selector_pos) > 1:
            #     pos_bar = bar
            #     z[selector_pos] = z[selector_pos] / pos_bar / 2
            #
            # if np.sum(selector_neg) > 1:
            #     neg_bar = -bar
            #     z[selector_neg] = -z[selector_neg] / neg_bar / 2
            #
            # znormed = z + 0.5
            # colorMap = cm(znormed)[:, 0:3]
            #
            # plt.figure(figsize=(12, 9), dpi=120, facecolor='w', edgecolor='k')
            # plt.imshow(tensor2rgb(rgbi, ind=0))
            # plt.scatter(xx[valmask], yy[valmask], c=colorMap, s=8)
            # plt.savefig(os.path.join('/media/shengjie/c9c81c9f-511c-41c6-bfe0-2fc19666fb32/Visualizations/Project_SemanDepth/vls_shapeErrType', str(count) + '.png'))
            # plt.close()

            # hthetad, vthetad = localgeomDict[acckey].get_theta(depthmap=preddepthi)
            # ratiohd, ratiohld, ratiovd, ratiovld = localgeomDict[acckey].get_ratio(htheta=hthetad, vtheta=vthetad)
            # logdepthd = torch.log(preddepthi)
            # valindic = preddepthi > 0
            # lossrec = torch.zeros_like(logdepthd)
            # inplaceShapeLoss_cuda.inplaceShapeLoss_integration(logdepthd, ratiohld, ratiovld, valindic.int(), lossrec, 1, 1)

    totloss = totloss / len(filenames)
    print(totloss)