Ejemplo n.º 1
0
def main():
    processed = get_transform()
    model.eval()
    for inx in range(len(test_left_img)):
        print(test_left_img[inx])
        imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32'))[:,:,:3]
        imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32'))[:,:,:3]
        imgsize = imgL_o.shape[:2]

        if args.max_disp>0:
            if args.max_disp % 16 != 0:
                args.max_disp = 16 * math.floor(args.max_disp/16)
            max_disp = int(args.max_disp)
        else:
            with open(test_left_img[inx].replace('im0.png','calib.txt')) as f:
                lines = f.readlines()
                max_disp = int(int(lines[6].split('=')[-1]))

        ## change max disp
        tmpdisp = int(max_disp*args.testres//64*64)
        if (max_disp*args.testres/64*64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp ==64: model.module.maxdisp=128
        model.module.disp_reg8 =  disparityregression(model.module.maxdisp,16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp,16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp,32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp,64).cuda()
        print(model.module.maxdisp)
        
        # resize
        imgL_o = cv2.resize(imgL_o,None,fx=args.testres,fy=args.testres,interpolation=cv2.INTER_CUBIC)
        imgR_o = cv2.resize(imgR_o,None,fx=args.testres,fy=args.testres,interpolation=cv2.INTER_CUBIC)
        imgL = processed(imgL_o).numpy()
        imgR = processed(imgR_o).numpy()

        imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]])
        imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]])

        ##fast pad
        max_h = int(imgL.shape[2] // 64 * 64)
        max_w = int(imgL.shape[3] // 64 * 64)
        if max_h < imgL.shape[2]: max_h += 64
        if max_w < imgL.shape[3]: max_w += 64

        top_pad = max_h-imgL.shape[2]
        left_pad = max_w-imgL.shape[3]
        imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0)
        imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0)

        # test
        imgL = Variable(torch.FloatTensor(imgL).cuda())
        imgR = Variable(torch.FloatTensor(imgR).cuda())
        with torch.no_grad():
            torch.cuda.synchronize()
            start_time = time.time()
            pred_disp,entropy = model(imgL,imgR)
            torch.cuda.synchronize()
            ttime = (time.time() - start_time); print('time = %.2f' % (ttime*1000) )
        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad   = max_h-imgL_o.shape[0]
        left_pad  = max_w-imgL_o.shape[1]
        entropy = entropy[top_pad:,:pred_disp.shape[1]-left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:,:pred_disp.shape[1]-left_pad]

        # save predictions
        idxname = test_left_img[inx].split('/')[-2]
        if not os.path.exists('%s/%s'%(args.outdir,idxname)):
            os.makedirs('%s/%s'%(args.outdir,idxname))
        idxname = '%s/disp0HSM'%(idxname)

        # resize to highres
        pred_disp = cv2.resize(pred_disp/args.testres,(imgsize[1],imgsize[0]),interpolation=cv2.INTER_LINEAR)

        # clip while keep inf
        invalid = np.logical_or(pred_disp == np.inf,pred_disp!=pred_disp)
        pred_disp[invalid] = np.inf

        np.save('%s/%s-disp.npy'% (args.outdir, idxname.split('/')[0]),(pred_disp))
        np.save('%s/%s-ent.npy'% (args.outdir, idxname.split('/')[0]),(entropy))
        cv2.imwrite('%s/%s-disp.png'% (args.outdir, idxname.split('/')[0]),pred_disp/pred_disp[~invalid].max()*255)
        cv2.imwrite('%s/%s-ent.png'% (args.outdir, idxname.split('/')[0]),entropy/entropy.max()*255)

        with open('%s/%s.pfm'% (args.outdir, idxname),'w') as f:
            save_pfm(f,pred_disp[::-1,:])
        with open('%s/%s/timeHSM.txt'%(args.outdir,idxname.split('/')[0]),'w') as f:
             f.write(str(ttime))
            
        torch.cuda.empty_cache()
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument(
        '--datapath',
        default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014",
        help='test data path')
    parser.add_argument('--loadmodel', default=None, help='model path')
    parser.add_argument('--name',
                        default='rvc_highres_output',
                        help='output dir')
    parser.add_argument('--clean',
                        type=float,
                        default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument(
        '--testres',
        type=float,
        default=0.5,  #default used to be 0.5
        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp',
                        type=float,
                        default=-1,
                        help='maximum disparity to search for')
    parser.add_argument(
        '--level',
        type=int,
        default=1,
        help='output level of output, default is level 1 (stage 3),\
                              can also use level 2 (stage 2) or level 3 (stage 1)'
    )
    parser.add_argument('--debug_image', type=str, default=None)
    parser.add_argument("--eth_testres", type=float, default=3.5)
    parser.add_argument("--score_results", action="store_true", default=False)
    parser.add_argument("--save_weights", action="store_true", default=False)
    parser.add_argument("--kitti", action="store_true", default=False)
    parser.add_argument("--eth", action="store_true", default=False)
    parser.add_argument("--mb", action="store_true", default=False)
    parser.add_argument("--all_data", action="store_true", default=False)
    parser.add_argument("--eval_train_only",
                        action="store_true",
                        default=False)
    parser.add_argument("--debug", action="store_true", default=False)
    parser.add_argument("--batchsize", type=int, default=16)
    parser.add_argument("--prepare_kitti", action="store_true", default=False)

    args = parser.parse_args()

    # wandb.init(name=args.name, project="high-res-stereo", save_code=True, magic=True, config=args)

    if not os.path.exists("output"):
        os.mkdir("output")

    kitti_merics = {}
    eth_metrics = {}
    mb_metrics = {}

    # construct model
    model = hsm(128, args.clean, level=args.level)
    model = convert_model(model)
    # wandb.watch(model)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if 'disp' not in k
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    model.eval()

    if not args.prepare_kitti:
        dataset = RVCDataset(args)
    if args.prepare_kitti:
        _, _, _, left_val, right_val, disp_val_L = lk15.dataloader(
            '/data/private/KITTI2015/data_scene_flow/training/',
            val=True)  # change to trainval when finetuning on KITTI

        dataset = DA.myImageFloder(left_val,
                                   right_val,
                                   disp_val_L,
                                   rand_scale=[1, 1],
                                   order=0)

    dataloader = DataLoader(dataset,
                            batch_size=args.batchsize,
                            shuffle=False,
                            num_workers=0)

    steps = 0
    max_disp = None
    origianl_image_size = None
    top_pad = None
    left_pad = None
    testres = [args.testres]
    dataset_type = None
    data_path = [args.datapath]
    # for (imgL, imgR, gt_disp_raw, max_disp, origianl_image_size, top_pad, left_pad, testres, dataset_type , data_path) in dataloader:
    for (imgL, imgR, gt_disp_raw) in dataloader:
        # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1
        data_path = data_path[0]
        img_name = os.path.basename(os.path.normpath(data_path))
        testres = float(testres[0])
        gt_disp_raw = gt_disp_raw[0]

        cum_metrics = None
        if dataset_type == 0:
            cum_metrics = mb_metrics

        elif dataset_type == 1:
            cum_metrics = eth_metrics

        elif dataset_type == 2:
            cum_metrics = kitti_merics

        print(img_name)

        if args.max_disp > 0:
            max_disp = int(args.max_disp)

        ## change max disp
        tmpdisp = int(max_disp * testres // 64 * 64)
        if (max_disp * testres / 64 * 64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp == 64: model.module.maxdisp = 128
        model.module.disp_reg8 = disparityregression(model.module.maxdisp,
                                                     16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp,
                                                      16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp,
                                                      32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp,
                                                      64).cuda()
        print("    max disparity = " + str(model.module.maxdisp))

        # wandb.log({"imgL": wandb.Image(imgL, caption=img_name + ", " + str(tuple(imgL.shape))),
        #            "imgR": wandb.Image(imgR, caption=img_name + ", " + str(tuple(imgR.shape)))}, step=steps)

        with torch.no_grad():
            torch.cuda.synchronize()
            start_time = time.time()

            # * output dimensions same as input dimensions
            # * (ex: imgL[1, 3, 704, 2240] then pred_disp[1, 704, 2240])
            pred_disp, entropy = model(imgL, imgR)

            torch.cuda.synchronize()
            ttime = (time.time() - start_time)

            print('    time = %.2f' % (ttime * 1000))

        # * squeeze (remove dimensions with size 1) (ex: pred_disp[1, 704, 2240] ->[704, 2240])
        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad = int(top_pad[0])
        left_pad = int(left_pad[0])
        entropy = entropy[top_pad:, :pred_disp.shape[1] -
                          left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

        # save predictions
        idxname = img_name

        if not os.path.exists('output/%s/%s' % (args.name, idxname)):
            os.makedirs('output/%s/%s' % (args.name, idxname))

        idxname = '%s/disp0%s' % (idxname, args.name)

        # * shrink image back to the GT size (ex: pred_disp[675, 2236] -> [375, 1242])
        # ! we element-wise divide pred_disp by testres becasue the image is shrinking,
        # ! so the distance between pixels should also shrink by the same factor
        pred_disp_raw = cv2.resize(
            pred_disp / testres,
            (origianl_image_size[1], origianl_image_size[0]),
            interpolation=cv2.INTER_LINEAR)
        pred_disp = pred_disp_raw  # raw is to use for scoring

        gt_disp = gt_disp_raw.numpy()

        # * clip while keep inf
        # ? `pred_disp != pred_disp` is always true, right??
        # ? `pred_disp[pred_invalid] = np.inf` why do this?
        pred_invalid = np.logical_or(pred_disp == np.inf,
                                     pred_disp != pred_disp)
        pred_disp[pred_invalid] = np.inf

        pred_disp_png = (pred_disp * 256).astype("uint16")

        gt_invalid = np.logical_or(gt_disp == np.inf, gt_disp != gt_disp)
        gt_disp[gt_invalid] = 0
        gt_disp_png = (gt_disp * 256).astype("uint16")
        entorpy_png = (entropy * 256).astype('uint16')

        # ! raw output to png
        pred_disp_path = 'output/%s/%s/disp.png' % (args.name,
                                                    idxname.split('/')[0])
        gt_disp_path = 'output/%s/%s/gt_disp.png' % (args.name,
                                                     idxname.split('/')[0])
        assert (cv2.imwrite(pred_disp_path, pred_disp_png))
        assert (cv2.imwrite(gt_disp_path, gt_disp_png))
        assert (cv2.imwrite(
            'output/%s/%s/ent.png' % (args.name, idxname.split('/')[0]),
            entorpy_png))

        # ! Experimental color maps
        gt_disp_color_path = 'output/%s/%s/gt_disp_color.png' % (
            args.name, idxname.split('/')[0])
        pred_disp_color_path = 'output/%s/%s/disp_color.png' % (
            args.name, idxname.split('/')[0])

        gt_colormap = convert_to_colormap(gt_disp_png)
        pred_colormap = convert_to_colormap(pred_disp_png)
        entropy_colormap = convert_to_colormap(entorpy_png)
        assert (cv2.imwrite(gt_disp_color_path, gt_colormap))
        assert (cv2.imwrite(pred_disp_color_path, pred_colormap))

        # ! diff colormaps
        diff_colormap_path = 'output/%s/%s/diff_color.png' % (
            args.name, idxname.split('/')[0])
        false_positive_path = 'output/%s/%s/false_positive_color.png' % (
            args.name, idxname.split('/')[0])
        false_negative_path = 'output/%s/%s/false_negative_color.png' % (
            args.name, idxname.split('/')[0])
        gt_disp_png[gt_invalid] = pred_disp_png[gt_invalid]
        gt_disp_png = gt_disp_png.astype("int32")
        pred_disp_png = pred_disp_png.astype("int32")

        diff_colormap = convert_to_colormap(np.abs(gt_disp_png -
                                                   pred_disp_png))
        false_positive_colormap = convert_to_colormap(
            np.abs(np.clip(gt_disp_png - pred_disp_png, None, 0)))
        false_negative_colormap = convert_to_colormap(
            np.abs(np.clip(gt_disp_png - pred_disp_png, 0, None)))
        assert (cv2.imwrite(diff_colormap_path, diff_colormap))
        assert (cv2.imwrite(false_positive_path, false_positive_colormap))
        assert (cv2.imwrite(false_negative_path, false_negative_colormap))

        out_pfm_path = 'output/%s/%s.pfm' % (args.name, idxname)
        with open(out_pfm_path, 'w') as f:
            save_pfm(f, pred_disp[::-1, :])
        with open(
                'output/%s/%s/time_%s.txt' %
            (args.name, idxname.split('/')[0], args.name), 'w') as f:
            f.write(str(ttime))
        print("    output = " + out_pfm_path)

        caption = img_name + ", " + str(
            tuple(pred_disp_png.shape)) + ", max disparity = " + str(
                int(max_disp[0])) + ", time = " + str(ttime)

        # read GT depthmap and upload as jpg

        # wandb.log({"disparity": wandb.Image(pred_colormap, caption=caption) , "gt": wandb.Image(gt_colormap), "entropy": wandb.Image(entropy_colormap, caption= str(entorpy_png.shape)),
        #            "diff":wandb.Image(diff_colormap), "false_positive":wandb.Image(false_positive_colormap), "false_negative":wandb.Image(false_negative_colormap)}, step=steps)

        torch.cuda.empty_cache()
        steps += 1

        # Todo: find out what mask0nocc does. It's probably not the same as KITTI's object map
        if dataset_type == 2:
            obj_map_path = os.path.join(data_path, "obj_map.png")
        else:
            obj_map_path = None

        if args.score_results:
            if pred_disp_raw.shape != gt_disp_raw.shape:  # pred_disp_raw[375 x 1242] gt_disp_raw[675 x 2236]
                ratio = float(gt_disp_raw.shape[1]) / pred_disp_raw.shape[1]
                disp_resized = cv2.resize(
                    pred_disp_raw,
                    (gt_disp_raw.shape[1], gt_disp_raw.shape[0])) * ratio
                pred_disp_raw = disp_resized  # [675 x 2236]
            # if args.debug:
            #     out_resized_pfm_path = 'output/%s/%s/pred_scored.pfm' % (args.name, img_name)
            #     with open(out_resized_pfm_path, 'w') as f:
            #         save_pfm(f, pred_disp_raw)

            #     out_resized_gt_path = 'output/%s/%s/gt_scored.pfm' % (args.name, img_name)
            #     with open(out_resized_gt_path, 'w') as f:
            #         save_pfm(f, gt_disp_raw.numpy())

            metrics = score_rvc.get_metrics(
                pred_disp_raw,
                gt_disp_raw,
                int(max_disp[0]),
                dataset_type,
                ('output/%s/%s' % (args.name, idxname.split('/')[0])),
                disp_path=pred_disp_path,
                gt_path=gt_disp_path,
                obj_map_path=obj_map_path,
                debug=args.debug)

            avg_metrics = {}
            for (key, val) in metrics.items():
                if cum_metrics.get(key) == None:
                    cum_metrics[key] = []
                cum_metrics[key].append(val)
                avg_metrics["avg_" + key] = sum(cum_metrics[key]) / len(
                    cum_metrics[key])

            # wandb.log(metrics, step=steps)
            # wandb.log(avg_metrics, step=steps)

    # if args.save_weights and os.path.exists(args.loadmodel):
    #     wandb.save(args.loadmodel)

    if args.prepare_kitti and (args.all_data or args.kitti):
        in_path = 'output/%s' % (args.name)
        out_path = "/home/isaac/high-res-stereo/kitti_submission_output"
        out_path = prepare_kitti(in_path, out_path)
        subprocess.run(
            ["/home/isaac/KITTI2015_devkit/cpp/eval_scene_flow", out_path])
        print("KITTI submission evaluation saved to: " + out_path)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument('--datapath',
                        default='./data-mbtest/',
                        help='test data path')
    parser.add_argument('--loadmodel', default=None, help='model path')
    parser.add_argument('--outdir', default='output', help='output dir')
    parser.add_argument('--clean',
                        type=float,
                        default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument('--testres',
                        type=float,
                        default=0.5,
                        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp',
                        type=float,
                        default=-1,
                        help='maximum disparity to search for')
    parser.add_argument(
        '--level',
        type=int,
        default=1,
        help='output level of output, default is level 1 (stage 3),\
                              can also use level 2 (stage 2) or level 3 (stage 1)'
    )
    parser.add_argument('--dtype', type=int)
    args = parser.parse_args()

    # construct model
    model = hsm(128, args.clean, level=args.level)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if 'disp' not in k
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # dry run
    multip = 48
    imgL = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgR = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgL = Variable(torch.FloatTensor(imgL).cuda())
    imgR = Variable(torch.FloatTensor(imgR).cuda())
    with torch.no_grad():
        model.eval()
        pred_disp, entropy = model(imgL, imgR)

    # Get arguments.
    method_name = sys.argv[1]

    if args.dtype == 0:  # KITTI
        args.testres = 1.8
    elif args.dtype == 1:  # Middlebury
        args.testres = 1
    elif args.dtype == 2:  # ETH
        args.testres = 3.5  # Gengsahn said it's between 3~4. Find with linear grid search

    processed = get_transform()
    model.eval()

    datasets_dir_path = "datasets_middlebury2014"

    folders = [os.path.join(datasets_dir_path, 'training')]
    if not args.training_only:
        folders.append(os.path.join(datasets_dir_path, 'test'))

    for folder in folders:
        datasets = [
            dataset for dataset in os.listdir(folder)
            if os.path.isdir(os.path.join(folder, dataset))
        ]

        for dataset_name in datasets:
            im0_path = os.path.join(folder, dataset_name, 'im0.png')
            im1_path = os.path.join(folder, dataset_name, 'im1.png')
            calib = ReadMiddlebury2014CalibFile(
                os.path.join(folder, dataset_name, 'calib.txt'))
            output_dir_path = os.path.join(folder, dataset_name)

            imgL_o = (skimage.io.imread(im0_path).astype('float32'))[:, :, :3]
            imgR_o = (skimage.io.imread(im1_path).astype('float32'))[:, :, :3]
            imgsize = imgL_o.shape[:2]

            if args.max_disp > 0:
                max_disp = int(args.max_disp)
            else:
                path_to_replace = os.path.basename(os.path.normpath(im0_path))
                with open(im0_path.replace(path_to_replace, 'calib.txt')) as f:
                    lines = f.readlines()
                    max_disp = int(int(lines[6].split('=')[-1]))

            ## change max disp
            tmpdisp = int(max_disp * args.testres // 64 * 64)
            if (max_disp * args.testres / 64 * 64) > tmpdisp:
                model.module.maxdisp = tmpdisp + 64
            else:
                model.module.maxdisp = tmpdisp
            if model.module.maxdisp == 64: model.module.maxdisp = 128
            model.module.disp_reg8 = disparityregression(
                model.module.maxdisp, 16).cuda()
            model.module.disp_reg16 = disparityregression(
                model.module.maxdisp, 16).cuda()
            model.module.disp_reg32 = disparityregression(
                model.module.maxdisp, 32).cuda()
            model.module.disp_reg64 = disparityregression(
                model.module.maxdisp, 64).cuda()
            print("max disparity = " + str(model.module.maxdisp))

            # resize
            imgL_o = cv2.resize(imgL_o,
                                None,
                                fx=args.testres,
                                fy=args.testres,
                                interpolation=cv2.INTER_CUBIC)
            imgR_o = cv2.resize(imgR_o,
                                None,
                                fx=args.testres,
                                fy=args.testres,
                                interpolation=cv2.INTER_CUBIC)
            imgL = processed(imgL_o).numpy()
            imgR = processed(imgR_o).numpy()

            imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]])
            imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]])

            ##fast pad
            max_h = int(imgL.shape[2] // 64 * 64)
            max_w = int(imgL.shape[3] // 64 * 64)
            if max_h < imgL.shape[2]: max_h += 64
            if max_w < imgL.shape[3]: max_w += 64

            top_pad = max_h - imgL.shape[2]
            left_pad = max_w - imgL.shape[3]
            imgL = np.lib.pad(imgL,
                              ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)),
                              mode='constant',
                              constant_values=0)
            imgR = np.lib.pad(imgR,
                              ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)),
                              mode='constant',
                              constant_values=0)

            # test
            imgL = Variable(torch.FloatTensor(imgL).cuda())
            imgR = Variable(torch.FloatTensor(imgR).cuda())
            with torch.no_grad():
                torch.cuda.synchronize()
                start_time = time.time()
                pred_disp, entropy = model(imgL, imgR)
                torch.cuda.synchronize()
                ttime = (time.time() - start_time)
                print('time = %.2f' % (ttime * 1000))
            pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

            top_pad = max_h - imgL_o.shape[0]
            left_pad = max_w - imgL_o.shape[1]
            entropy = entropy[top_pad:, :pred_disp.shape[1] -
                              left_pad].cpu().numpy()
            pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

            # save predictions
            idxname = im0_path.split('/')[-2]
            if not os.path.exists('%s/%s' % (args.outdir, idxname)):
                os.makedirs('%s/%s' % (args.outdir, idxname))
            idxname = '%s/disp0%s' % (idxname, method_name)

            # resize to highres
            pred_disp = cv2.resize(pred_disp / args.testres,
                                   (imgsize[1], imgsize[0]),
                                   interpolation=cv2.INTER_LINEAR)

            # clip while keep inf
            invalid = np.logical_or(pred_disp == np.inf,
                                    pred_disp != pred_disp)
            pred_disp[invalid] = np.inf

            np.save('%s/%s-disp.npy' % (args.outdir, idxname.split('/')[0]),
                    (pred_disp))
            np.save('%s/%s-ent.npy' % (args.outdir, idxname.split('/')[0]),
                    (entropy))
            cv2.imwrite(
                '%s/%s-disp.png' % (args.outdir, idxname.split('/')[0]),
                pred_disp / pred_disp[~invalid].max() * 255)
            cv2.imwrite('%s/%s-ent.png' % (args.outdir, idxname.split('/')[0]),
                        entropy / entropy.max() * 255)

            with open('%s/%s.pfm' % (args.outdir, idxname), 'w') as f:
                save_pfm(f, pred_disp[::-1, :])
            with open(
                    '%s/%s/time%s.txt' %
                (args.outdir, idxname.split('/')[0], method_name), 'w') as f:
                f.write(str(ttime))

    torch.cuda.empty_cache()
Ejemplo n.º 4
0
def main():
    processed = get_transform()
    model.eval()

    # save predictions
    out_path = os.path.join("./mb_submission_output", args.name)
    if os.path.exists(out_path):
        raise FileExistsError
    os.mkdir(out_path)

    for (left_img_path, right_img_path, disp_path) in zip(left_val, right_val, disp_val_L):
        print(left_img_path)
        imgL_o = (skimage.io.imread(left_img_path).astype('float32'))[:, :, :3]
        imgR_o = (skimage.io.imread(right_img_path).astype('float32'))[:, :, :3]
        gt_o = readPFM(disp_path)[0]

        imgsize = imgL_o.shape[:2]

        with open(os.path.join(left_img_path[:-7], 'calib.txt')) as f:
            lines = f.readlines()
            max_disp = int(int(lines[6].split('=')[-1]))

        ## change max disp
        tmpdisp = int(max_disp * args.testres // 64 * 64)
        if (max_disp * args.testres / 64 * 64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp == 64: model.module.maxdisp = 128
        model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda()

        # resize
        imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC)
        imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC)
        imgL = processed(imgL_o).numpy()
        imgR = processed(imgR_o).numpy()

        imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]])
        imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]])

        ##fast pad
        max_h = int(imgL.shape[2] // 64 * 64)
        max_w = int(imgL.shape[3] // 64 * 64)
        if max_h < imgL.shape[2]: max_h += 64
        if max_w < imgL.shape[3]: max_w += 64

        top_pad = max_h - imgL.shape[2]
        left_pad = max_w - imgL.shape[3]
        imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0)
        imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0)



        # test
        imgL = torch.FloatTensor(imgL)
        imgR = torch.FloatTensor(imgR)
        wandb.log(
            {"imgL": wandb.Image(imgL, caption=str(imgL.shape)), "imgR": wandb.Image(imgR, caption=str(imgR.shape))})
        imgL = imgL.cuda()
        imgR = imgR.cuda()
        with torch.no_grad():
            torch.cuda.synchronize()
            start_time = time.time()
            pred_disp, entropy = model(imgL, imgR)
            torch.cuda.synchronize()
            ttime = (time.time() - start_time)

        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad = max_h - imgL_o.shape[0]
        left_pad = max_w - imgL_o.shape[1]
        entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

        # save predictions
        idxname = left_img_path.split('/')[-2]
        if not os.path.exists('%s/%s'%(out_path,idxname)):
            os.makedirs('%s/%s'%(out_path,idxname))
        idxname = '%s/disp0HSM'%(idxname)

        with open('%s/%s.pfm'% (out_path, idxname),'w') as f:
            save_pfm(f,pred_disp[::-1,:])
        with open('%s/%s/timeHSM.txt'%(out_path,idxname.split('/')[0]),'w') as f:
             f.write(str(ttime))

        # resize to highres
        pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR)

        # clip while keep inf
        invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp)
        pred_disp[invalid] = np.inf

        pred_disp_png = (pred_disp * 256).astype('uint16')
        cv2.imwrite(os.path.join(out_path, idxname.split('/')[0] + ".png"), pred_disp_png)
        entropy_png = (entropy * 256).astype('uint16')
        # cv2.imwrite(os.path.join(out_dir, img_name), entropy_png)

        wandb.log({"disp": wandb.Image(pred_disp_png, caption=str(pred_disp_png.shape)),
                   "entropy": wandb.Image(entropy_png, caption=str(entropy_png.shape))})

        metrics = get_metrics(gt_o, pred_disp, max_disp)

        for (key, val) in metrics.items():
            if key not in score_avg.keys():
                score_avg[key] = []
            score_avg[key].append(val)

        torch.cuda.empty_cache()

    for (key, val) in score_avg.items():
        score_avg[key] = mean(score_avg[key])

    print(score_avg)
    with open(os.path.join(out_path, "metrrics.txt")) as file:
        file.write(str(score_avg))
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument(
        '--datapath',
        default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014",
        help='test data path')
    parser.add_argument('--loadmodel', default=None, help='model path')
    parser.add_argument('--name',
                        default='rvc_highres_output',
                        help='output dir')
    parser.add_argument('--clean',
                        type=float,
                        default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument(
        '--testres',
        type=float,
        default=-1,  #default used to be 0.5
        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp',
                        type=float,
                        default=-1,
                        help='maximum disparity to search for')
    parser.add_argument(
        '--level',
        type=int,
        default=1,
        help='output level of output, default is level 1 (stage 3),\
                              can also use level 2 (stage 2) or level 3 (stage 1)'
    )
    parser.add_argument('--debug_image', type=str, default=None)
    parser.add_argument("--eth_testres", type=int, default=3.5)
    args = parser.parse_args()

    wandb.init(name=args.name,
               project="rvc_stereo",
               save_code=True,
               magic=True,
               config=args)

    use_adaptive_testres = False
    if args.testres == -1:
        use_adaptive_testres = True

    # construct model
    model = hsm(128, args.clean, level=args.level)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if 'disp' not in k
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    model.eval()

    if args.testres > 0:
        dataset = RVCDataset(args.datapath, testres=args.testres)
    else:
        dataset = RVCDataset(args.datapath, eth_testres=args.eth_testres)
    dataloader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=0)
    steps = 0
    for (imgL, imgR, max_disp, origianl_image_size, dataset_type,
         img_name) in dataloader:
        # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1
        img_name = img_name[0]

        if args.debug_image != None and not args.debug_image in img_name:
            continue

        print(img_name)

        if use_adaptive_testres:
            if dataset_type == 0:  # Middlebury
                args.testres = 1
            elif dataset_type == 2:
                args.testres = 1.8
            elif dataset_type == 1:  # Gengsahn said it's between 3~4. Find with linear grid search
                args.testres = 3.5
            else:
                raise ValueError(
                    "name of the folder does not contain any of: kitti, middlebury, eth3d"
                )

        if args.max_disp > 0:
            max_disp = int(args.max_disp)

        ## change max disp
        tmpdisp = int(max_disp * args.testres // 64 * 64)
        if (max_disp * args.testres / 64 * 64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp == 64: model.module.maxdisp = 128
        model.module.disp_reg8 = disparityregression(model.module.maxdisp,
                                                     16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp,
                                                      16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp,
                                                      32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp,
                                                      64).cuda()
        print("    max disparity = " + str(model.module.maxdisp))

        ##fast pad
        max_h = int(imgL.shape[2] // 64 * 64)
        max_w = int(imgL.shape[3] // 64 * 64)
        if max_h < imgL.shape[2]: max_h += 64
        if max_w < imgL.shape[3]: max_w += 64

        wandb.log(
            {
                "imgL":
                wandb.Image(imgL,
                            caption=img_name + ", " + str(tuple(imgL.shape))),
                "imgR":
                wandb.Image(imgR,
                            caption=img_name + ", " + str(tuple(imgR.shape)))
            },
            step=steps)

        with torch.no_grad():
            torch.cuda.synchronize()
            start_time = time.time()

            pred_disp, entropy = model(imgL, imgR)

            torch.cuda.synchronize()
            ttime = (time.time() - start_time)
            torch.save(pred_disp,
                       "/home/isaac/high-res-stereo/debug/rvc/out.pt")

            print('    time = %.2f' % (ttime * 1000))
        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad = max_h - origianl_image_size[0][0]
        left_pad = max_w - origianl_image_size[1][0]
        entropy = entropy[top_pad:, :pred_disp.shape[1] -
                          left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

        # save predictions
        idxname = img_name
        if not os.path.exists('%s/%s' % (args.name, idxname)):
            os.makedirs('%s/%s' % (args.name, idxname))
        idxname = '%s/disp0%s' % (idxname, args.name)

        # resize to highres
        pred_disp = cv2.resize(
            pred_disp / args.testres,
            (origianl_image_size[1], origianl_image_size[0]),
            interpolation=cv2.INTER_LINEAR)

        # clip while keep inf
        invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp)
        pred_disp[invalid] = np.inf

        pred_disp_png = pred_disp / pred_disp[~invalid].max() * 255
        cv2.imwrite('%s/%s/disp.png' % (args.name, idxname.split('/')[0]),
                    pred_disp_png)
        entorpy_png = entropy / entropy.max() * 255
        cv2.imwrite('%s/%s/ent.png' % (args.name, idxname.split('/')[0]),
                    entropy / entropy.max() * 255)

        out_pfm_path = '%s/%s.pfm' % (args.name, idxname)
        with open(out_pfm_path, 'w') as f:
            save_pfm(f, pred_disp[::-1, :])
        with open(
                '%s/%s/time%s.txt' %
            (args.name, idxname.split('/')[0], args.name), 'w') as f:
            f.write(str(ttime))
        print("    output = " + out_pfm_path)

        caption = img_name + ", " + str(tuple(
            pred_disp_png.shape)) + ", max disparity = " + str(
                max_disp) + ", time = " + str(ttime)
        wandb.log(
            {
                "disparity": wandb.Image(pred_disp_png, caption=caption),
                "entropy": wandb.Image(entorpy_png,
                                       caption=str(entorpy_png.shape))
            },
            step=steps)
        torch.cuda.empty_cache()
        steps += 1