Exemple #1
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.use_stereo:
        opt.frame_ids.append("s")
    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path,
                                               filenames,
                                               encoder_dict['height'],
                                               encoder_dict['width'],
                                               opt.frame_ids,
                                               4,
                                               is_train=False,
                                               tag=opt.dataset,
                                               load_meta=True,
                                               is_sep_train_seman=False)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(opt.data_path,
                                           filenames,
                                           encoder_dict['height'],
                                           encoder_dict['width'],
                                           opt.frame_ids,
                                           4,
                                           is_train=False,
                                           tag=opt.dataset)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=True)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc,
                                              isSwitch=True,
                                              isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    # x = torch.ones(2, 2, requires_grad=True)
    # print(x)
    # y = x + 2 + x
    # y = y.detach()
    # print(y)
    # z = y * y * 3
    # out = z.mean()
    # print(z, out)
    # out.backward()
    # print(x.grad)

    ##--------------------Visualization parameter here----------------------------##
    sfx = torch.nn.Softmax(dim=1)
    mergeDisp = Merge_MultDisp(opt.scales,
                               batchSize=opt.batch_size,
                               isMulChannel=opt.isMulChannel)
    svRoot = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/figure_visual'
    index = 0
    isvisualize = True
    viewEdgeMerge = False
    isHist = False
    useGtSeman = True
    viewSurfaceNormal = True
    viewSelfOcclu = True
    viewDispUp = True
    viewSmooth = True
    viewMulReg = True
    viewBorderRegress = False
    viewBorderSimilarity = False
    viewRandomSample = True
    viewSemanReg = False
    viewDepthGuess = False
    height = 256
    width = 512
    tensor23dPts = Tensor23dPts()

    if isHist:
        rec = np.zeros((19, 100))

    if opt.isMulChannel:
        app = os.path.join('mulDispOn', opt.model_name)
    else:
        app = os.path.join('mulDispOff', opt.model_name)

    dirpath = os.path.join(svRoot, app)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

    if viewEdgeMerge:
        comp1dgrad = Comp1dgrad().cuda()

    if viewSurfaceNormal:
        compsn = ComputeSurfaceNormal(height=height,
                                      width=width,
                                      batch_size=opt.batch_size).cuda()

    if viewSelfOcclu:
        selfclu = SelfOccluMask().cuda()

    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            # if idx != 12:
            #     continue
            for key, ipt in inputs.items():
                if not (key == 'height' or key == 'width' or key == 'tag'
                        or key == 'cts_meta'):
                    inputs[key] = ipt.to(torch.device("cuda"))
            input_color = inputs[("color", 0, 0)].cuda()
            # input_color = torch.flip(input_color, dims=[3])
            features = encoder(input_color)
            outputs = dict()
            outputs.update(
                depth_decoder(features,
                              computeSemantic=True,
                              computeDepth=False))
            outputs.update(
                depth_decoder(features,
                              computeSemantic=False,
                              computeDepth=True))

            # view the processed semantic seperate training data
            # for viewInd in range(opt.batch_size):
            #     label = inputs['semanTrain_label']
            #     visualize_semantic(label[viewInd, 0, :, :].cpu().numpy()).show()
            #     fig_rgb = inputs['semanTrain_rgb'][viewInd, :, :, :].permute(1, 2, 0).cpu().numpy()
            #     fig_rgb = (fig_rgb * 255).astype(np.uint8)
            #     fig_rgb = pil.fromarray(fig_rgb)
            #     fig_rgb.show()

            if isHist:
                mulDisp = outputs[('mul_disp', 0)]
                scaled_disp, mulDepth = disp_to_depth(mulDisp, 0.1, 100)
                mulDepth = mulDepth.cpu()
                for i in range(mulDisp.shape[1]):
                    rec[i, :] += torch.histc(mulDepth[:, i, :, :],
                                             bins=100,
                                             min=0,
                                             max=100).numpy()

            if isvisualize:
                if useGtSeman:
                    # outputs[('mul_disp', 0)][:,2,:,:] = outputs[('mul_disp', 0)][:,2,:,:] * 0
                    # outputs[('mul_disp', 0)][:, 12, :, :] = outputs[('mul_disp', 0)][:, 12, :, :] * 0
                    mergeDisp(inputs, outputs, eval=False)
                else:
                    mergeDisp(inputs, outputs, eval=True)

                dispMap = outputs[('disp', 0)]
                scaled_disp, depthMap = disp_to_depth(dispMap, 0.1, 100)
                depthMap = depthMap * STEREO_SCALE_FACTOR
                # _, mul_depthMap = disp_to_depth(outputs[('mul_disp', 0)], 0.1, 100)
                # mul_depthMap = mul_depthMap * STEREO_SCALE_FACTOR

                if viewDispUp:
                    fig_dispup = compDispUp.visualize(scaled_disp,
                                                      viewindex=index)

                if viewSmooth:
                    rgb = inputs[('color_aug', 0, 0)]
                    smoothfig = comSmooth.visualize(rgb=rgb,
                                                    disp=scaled_disp,
                                                    viewindex=index)

                if useGtSeman:
                    fig_seman = tensor2semantic(inputs['seman_gt'],
                                                ind=index,
                                                isGt=True)
                else:
                    fig_seman = tensor2semantic(outputs[('seman', 0)],
                                                ind=index)

                if viewSemanReg:
                    foregroundType = [
                        11, 12, 13, 14, 15, 16, 17, 18
                    ]  # person, rider, car, truck, bus, train, motorcycle, bicycle
                    softmaxedSeman = F.softmax(outputs[('seman', 0)], dim=1)
                    forePredMask = torch.sum(
                        softmaxedSeman[:, foregroundType, :, :],
                        dim=1,
                        keepdim=True)
                    foreGtMask = torch.ones(dispMap.shape).cuda().byte()

                    for m in foregroundType:
                        foreGtMask = foreGtMask * (inputs['seman_gt'] != m)
                    foreGtMask = 1 - foreGtMask
                    foreGtMask = foreGtMask.float()

                    forePredMask[forePredMask > 0.5] = 1
                    forePredMask[forePredMask <= 0.5] = 0

                    forePredMask = foreGtMask
                    rdSampleSeman.visualizeBorderSample(dispMap,
                                                        forePredMask,
                                                        gtMask=foreGtMask,
                                                        viewIndex=index)

                    cm = plt.get_cmap('magma')
                    viewForePred = forePredMask[index, :, :, :].squeeze(
                        0).detach().cpu().numpy()
                    viewForePred = (cm(viewForePred) * 255).astype(np.uint8)
                    # pil.fromarray(viewForePred).show()

                    viewForeGt = foreGtMask[index, :, :, :].squeeze(
                        0).detach().cpu().numpy()
                    viewForeGt = (cm(viewForeGt) * 255).astype(np.uint8)
                    # pil.fromarray(viewForeGt).show()
                    forePredictCombined = np.concatenate(
                        [viewForePred, viewForeGt], axis=0)
                    # pil.fromarray(forePredictCombined).show()
                    pil.fromarray(forePredictCombined).save(
                        os.path.join(dirpath,
                                     str(idx) + '_fg.png'))

                if viewDepthGuess:
                    wallType = [2, 3, 4]  # Building, wall, fence
                    roadType = [0, 1, 9]  # road, sidewalk, terrain
                    foregroundType = [
                        5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
                    ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle

                    wallTypeMask = torch.ones(dispMap.shape).cuda().byte()
                    roadTypeMask = torch.ones(dispMap.shape).cuda().byte()
                    foreGroundMask = torch.ones(dispMap.shape).cuda().byte()

                    with torch.no_grad():
                        for m in wallType:
                            wallTypeMask = wallTypeMask * (inputs['seman_gt']
                                                           != m)
                        wallTypeMask = (1 - wallTypeMask).float()

                        for m in roadType:
                            roadTypeMask = roadTypeMask * (inputs['seman_gt']
                                                           != m)
                        roadTypeMask = (1 - roadTypeMask).float()

                        for m in foregroundType:
                            foreGroundMask = foreGroundMask * (
                                inputs['seman_gt'] != m)
                        foreGroundMask = (1 - foreGroundMask).float()
                    originalSieze = [2048, 1024]
                    # currentSize = np.array([dispMap.shape[3], dispMap.shape[2]])
                    # scaleFac = np.eye(4)
                    # scaleFac[0,0] = currentSize[0] / originalSieze[0]
                    # scaleFac[1,1] = currentSize[1] / originalSieze[1]
                    # scaleFac = torch.Tensor(scaleFac).view(1,4,4).repeat(opt.batch_size, 1, 1).cuda()
                    # scaledIntrinsic = scaleFac @ inputs['realIn']
                    scaledIntrinsic = inputs['realIn']
                    depthGuess.visualizeDepthGuess(
                        realDepth=depthMap,
                        dispAct=dispMap,
                        foredgroundMask=foreGroundMask,
                        wallTypeMask=wallTypeMask,
                        groundTypeMask=roadTypeMask,
                        intrinsic=scaledIntrinsic,
                        extrinsic=inputs['realEx'],
                        semantic=inputs['seman_gt_eval'],
                        cts_meta=inputs['cts_meta'],
                        viewInd=index)
                    # realDepth, foredgroundMask, wallTypeMask, groundTypeMask, intrinsic, extrinsic

                fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=index)
                fig_disp = tensor2disp(outputs[('disp', 0)], ind=index)
                fig_3d, veh_coord, veh_coord_gt = tensor23dPts.visualize3d(
                    depthMap,
                    ind=index,
                    intrinsic=inputs['cts_meta']['intrinsic'][index, :, :],
                    extrinsic=inputs['cts_meta']['extrinsic'][index, :, :],
                    gtmask=inputs['cts_meta']['mask'][index, :, :],
                    gtdepth=inputs['cts_meta']['depthMap'][index, :, :],
                    semanticMap=inputs['seman_gt_eval'][index, :, :])
                # check:
                # torch.inverse(inputs['invcamK'][index, :, :] @ inputs['realIn'][index, :, :]) - inputs['cts_meta']['extrinsic'][index, :, :]
                fig_grad = None

                if viewSurfaceNormal:
                    # surnorm = compsn.visualize(depthMap = depthMap, invcamK = inputs['invcamK'].cuda(), orgEstPts = veh_coord, gtEstPts = veh_coord_gt, viewindex = index)
                    surnorm = compsn.visualize(
                        depthMap=depthMap,
                        invcamK=inputs['invcamK'].cuda(),
                        orgEstPts=veh_coord,
                        gtEstPts=veh_coord_gt,
                        viewindex=index)
                    surnormMap = compsn(depthMap=depthMap,
                                        invcamK=inputs['invcamK'].cuda())

                if viewMulReg:
                    depthMapLoc = depthMap / STEREO_SCALE_FACTOR
                    skyId = 10
                    skyMask = inputs['seman_gt'] == skyId
                    skyerr = objReg.visualize_regularizeSky(depthMapLoc,
                                                            skyMask,
                                                            viewInd=index)

                    wallType = [2, 3, 4]  # Building, wall, fence
                    roadType = [0, 1, 9]  # road, sidewalk, terrain
                    permuType = [5, 7]  # Pole, traffic sign
                    chanWinSize = 5

                    wallMask = torch.ones_like(skyMask)
                    roadMask = torch.ones_like(skyMask)
                    permuMask = torch.ones_like(skyMask)

                    with torch.no_grad():
                        for m in wallType:
                            wallMask = wallMask * (inputs['seman_gt'] != m)
                        wallMask = 1 - wallMask
                        wallMask = wallMask[:, :, 1:-1, 1:-1]

                        for m in roadType:
                            roadMask = roadMask * (inputs['seman_gt'] != m)
                        roadMask = 1 - roadMask
                        roadMask = roadMask[:, :, 1:-1, 1:-1]

                        for m in permuType:
                            permuMask = permuMask * (inputs['seman_gt'] != m)
                        permuMask = 1 - permuMask
                        permuMask = permuMask[:, :, 1:-1, 1:-1]

                    BdErrFig, viewRdErrFig = objReg.visualize_regularizeBuildingRoad(
                        surnormMap, wallMask, roadMask, dispMap, viewInd=index)

                    padSize = int((chanWinSize - 1) / 2)
                    permuMask = permuMask[:, :, padSize:-padSize,
                                          padSize:-padSize]
                    surVarFig = objReg.visualize_regularizePoleSign(
                        surnormMap, permuMask, dispMap, viewInd=index)

                if viewBorderRegress:
                    foregroundType = [
                        5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
                    ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                    backgroundType = [
                        0, 1, 2, 3, 4, 8, 9, 10
                    ]  # road, sidewalk, building, wall, fence, vegetation, terrain, sky
                    suppressType = [255]  # Suppress no label lines
                    # foreGroundMask = torch.sum(inputs['seman_gt'][:, foregroundType, :, :], dim=1, keepdim=True)
                    # backGroundMask = torch.sum(inputs['seman_gt'][:, backgroundType, :, :], dim=1, keepdim=True)
                    foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    backGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    suppresMask = torch.ones(dispMap.shape).cuda().byte()

                    with torch.no_grad():
                        for m in foregroundType:
                            foreGroundMask = foreGroundMask * (
                                inputs['seman_gt'] != m)
                        foreGroundMask = 1 - foreGroundMask
                        for m in backgroundType:
                            backGroundMask = backGroundMask * (
                                inputs['seman_gt'] != m)
                        backGroundMask = 1 - backGroundMask
                        for m in suppressType:
                            suppresMask = suppresMask * (inputs['seman_gt'] !=
                                                         m)
                        suppresMask = 1 - suppresMask
                        suppresMask = suppresMask.float()
                        combinedMask = torch.cat(
                            [foreGroundMask, backGroundMask], dim=1).float()

                    # borderRegFig = borderRegress.visualize_computeBorder(dispMap, combinedMask, suppresMask = suppresMask, viewIndex=index)
                    borderRegFig = None

                else:
                    borderRegFig = None

                # if viewBorderSimilarity:
                #     foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17,
                #                       18]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                #     backgroundType = [0, 1, 2, 3, 4, 8, 9,
                #                       10]  # road, sidewalk, building, wall, fence, vegetation, terrain, sky
                #     suppressType = [255]  # Suppress no label lines
                #     foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                #     backGroundMask = torch.ones(dispMap.shape).cuda().byte()
                #     suppresMask = torch.ones(dispMap.shape).cuda().byte()
                #
                #     with torch.no_grad():
                #         for m in foregroundType:
                #             foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m)
                #         foreGroundMask = 1 - foreGroundMask
                #         for m in backgroundType:
                #             backGroundMask = backGroundMask * (inputs['seman_gt'] != m)
                #         backGroundMask = 1 - backGroundMask
                #         for m in suppressType:
                #             suppresMask = suppresMask * (inputs['seman_gt'] != m)
                #         suppresMask = 1 - suppresMask
                #         suppresMask = suppresMask.float()
                #         combinedMask = torch.cat([foreGroundMask, backGroundMask], dim=1).float()
                #
                #     borderSimFig = borderSim.visualize_borderSimilarity(dispMap, foreGroundMask.float(), suppresMask = suppresMask, viewIndex=index)

                if viewRandomSample:
                    foregroundType = [
                        5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
                    ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                    backgroundType = [
                        0, 1, 2, 3, 4, 8, 9, 10
                    ]  # road, sidewalk, building, wall, fence, vegetation, terrain, sky
                    suppressType = [255]  # Suppress no label lines
                    foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    backGroundMask = torch.ones(dispMap.shape).cuda().byte()
                    suppresMask = torch.ones(dispMap.shape).cuda().byte()

                    with torch.no_grad():
                        for m in foregroundType:
                            foreGroundMask = foreGroundMask * (
                                inputs['seman_gt'] != m)
                        foreGroundMask = 1 - foreGroundMask
                        for m in suppressType:
                            suppresMask = suppresMask * (inputs['seman_gt'] !=
                                                         m)
                        suppresMask = 1 - suppresMask
                        suppresMask = suppresMask.float()
                        foreGroundMask = foreGroundMask.float()

                    rdSampleOnBorder.visualize_randomSample(dispMap,
                                                            foreGroundMask,
                                                            suppresMask,
                                                            viewIndex=index)
                    # rdSampleOnBorder.randomSampleReg(dispMap, foreGroundMask)

                if viewEdgeMerge:
                    grad_disp = comp1dgrad(outputs[('mul_disp', 0)])
                    fig_grad = tensor2disp(grad_disp, ind=index, vmax=1)
                    fig_grad = fig_grad.resize([512, 256])

                if viewSelfOcclu:
                    fl = inputs[("K", 0)][:, 0, 0]
                    bs = torch.abs(inputs["stereo_T"][:, 0, 3])
                    clufig, suppressedDisp = selfclu.visualize(dispMap,
                                                               viewind=index)

                if fig_grad is not None:
                    grad_seman = (
                        np.array(fig_grad)[:, :, 0:3].astype(np.float) * 0.7 +
                        np.array(fig_seman).astype(np.float) * 0.3).astype(
                            np.uint8)
                    # combined = [np.array(fig_disp)[:, :, 0:3], np.array(fig_grad)[:, :, 0:3], np.array(fig_seman), np.array(fig_rgb)]
                    combined = [
                        grad_seman,
                        np.array(fig_disp)[:, :, 0:3],
                        np.array(fig_rgb)
                    ]
                    combined = np.concatenate(combined, axis=1)
                else:
                    if viewSurfaceNormal and viewSelfOcclu:
                        surnorm = surnorm.resize([512, 256])
                        surnorm_mixed = pil.fromarray(
                            (np.array(surnorm) * 0.2 +
                             np.array(fig_disp)[:, :, 0:3] * 0.8).astype(
                                 np.uint8))
                        disp_seman = (
                            np.array(fig_disp)[:, :, 0:3].astype(np.float) *
                            0.8 +
                            np.array(fig_seman).astype(np.float) * 0.2).astype(
                                np.uint8)
                        supprressed_disp_seman = (
                            np.array(suppressedDisp)[:, :, 0:3].astype(
                                np.float) * 0.8 +
                            np.array(fig_seman).astype(np.float) * 0.2).astype(
                                np.uint8)
                        rgb_seman = (
                            np.array(fig_seman).astype(np.float) * 0.5 +
                            np.array(fig_rgb).astype(np.float) * 0.5).astype(
                                np.uint8)

                        # clud_disp = (np.array(clufig)[:, :, 0:3].astype(np.float) * 0.3 + np.array(fig_disp)[:, :, 0:3].astype(
                        #     np.float) * 0.7).astype(np.uint8)
                        comb1 = np.concatenate([
                            np.array(supprressed_disp_seman)[:, :, 0:3],
                            np.array(suppressedDisp)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb2 = np.concatenate([
                            np.array(disp_seman)[:, :, 0:3],
                            np.array(fig_disp)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb3 = np.concatenate([
                            np.array(surnorm_mixed)[:, :, 0:3],
                            np.array(surnorm)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb4 = np.concatenate([
                            np.array(fig_seman)[:, :, 0:3],
                            np.array(rgb_seman)[:, :, 0:3]
                        ],
                                               axis=1)
                        comb6 = np.concatenate([
                            np.array(clufig)[:, :, 0:3],
                            np.array(fig_dispup)[:, :, 0:3]
                        ],
                                               axis=1)

                        fig3dsize = np.ceil(
                            np.array([
                                comb4.shape[1], comb4.shape[1] /
                                fig_3d.size[0] * fig_3d.size[1]
                            ])).astype(np.int)
                        comb5 = np.array(fig_3d.resize(fig3dsize))
                        # combined = np.concatenate([comb1, comb6, comb2, comb3, comb4, comb5], axis=0)
                        combined = np.concatenate([comb1, comb2, comb4, comb3],
                                                  axis=0)
                    else:
                        disp_seman = (
                            np.array(fig_disp)[:, :, 0:3].astype(np.float) *
                            0.8 +
                            np.array(fig_seman).astype(np.float) * 0.2).astype(
                                np.uint8)
                        rgb_seman = (
                            np.array(fig_seman).astype(np.float) * 0.5 +
                            np.array(fig_rgb).astype(np.float) * 0.5).astype(
                                np.uint8)
                        # combined = [np.array(disp_seman)[:,:,0:3], np.array(fig_disp)[:, :, 0:3], np.array(fig_seman), np.array(fig_rgb)]
                        combined = [
                            np.array(disp_seman)[:, :, 0:3],
                            np.array(fig_disp)[:, :, 0:3],
                            np.array(fig_seman),
                            np.array(rgb_seman)
                        ]
                        combined = np.concatenate(combined, axis=1)

                fig = pil.fromarray(combined)
                # fig.show()
                fig.save(os.path.join(dirpath, str(idx) + '.png'))
                if borderRegFig is not None:
                    borderRegFig.save(
                        os.path.join(dirpath,
                                     str(idx) + '_borderRegress.png'))
                # fig_3d.save(os.path.join(dirpath, str(idx) + '_fig3d.png'))
                # for k in range(10):
                #     fig_disp = tensor2disp(outputs[('disp', 0)], ind=k)
                #     fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=k)
                #     combined = [np.array(fig_disp)[:, :, 0:3], np.array(fig_rgb)]
                #     combined = np.concatenate(combined, axis=1)
                #     fig = pil.fromarray(combined)
                #     fig.save(
                #         os.path.join('/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/MoredispOrg' + str(k) + '.png'))

                # fig_rgb.save(os.path.join(svRoot, app, 'rgb' + str(idx) + '.png'))
                # fig_seman.save(os.path.join(svRoot, app, 'semantic'+ str(idx) + '.png'))
                # fig_disp.save(os.path.join(svRoot, app, 'disp'+ str(idx) + '.png'))
                # a = inputs['seman_gt_eval']
                # scaled_disp, _ = disp_to_depth(outputs[('disp', 0)], 0.1, 100)
                print("%dth saved" % idx)
    # If compute the histogram
    if isHist:
        svPath = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/mul_channel_depth'
        carId = 13
        prob = copy.deepcopy(rec)
        ind = np.arange(prob.shape[1] * 2)
        for i in range(prob.shape[0]):
            prob[i, :] = prob[i, :] / np.sum(prob[i, :])
        for i in range(prob.shape[0]):
            trainStr = trainId2label[i][0]
            fig, ax = plt.subplots()
            rects1 = ax.bar(ind[0::2], prob[carId, :], label='obj:car')
            rects2 = ax.bar(ind[1::2], prob[i, :], label='obj:' + trainStr)
            ax.set_ylabel('Meter in percentile')
            ax.set_xlabel('Meters')
            ax.set_title('Scale Changes between scale car and scale %s' %
                         trainStr)
            ax.legend()
            plt.savefig(os.path.join(svPath, str(i)), dpi=200)
            plt.close(fig)
Exemple #2
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    is_use_disparity = True
    is_eval_morph = True
    is_cts_bst = True
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    if is_use_disparity:
        getDisp = get_disparity_predict()
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, tag=opt.dataset)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTISemanticDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, tag=opt.dataset)
        train_dataset_predict = datasets.KITTIRAWDataset(
            opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'],
            [0,'s'], 4, tag='kitti', is_train=False, img_ext='png',
            load_meta=False, is_load_semantics=True,
            is_predicted_semantics=True, load_morphed_depth=False)
        train_dataset_gt = datasets.KITTIRAWDataset(
            opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'],
            [0,'s'], 4, tag='kitti', is_train=False, img_ext='png',
            load_meta=False, is_load_semantics=True,
            is_predicted_semantics=False, load_morphed_depth=False)
    else:
        raise ValueError("No predefined dataset")
    dataloader_predict = DataLoader(train_dataset_predict, 1, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=False)
    dataloader_gt = DataLoader(train_dataset_gt, 1, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=False)
    dataloader_predict_iter = iter(dataloader_predict)
    dataloader_gt_iter = iter(dataloader_gt)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()
    sfx = torch.nn.Softmax(dim=1)
    depth_pos = '/media/shengjie/other/sceneUnderstanding/bts/result_bts_eigen/raw'


    print("Evaluation starts")


    width = 1216
    height = 352
    height_s = int(0.40810811 * height)
    height_e = int(0.99189189 * height)
    width_s = int(0.03594771 * width)
    width_e = int(0.96405229 * width)

    if not is_use_disparity:
        ms = Morph_semantics(height=206, width=1129)
    else:
        ms = Morph_semantics(height=218, width=1153)
    with torch.no_grad():
        for idx in range(dataloader_gt.__len__()):
            inputs_predict = dataloader_predict_iter.__next__()
            inputs_gt = dataloader_gt_iter.__next__()
            if not is_cts_bst:
                inputs_predict['seman_gt_eval'] = inputs_predict['seman_gt_eval']
            else:
                tcomp = filenames[idx].split(' ')
                path = os.path.join('/media/shengjie/other/sceneUnderstanding/SDNET/cts_best_seman', tcomp[0].split('/')[0] +'_' + tcomp[0].split('/')[1] +  '_' + tcomp[1].zfill(10) + '.png')
                cts_pred = Image.open(path)
                cts_pred = np.array(cts_pred)
                for k in np.unique(cts_pred):
                    cts_pred[cts_pred == k] = labels[k].trainId
                inputs_predict['seman_gt_eval'] = torch.from_numpy(cts_pred).unsqueeze(0)
            # tensor2semantic(inputs_predict['seman_gt_eval'].unsqueeze(1), ind=0).show()
            # tensor2semantic(inputs_gt['seman_gt_eval'].unsqueeze(1), ind=0).show()
            # tensor2semantic(inputs_predict['seman_gt_eval'].unsqueeze(1), ind=0).show()

            # input_color = inputs[("color", 0, 0)].cuda()
            # outputs = depth_decoder(encoder(input_color),computeSemantic = True, computeDepth = False)
            resized_gt = inputs_gt['seman_gt_eval'].unsqueeze(1)
            # resized_gt = F.interpolate(inputs_gt['seman_gt_eval'].unsqueeze(1).float(), [height, width], mode='nearest')
            # resized_gt = resized_gt.squeeze(1).byte()
            resized_pred = F.interpolate(inputs_predict['seman_gt_eval'].unsqueeze(1).float(), [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='nearest')
            resized_pred = resized_pred.byte()
            resized_rgb = F.interpolate(inputs_gt[('color', 0, 0)],
                                        [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]],
                                        mode='bilinear', align_corners=True)

            resized_pred_list = list()
            resized_morph_list = list()
            groundTruthNp_list = list()
            if not is_use_disparity:
                t_height = resized_gt.shape[2]
                t_width = resized_gt.shape[3]
                top_margin = int(t_height - 352)
                left_margin = int((t_width - 1216) / 2)
                resized_gt = resized_gt[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216]
                resized_pred = resized_pred[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216]
                # tensor2semantic(resized_gt, ind=0).show()
                # tensor2semantic(resized_pred, ind=0).show()

                resized_rgb = F.interpolate(inputs_gt[('color', 0, 0)], [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]], mode='bilinear', align_corners=True)
                resized_rgb = resized_rgb[:,:,top_margin:top_margin + 352, left_margin:left_margin + 1216]

                pred_depth = get_depth_predict(filenames[idx])

                resized_depth = pred_depth
                # resized_gt = resized_gt.cpu().numpy().astype(np.uint8)
                # resized_pred = resized_pred.cpu().numpy().astype(np.uint8)
                # resized_depth = pred_depth
                # visualize_semantic(gt[0,:,:]).show()
                # visualize_semantic(pred[0,:,:]).show()
                # pred_depth = get_depth_predict(filenames[idx])
                # pred_depth = F.interpolate(pred_depth.float(), [height, width], mode='bilinear', align_corners=True)

                # resized_pred = resized_pred.unsqueeze(1)
                # resized_gt = resized_gt.unsqueeze(1)
                # tensor2semantic(resized_pred, ind=0).show()
                # tensor2semantic(resized_gt, ind=0).show()
                # tensor2disp(1 / pred_depth, vmax=0.15, ind=0).show()
                # disp_map = tensor2disp(1 / pred_depth, vmax=0.15, ind=0)
                # disp_map_combined = combined_2_img(disp_map, tensor2rgb(resized_rgb, ind=0), 0.5)

                pred_depth_cropped = resized_depth[:,:,height_s : height_e, width_s : width_e]
                resized_pred_cropped = resized_pred[:,:,height_s : height_e, width_s : width_e]
                resized_gt_cropped = resized_gt[:,:,height_s : height_e, width_s : width_e]
                resized_rgb_cropped = resized_rgb[:,:,height_s : height_e, width_s : width_e]
                # tensor2semantic(resized_pred_cropped, ind=0).show()
                # tensor2semantic(resized_gt_cropped, ind=0).show()
                # tensor2disp(1 / pred_depth_cropped, vmax=0.15, ind=0).show()
                figseman_gt = tensor2semantic(resized_gt_cropped, ind=0)
                figseman_pred = tensor2semantic(resized_pred_cropped, ind=0)
                figdisp = tensor2disp(1 / pred_depth_cropped, vmax=0.15, ind=0)
                combined_2_img(figseman_pred, figdisp, 0.7).show()
                combined_2_img(figseman_gt, figdisp, 0.7).show()
                seman_morphed = ms.morh_semantics(pred_depth_cropped, resized_pred_cropped)
            else:
                pred_depth = getDisp.read_disparity_predict(filenames[idx])
                pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).unsqueeze(0)
                pred_depth = F.interpolate(pred_depth, [inputs_gt['seman_gt_eval'].shape[1], inputs_gt['seman_gt_eval'].shape[2]],
                                        mode='bilinear', align_corners=True)
                # tensor2disp(pred_depth, ind=0, percentile=95).show()
                if pred_depth.shape[2] < 371 or pred_depth.shape[3] < 1197:
                    print("Error")
                pred_depth_cropped = pred_depth[:, :, 153:371, 44:1197]
                resized_pred_cropped = resized_pred[:, :, 153:371, 44:1197]
                resized_gt_cropped = resized_gt[:, :, 153:371, 44:1197]
                resized_rgb_cropped = resized_rgb[:, :, 153:371, 44:1197]
                # figdisp = tensor2disp(pred_depth_cropped, percentile=95, ind=0)
                # figseman = tensor2semantic(resized_gt_cropped, ind=0)
                # figcombined = combined_2_img(figdisp, figseman, 0.7)
                # figcombined.show()
                #
                # figdisp = tensor2disp(pred_depth_cropped, percentile=95, ind=0)
                # figseman = tensor2semantic(resized_pred_cropped, ind=0)
                # figcombined = combined_2_img(figdisp, figseman, 0.7)
                # figcombined.show()
                seman_morphed = ms.morh_semantics(pred_depth_cropped, resized_pred_cropped)
                ms.compute_edge_distance(pred_depth_cropped, resized_pred_cropped, resized_gt_cropped)
            resized_pred_list.append(resized_pred_cropped.squeeze(1).detach().cpu().numpy())
            resized_morph_list.append(seman_morphed.squeeze(1).detach().cpu().numpy().astype(np.uint8))
            groundTruthNp_list.append(resized_gt_cropped.squeeze(1).detach().cpu().numpy())



            sv_path = '/media/shengjie/other/sceneUnderstanding/SDNET/visualization/semantic_morph'
            gt_blended = combined_2_img(tensor2semantic(resized_gt_cropped, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.2)
            pred_blended = combined_2_img(tensor2semantic(resized_pred_cropped, ind=0), tensor2rgb(resized_rgb_cropped, ind=0), 0.2)
            morph_blended = combined_2_img(tensor2semantic(seman_morphed, ind=0),
                                          tensor2rgb(resized_rgb_cropped, ind=0), 0.2)
            improved_region = (seman_morphed.cuda().byte() == resized_gt_cropped.cuda().byte()) > (resized_pred_cropped.cuda().byte() == resized_gt_cropped.cuda().byte())
            deterized_region = (seman_morphed.cuda().byte() == resized_gt_cropped.cuda().byte()) < (
                        resized_pred_cropped.cuda().byte() == resized_gt_cropped.cuda().byte())
            improve_blend = combined_2_img(tensor2disp(improved_region, vmax = 1, ind=0),
                                          tensor2rgb(resized_rgb_cropped, ind=0), 0.6)
            deterized_blend = combined_2_img(tensor2disp(deterized_region, vmax = 1, ind=0),
                                          tensor2rgb(resized_rgb_cropped, ind=0), 0.6)
            cat_img = concat_imgs([gt_blended, pred_blended, morph_blended, improve_blend, deterized_blend])
            cat_img.save(os.path.join('/media/shengjie/other/sceneUnderstanding/SDNET/visualization/semantic_morph', str(idx) + '.png'))

            # groundTruthNp = resized_gt_cropped.squeeze(1).detach().cpu().numpy()
            # if is_eval_morph:
            #     predictionNp = seman_morphed.byte().squeeze(1).detach().cpu().numpy()
            # else:
            #     predictionNp = resized_pred_cropped.squeeze(1).detach().cpu().numpy()
            print("Finish %dth batch" % idx)
    ms.show_dis_comp()
    for pp in range(2):
        nbPixels = 0
        count255 = 0
        confMatrix = generateMatrix(args)
        for k in range(len(resized_pred_list)):
            groundTruthNp = groundTruthNp_list[k]
            if pp == 0:
                predictionNp = resized_pred_list[k]
            else:
                predictionNp = resized_morph_list[k]
            nbPixels = nbPixels + groundTruthNp.shape[0] * groundTruthNp.shape[1] * groundTruthNp.shape[2]


            encoding_value = 256  # precomputed
            encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp

            values, cnt = np.unique(encoded, return_counts=True)

            for value, c in zip(values, cnt):
                pred_id = value % encoding_value
                gt_id = int((value - pred_id) / encoding_value)
                if pred_id == 255 or gt_id == 255:
                    count255 = count255 + c
                    continue
                if not gt_id in args.evalLabels:
                    printError("Unknown label with id {:}".format(gt_id))
                confMatrix[gt_id][pred_id] += c

        if confMatrix.sum() + count255!= nbPixels:
            printError(
                'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(
                    confMatrix.sum(), nbPixels))
        classScoreList = {}
        for label in args.evalLabels:
            labelName = trainId2label[label].name
            classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args)
        vals = np.array(list(classScoreList.values()))
        print(vals)
        mIOU = np.mean(vals[np.logical_not(np.isnan(vals))])
        if pp == 0:
            print("Original mIOU is %f" % mIOU)
        else:
            print("Morphed mIOU is %f" % mIOU)
Exemple #3
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, tag=opt.dataset)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(
            opt.data_path, filenames, encoder_dict['height'], encoder_dict['width'],
            [0,'s'], 4, tag='kitti', is_train=False, img_ext='png',
            load_meta=False, is_load_semantics=True,
            is_predicted_semantics=True, load_morphed_depth=False)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=False)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()
    sfx = torch.nn.Softmax(dim=1)

    print("Evaluation starts")

    confMatrix = generateMatrix(args)
    nbPixels = 0
    count255 = 0
    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            input_color = inputs[("color", 0, 0)].cuda()
            outputs = depth_decoder(encoder(input_color),computeSemantic = True, computeDepth = False)

            gt = inputs['seman_gt_eval'].cpu().numpy().astype(np.uint8)
            pred = sfx(outputs[('seman', 0)]).detach()
            pred = torch.argmax(pred, dim=1).type(torch.float).unsqueeze(1)
            pred = F.interpolate(pred, [gt.shape[1], gt.shape[2]], mode='nearest')
            pred = pred.squeeze(1).cpu().numpy().astype(np.uint8)
            # visualize_semantic(gt[0,:,:]).show()
            # visualize_semantic(pred[0,:,:]).show()

            groundTruthNp = gt
            predictionNp = pred
            nbPixels = nbPixels + groundTruthNp.shape[0] * groundTruthNp.shape[1] * groundTruthNp.shape[2]

            # encoding_value = max(groundTruthNp.max(), predictionNp.max()).astype(np.int32) + 1
            encoding_value = 256  # precomputed
            encoded = (groundTruthNp.astype(np.int32) * encoding_value) + predictionNp

            values, cnt = np.unique(encoded, return_counts=True)

            for value, c in zip(values, cnt):
                pred_id = value % encoding_value
                gt_id = int((value - pred_id) / encoding_value)
                if pred_id == 255 or gt_id == 255:
                    count255 = count255 + c
                    continue
                if not gt_id in args.evalLabels:
                    printError("Unknown label with id {:}".format(gt_id))
                confMatrix[gt_id][pred_id] += c
            print("Finish %dth batch" % idx)
    if confMatrix.sum() + count255 != nbPixels:
        printError(
            'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(
                confMatrix.sum(), nbPixels))

    classScoreList = {}
    for label in args.evalLabels:
        labelName = trainId2label[label].name
        classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args)
    vals = np.array(list(classScoreList.values()))
    mIOU = np.mean(vals[np.logical_not(np.isnan(vals))])
    # if opt.save_pred_disps:
    #     output_path = os.path.join(
    #         opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
    #     print("-> Saving predicted disparities to ", output_path)
    #     np.save(output_path, pred_disps)

    print("mIOU is %f" % mIOU)
Exemple #4
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    viewPythonVer = False
    viewCudaVer = True

    if viewCudaVer:
        bnmorph = BNMorph(height=opt.height, width=opt.width).cuda()

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)

    if opt.use_stereo:
        opt.frame_ids.append("s")
    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(
            opt.data_path,
            filenames,
            opt.height,
            opt.width,
            opt.frame_ids,
            4,
            is_train=False,
            tag=opt.dataset,
            load_meta=True,
            direction_left=opt.direction_left)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(
            opt.data_path,
            filenames,
            opt.height,
            opt.width,
            opt.frame_ids,
            4,
            is_train=False,
            tag=opt.dataset,
            is_load_semantics=opt.use_kitti_gt_semantics,
            is_predicted_semantics=opt.is_predicted_semantics,
            direction_left=opt.direction_left)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=True)

    encoder = networks.ResnetEncoder(opt.num_layers, False, num_input_images=2)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(
            encoder.num_ch_enc,
            isSwitch=True,
            isMulChannel=opt.isMulChannel,
            outputtwoimage=(opt.outputtwoimage == True))
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()

    viewIndex = 0
    tool = grad_computation_tools(batch_size=opt.batch_size,
                                  height=opt.height,
                                  width=opt.width).cuda()
    auto_morph = AutoMorph(height=opt.height, width=opt.width)
    with torch.no_grad():
        for idx, inputs in enumerate(dataloader):
            for key, ipt in inputs.items():
                if not (key == 'height' or key == 'width' or key == 'tag'
                        or key == 'cts_meta' or key == 'file_add'):
                    inputs[key] = ipt.to(torch.device("cuda"))

            input_color = torch.cat(
                [inputs[("color_aug", 0, 0)], inputs[("color_aug", 's', 0)]],
                dim=1).cuda()
            # input_color = inputs[("color", 0, 0)].cuda()
            # tensor2rgb(inputs[("color_aug", 0, 0)], ind=0).show()
            # tensor2rgb(inputs[("color_aug", 's', 0)], ind=0).show()
            features = encoder(input_color)
            outputs = dict()
            outputs.update(
                depth_decoder(features,
                              computeSemantic=True,
                              computeDepth=False))
            outputs.update(
                depth_decoder(features,
                              computeSemantic=False,
                              computeDepth=True))

            if not opt.view_right:
                disparityMap = outputs[('mul_disp', 0)][:, 0:1, :, :]
            else:
                disparityMap = outputs[('mul_disp', 0)][:, 1:2, :, :]
            depthMap = torch.clamp(disparityMap, max=80)
            fig_seman = tensor2semantic(inputs['seman_gt'],
                                        ind=viewIndex,
                                        isGt=True)
            fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=viewIndex)
            fig_disp = tensor2disp(disparityMap, ind=viewIndex, vmax=0.1)

            segmentationMapGt = inputs['seman_gt']
            foregroundType = [
                5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18
            ]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
            foregroundMapGt = torch.ones(disparityMap.shape).cuda().byte()
            for m in foregroundType:
                foregroundMapGt = foregroundMapGt * (segmentationMapGt != m)
            foregroundMapGt = (1 - foregroundMapGt).float()

            disparity_grad = torch.abs(
                tool.convDispx(disparityMap)) + torch.abs(
                    tool.convDispy(disparityMap))
            semantics_grad = torch.abs(
                tool.convDispx(foregroundMapGt)) + torch.abs(
                    tool.convDispy(foregroundMapGt))
            disparity_grad = disparity_grad * tool.zero_mask
            semantics_grad = semantics_grad * tool.zero_mask

            disparity_grad_bin = disparity_grad > tool.disparityTh
            semantics_grad_bin = semantics_grad > tool.semanticsTh

            # tensor2disp(disparity_grad_bin, ind=viewIndex, vmax=1).show()
            # tensor2disp(semantics_grad_bin, ind=viewIndex, vmax=1).show()

            if viewPythonVer:
                disparity_grad_bin = disparity_grad_bin.detach().cpu().numpy()
                semantics_grad_bin = semantics_grad_bin.detach().cpu().numpy()

                disparityMap_to_processed = disparityMap.detach().cpu().numpy(
                )[viewIndex, 0, :, :]
                dispMap_morphed, dispMap_morphRec = auto_morph.automorph(
                    disparity_grad_bin[viewIndex, 0, :, :],
                    semantics_grad_bin[viewIndex,
                                       0, :, :], disparityMap_to_processed)

                fig_disp_processed = visualizeNpDisp(dispMap_morphed, vmax=0.1)
                overlay_processed = pil.fromarray(
                    (np.array(fig_disp_processed) * 0.7 +
                     np.array(fig_seman) * 0.3).astype(np.uint8))
                overlay_org = pil.fromarray(
                    (np.array(fig_disp) * 0.7 +
                     np.array(fig_seman) * 0.3).astype(np.uint8))
                combined_fig = pil.fromarray(
                    np.concatenate([
                        np.array(overlay_org),
                        np.array(overlay_processed),
                        np.array(fig_disp),
                        np.array(fig_disp_processed)
                    ],
                                   axis=0))
                combined_fig.save(
                    "/media/shengjie/other/sceneUnderstanding/Stereo_SDNET/visualization/border_morph_l2_3/"
                    + str(idx) + ".png")
            if viewCudaVer:
                # morphedx, morphedy = bnmorph.find_corresponding_pts(disparity_grad_bin, semantics_grad_bin, disparityMap, fig_seman, 10)
                # morphedx = (morphedx / (opt.width - 1) - 0.5) * 2
                # morphedy = (morphedy / (opt.height - 1) - 0.5) * 2
                # grid = torch.cat([morphedx, morphedy], dim = 1).permute(0,2,3,1)
                # disparityMap_morphed = F.grid_sample(disparityMap, grid, padding_mode="border")
                # fig_morphed = tensor2disp(disparityMap_morphed, vmax=0.08, ind=0)
                # fig_disp = tensor2disp(disparityMap, vmax=0.08, ind=0)
                # fig_combined = pil.fromarray(np.concatenate([np.array(fig_morphed), np.array(fig_disp)], axis=0))
                # fig_combined.show()
                svpath = os.path.join(opt.load_weights_folder).split('/')
                try:
                    svpath = os.path.join(
                        "/media/shengjie/other/sceneUnderstanding/Stereo_SDNET/visualization",
                        svpath[-3])
                    os.mkdir(svpath)
                except FileExistsError:
                    a = 1
                morphedx, morphedy, coeff = bnmorph.find_corresponding_pts(
                    disparity_grad_bin, semantics_grad_bin)
                morphedx = (morphedx / (opt.width - 1) - 0.5) * 2
                morphedy = (morphedy / (opt.height - 1) - 0.5) * 2
                grid = torch.cat([morphedx, morphedy],
                                 dim=1).permute(0, 2, 3, 1)
                disparityMap_morphed = F.grid_sample(disparityMap,
                                                     grid,
                                                     padding_mode="border")

                fig_morphed = tensor2disp(disparityMap_morphed,
                                          vmax=0.08,
                                          ind=0)
                fig_disp = tensor2disp(disparityMap, vmax=0.08, ind=0)
                fig_morphed_overlayed = pil.fromarray(
                    (np.array(fig_seman) * 0.5 +
                     np.array(fig_morphed) * 0.5).astype(np.uint8))
                fig_disp_overlayed = pil.fromarray(
                    (np.array(fig_seman) * 0.5 +
                     np.array(fig_disp) * 0.5).astype(np.uint8))
                # fig_rgb =  tensor2rgb(inputs[("color", 0, 0)], ind=0)
                # fig_combined = pil.fromarray(np.concatenate([np.array(fig_disp_overlayed), np.array(fig_morphed_overlayed), np.array(fig_disp), np.array(fig_morphed), np.array(fig_rgb)], axis=0))
                fig_combined = pil.fromarray(
                    np.concatenate([
                        np.array(fig_disp_overlayed),
                        np.array(fig_morphed_overlayed),
                        np.array(fig_disp),
                        np.array(fig_morphed)
                    ],
                                   axis=0))
                fig_combined.save(os.path.join(svpath, str(idx) + ".png"))
Exemple #5
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80

    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.split, "val_files.txt"))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path)
    # encoder's record of height and weight are of less important now

    if opt.use_stereo:
        opt.frame_ids.append("s")
    if opt.dataset == 'cityscape':
        dataset = datasets.CITYSCAPERawDataset(opt.data_path, filenames,
                                           opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, load_meta=True)
    elif opt.dataset == 'kitti':
        dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,
                                           opt.height, opt.width, opt.frame_ids, 4, is_train=False, tag=opt.dataset, is_load_semantics=True)
    else:
        raise ValueError("No predefined dataset")
    dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=True)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    if opt.switchMode == 'on':
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, isSwitch=True, isMulChannel=opt.isMulChannel)
    else:
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))

    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()




    ##--------------------Visualization parameter here----------------------------##
    sfx = torch.nn.Softmax(dim=1)
    mergeDisp = Merge_MultDisp(opt.scales, batchSize = opt.batch_size, isMulChannel = opt.isMulChannel)
    svRoot = '/media/shengjie/other/sceneUnderstanding/monodepth2/internalRe/figure_visual'
    index = 0
    isvisualize = True
    useGtSeman = True
    useSeman = False
    viewSurfaceNormal = False
    viewSelfOcclu = False
    viewMutuallyRegularizedBorder= False
    viewLiuSemanCompare = False
    viewSecondOrder = False
    viewBorderConverge = True
    expBin = True
    height = 288
    width = 960
    tensor23dPts = Tensor23dPts(height=height, width=width)

    dirpath = os.path.join(svRoot, opt.model_name)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

    if viewSurfaceNormal:
        compsn = ComputeSurfaceNormal(height = height, width = width, batch_size = opt.batch_size).cuda()

    if viewSelfOcclu:
        selfclu = SelfOccluMask().cuda()

    if viewMutuallyRegularizedBorder:
        mrb = MutuallyRegularizedBorders(height=height, width=width, batchsize=opt.batch_size)
        iouFore_gtdepth2gtseman = list()
        iouBack_gtdepth2gtseman = list()
        iouValid_gtdepth2gtseman = list()

        iouFore_estdepth2gtseman = list()
        iouBack_estdepth2gtseman = list()
        iouValid_estdepth2gtseman = list()

        iouFore_estdepth2estseman = list()
        iouBack_estdepth2estseman = list()
        iouValid_estdepth2estseman = list()

    if viewLiuSemanCompare:
        cmpBCons = computeBorderDistance()
        compGrad = computeGradient()
        semanest2semangt = np.zeros(31)
        depth2disp = np.zeros(31)
        depth2semangt = np.zeros(31)
        disp2semanest = np.zeros(31)
        sfx = torch.nn.Softmax(dim=1)
        cmpBCons.cuda()
        compGrad.cuda()

    if viewSecondOrder:
        compSecGrad = SecondOrderGrad().cuda()

    if viewBorderConverge:
        borderConverge = BorderConverge(height, width, opt.batch_size).cuda()

    if expBin:
        expbinmap = expBinaryMap(height, width, opt.batch_size).cuda()

    computedNum = 0
    # with torch.no_grad():
    for idx, inputs in enumerate(dataloader):
        for key, ipt in inputs.items():
            if not(key == 'height' or key == 'width' or key == 'tag' or key == 'cts_meta'):
                inputs[key] = ipt.to(torch.device("cuda"))
        input_color = inputs[("color", 0, 0)].cuda()
        features = encoder(input_color)
        outputs = dict()
        outputs.update(depth_decoder(features, computeSemantic=True, computeDepth=False))
        outputs.update(depth_decoder(features, computeSemantic=False, computeDepth=True))

        if isvisualize:
            if useGtSeman:
                mergeDisp(inputs, outputs, eval=False)
            else:
                mergeDisp(inputs, outputs, eval=True)

            dispMap = outputs[('disp', 0)]
            scaled_disp, depthMap = disp_to_depth(dispMap, 0.1, 100)
            depthMap = depthMap * STEREO_SCALE_FACTOR
            depthMap = torch.clamp(depthMap, max=80)

            if useGtSeman:
                fig_seman = tensor2semantic(inputs['seman_gt'], ind=index, isGt=True)
            else:
                if useSeman:
                    fig_seman = tensor2semantic(outputs[('seman', 0)], ind=index)
                else:
                    fig_seman = inputs[('color', 0, 0)][index, :, :, :].permute(1,2,0).cpu().numpy()
                    fig_seman = (fig_seman * 255).astype(np.uint8)
                    fig_seman = pil.fromarray(fig_seman)

            fig_rgb = tensor2rgb(inputs[('color', 0, 0)], ind=index)
            fig_disp = tensor2disp(outputs[('disp', 0)], ind=index, vmax=0.1)

            gtmask = (inputs['depth_gt'] > 0).float()
            gtdepth = inputs['depth_gt']
            velo = inputs['velo']
            fig_3d, veh_coord, veh_coord_gt = tensor23dPts.visualize3d(depthMap.detach(), ind=index,
                                                                       intrinsic_in=inputs['realIn'],
                                                                       extrinsic_in=inputs['realEx'],
                                                                       gtmask_in=gtmask,
                                                                       gtdepth_in=gtdepth,
                                                                       semanticMap=None,
                                                                       velo_in=velo,
                                                                       rgb_in = inputs[('color', 's', 0)],
                                                                       disp_in = outputs[('disp', 0)].detach()
                                                                       )
            if viewMutuallyRegularizedBorder:
                foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                backgroundType = [2, 3, 4, 8, 9, 10] #building, wall, fence, vegetation, terrain, sky
                foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                backGroundMask = torch.ones(dispMap.shape).cuda().byte()

                with torch.no_grad():
                    for m in foregroundType:
                        foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m)
                    foreGroundMask = 1 - foreGroundMask
                    for m in backgroundType:
                        backGroundMask = backGroundMask * (inputs['seman_gt'] != m)
                    backGroundMask = 1 - backGroundMask

                # tensor2disp(foreGroundMask, ind=0, vmax=1).show()
                # tensor2disp(backGroundMask, ind=0, vmax=1).show()
                # tensor2rgb(inputs[('color', 0, 0)], ind=0).show()
                # tensor2semantic(inputs['seman_gt'],ind=0,isGt=True).show()
                iouForeMean, iouBackMean, isvalid = mrb.visualization(gtdepth, foreGroundMask, backGroundMask, viewind= index, rgb=inputs[('color', 0, 0)])
                iouFore_gtdepth2gtseman.append(iouForeMean)
                iouBack_gtdepth2gtseman.append(iouBackMean)
                iouValid_gtdepth2gtseman.append(isvalid)


                iouForeMean, iouBackMean, isvalid = mrb.visualization(1 - dispMap, foreGroundMask, backGroundMask,
                                                                      viewind=index, rgb=inputs[('color', 0, 0)])

                iouFore_estdepth2gtseman.append(iouForeMean)
                iouBack_estdepth2gtseman.append(iouBackMean)
                iouValid_estdepth2gtseman.append(isvalid)

                semanMapEst = outputs[('seman', 0)]
                semanMapEst_sfxed = sfx(semanMapEst)
                foreGroundMask_est = torch.sum(semanMapEst_sfxed[:, foregroundType, :, :], dim=1).unsqueeze(1)
                backGroundMask_est = torch.sum(semanMapEst_sfxed[:, backgroundType, :, :], dim=1).unsqueeze(1)
                other_est = 1 - (foreGroundMask_est + backGroundMask_est)
                tot_est = torch.cat([foreGroundMask_est, backGroundMask_est, other_est], dim=1)
                foreGroundMask_est_bin = (torch.argmax(tot_est, dim=1) == 0).unsqueeze(1)
                backGroundMask_est_bin = (torch.argmax(tot_est, dim=1) == 1).unsqueeze(1)
                iouForeMean, iouBackMean, isvalid = mrb.visualization(1 - dispMap, foreGroundMask_est_bin, backGroundMask_est_bin,
                                                                      viewind=index, rgb=inputs[('color', 0, 0)])
                iouFore_estdepth2estseman.append(iouForeMean)
                iouBack_estdepth2estseman.append(iouBackMean)
                iouValid_estdepth2estseman.append(isvalid)

                # tensor2disp(foreGroundMask_est_bin, vmax=1, ind=0).show()
                # tensor2disp(backGroundMask_est_bin, vmax=1, ind=0).show()
            if viewLiuSemanCompare:
                foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                backgroundType = [2, 3, 4, 8, 9, 10] #building, wall, fence, vegetation, terrain, sky
                foreGroundMask = torch.ones(dispMap.shape).cuda().byte()
                backGroundMask = torch.ones(dispMap.shape).cuda().byte()

                with torch.no_grad():
                    for m in foregroundType:
                        foreGroundMask = foreGroundMask * (inputs['seman_gt'] != m)
                    foreGroundMask = 1 - foreGroundMask
                    for m in backgroundType:
                        backGroundMask = backGroundMask * (inputs['seman_gt'] != m)
                    backGroundMask = 1 - backGroundMask

                dispMapEst = outputs[('disp', 0)]
                semanMapEst = outputs[('seman', 0)]
                semanMapGt = inputs['seman_gt']
                depthMapGt = inputs['depth_gt']

                sparseDepthmapGrad = compGrad.computegrad11_sparse(depthMapGt)
                sparseDepthmapGrad_bin = sparseDepthmapGrad > 0
                sparseDepthmapGrad = F.interpolate(sparseDepthmapGrad, [height, width], mode='bilinear', align_corners=True)
                sparseDepthmapGrad_bin = F.interpolate(sparseDepthmapGrad_bin.float(), [height, width], mode='nearest')
                sparseDepthmapGrad = sparseDepthmapGrad * sparseDepthmapGrad_bin
                # depthMapGt_bin = depthMapGt > 1e-1
                # depthMapGt = F.interpolate(sparseDepthmapGrad, (height, width), mode='bilinear', align_corners=False)
                # depthMapGt_bin = F.interpolate(depthMapGt_bin.float(), (height, width), mode='nearest')
                # depthMapGt = depthMapGt * depthMapGt_bin
                # compGrad.computegrad11_sparse(depthMapGt)
                # tensor2disp(depthMapGt>0, ind=0, vmax=1).show()


                semanMapEst_sfxed = sfx(semanMapEst)
                semanMapEst_inds = torch.argmax(semanMapEst_sfxed, dim=1).unsqueeze(1)
                seman_est_fig = tensor2semantic(semanMapEst_inds, ind=0)
                seman_gt_fig = tensor2semantic(semanMapGt, ind=0)
                depthMapGt_fig = tensor2disp(depthMapGt, ind=0, vmax=20)
                depthMapGt_fig = depthMapGt_fig.resize((width, height), resample=pil.BILINEAR)


                foreGroundMask_est = torch.sum(semanMapEst_sfxed[:,foregroundType,:,:], dim=1).unsqueeze(1)

                dispMapGrad = compGrad.computegrad11(dispMapEst)
                foreGroundMaskGrad = compGrad.computegrad11(foreGroundMask.float())
                foreGroundMask_estGrad = compGrad.computegrad11(foreGroundMask_est)
                sparseDepthmapGrad_fig = tensor2disp(sparseDepthmapGrad, ind=0, vmax=20)
                dispMapGrad_fig = tensor2disp(dispMapGrad, ind=0, vmax=0.08)
                foreGroundMaskGrad_fig = tensor2disp(foreGroundMaskGrad, ind=0, vmax=1)
                foreGroundMask_estGrad_fig = tensor2disp(foreGroundMask_estGrad, ind=0, vmax=1.5)

                dispMapGrad_bin = dispMapGrad > 0.011
                foreGroundMaskGrad_bin = foreGroundMaskGrad > 0.5
                foreGroundMask_estGrad_bin = foreGroundMask_estGrad > 0.6
                sparseDepthmapGrad_bin = sparseDepthmapGrad > 9
                dispMapGrad_bin_fig = tensor2disp(dispMapGrad_bin, ind=0, vmax=1)
                foreGroundMaskGrad_bin_fig = tensor2disp(foreGroundMaskGrad_bin, ind=0, vmax=1)
                foreGroundMask_estGrad_bin_fig = tensor2disp(foreGroundMask_estGrad_bin, ind=0, vmax=1)
                sparseDepthmapGrad_bin_fig = tensor2disp(sparseDepthmapGrad_bin, ind=0, vmax=1)

                visualizeImage = np.concatenate([np.array(fig_rgb), np.array(fig_disp)[:,:,0:3], np.array(seman_est_fig), np.array(seman_gt_fig), np.array(depthMapGt_fig)[:,:,0:3]], axis=0)
                visualizeImage_grad = np.concatenate([np.array(fig_rgb), np.array(dispMapGrad_fig)[:,:,0:3], np.array(foreGroundMask_estGrad_fig)[:,:,0:3], np.array(foreGroundMaskGrad_fig)[:,:,0:3], np.array(sparseDepthmapGrad_fig)[:,:,0:3]], axis=0)
                visualizeimage_grad_bin = np.concatenate([np.array(fig_rgb), np.array(dispMapGrad_bin_fig)[:,:,0:3], np.array(foreGroundMask_estGrad_bin_fig)[:,:,0:3], np.array(foreGroundMaskGrad_bin_fig)[:,:,0:3], np.array(sparseDepthmapGrad_bin_fig)[:,:,0:3]], axis=0)
                tot = np.concatenate([np.array(visualizeImage), np.array(visualizeImage_grad), np.array(visualizeimage_grad_bin)], axis=1)
                pil.fromarray(tot).save('/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/%d.png' % idx)
                # pil.fromarray(tot).show()
                # pil.fromarray(visualizeImage).show()
                # pil.fromarray(visualizeImage_grad).show()
                # pil.fromarray(visualizeimage_grad_bin).show()


                semanest2semangt = semanest2semangt + cmpBCons.computeDistance(foreGroundMask_estGrad_bin, foreGroundMaskGrad_bin)
                depth2disp = depth2disp + cmpBCons.computeDistance(sparseDepthmapGrad_bin, dispMapGrad_bin)
                depth2semangt = depth2semangt + cmpBCons.computeDistance(sparseDepthmapGrad_bin, foreGroundMaskGrad_bin)
                disp2semanest = disp2semanest + cmpBCons.computeDistance(dispMapGrad_bin, foreGroundMask_estGrad_bin)

                # tensor2disp(dispMapEst, ind=index, percentile=90).show()

            if viewBorderConverge:
                semanMapEst = outputs[('seman', 0)]
                semanMapEst_sfxed = sfx(semanMapEst)
                foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17,
                                  18]  # pole, traffic light, traffic sign, person, rider, car, truck, bus, train, motorcycle, bicycle
                foreGroundMask_est = torch.sum(semanMapEst_sfxed[:, foregroundType, :, :], dim=1).unsqueeze(1)
                dispMapEst = outputs[('disp', 0)]

                # borderConverge.visualization(dispMapEst, foreGroundMask_est)
                if expBin:
                    expbinmap.visualization3(disparity=dispMapEst, semantics=foreGroundMask_est)
                a = 1

            if viewSecondOrder:
                disp2order = compSecGrad.computegrad11(outputs[('disp', 0)])
                tensor2disp(disp2order, ind=0, percentile=95).show()

            if viewSurfaceNormal:
                surnorm = compsn.visualize(depthMap=depthMap, invcamK=inputs['invcamK'].cuda().float(), orgEstPts=veh_coord,
                                           gtEstPts=veh_coord_gt, viewindex=index)
                surnormMap = compsn(depthMap=depthMap, invcamK=inputs['invcamK'].cuda().float())

            if viewSelfOcclu:
                fl = inputs[("K", 0)][:, 0, 0]
                bs = torch.abs(inputs["stereo_T"][:, 0, 3])
                clufig, suppressedDisp = selfclu.visualize(dispMap, viewind=index)

            if viewSurfaceNormal and viewSelfOcclu:
                surnorm = surnorm.resize([width, height])
                surnorm_mixed = pil.fromarray(
                    (np.array(surnorm) * 0.2 + np.array(fig_disp)[:, :, 0:3] * 0.8).astype(np.uint8))
                disp_seman = (np.array(fig_disp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype(
                    np.float) * 0.2).astype(np.uint8)
                supprressed_disp_seman = (np.array(suppressedDisp)[:, :, 0:3].astype(np.float) * 0.8 + np.array(fig_seman).astype(
                    np.float) * 0.2).astype(np.uint8)
                rgb_seman = (np.array(fig_seman).astype(np.float) * 0.5 + np.array(fig_rgb).astype(
                    np.float) * 0.5).astype(np.uint8)

                # clud_disp = (np.array(clufig)[:, :, 0:3].astype(np.float) * 0.3 + np.array(fig_disp)[:, :, 0:3].astype(
                #     np.float) * 0.7).astype(np.uint8)
                comb1 = np.concatenate([np.array(supprressed_disp_seman)[:, :, 0:3], np.array(suppressedDisp)[:, :, 0:3]], axis=1)
                comb2 = np.concatenate([np.array(disp_seman)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3]], axis=1)
                # comb3 = np.concatenate([np.array(errFig)[:, :, 0:3], np.array(surnorm)[:, :, 0:3]], axis=1)
                comb4 = np.concatenate([np.array(fig_seman)[:, :, 0:3], np.array(rgb_seman)[:, :, 0:3]],
                                       axis=1)
                comb6 = np.concatenate([np.array(clufig)[:, :, 0:3], np.array(fig_disp)[:, :, 0:3]], axis=1)

                fig3dsize = np.ceil(np.array([comb4.shape[1] , comb4.shape[1] / fig_3d.size[0] * fig_3d.size[1]])).astype(np.int)
                comb5 = np.array(fig_3d.resize(fig3dsize))

            # fig = pil.fromarray(combined)
            # fig.save(os.path.join(dirpath, str(idx) + '.png'))
            print("%dth img finished" % idx)
            # if idx >=4:
            #     break
    if viewLiuSemanCompare:
        semanest2semangt_p = semanest2semangt / np.sum(semanest2semangt)
        semanest2semangt_p_ = semanest2semangt_p[0:-1]
        mean = np.sum(np.arange(len(semanest2semangt_p_)) * semanest2semangt_p_)
        std = np.sqrt(np.sum((np.arange(len(semanest2semangt_p_)) - mean) ** 2 * semanest2semangt_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(semanest2semangt_p)), semanest2semangt_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of semantic, est to gt")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/seman_est2gt.png")
        plt.close(fig)

        depth2disp_p = depth2disp / np.sum(depth2disp)
        depth2disp_p_ = depth2disp_p[0:-1]
        mean = np.sum(np.arange(len(depth2disp_p_)) * depth2disp_p_)
        std = np.sqrt(np.sum((np.arange(len(depth2disp_p_)) - mean) ** 2 * depth2disp_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(depth2disp_p)), depth2disp_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of depth, gt to est")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth_gt2est.png")
        plt.close(fig)

        depth2semangt_p = depth2semangt / np.sum(depth2semangt)
        depth2semangt_p_ = depth2semangt_p[0:-1]
        mean = np.sum(np.arange(len(depth2semangt_p_)) * depth2semangt_p_)
        std = np.sqrt(np.sum((np.arange(len(depth2semangt_p_)) - mean) ** 2 * depth2semangt_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(depth2semangt_p)), depth2semangt_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of depth and semantic, gt")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth2seman_gt.png")
        plt.close(fig)

        disp2semanest_p = disp2semanest / np.sum(disp2semanest)
        disp2semanest_p_ = disp2semanest_p[0:-1]
        mean = np.sum(np.arange(len(disp2semanest_p_)) * disp2semanest_p_)
        std = np.sqrt(np.sum((np.arange(len(disp2semanest_p_)) - mean) ** 2 * disp2semanest_p_))
        fig, ax = plt.subplots()
        ax.bar(np.arange(len(disp2semanest_p)), disp2semanest_p)
        ax.set_ylabel('Percentile')
        ax.set_xlabel('Distance in pixel, mean %f, std %f' % (mean, std))
        ax.set_title("Pixel distance of depth and semantic, est")
        fig.savefig("/media/shengjie/other/sceneUnderstanding/SDNET/visualization/borderConsistAnalysis/depth2seman_est.png")
        plt.close(fig)

    if viewMutuallyRegularizedBorder:
        iouFore_gtdepth2gtseman = np.array(iouFore_gtdepth2gtseman)
        iouBack_gtdepth2gtseman = np.array(iouBack_gtdepth2gtseman)
        iouValid_gtdepth2gtseman = np.array(iouValid_gtdepth2gtseman)
        iouFore_gtdepth2gtsemanMean = np.sum(iouFore_gtdepth2gtseman * iouValid_gtdepth2gtseman) / np.sum(iouValid_gtdepth2gtseman)
        iouBack_gtdepth2gtsemanMean = np.sum(iouBack_gtdepth2gtseman * iouValid_gtdepth2gtseman) / np.sum(iouValid_gtdepth2gtseman)

        iouFore_estdepth2gtseman = np.array(iouFore_estdepth2gtseman)
        iouBack_estdepth2gtseman = np.array(iouBack_estdepth2gtseman)
        iouValid_estdepth2gtseman = np.array(iouValid_estdepth2gtseman)
        iouFore_estdepth2gtsemanMean = np.sum(iouFore_estdepth2gtseman * iouValid_estdepth2gtseman) / np.sum(iouValid_estdepth2gtseman)
        iouBack_estdepth2gtsemanMean = np.sum(iouBack_estdepth2gtseman * iouValid_estdepth2gtseman) / np.sum(iouValid_estdepth2gtseman)

        iouFore_estdepth2estseman = np.array(iouFore_estdepth2estseman)
        iouBack_estdepth2estseman = np.array(iouBack_estdepth2estseman)
        iouValid_estdepth2estseman = np.array(iouValid_estdepth2estseman)
        iouFore_estdepth2estsemanMean = np.sum(iouFore_estdepth2estseman * iouValid_estdepth2estseman) / np.sum(iouValid_estdepth2estseman)
        iouBack_estdepth2estsemanMean = np.sum(iouBack_estdepth2estseman * iouValid_estdepth2estseman) / np.sum(iouValid_estdepth2estseman)

        print("iouFore_gtdepth2gtsemanMean is % f" % iouFore_gtdepth2gtsemanMean)
        print("iouBack_gtdepth2gtsemanMean is % f" % iouBack_gtdepth2gtsemanMean)
        print("iouFore_estdepth2gtsemanMean is % f" % iouFore_estdepth2gtsemanMean)
        print("iouBack_estdepth2gtsemanMean is % f" % iouBack_estdepth2gtsemanMean)
        print("iouFore_estdepth2estsemanMean is % f" % iouFore_estdepth2estsemanMean)
        print("iouBack_estdepth2estsemanMean is % f" % iouBack_estdepth2estsemanMean)