Esempio n. 1
0
def export_gt_depths_mc(opt):

    split_folder = Path(opt.split)
    lines = readlines(split_folder / opt.base)

    print("Exporting ground truth depths for {}".format(opt.split))

    gt_depths = []

    data_path = Path(opt.data_path)  # raw kitti path

    for line in tqdm(lines):

        traj_name, shader, frame = relpath_split(line)

        if opt.split == "mc" or "mc_lite":  # 后来补充的, ground-truth 在 ‘depth_annotated_path’,结果偏高

            pass
            gt_depth_path = data_path / traj_name / "depth" / "{:04d}.png".format(
                int(frame))
            gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32)
            gt_depth = gt_depth.sum(axis=2)
            gt_depth = np_normalize_image(gt_depth)
            gt_depths.append(gt_depth.astype(np.float32))
        else:
            print('no data set selected')
            return
    output_path = split_folder / "gt_depths.npz"

    print("Saving to {}".format(opt.split))

    np.savez_compressed(output_path, data=np.array(gt_depths))
Esempio n. 2
0
def main():
    data_base = Path('/home/roit/datasets/kitti')
    in_file = Path('/home/roit/datasets/splits/kitti/eigen_std/test.txt')
    out_file = Path('/home/roit/datasets/splits/kitti/eigen_std/wo_test.txt')
    lines = readlines(in_file)
    discard_list = []
    for item in lines:
        date, scene, camera, frame = relpath_split(item)
        reframe_forward = str(int(frame) - 1)
        reframe_passward = str(int(frame) + 1)

        path = os.path.join(date, scene, camera, 'data',
                            "{:010d}".format(int(reframe_forward)))
        image_path1 = data_base / path + '.png'
        path = os.path.join(date, scene, camera, 'data',
                            "{:010d}".format(int(reframe_passward)))
        image_path2 = data_base / path + '.png'
        if not Path.exists(image_path1) or not Path.exists(image_path2):

            discard_list.append(item)
            print(item)

    for item in discard_list:
        lines.remove(item)

    print("in file: {}".format(in_file))
    print("out file: {}".format(out_file))

    print("input lines:{}".format(len(lines)))
    print('total discard lines {}'.format(len(discard_list)))
    writelines(out_file, lines)
Esempio n. 3
0
def kittiv2_test():
    train_filenames = readlines(
        '/home/roit/datasets/splits/eigen_zhou_std/train_files.txt')

    train_dataset = KITTIRAWDataset(  # KITTIRAWData
        data_path='/home/roit/datasets/kitti',
        filenames=train_filenames,
        height=192,
        width=640,
        frame_sides=[-1, 0, 1],  # kitti[0,-1,1],mc[-1,0,1]
        num_scales=4,
        is_train=True,
        img_ext='.png')

    train_loader = DataLoader(  # train_datasets:KITTIRAWDataset
        dataset=train_dataset,
        batch_size=16,
        shuffle=False,
        num_workers=8,
        pin_memory=True,
        drop_last=True)

    for data in train_loader:
        print('ok')
        print('okk')
Esempio n. 4
0
def export_gt_depths_kitti():

    parser = argparse.ArgumentParser(description='export_gt_depth')
    parser.add_argument('--split_dir',
                        type=str,
                        help='which split to export gt from',
                        default='/home/roit/datasets/splits/eigen',
                        choices=["eigen","eigen_zhou", "eigen_benchmark", "custom","custom_lite"])

    parser.add_argument('--data_path',#2012年版本最原始的
                        type=str,
                        help='path to the root of the KITTI data',
                        default='/home/roit/datasets/kitti')
    parser.add_argument('--depth_annotated_path',type=str,
                        default='/home/roit/bluep2/datasets/kitti_data_depth_annotated',
                        help='2015年又补充的')
    opt = parser.parse_args()

    split_dir = Path(opt.split_dir)
    lines = readlines(split_dir/ "test_files.txt")

    print("Exporting ground truth depths for {}".format(opt.split_dir))

    gt_depths = []

    data_path = Path(opt.data_path)#raw kitti path
    depth_annotated_path = Path(opt.depth_annotated_path)

    for line in tqdm(lines):

        folder, frame_id, _ = line.split()
        frame_id = int(frame_id)

        if split_dir.stem == "eigen":#depth ground truth 在 场景文件夹中, 云图,full eigen split
            calib_dir = data_path/folder.split("/")[0]
            velo_filename = data_path/folder/"velodyne_points/data"/"{:010d}.bin".format(frame_id)
            gt_depth = generate_depth_map(calib_dir, velo_filename, 2, True)

        elif split_dir.stem == "eigen_benchmark":#后来补充的, ground-truth 在 ‘depth_annotated_path’,结果偏高
            gt_depth_path = depth_annotated_path/folder/"proj_depth"/"groundtruth"/"image_02"/"{:010d}.png".format(frame_id)
            gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32) / 256


        else:# opt.split =='custom':#根据png来取gt数据
            gt_depth_path = depth_annotated_path/folder/"proj_depth"/"groundtruth"/"image_02"/"{:010d}.png".format(frame_id)
            gt_depth = np.array(pil.open(gt_depth_path)).astype(np.float32) / 256

        gt_depths.append(gt_depth.astype(np.float32))

    output_path = split_dir/"gt_depths2.npz"

    print("Saving to {}".format(split_dir))

    np.savez_compressed(output_path, data=np.array(gt_depths))
Esempio n. 5
0
def extract_vsd_img2():
        dataset = Path("/home/roit/datasets/VSD")
        wk_root = Path('/home/roit/aws/aprojects/xdr94_mono2')
        root = wk_root / 'splits/visdrone/test_files.txt'
        img_dump = wk_root / 'visdrone_test_img'
        img_dump.mkdir_p()

        rel_paths = readlines(root)
        rel_paths.sort()
        for item in tqdm(rel_paths):
            seq, frame = item.split('/')
            img_p = dataset / seq / frame + '.jpg'
            out_name = item.replace('/', '_') + '.jpg'
            cmd = 'cp ' + img_p + '  ' + img_dump / out_name
            os.system(cmd)
Esempio n. 6
0
def kitti():
    dataset = Path("/media/roit/970evo/home/roit/datasets/kitti")

    wk_root = Path('/home/roit/aws/aprojects/xdr94_mono2')
    root = wk_root / 'splits/eigen/test_files.txt'

    out_path = wk_root / 'eigen_imgs'
    out_path.mkdir_p()
    files = readlines(root)
    for item in tqdm(files):
        dir,pre,num,lr = re.split(' |/',item)
        out_name = pre +'_'+ num+'_'+lr+'.png'
        cmd = 'cp '+ dataset/dir/pre/'image_02/data'/"{:010d}.png".format(int(num))+' '+out_path/out_name
        os.system(cmd)

    print('ok')
Esempio n. 7
0
def extract_MC():
    cp_img=False
    cp_gt =True
    dataset = Path("/home/roit/datasets/mcv5")
    wk_root = Path('/home/roit/aws/aprojects/xdr94_mono2')
    lines = '/home/roit/datasets/splits/mc/mcv5-sildurs-e-10k-12345-s/test.txt'
    dump_base = Path('/home/roit/bluep2/test_out/mc/apr/mcv5-sildurs-e-10k-12345-s')
    shader = 'sildurs-e'

    save_cmap='magma'


    dump_base.mkdir_p()







    img_dump = wk_root/'color'
    img_dump.mkdir_p()

    gt_dump = wk_root/'test_files'
    gt_dump.mkdir_p()
    (dump_base / 'img').mkdir_p()
    (dump_base / 'depth').mkdir_p()


    files = readlines(lines)

    for item in tqdm(files):
        if cp_img:
            img_p = dataset/item
            out_name = item.replace('/','_')
            cmd = 'cp '+img_p+'  '+dump_base/'img'/out_name
            os.system(cmd)
        if cp_gt:
            gt_p = dataset / item
            gt_p =gt_p.replace(shader,'depth')
            out_name = item.replace('/', '_')
            cmd = 'cp ' + gt_p + '  ' + dump_base / 'depth' / out_name
            os.system(cmd)
Esempio n. 8
0
def dataset_init(opts):

    # datasets setting


    #global

    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    dataset_opt = opts['dataset']
    frame_sides = opts['frame_sides']
    scales = opts['scales']

    device = opts['device']
    # local
    datasets_dict = {"kitti": KITTIRAWDataset,
                     # "kitti_odom": KITTIOdomDataset,
                     "mc": MCDataset,
                     "custom_mono": CustomMonoDataset}

    if dataset_opt['type'] in datasets_dict.keys():
        dataset = datasets_dict[dataset_opt['type']]  # 选择建立哪个类,这里kitti,返回构造函数句柄
    else:
        dataset = CustomMonoDataset

    split_path = Path(dataset_opt['split']['path'])
    train_path = split_path / dataset_opt['split']['train_file']
    val_path = split_path / dataset_opt['split']['val_file']
    data_path = Path(dataset_opt['path'])



    batch_size = dataset_opt['batch_size']
    num_workers = dataset_opt['num_workers']

    train_filenames = readlines(train_path)
    val_filenames = readlines(val_path)
    img_ext = '.png'



    # train loader
    train_dataset = dataset(  # KITTIRAWData
        data_path=data_path,
        filenames=train_filenames,
        height=feed_height,
        width=feed_width,
        frame_sides=frame_sides,  # kitti[0,-1,1],mc[-1,0,1]
        num_scales=len(scales),
        mode="train"
        # img_ext='.png'
    )
    train_loader = DataLoader(  # train_datasets:KITTIRAWDataset
        dataset=train_dataset,
        batch_size=batch_size*len(device),
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=True
    )
    # val loader
    val_dataset = dataset(
        data_path=data_path,
        filenames=val_filenames,
        height=feed_height,
        width=feed_width,
        frame_sides=frame_sides,
        num_scales=len(scales),
        mode="val",
        img_ext=img_ext)

    val_loader = DataLoader(
        dataset=val_dataset,
        batch_size=batch_size*len(device),
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=True)

    print("Using split:{}, {}, {}".format(split_path,
                                          dataset_opt['split']['train_file'],
                                          dataset_opt['split']['val_file']
                                          ))
    print("There are {:d} training items and {:d} validation items".format(
        len(train_dataset), len(val_dataset)))

    return train_loader, val_loader
def evaluate(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']
    metric_mode = opts['metric_mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    encoder_mode = opts['model']['encoder_mode']
    frame_sides = opts['frame_sides']
    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=encoder_mode)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> metrics mode: {}".format(metric_mode))
    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="test")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="test")

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_depths = []
    gt_depths = []
    disps = []
    for data in tqdm(dataloader):

        image = cv2.imread('/home/roit/datasets/nyudepthv2/img/0001.jpg')
        image = cv2.resize(image, (384, 288))
        image = np.transpose(image, [2, 0, 1])
        image = torch.tensor(image).cuda() / 255.
        image = image.unsqueeze(0)

        # input_color = reframe(encoder_mode,data,frame_sides=frame_sides,key='color')
        # input_color = input_color.cuda()

        features = encoder(image)
        disp = decoder(*features)

        depth_gt = data['depth_gt']

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)
        #pred_depth = disp2depth(disp)

        pred_depth = pred_depth.cpu()[:, 0].numpy()
        depth_gt = depth_gt.cpu()[:, 0].numpy()

        pred_depths.append(pred_depth)
        gt_depths.append(depth_gt)
    gt_depths = np.concatenate(gt_depths, axis=0)

    pred_depths = np.concatenate(pred_depths, axis=0)

    metrics = []
    ratios = []

    for gt, pred in zip(gt_depths, pred_depths):
        gt_height, gt_width = gt.shape[:2]
        pred = cv2.resize(pred, (gt_width, gt_height))
        # crop
        # if test_dir.stem == "eigen" or test_dir.stem == 'custom':#???,可能是以前很老的
        if opts['dataset']['type'] == "kitti":  # ???,可能是以前很老的
            mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH)
            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)
        else:
            mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH)

        pred = pred[mask]  # 并reshape成1d
        gt = gt[mask]

        ratio = np.median(gt) / np.median(
            pred)  # 中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可
        ratios.append(ratio)
        pred *= ratio

        pred[pred < MIN_DEPTH] = MIN_DEPTH  # 所有历史数据中最小的depth, 更新,
        pred[pred > MAX_DEPTH] = MAX_DEPTH  # ...
        metric = compute_errors(gt, pred, mode=metric_mode)
        metrics.append(metric)

    metrics = np.array(metrics)
    mean_metrics = np.mean(metrics, axis=0)

    # print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_metrics.tolist()) + "\\\\")

    ratios = np.array(ratios)
    median = np.median(ratios)
    print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format(
        median, np.std(ratios / median)))
Esempio n. 10
0
def main(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    #assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \
    #    "eval_split should be either odom_9 or odom_10"

    #sequence_id = int(opt.eval_split.split("_")[1])

    #filenames = readlines(
    #    os.path.join(os.path.dirname(__file__), "splits", "odom",
    #                 "test_files_{:02d}.txt".format(sequence_id)))
    # dataset = KITTIOdomDataset(opt.eval_pose_data_path, filenames, opt.height, opt.width,
    #                            [0, 1], 4, is_train=False)

    filenames = readlines(Path('./splits') / opt.split / 'test_files.txt')

    dataset = CustomMonoDataset(opt.dataset_path,
                                filenames,
                                opt.height,
                                opt.width, [0, 1],
                                1,
                                is_train=False)

    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    #model
    pose_encoder_path = Path(opt.load_weights_folder) / "pose_encoder.pth"
    pose_decoder_path = Path(opt.load_weights_folder) / "pose.pth"

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    print("-> eval " + opt.split)
    for inputs in tqdm(dataloader):
        for key, ipt in inputs.items():
            inputs[key] = ipt.cuda()

        all_color_aug = torch.cat(
            [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

        features = [pose_encoder(all_color_aug)]
        axisangle, translation = pose_decoder(features)

        pred_pose = transformation_from_parameters(axisangle[:, 0],
                                                   translation[:, 0])
        pred_pose = pred_pose.cpu().numpy()
        pred_poses.append(pred_pose)

    pred_poses = np.concatenate(pred_poses)
    length = pred_poses.shape[0]
    pred_poses.resize([length, 16])
    pred_poses = pred_poses[:, :12]
    filename = opt.dump_name
    np.savetxt(filename, pred_poses, delimiter=' ', fmt='%1.8e')

    print("-> Predictions saved to", filename)
Esempio n. 11
0
def main(opt):
    """Evaluate odometry on the KITTI dataset
    """

    #assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \
    #    "eval_split should be either odom_9 or odom_10"

    #sequence_id = int(opt.eval_split.split("_")[1])

    #filenames = readlines(
    #    os.path.join(os.path.dirname(__file__), "splits", "odom",
    #                 "test_files_{:02d}.txt".format(sequence_id)))
    # dataset = KITTIOdomDataset(opt.eval_pose_data_path, filenames, opt.height, opt.width,
    #                            [0, 1], 4, is_train=False)

    if opt.infer_file == None:
        filenames = readlines(Path('./splits') / opt.split / 'test_files.txt')
    else:
        filenames = readlines(Path('./splits') / opt.split / opt.infer_file)
    if opt.split == "custom_mono":
        dataset = CustomMonoDataset(opt.dataset_path,
                                    filenames,
                                    opt.height,
                                    opt.width, [0, 1],
                                    1,
                                    is_train=False)
    elif opt.split == "mc":

        dataset = MCDataset(opt.dataset_path,
                            filenames,
                            opt.height,
                            opt.width, [0, 1],
                            1,
                            is_train=False)

    dataloader = DataLoader(dataset,
                            opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    #model

    weights_pose = torch.load(opt.posenet_path)
    pose_net = networks.PoseNet().to(device)
    pose_net.load_state_dict(weights_pose['state_dict'], strict=False)
    pose_net.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    print("-> eval " + opt.split)
    global_pose = np.identity(4)
    poses = [global_pose[0:3, :].reshape(1, 12)]
    for inputs in tqdm(dataloader):
        for key, ipt in inputs.items():
            inputs[key] = ipt.cuda()

        pose = pose_net(inputs[("color_aug", 0, 0)],
                        inputs[("color_aug", 1, 0)])  #1,6
        pose_mat = pose_vec2mat(pose).squeeze(0).cpu().numpy()
        pose_mat = np.vstack([pose_mat, np.array([0, 0, 0, 1])])  # 4X4
        global_pose = global_pose @ np.linalg.inv(pose_mat)

        poses.append(global_pose[0:3, :].reshape(1, 12))

    poses = np.concatenate(poses, axis=0)
    if opt.scale_factor:
        poses[:, 3] *= opt.scale_factor  #x-axis
        poses[:, 11] *= opt.scale_factor  #z-axis
    if opt.infer_file:
        dump_name = Path(opt.infer_file).stem + '.txt'
    else:
        dump_name = opt.dump_name
    np.savetxt(dump_name, poses, delimiter=' ', fmt='%1.8e')
Esempio n. 12
0
def main(args):
    """Function to predict for a single image or folder of images
    """
    print(args.dataset_path)
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    #download_model_if_doesnt_exist(args.model_path,args.model_name)

    model_path = Path(args.model_path) / args.model_name
    if not model_path.exists():
        print(model_path + " does not exists")

    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    #1 LOADING PRETRAINED MODEL
    #1.1 encoder
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    #1.2 decoder
    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    #2. FINDING INPUT IMAGES

    dataset_path = Path(args.dataset_path)

    #files
    root = Path(os.path.dirname(__file__))
    txt = root / 'splits' / args.split / args.txt_files
    print('-> inference file: ', txt)
    rel_paths = readlines(txt)
    #out
    if args.out_path != None:
        out_path = Path(args.out_path)
    else:
        out_path = Path('./' + dataset_path.stem + '_out')
    out_path.mkdir_p()

    files = []
    #rel_paths 2 paths
    if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']:  #kitti
        for item in rel_paths:
            item = item.split(' ')
            if item[2] == 'l': camera = 'image_02'
            elif item[2] == 'r': camera = 'image_01'
            files.append(dataset_path / item[0] / camera / 'data' /
                         "{:010d}.png".format(int(item[1])))
    elif args.split == 'mc':
        for item in rel_paths:
            #item = item.split('/')
            files.append(item)
    elif args.split == 'visdrone' or 'visdrone_lite':
        for item in rel_paths:
            item = item.split('/')
            files.append(dataset_path / item[0] / item[1] + '.jpg')
    else:
        for item in rel_paths:
            item = item.split('/')
            files.append(dataset_path / item[0] / item[1] + '.jpg')


#2.1

    cnt = 0
    #3. PREDICTING ON EACH IMAGE IN TURN
    print('\n-> inference ' + args.dataset_path)
    files.sort()
    for image_path in tqdm(files):

        # Load image and preprocess

        if args.split == 'mc':
            input_image = pil.open(dataset_path / image_path +
                                   '.png').convert('RGB')
        else:
            input_image = pil.open(image_path).convert('RGB')

        original_width, original_height = input_image.size
        input_image = input_image.resize((feed_width, feed_height),
                                         pil.LANCZOS)
        input_image = transforms.ToTensor()(input_image).unsqueeze(0)

        # PREDICTION
        input_image = input_image.to(device)  #torch.Size([1, 3, 192, 640])
        features = encoder(input_image)  #a list from 0 to 4
        outputs = depth_decoder(features)  # dict , 4 disptensor
        cnt += 1
        disp = outputs[("disp", 0)]  # has a same size with input
        disp_resized = torch.nn.functional.interpolate(
            disp, (original_height, original_width),
            mode="bilinear",
            align_corners=False)

        # Saving numpy file
        #if args.out_name=='num':
        if args.split == 'eigen' or args.split == 'custom':
            output_name = str(image_path).split('/')[-4] + '_{}'.format(
                image_path.stem)
        elif args.split == 'mc':
            block, p, color, frame = image_path.split('/')
            output_name = str(image_path).replace('/', '_') + '.png'
        elif args.split == 'visdrone' or args.split == 'visdrone_lite':
            output_name = image_path.relpath(dataset_path).strip(
                '.jpg').replace('/', '_')
            pass
        elif args.split == 'custom_mono':
            output_name = image_path.relpath(dataset_path).strip(
                '.jpg').replace('/', '_')
        else:
            output_name = image_path.relpath(dataset_path).strip(
                '.jpg').replace('/', '_')

        if args.npy_out:
            name_dest_npy = os.path.join(out_path,
                                         "{}_disp.npy".format(output_name))
            scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
            np.save(name_dest_npy, scaled_disp.cpu().numpy())

        # Saving colormapped depth image
        disp_resized_np = disp_resized.squeeze().cpu().numpy()
        vmax = np.percentile(disp_resized_np, 95)
        name_dest_im = Path(out_path) / "{}.png".format(output_name)
        plt.imsave(name_dest_im, disp_resized_np, cmap='magma', vmax=vmax)

    print(cnt)

    print('\n-> Done,save at ' + args.out_path)
Esempio n. 13
0
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    #assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \
    #    "eval_split should be either odom_9 or odom_10"

    sequence_id = int(opt.eval_split.split("_")[1])

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.eval_pose_data_path,
                               filenames,
                               opt.height,
                               opt.width, [0, 1],
                               4,
                               is_train=False)
    dataloader = DataLoader(dataset,
                            opt.eval_odom_batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    print("-> eval " + opt.eval_split)
    for inputs in tqdm(dataloader):
        for key, ipt in inputs.items():
            inputs[key] = ipt.cuda()

        all_color_aug = torch.cat(
            [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

        features = [pose_encoder(all_color_aug)]
        axisangle, translation = pose_decoder(features)
        #encoder_out = torch.onnx.export(model=pose_decoder, args=features, f="monopose_poseencoder18.onnx",verbose=True, export_params=True)

        pred_poses.append(
            transformation_from_parameters(axisangle[:, 0],
                                           translation[:, 0]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)

    gt_poses_path = os.path.join(opt.eval_pose_data_path, "poses",
                                 "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    print('-> step2')

    gt_local_poses = []
    for i in tqdm(range(1, len(gt_global_poses))):
        gt_local_poses.append(
            np.linalg.inv(
                np.dot(np.linalg.inv(gt_global_poses[i - 1]),
                       gt_global_poses[i])))

    print('-> step3')
    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 5
    for i in tqdm(range(0, num_frames - 1)):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(
            dump_xyz(gt_local_poses[i:i + track_length - 1]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))

    print("\n-> Trajectory error: {:0.3f}, std: {:0.3f}\n".format(
        np.mean(ates), np.std(ates)))

    opt.saved_npy = opt.eval_split + ".npy"
    save_path = os.path.join(opt.eval_pose_save_path, opt.saved_npy)
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)
Esempio n. 14
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于
    if not opt.eval_mono or opt.eval_stereo:
        print(
            "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"
        )
    test_dir = Path(opt.test_dir)
    #1. load gt
    print('\n-> load gt:{}\n'.format(opt.test_dir))
    gt_path = test_dir / "gt_depths.npz"
    gt_depths = np.load(gt_path, allow_pickle=True)
    gt_depths = gt_depths["data"]
    #2. load img data and predict, output is pred_disps(shape is [nums,1,w,h])
    depth_eval_path = Path(opt.depth_eval_path)
    if not depth_eval_path.exists():
        print("Cannot find a folder at {}".format(depth_eval_path))
    print("-> Loading weights from {}".format(depth_eval_path))
    #model loading
    filenames = readlines(test_dir / opt.test_files)
    encoder_path = depth_eval_path / "encoder.pth"
    decoder_path = depth_eval_path / "depth.pth"
    encoder_dict = torch.load(encoder_path)
    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)
    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))
    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()
    # dataloader
    dataset = datasets.KITTIRAWDatasetv1(opt.data_path,
                                         filenames,
                                         encoder_dict['height'],
                                         encoder_dict['width'], [0],
                                         4,
                                         is_train=False)
    dataloader = DataLoader(dataset,
                            batch_size=opt.eval_batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_disps = []
    print("\n-> Computing predictions with size {}x{}\n".format(
        encoder_dict['width'], encoder_dict['height']))
    #prediction
    for data in tqdm(dataloader):
        input_color = data[("color", 0, 0)].cuda()
        # if opt.post_process:
        #     # Post-processed results require each image to have two forward passes
        #     input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)

        #eval 0
        output = depth_decoder(encoder(input_color))

        #eval 1
        pred_disp, pred_depth_tmp = disp_to_depth(output[("disp", 0)],
                                                  opt.min_depth, opt.max_depth)
        pred_disp = pred_disp.cpu()[:, 0].numpy()
        #pred_depth = pred_depth.cpu()[:,0].numpy()
        # if opt.post_process:
        #     N = pred_disp.shape[0] // 2
        #     pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])
        pred_disps.append(pred_disp)
    #endfor
    pred_disps = np.concatenate(pred_disps)
    # if opt.save_pred_disps:
    #     output_path = depth_eval_path/ "disps_{}_split.npy".format(opt.test_dir)
    #     print("-> Saving predicted disparities to ", output_path)
    #     np.save(output_path, pred_disps)
    # if opt.no_eval:
    #     print("-> Evaluation disabled. Done.")
    #     quit()
    # elif test_dir.stem == 'benchmark':
    #     save_dir = depth_eval_path/ "benchmark_predictions"
    #     print("-> Saving out benchmark predictions to {}".format(save_dir))
    #     if not os.path.exists(save_dir):
    #         os.makedirs(save_dir)
    #     for idx in tqdm(range(len(pred_disps))):
    #         disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
    #         depth = STEREO_SCALE_FACTOR / disp_resized
    #         depth = np.clip(depth, 0, 80)
    #         depth = np.uint16(depth * 256)
    #         save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
    #         cv2.imwrite(save_path, depth)
    #     print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
    #     quit()
    #3. evaluation
    print("-> Evaluating")
    # if opt.eval_stereo:
    #     print("   Stereo evaluation - "
    #           "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
    #     opt.median_scaling = False
    #     opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    # else:
    #     print("   Mono evaluation - using median scaling")
    metrics = []
    ratios = []
    nums_evaluate = pred_disps.shape[0]
    for i in tqdm(range(nums_evaluate)):
        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]

        #eval2
        pred_disp = cv2.resize(pred_disp,
                               (gt_width, gt_height))  #1271x341 t0 128x640
        pred_depth = 1 / pred_disp  # 也可以根据上面直接得到

        #crop
        if test_dir.stem == "eigen" or test_dir.stem == 'custom':  #???,可能是以前很老的
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)
            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

        else:
            mask = gt_depth > 0

        #eval3
        pred_depth = pred_depth[mask]  #并reshape成1d
        gt_depth = gt_depth[mask]

        pred_depth *= opt.pred_depth_scale_factor
        #median scaling
        if opt.median_scaling:
            ratio = np.median(gt_depth) / np.median(
                pred_depth)  #中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可
            ratios.append(ratio)
            pred_depth *= ratio
        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH  #所有历史数据中最小的depth, 更新,
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH  #...
        metric = compute_errors(gt_depth, pred_depth)
        metrics.append(metric)
    metrics = np.array(metrics)
    #4. precess results, latex style output
    if opt.median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format(
            med, np.std(ratios / med)))
    mean_metrics = metrics.mean(0)
    print("\n  " +
          ("{:>8} | " * 7
           ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_metrics.tolist()) + "\\\\")
    print("\n-> Done!")
Esempio n. 15
0
def evaluate(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']

    out_dir = Path(opts['out_dir'])
    out_dir.mkdir_p()
    sub_dirs = opts['sub_dirs']
    for item in sub_dirs:
        (out_dir / item).mkdir_p()

    # metric_mode = opts['metric_mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    encoder_mode = opts['model']['encoder_mode']
    frame_sides = opts['frame_sides']
    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=encoder_mode)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="test")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="test")
    elif opts['dataset']['type'] == 'custom_mono':
        dataset = datasets.CustomMonoDataset(data_path=data_path,
                                             filenames=file_names,
                                             height=feed_height,
                                             width=feed_width,
                                             frame_sides=frame_sides,
                                             num_scales=1,
                                             mode='test')

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_depths = []
    gt_depths = []
    disps = []
    idx = 0
    for data in tqdm(dataloader):

        input_color = reframe(encoder_mode,
                              data,
                              frame_sides=frame_sides,
                              key='color')
        input_color = input_color.cuda()

        features = encoder(input_color)
        disp = decoder(*features)

        # depth_gt = data['depth_gt']

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)
        #pred_depth = disp2depth(disp)

        if "depth" in sub_dirs:
            pred_depth = pred_depth.cpu()[:, 0].numpy()[0]
            depth = cv2.resize(pred_depth, (full_width, full_height))
            depth = np_normalize_image(depth)
            cv2.imwrite(out_dir / "depth" / file_names[idx].replace('/', '_'),
                        depth * 255)

        if "disp" in sub_dirs:
            pred_disp = pred_disp.cpu()[:, 0].numpy()[0]
            disp = cv2.resize(pred_disp, (full_width, full_height))
            disp = np_normalize_image(disp)

            cv2.imwrite(out_dir / "disp" / file_names[idx].replace('/', '_'),
                        disp * 255)

        idx += 1
Esempio n. 16
0
def prediction(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']
    metric_mode = opts['metric_mode']

    framework_mode = opts['model']['mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    components = opts['model']['mode']
    frame_sides = opts['frame_sides']
    out_dir_base = Path(opts['out_dir_base'])

    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=components)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> metrics mode: {}".format(metric_mode))
    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    file_names.sort()
    #prediction loader
    # test_files = []
    # for base in file_names:
    #     test_files.append(data_path/base)
    # test_files.sort()

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="prediction")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="prediction")

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    out_shows = []

    if opts['out_dir']:
        out_dir = out_dir_base / opts['out_dir']
    else:
        out_dir = out_dir_base / data_path.stem
    out_dir.mkdir_p()
    for data in tqdm(dataloader):

        input_color = input_frames(data,
                                   mode=framework_mode,
                                   frame_sides=frame_sides)

        features = encoder(input_color)
        disp = decoder(*features)

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)

        out_show = pred_disp
        out_show = out_show.cpu()[:, 0].numpy()

        out_shows.append(out_show)

    for idx, item in enumerate(out_shows):

        depth_name = file_names[idx].replace('/', '_').replace('.png', 'depth')
        idx += 1
        plt.imsave(out_dir / depth_name + '{}'.format('.png'),
                   item[0],
                   cmap='magma')
Esempio n. 17
0
def main_with_masks(args):
    """Function to predict for a single image or folder of images
    """
    print(args.dataset_path)
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    out_path = Path(args.out_path)
    out_path.mkdir_p()
    dirs = {}
    for mask in args.results:
        dirs[mask] = (out_path / mask)
        (out_path / mask).mkdir_p()

    print('-> split:{}'.format(args.split))
    print('-> save to {}'.format(args.out_path))

    if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']:
        feed_height = 192
        feed_width = 640
        min_depth = 0.1
        max_depth = 80
        full_height = 375
        full_width = 1242
        dataset = KITTIRAWDataset

    elif args.split in ["visdrone", "visdrone_lite"]:
        feed_width = 352
        feed_height = 192
        min_depth = 0.1
        max_depth = 255
        dataset = VSDataset
    elif args.split in ['mc', 'mc_lite']:
        feed_height = 288
        feed_width = 384
        min_depth = 0.1
        max_depth = 255
        dataset = MCDataset

    feed_height = 192
    feed_width = 640

    backproject_depth = BackprojectDepth(1, feed_height, feed_width).to(device)

    project_3d = Project3D(1, feed_height, feed_width)

    photometric_error = PhotometricError()

    txt_files = args.txt_files
    #data
    test_path = Path(args.wk_root) / "splits" / args.split / txt_files
    test_filenames = readlines(test_path)
    if args.as_name_sort:  #按照序列顺序名字排列
        test_filenames.sort()
    #check filenames:
    i = 0
    for i, item in enumerate(test_filenames):
        #item = test_filenames[i]
        if args.split in ['eigen', 'custom', 'custom_lite', 'eigen_zhou']:
            dirname, frame, lr = test_filenames[i].split()
            files = (Path(args.dataset_path) / dirname /
                     'image_02/data').files()
            files.sort()
            min = int(files[0].stem)
            max = int(files[-1].stem)
            if int(frame) + args.frame_ids[0] <= min or int(
                    frame) + args.frame_ids[-1] >= max:
                test_filenames[i] = ''
        if args.split in ['mc', 'mc_lite']:  #虽然在split的时候已经处理过了
            block, trajactory, color, frame = test_filenames[i].split('/')
            files = (Path(args.dataset_path) / block / trajactory /
                     color).files()
            files.sort()
            min = int(files[0].stem)
            max = int(files[-1].stem)
            if int(frame) + args.frame_ids[0] <= min or int(
                    frame) + args.frame_ids[-1] >= max:
                test_filenames[i] = ''
            pass
        if args.split in ['visdrone', 'visdrone_lite']:  #虽然在split的时候已经处理过了
            dirname, frame = test_filenames[i].split('/')
            files = (Path(args.dataset_path) / dirname).files()
            files.sort()
            min = int(files[0].stem)
            max = int(files[-1].stem)
            if int(frame) + args.frame_ids[0] <= min or int(
                    frame) + args.frame_ids[-1] >= max:
                test_filenames[i] = ''

    while '' in test_filenames:
        test_filenames.remove('')

    test_dataset = dataset(  # KITTIRAWData
        args.dataset_path,
        test_filenames,
        feed_height,
        feed_width,
        args.frame_ids,
        1,
        is_train=False,
        img_ext=args.ext)

    test_loader = DataLoader(  # train_datasets:KITTIRAWDataset
        dataset=test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=1,
        pin_memory=True,
        drop_last=False)

    print('->items num: {}'.format(len(test_loader)))

    #layers

    #download_model_if_doesnt_exist(args.model_path,args.model_name)

    model_path = Path(args.model_path) / args.model_name
    if not model_path.exists():
        print(model_path + " does not exists")

    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    #1 LOADING PRETRAINED MODEL
    #1.1 encoder
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    #1.2 decoder
    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    #paths
    pose_encoder_path = Path(model_path) / "pose_encoder.pth"
    pose_decoder_path = Path(model_path) / 'pose.pth'

    # 2.1 pose encoder
    print("   Loading pretrained pose encoder")

    pose_encoder = networks.ResnetEncoder(18, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.to(device)
    pose_encoder.eval()

    # 2.2 pose decoder
    print("   Loading pretrained decoder")
    pose_decoder = networks.PoseDecoder(num_ch_enc=pose_encoder.num_ch_enc,
                                        num_input_features=1,
                                        num_frames_to_predict_for=2)

    pose_loaded_dict = torch.load(pose_decoder_path, map_location=device)
    pose_decoder.load_state_dict(pose_loaded_dict)

    pose_decoder.to(device)
    pose_decoder.eval()
    source_scale = 0
    scale = 0
    for batch_idx, inputs in tqdm(enumerate(test_loader)):
        for key, ipt in inputs.items():
            inputs[key] = ipt.to(device)
        features = encoder(inputs[("color", 0, 0)])  # a list from 0 to 4

        outputs = depth_decoder(features)  # dict , 4 disptensor

        disp = outputs[("disp", 0)]  # has a same size with input

        #disp_resized = torch.nn.functional.interpolate(disp, (full_height, full_width), mode="bilinear", align_corners=False)

        _, depth = disp_to_depth(disp, min_depth, max_depth)

        for f_i in [args.frame_ids[0], args.frame_ids[-1]]:

            if f_i < 0:
                pose_inputs = [
                    inputs[("color", f_i, 0)], inputs[("color", 0, 0)]
                ]
            else:
                pose_inputs = [
                    inputs[("color", 0, 0)], inputs[("color", f_i, 0)]
                ]
            pose_inputs = torch.cat(pose_inputs, 1)
            features = pose_encoder(pose_inputs)
            axisangle, translation = pose_decoder([features])

            outputs[("cam_T_cam", 0, f_i)] = transformation_from_parameters(
                axisangle[:, 0], translation[:, 0], invert=(f_i < 0))  # b44
            T = outputs[("cam_T_cam", 0, f_i)]

            cam_points = backproject_depth(depth,
                                           inputs[("inv_K", 0)])  # D@K_inv
            pix_coords = project_3d(cam_points, inputs[("K", 0)],
                                    T)  # K@D@K_inv

            outputs[("sample", f_i, 0)] = pix_coords  # rigid_flow

            outputs[("color", f_i,
                     0)] = F.grid_sample(inputs[("color", f_i, 0)],
                                         outputs[("sample", f_i, 0)],
                                         padding_mode="border")
            # output"color" 就是i-warped

            # add a depth warp
            outputs[("color_identity", f_i, 0)] = inputs[("color", f_i, 0)]

        target = inputs[("color", 0, 0)]

        reprojection_losses = []
        for frame_id in [args.frame_ids[0], args.frame_ids[-1]]:
            pred = outputs[("color", frame_id, 0)]
            reprojection_losses.append(photometric_error.run(pred, target))

        reprojection_losses = torch.cat(reprojection_losses, 1)

        identity_reprojection_losses = []
        for frame_id in [args.frame_ids[0], args.frame_ids[-1]]:
            pred = inputs[("color", frame_id, source_scale)]
            identity_reprojection_losses.append(
                photometric_error.run(pred, target))
        identity_reprojection_losses = torch.cat(identity_reprojection_losses,
                                                 1)

        erro_maps = torch.cat(
            (identity_reprojection_losses, reprojection_losses), dim=1)  # b4hw

        identical_mask = IdenticalMask(erro_maps)
        identical_mask = identical_mask[0].detach().cpu().numpy()

        save_name = test_filenames[batch_idx].replace('/', '_')
        save_name = save_name.replace('l', '')
        save_name = save_name.replace('r', '')
        save_name = save_name.replace(' ', '')

        if "identical_mask" in args.results:
            plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name),
                       identical_mask)

        if "depth" in args.results:
            # Saving colormapped depth image
            disp_np = disp[0, 0].detach().cpu().numpy()
            vmax = np.percentile(disp_np, 95)
            plt.imsave(dirs['depth'] / "{}.png".format(save_name),
                       disp_np,
                       cmap='magma',
                       vmax=vmax)

        if "mean_mask" in args.results:
            mean_mask = MeanMask(erro_maps)
            mean_mask = mean_mask[0].detach().cpu().numpy()
            plt.imsave(dirs['mean_mask'] / "{}.png".format(save_name),
                       mean_mask,
                       cmap='bone')

        if "identical_mask" in args.results:
            identical_mask = IdenticalMask(erro_maps)
            identical_mask = identical_mask[0].detach().cpu().numpy()
            plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name),
                       identical_mask,
                       cmap='bone')

        if "var_mask" in args.results:
            var_mask = VarMask(erro_maps)
            var_mask = var_mask[0].detach().cpu().numpy()
            plt.imsave(dirs["var_mask"] / "{}.png".format(save_name),
                       var_mask,
                       cmap='bone')

        if "final_mask" in args.results:
            identical_mask = IdenticalMask(erro_maps)
            mean_mask = MeanMask(erro_maps)
            var_mask = VarMask(erro_maps)
            final_mask = float8or(mean_mask * identical_mask, var_mask)
            final_mask = final_mask[0].detach().cpu().numpy()
            plt.imsave(dirs["final_mask"] / "{}.png".format(save_name),
                       final_mask,
                       cmap='bone')