def __test_simple__(self):
        with torch.no_grad():
            for idx, image_path in enumerate(self.paths):

                # if image_path.endswith("_disp.jpg"):
                #     # don't try to predict disparity for a disparity image!
                #     continue

                # Load image and preprocess
                try:
                    # input_image = pil.open(image_path).convert('RGB')
                    # original_width, original_height = input_image.size
                    # input_image = input_image.resize((self.feed_width, self.feed_height), pil.LANCZOS)
                    # input_image = transforms.ToTensor()(input_image).unsqueeze(0)


                    input_image = cv2.imread(image_path)
                    input_image = cv2.resize(input_image,(self.feed_width, self.feed_height))
                    input_image = transforms.ToTensor()(input_image).unsqueeze(0)


                    # PREDICTION
                    input_image = input_image.to(self.device)
                    features = self.encoder(input_image)
                    disp = self.depth_decoder(features[0],features[1],features[2],features[3],features[4])

                    #disp = outputs[("disp", 0)]
                    disp_resized = torch.nn.functional.interpolate(
                        disp, (192, 640), mode="bilinear", align_corners=False)

                    # Saving numpy file
                    output_name = os.path.splitext(os.path.basename(image_path))[0]
                    name_dest_npy = os.path.join(self.output_directory, "{}_disp.npy".format(output_name))
                    scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
                    np.save(name_dest_npy, scaled_disp.cpu().numpy())

                    # Saving colormapped depth image
                    disp_resized_np = disp_resized.squeeze().cpu().numpy()
                    vmax = np.percentile(disp_resized_np, 95)
                    normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
                    mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
                    colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
                    im = pil.fromarray(colormapped_im)

                    name_dest_im = os.path.join(self.output_directory, "{}_disp.jpeg".format(output_name))
                    im.save(name_dest_im)
                except :
                    print("File is not found.")
Beispiel #2
0
def prediction(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']
    metric_mode = opts['metric_mode']

    framework_mode = opts['model']['mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    components = opts['model']['mode']
    frame_sides = opts['frame_sides']
    out_dir_base = Path(opts['out_dir_base'])

    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=components)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> metrics mode: {}".format(metric_mode))
    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    file_names.sort()
    #prediction loader
    # test_files = []
    # for base in file_names:
    #     test_files.append(data_path/base)
    # test_files.sort()

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="prediction")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="prediction")

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    out_shows = []

    if opts['out_dir']:
        out_dir = out_dir_base / opts['out_dir']
    else:
        out_dir = out_dir_base / data_path.stem
    out_dir.mkdir_p()
    for data in tqdm(dataloader):

        input_color = input_frames(data,
                                   mode=framework_mode,
                                   frame_sides=frame_sides)

        features = encoder(input_color)
        disp = decoder(*features)

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)

        out_show = pred_disp
        out_show = out_show.cpu()[:, 0].numpy()

        out_shows.append(out_show)

    for idx, item in enumerate(out_shows):

        depth_name = file_names[idx].replace('/', '_').replace('.png', 'depth')
        idx += 1
        plt.imsave(out_dir / depth_name + '{}'.format('.png'),
                   item[0],
                   cmap='magma')
Beispiel #3
0
def evaluate(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']

    out_dir = Path(opts['out_dir'])
    out_dir.mkdir_p()
    sub_dirs = opts['sub_dirs']
    for item in sub_dirs:
        (out_dir / item).mkdir_p()

    # metric_mode = opts['metric_mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    encoder_mode = opts['model']['encoder_mode']
    frame_sides = opts['frame_sides']
    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=encoder_mode)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="test")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="test")
    elif opts['dataset']['type'] == 'custom_mono':
        dataset = datasets.CustomMonoDataset(data_path=data_path,
                                             filenames=file_names,
                                             height=feed_height,
                                             width=feed_width,
                                             frame_sides=frame_sides,
                                             num_scales=1,
                                             mode='test')

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_depths = []
    gt_depths = []
    disps = []
    idx = 0
    for data in tqdm(dataloader):

        input_color = reframe(encoder_mode,
                              data,
                              frame_sides=frame_sides,
                              key='color')
        input_color = input_color.cuda()

        features = encoder(input_color)
        disp = decoder(*features)

        # depth_gt = data['depth_gt']

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)
        #pred_depth = disp2depth(disp)

        if "depth" in sub_dirs:
            pred_depth = pred_depth.cpu()[:, 0].numpy()[0]
            depth = cv2.resize(pred_depth, (full_width, full_height))
            depth = np_normalize_image(depth)
            cv2.imwrite(out_dir / "depth" / file_names[idx].replace('/', '_'),
                        depth * 255)

        if "disp" in sub_dirs:
            pred_disp = pred_disp.cpu()[:, 0].numpy()[0]
            disp = cv2.resize(pred_disp, (full_width, full_height))
            disp = np_normalize_image(disp)

            cv2.imwrite(out_dir / "disp" / file_names[idx].replace('/', '_'),
                        disp * 255)

        idx += 1
def evaluate(opts):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = opts['min_depth']
    MAX_DEPTH = opts['max_depth']

    data_path = opts['dataset']['path']
    batch_size = opts['dataset']['batch_size']

    num_workers = opts['dataset']['num_workers']
    feed_height = opts['feed_height']
    feed_width = opts['feed_width']
    full_width = opts['dataset']['full_width']
    full_height = opts['dataset']['full_height']
    metric_mode = opts['metric_mode']

    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于

    data_path = Path(opts['dataset']['path'])
    lines = Path(opts['dataset']['split']
                 ['path']) / opts['dataset']['split']['test_file']
    model_path = opts['model']['load_paths']
    encoder_mode = opts['model']['encoder_mode']
    frame_sides = opts['frame_sides']
    # frame_prior,frame_now,frame_next =  opts['frame_sides']
    encoder, decoder = model_init(model_path, mode=encoder_mode)
    file_names = readlines(lines)

    print('-> dataset_path:{}'.format(data_path))
    print('-> model_path')
    for k, v in opts['model']['load_paths'].items():
        print('\t' + str(v))

    print("-> metrics mode: {}".format(metric_mode))
    print("-> data split:{}".format(lines))
    print('-> total:{}'.format(len(file_names)))

    if opts['dataset']['type'] == 'mc':
        dataset = datasets.MCDataset(data_path=data_path,
                                     filenames=file_names,
                                     height=feed_height,
                                     width=feed_width,
                                     frame_sides=frame_sides,
                                     num_scales=1,
                                     mode="test")
    elif opts['dataset']['type'] == 'kitti':

        dataset = datasets.KITTIRAWDataset(  # KITTIRAWData
            data_path=data_path,
            filenames=file_names,
            height=feed_height,
            width=feed_width,
            frame_sides=frame_sides,
            num_scales=1,
            mode="test")

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_depths = []
    gt_depths = []
    disps = []
    for data in tqdm(dataloader):

        image = cv2.imread('/home/roit/datasets/nyudepthv2/img/0001.jpg')
        image = cv2.resize(image, (384, 288))
        image = np.transpose(image, [2, 0, 1])
        image = torch.tensor(image).cuda() / 255.
        image = image.unsqueeze(0)

        # input_color = reframe(encoder_mode,data,frame_sides=frame_sides,key='color')
        # input_color = input_color.cuda()

        features = encoder(image)
        disp = decoder(*features)

        depth_gt = data['depth_gt']

        pred_disp, pred_depth = disp_to_depth(disp,
                                              min_depth=MIN_DEPTH,
                                              max_depth=MAX_DEPTH)
        #pred_depth = disp2depth(disp)

        pred_depth = pred_depth.cpu()[:, 0].numpy()
        depth_gt = depth_gt.cpu()[:, 0].numpy()

        pred_depths.append(pred_depth)
        gt_depths.append(depth_gt)
    gt_depths = np.concatenate(gt_depths, axis=0)

    pred_depths = np.concatenate(pred_depths, axis=0)

    metrics = []
    ratios = []

    for gt, pred in zip(gt_depths, pred_depths):
        gt_height, gt_width = gt.shape[:2]
        pred = cv2.resize(pred, (gt_width, gt_height))
        # crop
        # if test_dir.stem == "eigen" or test_dir.stem == 'custom':#???,可能是以前很老的
        if opts['dataset']['type'] == "kitti":  # ???,可能是以前很老的
            mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH)
            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)
        else:
            mask = np.logical_and(gt > MIN_DEPTH, gt < MAX_DEPTH)

        pred = pred[mask]  # 并reshape成1d
        gt = gt[mask]

        ratio = np.median(gt) / np.median(
            pred)  # 中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可
        ratios.append(ratio)
        pred *= ratio

        pred[pred < MIN_DEPTH] = MIN_DEPTH  # 所有历史数据中最小的depth, 更新,
        pred[pred > MAX_DEPTH] = MAX_DEPTH  # ...
        metric = compute_errors(gt, pred, mode=metric_mode)
        metrics.append(metric)

    metrics = np.array(metrics)
    mean_metrics = np.mean(metrics, axis=0)

    # print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_metrics.tolist()) + "\\\\")

    ratios = np.array(ratios)
    median = np.median(ratios)
    print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format(
        median, np.std(ratios / median)))
Beispiel #5
0
def main(args):
    """Function to predict for a single image or folder of images
    """
    print(args.dataset_path)
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    #download_model_if_doesnt_exist(args.model_path,args.model_name)

    model_path = Path(args.model_path) / args.model_name
    if not model_path.exists():
        print(model_path + " does not exists")

    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    #1 LOADING PRETRAINED MODEL
    #1.1 encoder
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    #1.2 decoder
    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    #2. FINDING INPUT IMAGES

    dataset_path = Path(args.dataset_path)

    #files
    root = Path(os.path.dirname(__file__))
    txt = root / 'splits' / args.split / args.txt_files
    print('-> inference file: ', txt)
    rel_paths = readlines(txt)
    #out
    if args.out_path != None:
        out_path = Path(args.out_path)
    else:
        out_path = Path('./' + dataset_path.stem + '_out')
    out_path.mkdir_p()

    files = []
    #rel_paths 2 paths
    if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']:  #kitti
        for item in rel_paths:
            item = item.split(' ')
            if item[2] == 'l': camera = 'image_02'
            elif item[2] == 'r': camera = 'image_01'
            files.append(dataset_path / item[0] / camera / 'data' /
                         "{:010d}.png".format(int(item[1])))
    elif args.split == 'mc':
        for item in rel_paths:
            #item = item.split('/')
            files.append(item)
    elif args.split == 'visdrone' or 'visdrone_lite':
        for item in rel_paths:
            item = item.split('/')
            files.append(dataset_path / item[0] / item[1] + '.jpg')
    else:
        for item in rel_paths:
            item = item.split('/')
            files.append(dataset_path / item[0] / item[1] + '.jpg')


#2.1

    cnt = 0
    #3. PREDICTING ON EACH IMAGE IN TURN
    print('\n-> inference ' + args.dataset_path)
    files.sort()
    for image_path in tqdm(files):

        # Load image and preprocess

        if args.split == 'mc':
            input_image = pil.open(dataset_path / image_path +
                                   '.png').convert('RGB')
        else:
            input_image = pil.open(image_path).convert('RGB')

        original_width, original_height = input_image.size
        input_image = input_image.resize((feed_width, feed_height),
                                         pil.LANCZOS)
        input_image = transforms.ToTensor()(input_image).unsqueeze(0)

        # PREDICTION
        input_image = input_image.to(device)  #torch.Size([1, 3, 192, 640])
        features = encoder(input_image)  #a list from 0 to 4
        outputs = depth_decoder(features)  # dict , 4 disptensor
        cnt += 1
        disp = outputs[("disp", 0)]  # has a same size with input
        disp_resized = torch.nn.functional.interpolate(
            disp, (original_height, original_width),
            mode="bilinear",
            align_corners=False)

        # Saving numpy file
        #if args.out_name=='num':
        if args.split == 'eigen' or args.split == 'custom':
            output_name = str(image_path).split('/')[-4] + '_{}'.format(
                image_path.stem)
        elif args.split == 'mc':
            block, p, color, frame = image_path.split('/')
            output_name = str(image_path).replace('/', '_') + '.png'
        elif args.split == 'visdrone' or args.split == 'visdrone_lite':
            output_name = image_path.relpath(dataset_path).strip(
                '.jpg').replace('/', '_')
            pass
        elif args.split == 'custom_mono':
            output_name = image_path.relpath(dataset_path).strip(
                '.jpg').replace('/', '_')
        else:
            output_name = image_path.relpath(dataset_path).strip(
                '.jpg').replace('/', '_')

        if args.npy_out:
            name_dest_npy = os.path.join(out_path,
                                         "{}_disp.npy".format(output_name))
            scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
            np.save(name_dest_npy, scaled_disp.cpu().numpy())

        # Saving colormapped depth image
        disp_resized_np = disp_resized.squeeze().cpu().numpy()
        vmax = np.percentile(disp_resized_np, 95)
        name_dest_im = Path(out_path) / "{}.png".format(output_name)
        plt.imsave(name_dest_im, disp_resized_np, cmap='magma', vmax=vmax)

    print(cnt)

    print('\n-> Done,save at ' + args.out_path)
Beispiel #6
0
def main_with_masks(args):
    """Function to predict for a single image or folder of images
    """
    print(args.dataset_path)
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    out_path = Path(args.out_path)
    out_path.mkdir_p()
    dirs = {}
    for mask in args.results:
        dirs[mask] = (out_path / mask)
        (out_path / mask).mkdir_p()

    print('-> split:{}'.format(args.split))
    print('-> save to {}'.format(args.out_path))

    if args.split in ['custom', 'custom_lite', 'eigen', 'eigen_zhou']:
        feed_height = 192
        feed_width = 640
        min_depth = 0.1
        max_depth = 80
        full_height = 375
        full_width = 1242
        dataset = KITTIRAWDataset

    elif args.split in ["visdrone", "visdrone_lite"]:
        feed_width = 352
        feed_height = 192
        min_depth = 0.1
        max_depth = 255
        dataset = VSDataset
    elif args.split in ['mc', 'mc_lite']:
        feed_height = 288
        feed_width = 384
        min_depth = 0.1
        max_depth = 255
        dataset = MCDataset

    feed_height = 192
    feed_width = 640

    backproject_depth = BackprojectDepth(1, feed_height, feed_width).to(device)

    project_3d = Project3D(1, feed_height, feed_width)

    photometric_error = PhotometricError()

    txt_files = args.txt_files
    #data
    test_path = Path(args.wk_root) / "splits" / args.split / txt_files
    test_filenames = readlines(test_path)
    if args.as_name_sort:  #按照序列顺序名字排列
        test_filenames.sort()
    #check filenames:
    i = 0
    for i, item in enumerate(test_filenames):
        #item = test_filenames[i]
        if args.split in ['eigen', 'custom', 'custom_lite', 'eigen_zhou']:
            dirname, frame, lr = test_filenames[i].split()
            files = (Path(args.dataset_path) / dirname /
                     'image_02/data').files()
            files.sort()
            min = int(files[0].stem)
            max = int(files[-1].stem)
            if int(frame) + args.frame_ids[0] <= min or int(
                    frame) + args.frame_ids[-1] >= max:
                test_filenames[i] = ''
        if args.split in ['mc', 'mc_lite']:  #虽然在split的时候已经处理过了
            block, trajactory, color, frame = test_filenames[i].split('/')
            files = (Path(args.dataset_path) / block / trajactory /
                     color).files()
            files.sort()
            min = int(files[0].stem)
            max = int(files[-1].stem)
            if int(frame) + args.frame_ids[0] <= min or int(
                    frame) + args.frame_ids[-1] >= max:
                test_filenames[i] = ''
            pass
        if args.split in ['visdrone', 'visdrone_lite']:  #虽然在split的时候已经处理过了
            dirname, frame = test_filenames[i].split('/')
            files = (Path(args.dataset_path) / dirname).files()
            files.sort()
            min = int(files[0].stem)
            max = int(files[-1].stem)
            if int(frame) + args.frame_ids[0] <= min or int(
                    frame) + args.frame_ids[-1] >= max:
                test_filenames[i] = ''

    while '' in test_filenames:
        test_filenames.remove('')

    test_dataset = dataset(  # KITTIRAWData
        args.dataset_path,
        test_filenames,
        feed_height,
        feed_width,
        args.frame_ids,
        1,
        is_train=False,
        img_ext=args.ext)

    test_loader = DataLoader(  # train_datasets:KITTIRAWDataset
        dataset=test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=1,
        pin_memory=True,
        drop_last=False)

    print('->items num: {}'.format(len(test_loader)))

    #layers

    #download_model_if_doesnt_exist(args.model_path,args.model_name)

    model_path = Path(args.model_path) / args.model_name
    if not model_path.exists():
        print(model_path + " does not exists")

    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    #1 LOADING PRETRAINED MODEL
    #1.1 encoder
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    #1.2 decoder
    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    #paths
    pose_encoder_path = Path(model_path) / "pose_encoder.pth"
    pose_decoder_path = Path(model_path) / 'pose.pth'

    # 2.1 pose encoder
    print("   Loading pretrained pose encoder")

    pose_encoder = networks.ResnetEncoder(18, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.to(device)
    pose_encoder.eval()

    # 2.2 pose decoder
    print("   Loading pretrained decoder")
    pose_decoder = networks.PoseDecoder(num_ch_enc=pose_encoder.num_ch_enc,
                                        num_input_features=1,
                                        num_frames_to_predict_for=2)

    pose_loaded_dict = torch.load(pose_decoder_path, map_location=device)
    pose_decoder.load_state_dict(pose_loaded_dict)

    pose_decoder.to(device)
    pose_decoder.eval()
    source_scale = 0
    scale = 0
    for batch_idx, inputs in tqdm(enumerate(test_loader)):
        for key, ipt in inputs.items():
            inputs[key] = ipt.to(device)
        features = encoder(inputs[("color", 0, 0)])  # a list from 0 to 4

        outputs = depth_decoder(features)  # dict , 4 disptensor

        disp = outputs[("disp", 0)]  # has a same size with input

        #disp_resized = torch.nn.functional.interpolate(disp, (full_height, full_width), mode="bilinear", align_corners=False)

        _, depth = disp_to_depth(disp, min_depth, max_depth)

        for f_i in [args.frame_ids[0], args.frame_ids[-1]]:

            if f_i < 0:
                pose_inputs = [
                    inputs[("color", f_i, 0)], inputs[("color", 0, 0)]
                ]
            else:
                pose_inputs = [
                    inputs[("color", 0, 0)], inputs[("color", f_i, 0)]
                ]
            pose_inputs = torch.cat(pose_inputs, 1)
            features = pose_encoder(pose_inputs)
            axisangle, translation = pose_decoder([features])

            outputs[("cam_T_cam", 0, f_i)] = transformation_from_parameters(
                axisangle[:, 0], translation[:, 0], invert=(f_i < 0))  # b44
            T = outputs[("cam_T_cam", 0, f_i)]

            cam_points = backproject_depth(depth,
                                           inputs[("inv_K", 0)])  # D@K_inv
            pix_coords = project_3d(cam_points, inputs[("K", 0)],
                                    T)  # K@D@K_inv

            outputs[("sample", f_i, 0)] = pix_coords  # rigid_flow

            outputs[("color", f_i,
                     0)] = F.grid_sample(inputs[("color", f_i, 0)],
                                         outputs[("sample", f_i, 0)],
                                         padding_mode="border")
            # output"color" 就是i-warped

            # add a depth warp
            outputs[("color_identity", f_i, 0)] = inputs[("color", f_i, 0)]

        target = inputs[("color", 0, 0)]

        reprojection_losses = []
        for frame_id in [args.frame_ids[0], args.frame_ids[-1]]:
            pred = outputs[("color", frame_id, 0)]
            reprojection_losses.append(photometric_error.run(pred, target))

        reprojection_losses = torch.cat(reprojection_losses, 1)

        identity_reprojection_losses = []
        for frame_id in [args.frame_ids[0], args.frame_ids[-1]]:
            pred = inputs[("color", frame_id, source_scale)]
            identity_reprojection_losses.append(
                photometric_error.run(pred, target))
        identity_reprojection_losses = torch.cat(identity_reprojection_losses,
                                                 1)

        erro_maps = torch.cat(
            (identity_reprojection_losses, reprojection_losses), dim=1)  # b4hw

        identical_mask = IdenticalMask(erro_maps)
        identical_mask = identical_mask[0].detach().cpu().numpy()

        save_name = test_filenames[batch_idx].replace('/', '_')
        save_name = save_name.replace('l', '')
        save_name = save_name.replace('r', '')
        save_name = save_name.replace(' ', '')

        if "identical_mask" in args.results:
            plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name),
                       identical_mask)

        if "depth" in args.results:
            # Saving colormapped depth image
            disp_np = disp[0, 0].detach().cpu().numpy()
            vmax = np.percentile(disp_np, 95)
            plt.imsave(dirs['depth'] / "{}.png".format(save_name),
                       disp_np,
                       cmap='magma',
                       vmax=vmax)

        if "mean_mask" in args.results:
            mean_mask = MeanMask(erro_maps)
            mean_mask = mean_mask[0].detach().cpu().numpy()
            plt.imsave(dirs['mean_mask'] / "{}.png".format(save_name),
                       mean_mask,
                       cmap='bone')

        if "identical_mask" in args.results:
            identical_mask = IdenticalMask(erro_maps)
            identical_mask = identical_mask[0].detach().cpu().numpy()
            plt.imsave(dirs['identical_mask'] / "{}.png".format(save_name),
                       identical_mask,
                       cmap='bone')

        if "var_mask" in args.results:
            var_mask = VarMask(erro_maps)
            var_mask = var_mask[0].detach().cpu().numpy()
            plt.imsave(dirs["var_mask"] / "{}.png".format(save_name),
                       var_mask,
                       cmap='bone')

        if "final_mask" in args.results:
            identical_mask = IdenticalMask(erro_maps)
            mean_mask = MeanMask(erro_maps)
            var_mask = VarMask(erro_maps)
            final_mask = float8or(mean_mask * identical_mask, var_mask)
            final_mask = final_mask[0].detach().cpu().numpy()
            plt.imsave(dirs["final_mask"] / "{}.png".format(save_name),
                       final_mask,
                       cmap='bone')
Beispiel #7
0
def test_simple(args):
    """Function to predict for a single image or folder of images
    """
    assert args.model_name is not None, \
        "You must specify the --model_name parameter; see README.md for an example"

    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(args.model_name)
    model_path = os.path.join("models", args.model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    # FINDING INPUT IMAGES
    if os.path.isfile(args.image_path):
        # Only testing on a single image
        paths = [args.image_path]
        output_directory = os.path.dirname(args.image_path)
    elif os.path.isdir(args.image_path):
        # Searching folder for images
        paths = glob.glob(
            os.path.join(args.image_path, '*.{}'.format(args.ext)))
        output_directory = args.image_path
    else:
        raise Exception("Can not find args.image_path: {}".format(
            args.image_path))

    print("-> Predicting on {:d} test images".format(len(paths)))

    # PREDICTING ON EACH IMAGE IN TURN
    with torch.no_grad():
        for idx, image_path in enumerate(paths):

            if image_path.endswith("_disp.jpg"):
                # don't try to predict disparity for a disparity image!
                continue

            # Load image and preprocess
            input_image = pil.open(image_path).convert('RGB')
            original_width, original_height = input_image.size
            input_image = input_image.resize((feed_width, feed_height),
                                             pil.LANCZOS)
            input_image = transforms.ToTensor()(input_image).unsqueeze(0)

            # PREDICTION
            input_image = input_image.to(device)
            features = encoder(input_image)
            outputs = depth_decoder(features)

            disp = outputs[("disp", 0)]
            disp_resized = torch.nn.functional.interpolate(
                disp, (original_height, original_width),
                mode="bilinear",
                align_corners=False)

            # Saving numpy file
            output_name = os.path.splitext(os.path.basename(image_path))[0]
            name_dest_npy = os.path.join(output_directory,
                                         "{}_disp.npy".format(output_name))
            scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
            np.save(name_dest_npy, scaled_disp.cpu().numpy())

            # Saving colormapped depth image
            disp_resized_np = disp_resized.squeeze().cpu().numpy()
            vmax = np.percentile(disp_resized_np, 95)
            normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(),
                                              vmax=vmax)
            mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
            colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] *
                              255).astype(np.uint8)
            im = pil.fromarray(colormapped_im)

            name_dest_im = os.path.join(output_directory,
                                        "{}_disp.jpeg".format(output_name))
            im.save(name_dest_im)

            print("   Processed {:d} of {:d} images - saved prediction to {}".
                  format(idx + 1, len(paths), name_dest_im))

    print('-> Done!')
Beispiel #8
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    device = torch.device("cuda" if opt.gpu else "cpu")
    assert sum((opt.eval_mono, opt.eval_stereo)) == 1, \
        "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"

    if opt.ext_disp_to_eval is None:

        opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

        assert os.path.isdir(opt.load_weights_folder), \
            "Cannot find a folder at {}".format(opt.load_weights_folder)

        print("-> Loading weights from {}".format(opt.load_weights_folder))

        filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
        img_ext = '.png' if opt.png else '.jpg'
        encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
        decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

        encoder_dict = torch.load(encoder_path)

        dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,
                                           encoder_dict['height'], encoder_dict['width'],
                                           [0], 4, is_train=False, img_ext=img_ext)
        dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
                                pin_memory=True, drop_last=False)

        encoder = networks.ResnetEncoder(opt.num_layers, False)
        depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

        model_dict = encoder.state_dict()
        encoder.load_state_dict({k.replace("module.",""): v for k, v in encoder_dict.items() if k.replace("module.","") in model_dict})
        decoder_dict = torch.load(decoder_path)
        depth_decoder.load_state_dict({k.replace("module.",""): v for k, v in decoder_dict.items()})

        encoder.to(device)
        encoder.eval()
        depth_decoder.to(device)
        depth_decoder.eval()

        pred_disps = []

        print("-> Computing predictions with size {}x{}".format(
            encoder_dict['width'], encoder_dict['height']))

        with torch.no_grad():
            for data in dataloader:
                input_color = data[("color", 0, 0)].to(device)

                if opt.post_process:
                    # Post-processed results require each image to have two forward passes
                    input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)

                output = depth_decoder(encoder(input_color))

                pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth)
                pred_disp = pred_disp.cpu()[:, 0].numpy()

                if opt.post_process:
                    N = pred_disp.shape[0] // 2
                    pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])

                pred_disps.append(pred_disp)

        pred_disps = np.concatenate(pred_disps)

    else:
        # Load predictions from file
        print("-> Loading predictions from {}".format(opt.ext_disp_to_eval))
        pred_disps = np.load(opt.ext_disp_to_eval)

        if opt.eval_eigen_to_benchmark:
            eigen_to_benchmark_ids = np.load(
                os.path.join(splits_dir, "benchmark", "eigen_to_benchmark_ids.npy"))

            pred_disps = pred_disps[eigen_to_benchmark_ids]

    if opt.save_pred_disps:
        output_path = os.path.join(
            opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
        print("-> Saving predicted disparities to ", output_path)
        np.save(output_path, pred_disps)

    if opt.no_eval:
        print("-> Evaluation disabled. Done.")
        quit()

    elif opt.eval_split == 'benchmark':
        save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions")
        print("-> Saving out benchmark predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
        quit()

    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"]

    print("-> Evaluating")

    if opt.eval_stereo:
        print("   Stereo evaluation - "
              "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
        opt.disable_median_scaling = True
        opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        #gt_depth = cv2.resize(gt_depth, (opt.width, opt.height)) # Resize the gt depth
        # gt_depth = skimage.transform.resize(gt_depth, (opt.height, opt.width), order=0, preserve_range=True, mode='constant')
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        if opt.eval_split == "eigen":
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
                             0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

        else:
            mask = gt_depth > 0

        pred_depth = pred_depth[mask]
        gt_depth = gt_depth[mask]

        pred_depth *= opt.pred_depth_scale_factor
        if not opt.disable_median_scaling:
            ratio = np.nanmedian(gt_depth) / np.nanmedian(pred_depth)
            ratios.append(ratio)
            pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

        errors.append(compute_errors(gt_depth, pred_depth))

    if not opt.disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " + ("{:>8} | " * 9).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3", "abs", "rmse"))
    print(("&{: 8.3f}  " * 9).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")
Beispiel #9
0
def evaluate(opt):
    """Evaluates a pretrained model using a specified test set
    """
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    #这里的度量信息是强行将gt里的值都压缩到和scanner一样的量程, 这样会让值尽量接近度量值
    #但是对于
    if not opt.eval_mono or opt.eval_stereo:
        print(
            "Please choose mono or stereo evaluation by setting either --eval_mono or --eval_stereo"
        )
    test_dir = Path(opt.test_dir)
    #1. load gt
    print('\n-> load gt:{}\n'.format(opt.test_dir))
    gt_path = test_dir / "gt_depths.npz"
    gt_depths = np.load(gt_path, allow_pickle=True)
    gt_depths = gt_depths["data"]
    #2. load img data and predict, output is pred_disps(shape is [nums,1,w,h])
    depth_eval_path = Path(opt.depth_eval_path)
    if not depth_eval_path.exists():
        print("Cannot find a folder at {}".format(depth_eval_path))
    print("-> Loading weights from {}".format(depth_eval_path))
    #model loading
    filenames = readlines(test_dir / opt.test_files)
    encoder_path = depth_eval_path / "encoder.pth"
    decoder_path = depth_eval_path / "depth.pth"
    encoder_dict = torch.load(encoder_path)
    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)
    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(torch.load(decoder_path))
    encoder.cuda()
    encoder.eval()
    depth_decoder.cuda()
    depth_decoder.eval()
    # dataloader
    dataset = datasets.KITTIRAWDatasetv1(opt.data_path,
                                         filenames,
                                         encoder_dict['height'],
                                         encoder_dict['width'], [0],
                                         4,
                                         is_train=False)
    dataloader = DataLoader(dataset,
                            batch_size=opt.eval_batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            pin_memory=True,
                            drop_last=False)
    pred_disps = []
    print("\n-> Computing predictions with size {}x{}\n".format(
        encoder_dict['width'], encoder_dict['height']))
    #prediction
    for data in tqdm(dataloader):
        input_color = data[("color", 0, 0)].cuda()
        # if opt.post_process:
        #     # Post-processed results require each image to have two forward passes
        #     input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)

        #eval 0
        output = depth_decoder(encoder(input_color))

        #eval 1
        pred_disp, pred_depth_tmp = disp_to_depth(output[("disp", 0)],
                                                  opt.min_depth, opt.max_depth)
        pred_disp = pred_disp.cpu()[:, 0].numpy()
        #pred_depth = pred_depth.cpu()[:,0].numpy()
        # if opt.post_process:
        #     N = pred_disp.shape[0] // 2
        #     pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])
        pred_disps.append(pred_disp)
    #endfor
    pred_disps = np.concatenate(pred_disps)
    # if opt.save_pred_disps:
    #     output_path = depth_eval_path/ "disps_{}_split.npy".format(opt.test_dir)
    #     print("-> Saving predicted disparities to ", output_path)
    #     np.save(output_path, pred_disps)
    # if opt.no_eval:
    #     print("-> Evaluation disabled. Done.")
    #     quit()
    # elif test_dir.stem == 'benchmark':
    #     save_dir = depth_eval_path/ "benchmark_predictions"
    #     print("-> Saving out benchmark predictions to {}".format(save_dir))
    #     if not os.path.exists(save_dir):
    #         os.makedirs(save_dir)
    #     for idx in tqdm(range(len(pred_disps))):
    #         disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
    #         depth = STEREO_SCALE_FACTOR / disp_resized
    #         depth = np.clip(depth, 0, 80)
    #         depth = np.uint16(depth * 256)
    #         save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
    #         cv2.imwrite(save_path, depth)
    #     print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
    #     quit()
    #3. evaluation
    print("-> Evaluating")
    # if opt.eval_stereo:
    #     print("   Stereo evaluation - "
    #           "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
    #     opt.median_scaling = False
    #     opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    # else:
    #     print("   Mono evaluation - using median scaling")
    metrics = []
    ratios = []
    nums_evaluate = pred_disps.shape[0]
    for i in tqdm(range(nums_evaluate)):
        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]

        #eval2
        pred_disp = cv2.resize(pred_disp,
                               (gt_width, gt_height))  #1271x341 t0 128x640
        pred_depth = 1 / pred_disp  # 也可以根据上面直接得到

        #crop
        if test_dir.stem == "eigen" or test_dir.stem == 'custom':  #???,可能是以前很老的
            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)
            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

        else:
            mask = gt_depth > 0

        #eval3
        pred_depth = pred_depth[mask]  #并reshape成1d
        gt_depth = gt_depth[mask]

        pred_depth *= opt.pred_depth_scale_factor
        #median scaling
        if opt.median_scaling:
            ratio = np.median(gt_depth) / np.median(
                pred_depth)  #中位数, 在eval的时候, 将pred值线性变化,尽量能使与gt接近即可
            ratios.append(ratio)
            pred_depth *= ratio
        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH  #所有历史数据中最小的depth, 更新,
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH  #...
        metric = compute_errors(gt_depth, pred_depth)
        metrics.append(metric)
    metrics = np.array(metrics)
    #4. precess results, latex style output
    if opt.median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print("\n Scaling ratios | med: {:0.3f} | std: {:0.3f}\n".format(
            med, np.std(ratios / med)))
    mean_metrics = metrics.mean(0)
    print("\n  " +
          ("{:>8} | " * 7
           ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_metrics.tolist()) + "\\\\")
    print("\n-> Done!")