Ejemplo n.º 1
0
    def __init__(self, cfg, split='training'):
        super(KittiMonoDataset, self).__init__()
        preprocessed_path = cfg.path.preprocessed_path
        obj_types = cfg.obj_types
        is_train = (split == 'training')

        imdb_file_path = os.path.join(preprocessed_path, split, 'imdb.pkl')
        self.imdb = pickle.load(open(imdb_file_path,
                                     'rb'))  # list of kittiData
        self.output_dict = {
            "calib": False,
            "image": True,
            "label": False,
            "velodyne": False
        }
        if is_train:
            self.transform = build_augmentator(cfg.data.train_augmentation)
        else:
            self.transform = build_augmentator(cfg.data.test_augmentation)
        self.projector = BBox3dProjector()
        self.is_train = is_train
        self.obj_types = obj_types
        self.use_right_image = getattr(cfg.data, 'use_right_image', True)
        self.is_reproject = getattr(cfg.data, 'is_reproject',
                                    True)  # if reproject 2d
    def __init__(self, cfg, split='training'):
        super(KittiDepthMonoDataset, self).__init__()
        raw_path    = cfg.path.raw_path
        depth_paths  = cfg.path.depth_path if isinstance(cfg.path.depth_path, list) else [cfg.path.depth_path]
        aug_cfg     = cfg.data.augmentation
        manager = Manager() # multithread manage wrapping for list objects
        self.is_eval = not split == 'training'
        self.size = aug_cfg.cropSize #[352, 1216]
        raw_sequences = {}
        for date_time in os.listdir(raw_path):
            sequences = os.listdir(os.path.join(raw_path, date_time))
            sequences = [sequence for sequence in sequences if not sequence.endswith(".txt")]
            P2, P3 = read_P23_from_sequence(os.path.join(raw_path, date_time, "calib_cam_to_cam.txt"))
            T      = read_T_from_sequence  (os.path.join(raw_path, date_time, "calib_velo_to_cam.txt"))
            for sequence in sequences:
                raw_sequences[sequence] = dict(P2=P2, P3=P3, T_velo2cam=T, date_time=date_time)
        self.imdb = []
        print("Start counting images in depth prediction dataset.")
        for depth_path in depth_paths:
            for sequence in tqdm(os.listdir(depth_path)):
                sequence_gt_path = os.path.join(depth_path, sequence, 'proj_depth', 'groundtruth')
                P2 = raw_sequences[sequence]['P2']
                P3 = raw_sequences[sequence]['P3']
                T  = raw_sequences[sequence]['T_velo2cam']
                left_gt_dir = os.path.join(sequence_gt_path, 'image_02')
                right_gt_dir = os.path.join(sequence_gt_path, 'image_03')
                gt_names = set(os.listdir(left_gt_dir))

                left_image_dir = os.path.join(raw_path, raw_sequences[sequence]['date_time'], sequence, 'image_02', 'data')
                right_image_dir = os.path.join(raw_path, raw_sequences[sequence]['date_time'], sequence, 'image_03', 'data')
                point_cloud_dir = os.path.join(raw_path, raw_sequences[sequence]['date_time'], sequence, 'velodyne_points', 'data')
                image_names = set(os.listdir(left_image_dir))

                intersection = gt_names.intersection(image_names) # names in both
                instances = [
                    dict(
                        image_2_file = os.path.join(left_image_dir, name),
                        image_3_file = os.path.join(right_image_dir, name),
                        gt_2_file    = os.path.join(left_gt_dir, name),
                        gt_3_file    = os.path.join(right_gt_dir, name),
                        P2           = P2.copy(),
                        P3           = P3.copy(),
                        # T_velo2cam   = T.copy(),
                        # point_cloud_file = os.path.join(point_cloud_dir, name.replace('.png', '.bin'))
                    ) for name in sorted(intersection)
                ]
                self.imdb += instances

        self.imdb = manager.list(self.imdb)
        if not self.is_eval:
            self.transform = build_augmentator(cfg.data.train_augmentation)
        else:
            self.transform = build_augmentator(cfg.data.test_augmentation)
    def __init__(self, cfg, split='validation'):
        super(KittiDepthMonoValTestDataset, self).__init__()
        base_path = cfg.path[split + "_path"]
        self.transform = build_augmentator(cfg.data.test_augmentation)

        self.imdb = []
        image_dir = os.path.join(base_path, "image")
        intrinsic_dir = os.path.join(base_path, "intrinsics")

        image_list = os.listdir(image_dir)
        image_list.sort()

        K_list = os.listdir(intrinsic_dir)
        K_list.sort()
        self.imdb = [
            dict(image_file=os.path.join(image_dir, image_list[i]),
                 K=read_K_from_depth_prediction(
                     os.path.join(intrinsic_dir, K_list[i])))
            for i in range(len(image_list))
        ]
def compute_dispairity_for_split(cfg,
                                 index_names: List[str],
                                 data_root_dir: str,
                                 output_dict: Dict,
                                 data_split: str = 'training',
                                 time_display_inter: int = 100,
                                 use_point_cloud: bool = True):
    save_dir = os.path.join(cfg.path.preprocessed_path, data_split)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    disp_dir = os.path.join(save_dir, 'disp')
    if not os.path.isdir(disp_dir):
        os.mkdir(disp_dir)

    if not use_point_cloud:
        stereo_matcher = cv2.StereoBM_create(192, 25)

    N = len(index_names)
    frames = [None] * N
    print("start reading {} data".format(data_split))
    timer = Timer()
    preprocess = build_augmentator(cfg.data.test_augmentation)

    for i, index_name in tqdm(enumerate(index_names)):

        # read data with dataloader api
        data_frame = KittiData(data_root_dir, index_name, output_dict)
        calib, image, right_image, label, velo = data_frame.read_data()

        original_image = image.copy()
        baseline = (calib.P2[0, 3] - calib.P3[0, 3]) / calib.P2[0, 0]
        image, image_3, P2, P3 = preprocess(original_image,
                                            right_image.copy(),
                                            p2=deepcopy(calib.P2),
                                            p3=deepcopy(calib.P3))
        if use_point_cloud:
            ## gathering disparity with point cloud back projection
            disparity_left = generate_dispariy_from_velo(velo[:, 0:3],
                                                         image.shape[0],
                                                         image.shape[1],
                                                         calib.Tr_velo_to_cam,
                                                         calib.R0_rect,
                                                         P2,
                                                         baseline=baseline)
            disparity_right = generate_dispariy_from_velo(velo[:, 0:3],
                                                          image.shape[0],
                                                          image.shape[1],
                                                          calib.Tr_velo_to_cam,
                                                          calib.R0_rect,
                                                          P3,
                                                          baseline=baseline)

        else:
            ## gathering disparity with stereoBM from opencv
            left_image = denorm(image, cfg.data.augmentation.rgb_mean,
                                cfg.data.augmentation.rgb_std)
            right_image = denorm(image_3, cfg.data.augmentation.rgb_mean,
                                 cfg.data.augmentation.rgb_std)
            gray_image1 = cv2.cvtColor(left_image, cv2.COLOR_BGR2GRAY)
            gray_image2 = cv2.cvtColor(right_image, cv2.COLOR_BGR2GRAY)

            disparity_left = stereo_matcher.compute(gray_image1, gray_image2)
            disparity_left[disparity_left < 0] = 0
            disparity_left = disparity_left.astype(np.uint16)

            disparity_right = stereo_matcher.compute(gray_image2[:, ::-1],
                                                     gray_image1[:, ::-1])
            disparity_right[disparity_right < 0] = 0
            disparity_right = disparity_right.astype(np.uint16)

        disparity_left = skimage.measure.block_reduce(disparity_left, (4, 4),
                                                      np.max)
        file_name = os.path.join(disp_dir, "P2%06d.png" % i)
        cv2.imwrite(file_name, disparity_left)

        disparity_right = skimage.measure.block_reduce(disparity_right, (4, 4),
                                                       np.max)
        file_name = os.path.join(disp_dir, "P3%06d.png" % i)
        cv2.imwrite(file_name, disparity_left)

    print("{} split finished precomputing disparity".format(data_split))
Ejemplo n.º 5
0
def read_one_split(cfg,
                   index_names,
                   data_root_dir,
                   output_dict,
                   data_split='training',
                   time_display_inter=100):
    save_dir = os.path.join(cfg.path.preprocessed_path, data_split)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    if data_split == 'training':
        disp_dir = os.path.join(save_dir, 'disp')
        if not os.path.isdir(disp_dir):
            os.mkdir(disp_dir)

    N = len(index_names)
    frames = [None] * N
    print("start reading {} data".format(data_split))
    timer = Timer()

    anchor_prior = getattr(cfg, 'anchor_prior', True)

    total_objects = [0 for _ in range(len(cfg.obj_types))]
    total_usable_objects = [0 for _ in range(len(cfg.obj_types))]
    if anchor_prior:
        anchor_manager = Anchors(cfg.path.preprocessed_path,
                                 readConfigFile=False,
                                 **cfg.detector.head.anchors_cfg)
        preprocess = build_augmentator(cfg.data.test_augmentation)
        total_objects = [0 for _ in range(len(cfg.obj_types))]
        total_usable_objects = [0 for _ in range(len(cfg.obj_types))]

        len_scale = len(anchor_manager.scales)
        len_ratios = len(anchor_manager.ratios)
        len_level = len(anchor_manager.pyramid_levels)

        examine = np.zeros(
            [len(cfg.obj_types), len_level * len_scale, len_ratios])
        sums = np.zeros(
            [len(cfg.obj_types), len_level * len_scale, len_ratios, 3])
        squared = np.zeros(
            [len(cfg.obj_types), len_level * len_scale, len_ratios, 3],
            dtype=np.float64)

        uniform_sum_each_type = np.zeros(
            (len(cfg.obj_types), 6),
            dtype=np.float64)  #[z, sin2a, cos2a, w, h, l]
        uniform_square_each_type = np.zeros((len(cfg.obj_types), 6),
                                            dtype=np.float64)

    for i, index_name in enumerate(index_names):

        # read data with dataloader api
        data_frame = KittiData(data_root_dir, index_name, output_dict)
        calib, image, label, velo = data_frame.read_data()

        # store the list of kittiObjet and kittiCalib
        max_occlusion = getattr(cfg.data, 'max_occlusion', 2)
        min_z = getattr(cfg.data, 'min_z', 3)
        if data_split == 'training':
            data_frame.label = [
                obj for obj in label.data if obj.type in cfg.obj_types
                and obj.occluded < max_occlusion and obj.z > min_z
            ]

            if anchor_prior:
                for j in range(len(cfg.obj_types)):
                    total_objects[j] += len([
                        obj for obj in data_frame.label
                        if obj.type == cfg.obj_types[j]
                    ])
                    data = np.array([[
                        obj.z,
                        np.sin(2 * obj.alpha),
                        np.cos(2 * obj.alpha), obj.w, obj.h, obj.l
                    ] for obj in data_frame.label
                                     if obj.type == cfg.obj_types[j]])  #[N, 6]
                    if data.any():
                        uniform_sum_each_type[j, :] += np.sum(data, axis=0)
                        uniform_square_each_type[j, :] += np.sum(data**2,
                                                                 axis=0)
        else:
            data_frame.label = [
                obj for obj in label.data if obj.type in cfg.obj_types
            ]
        data_frame.calib = calib

        if data_split == 'training' and anchor_prior:
            original_image = image.copy()
            baseline = (calib.P2[0, 3] - calib.P3[0, 3]) / calib.P2[0, 0]
            image, P2, label = preprocess(original_image,
                                          p2=deepcopy(calib.P2),
                                          labels=deepcopy(data_frame.label))
            _, P3 = preprocess(original_image, p2=deepcopy(calib.P3))

            ## Computing statistic for positive anchors
            if len(data_frame.label) > 0:
                anchors, _ = anchor_manager(
                    image[np.newaxis].transpose([0, 3, 1, 2]),
                    torch.tensor(P2).reshape([-1, 3, 4]))

                for j in range(len(cfg.obj_types)):
                    bbox2d = torch.tensor(
                        [[obj.bbox_l, obj.bbox_t, obj.bbox_r, obj.bbox_b]
                         for obj in label
                         if obj.type == cfg.obj_types[j]]).cuda()
                    if len(bbox2d) < 1:
                        continue
                    bbox3d = torch.tensor([[
                        obj.x, obj.y, obj.z,
                        np.sin(2 * obj.alpha),
                        np.cos(2 * obj.alpha)
                    ] for obj in label if obj.type == cfg.obj_types[j]
                                           ]).cuda()

                    usable_anchors = anchors[0]

                    IoUs = calc_iou(usable_anchors, bbox2d)  #[N, K]
                    IoU_max, IoU_argmax = torch.max(IoUs, dim=0)
                    IoU_max_anchor, IoU_argmax_anchor = torch.max(IoUs, dim=1)

                    num_usable_object = torch.sum(
                        IoU_max > cfg.detector.head.loss_cfg.fg_iou_threshold
                    ).item()
                    total_usable_objects[j] += num_usable_object

                    positive_anchors_mask = IoU_max_anchor > cfg.detector.head.loss_cfg.fg_iou_threshold
                    positive_ground_truth_3d = bbox3d[IoU_argmax_anchor[
                        positive_anchors_mask]].cpu().numpy()

                    used_anchors = usable_anchors[positive_anchors_mask].cpu(
                    ).numpy()  #[x1, y1, x2, y2]

                    sizes_int, ratio_int = anchor_manager.anchors2indexes(
                        used_anchors)
                    for k in range(len(sizes_int)):
                        examine[j, sizes_int[k], ratio_int[k]] += 1
                        sums[j, sizes_int[k],
                             ratio_int[k]] += positive_ground_truth_3d[k, 2:5]
                        squared[j, sizes_int[k],
                                ratio_int[k]] += positive_ground_truth_3d[
                                    k, 2:5]**2

        frames[i] = data_frame

        if (i + 1) % time_display_inter == 0:
            avg_time = timer.compute_avg_time(i + 1)
            eta = timer.compute_eta(i + 1, N)
            print(
                "{} iter:{}/{}, avg-time:{}, eta:{}, total_objs:{}, usable_objs:{}"
                .format(data_split, i + 1, N, avg_time, eta, total_objects,
                        total_usable_objects),
                end='\r')

    save_dir = os.path.join(cfg.path.preprocessed_path, data_split)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    if data_split == 'training' and anchor_prior:

        for j in range(len(cfg.obj_types)):
            global_mean = uniform_sum_each_type[j] / total_objects[j]
            global_var = np.sqrt(uniform_square_each_type[j] /
                                 total_objects[j] - global_mean**2)

            avg = sums[j] / (examine[j][:, :, np.newaxis] + 1e-8)
            EX_2 = squared[j] / (examine[j][:, :, np.newaxis] + 1e-8)
            std = np.sqrt(EX_2 - avg**2)

            avg[examine[j] <
                10, :] = -100  # with such negative mean Z, anchors/losses will filter them out
            std[examine[j] < 10, :] = 1e10
            avg[np.isnan(std)] = -100
            std[np.isnan(std)] = 1e10
            avg[std < 1e-3] = -100
            std[std < 1e-3] = 1e10

            whl_avg = np.ones([avg.shape[0], avg.shape[1], 3
                               ]) * global_mean[3:6]
            whl_std = np.ones([avg.shape[0], avg.shape[1], 3
                               ]) * global_var[3:6]

            avg = np.concatenate([avg, whl_avg], axis=2)
            std = np.concatenate([std, whl_std], axis=2)

            npy_file = os.path.join(
                save_dir, 'anchor_mean_{}.npy'.format(cfg.obj_types[j]))
            np.save(npy_file, avg)
            std_file = os.path.join(
                save_dir, 'anchor_std_{}.npy'.format(cfg.obj_types[j]))
            np.save(std_file, std)
    pkl_file = os.path.join(save_dir, 'imdb.pkl')
    pickle.dump(frames, open(pkl_file, 'wb'))
    print("{} split finished precomputing".format(data_split))