Example #1
0
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI
        format.
        """
        # filename = '$Faster-RCNN_TF/data/KITTI/object/training/label_2/000000.txt'
        filename = os.path.join(self._data_path, 'training/label_2',
                                index + '.npy')

        # calib
        calib = self._load_kitti_calib(index)

        lines = np.load(filename)

        num_objs = lines.shape[0]

        lwh = lines[:, 1:4]
        boxes3D_corners = lines[:, 4:]

        boxes_bv = corners_to_bv(boxes3D_corners)
        boxes = lidar_cnr_to_img(boxes3D_corners, calib[3], calib[2], calib[0])

        boxes3D_lidar = lidar_cnr_to_3d(boxes3D_corners, lwh)

        # TODO
        gt_classes = np.ones((num_objs), dtype=np.int32)
        overlaps = np.ones((num_objs, self.num_classes), dtype=np.float32)

        boxes.resize(num_objs + 1, 4)
        boxes_bv.resize(num_objs, 4)
        boxes3D_lidar.resize(num_objs, 6)
        boxes3D_corners.resize(num_objs, 24)
        gt_classes.resize(num_objs)
        overlaps.resize(num_objs, self.num_classes)
        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {
            'boxes': boxes,
            'boxes_bv': boxes_bv,
            'boxes_3D': boxes3D_lidar,
            'boxes_corners': boxes3D_corners,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'flipped': False
        }
Example #2
0
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI
        format.
        """
        # filename = '$Faster-RCNN_TF/data/KITTI/object/training/label_2/000000.txt'
        filename = os.path.join(self._data_path, 'training/label_2',
                                index + '.txt')
        #         print("Loading: ", filename)

        # calib
        calib = self._load_kitti_calib(index)
        Tr = calib['Tr_velo2cam']

        # print 'Loading: {}'.format(filename)
        with open(filename, 'r') as f:
            lines = f.readlines()
        num_objs = len(lines)
        translation = np.zeros((num_objs, 3), dtype=np.float32)
        rys = np.zeros((num_objs), dtype=np.float32)
        lwh = np.zeros((num_objs, 3), dtype=np.float32)
        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        boxes_bv = np.zeros((num_objs, 4), dtype=np.float32)
        boxes3D = np.zeros((num_objs, 6), dtype=np.float32)
        boxes3D_lidar = np.zeros((num_objs, 6), dtype=np.float32)
        boxes3D_cam_cnr = np.zeros((num_objs, 24), dtype=np.float32)
        boxes3D_corners = np.zeros((num_objs, 24), dtype=np.float32)
        alphas = np.zeros((num_objs), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # print(boxes3D.shape)

        # Load object bounding boxes into a data frame.
        ix = -1
        for line in lines:
            obj = line.strip().split(' ')
            try:
                cls = self._class_to_ind[obj[0].strip()]
                # print cls
            except:
                continue
            # ignore objects with undetermined difficult level
            # level = self._get_obj_level(obj)
            # if level > 3:
            #     continue
            ix += 1
            # 0-based coordinates
            alpha = float(obj[3])
            x1 = float(obj[4])
            y1 = float(obj[5])
            x2 = float(obj[6])
            y2 = float(obj[7])
            h = float(obj[8])
            w = float(obj[9])
            l = float(obj[10])
            tx = float(obj[11])
            ty = float(obj[12])
            tz = float(obj[13])
            ry = float(obj[14])

            rys[ix] = ry
            lwh[ix, :] = [l, w, h]
            alphas[ix] = alpha
            translation[ix, :] = [tx, ty, tz]
            boxes[ix, :] = [x1, y1, x2, y2]
            boxes3D[ix, :] = [tx, ty, tz, l, w, h]
            # convert boxes3D cam to 8 corners(cam)
            boxes3D_cam_cnr_single = computeCorners3D(boxes3D[ix, :], ry)
            boxes3D_cam_cnr[ix, :] = boxes3D_cam_cnr_single.reshape(24)
            # convert 8 corners(cam) to 8 corners(lidar)
            boxes3D_corners[ix, :] = camera_to_lidar_cnr(
                boxes3D_cam_cnr_single, Tr)
            # convert 8 corners(cam) to  lidar boxes3D
            boxes3D_lidar[ix, :] = lidar_cnr_to_3d(boxes3D_corners[ix, :],
                                                   lwh[ix, :])
            # convert 8 corners(lidar) to lidar bird view
            boxes_bv[ix, :] = lidar_3d_to_bv(boxes3D_lidar[ix, :])
            # boxes3D_corners[ix, :] = lidar_to_corners_single(boxes3D_lidar[ix, :])
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        rys.resize(ix + 1)
        lwh.resize(ix + 1, 3)
        translation.resize(ix + 1, 3)
        alphas.resize(ix + 1)
        boxes.resize(ix + 1, 4)
        boxes_bv.resize(ix + 1, 4)
        boxes3D.resize(ix + 1, 6)
        boxes3D_lidar.resize(ix + 1, 6)
        boxes3D_cam_cnr.resize(ix + 1, 24)
        boxes3D_corners.resize(ix + 1, 24)
        gt_classes.resize(ix + 1)
        # print(self.num_classes)
        overlaps.resize(ix + 1, self.num_classes)
        # if index == '000142':
        #     print(index)
        #     print(overlaps)
        overlaps = scipy.sparse.csr_matrix(overlaps)
        # if index == '000142':
        #     print(overlaps)

        return {
            'ry': rys,
            'lwh': lwh,
            'boxes': boxes,
            'boxes_bv': boxes_bv,
            'boxes_3D_cam': boxes3D,
            'boxes_3D': boxes3D_lidar,
            'boxes3D_cam_corners': boxes3D_cam_cnr,
            'boxes_corners': boxes3D_corners,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'xyz': translation,
            'alphas': alphas,
            'flipped': False
        }
Example #3
0
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI
        format.
        WZN: The non-interested area (Dontcare) is just ignored (treated same as background)
        """
        if self._image_set == 'test':
            return {
                'ry': np.array([]),
                'lwh': np.array([]),
                'boxes': np.array([]),  #xy box in image
                #'boxes_bv' : boxes_bv, #xy box in bird view
                'boxes_3D_cam':
                np.array([]),  #[xyz_center, lwh] in 3D, cam frame 
                #'boxes_3D' : boxes3D_lidar, #[xyz_center, lwh] in 3D, absolute
                'boxes3D_cam_corners':
                np.array([]),  #8 corners of box in 3D, cam frame
                #'boxes_corners' : boxes3D_corners, #8 corners of box in 3D
                #'boxes_bv_corners' : boxes_bv_corners, #4 corners of box in bird view
                'gt_classes': np.array([]),  #classes
                'gt_overlaps': np.array([]),  #default 1, changed later
                'xyz': np.array([]),
                'alphas': np.array([]),
                'diff_level': np.array([]),
                'flipped': False
            }
        else:
            # filename = '$Faster-RCNN_TF/data/KITTI/object/training/label_2/000000.txt'
            filename = os.path.join(self.gt_dir, index + '.txt')
            #         print("Loading: ", filename)

            # calib
            calib = self._load_kitti_calib(index)
            Tr = np.dot(calib['R0'], calib['Tr_velo2cam'])

            # print 'Loading: {}'.format(filename)
            with open(filename, 'r') as f:
                lines = f.readlines()
            num_objs = len(lines)
            translation = np.zeros((num_objs, 3), dtype=np.float32)
            rys = np.zeros((num_objs), dtype=np.float32)
            lwh = np.zeros((num_objs, 3), dtype=np.float32)
            boxes = np.zeros((num_objs, 4), dtype=np.float32)
            boxes_bv = np.zeros((num_objs, 4), dtype=np.float32)
            boxes3D = np.zeros((num_objs, 6), dtype=np.float32)
            boxes3D_lidar = np.zeros((num_objs, 6), dtype=np.float32)
            boxes3D_cam_cnr = np.zeros((num_objs, 24), dtype=np.float32)
            boxes3D_corners = np.zeros((num_objs, 24), dtype=np.float32)
            alphas = np.zeros((num_objs), dtype=np.float32)
            gt_classes = np.zeros((num_objs), dtype=np.int32)
            overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
            # new difficulty level for in training evaluation
            diff_level = np.zeros((num_objs), dtype=np.int32)

            # print(boxes3D.shape)

            # Load object bounding boxes into a data frame.
            ix = -1
            for line in lines:
                obj = line.strip().split(' ')
                try:
                    #WZN.strip() removes white spaces
                    cls = self._class_to_ind[obj[0].strip()]
                    # print cls
                except:
                    continue
                # ignore objects with undetermined difficult level
                level = self._get_obj_level(obj)
                if level > 3:
                    continue
                ix += 1

                # 0-based coordinates
                alpha = float(obj[3])
                x1 = float(obj[4])
                y1 = float(obj[5])
                x2 = float(obj[6])
                y2 = float(obj[7])
                h = float(obj[8])
                w = float(obj[9])
                l = float(obj[10])
                tx = float(obj[11])
                ty = float(obj[12])
                tz = float(obj[13])
                ry = float(obj[14])

                diff_level[ix] = level
                if obj[0].strip() == 'Person_sitting':
                    diff_level[ix] = -1
                rys[ix] = ry
                lwh[ix, :] = [l, w, h]
                alphas[ix] = alpha
                translation[ix, :] = [tx, ty, tz]
                boxes[ix, :] = [x1, y1, x2, y2]
                boxes3D[ix, :] = [tx, ty, tz, l, w, h]
                # convert boxes3D cam to 8 corners(cam)
                boxes3D_cam_cnr_single = computeCorners3D(boxes3D[ix, :], ry)
                boxes3D_cam_cnr[ix, :] = boxes3D_cam_cnr_single.reshape(24)
                # convert 8 corners(cam) to 8 corners(lidar)
                boxes3D_corners[ix, :] = camera_to_lidar_cnr(
                    boxes3D_cam_cnr_single, Tr)

                # convert 8 corners(cam) to  lidar boxes3D, note this is not ivertible because we LOSE ry!
                boxes3D_lidar[ix, :] = lidar_cnr_to_3d(boxes3D_corners[ix, :],
                                                       lwh[ix, :])
                # convert 8 corners(lidar) to lidar bird view
                boxes_bv[ix, :] = lidar_3d_to_bv(boxes3D_lidar[ix, :])
                # boxes3D_corners[ix, :] = lidar_to_corners_single(boxes3D_lidar[ix, :])
                gt_classes[ix] = cls
                overlaps[ix, cls] = 1.0

            rys.resize(ix + 1)
            lwh.resize(ix + 1, 3)
            translation.resize(ix + 1, 3)
            alphas.resize(ix + 1)
            boxes.resize(ix + 1, 4)
            boxes_bv.resize(ix + 1, 4)
            boxes3D.resize(ix + 1, 6)
            boxes3D_lidar.resize(ix + 1, 6)
            boxes3D_cam_cnr.resize(ix + 1, 24)
            boxes3D_corners.resize(ix + 1, 24)

            boxes_bv_corners = lidar_cnr_to_bv_cnr(boxes3D_corners)

            gt_classes.resize(ix + 1)
            # print(self.num_classes)
            overlaps.resize(ix + 1, self.num_classes)
            diff_level.resize(ix + 1)
            # if index == '000142':
            #     print(index)
            #     print(overlaps)
            overlaps = scipy.sparse.csr_matrix(overlaps)
            # if index == '000142':
            #     print(overlaps)

            #if ix>=0:
            #    print index

            return {
                'ry': rys,
                'lwh': lwh,
                'boxes': boxes,  #xy box in image
                #'boxes_bv' : boxes_bv, #xy box in bird view
                'boxes_3D_cam': boxes3D,  #[xyz_center, lwh] in 3D, cam frame 
                #'boxes_3D' : boxes3D_lidar, #[xyz_center, lwh] in 3D, absolute
                'boxes3D_cam_corners':
                boxes3D_cam_cnr,  #8 corners of box in 3D, cam frame
                #'boxes_corners' : boxes3D_corners, #8 corners of box in 3D
                #'boxes_bv_corners' : boxes_bv_corners, #4 corners of box in bird view
                'gt_classes': gt_classes,  #classes
                'gt_overlaps': overlaps,  #default 1, changed later
                'xyz': translation,
                'alphas': alphas,
                'diff_level': diff_level,
                'flipped': False
            }