コード例 #1
0
def pose_similarity(dt, gt, shape_sim_mat):
    """compute pose similarity in terms of shape, translation and rotation
    Input:
        dt: np.ndarray detection [N x 7] first 6 dims are roll, pitch, yaw, x, y, z
        gt: save with dt

    Output:
        sim_shape: similarity based on shape eval
        dis_trans: distance based on translation eval
        dis_rot:   dis.. based on rotation eval
    """
    dt_num = len(dt)
    gt_num = len(gt)
    car_num = shape_sim_mat.shape[0]

    dt_car_id = np.uint32(dt[:, -1])
    gt_car_id = np.uint32(gt[:, -1])

    idx = np.tile(dt_car_id[:, None], (1, gt_num)).flatten() * car_num + \
          np.tile(gt_car_id[None, :], (dt_num, 1)).flatten()
    sims_shape = shape_sim_mat.flatten()[idx]
    sims_shape = np.reshape(sims_shape, [dt_num, gt_num])

    # translation similarity
    dt_car_trans = dt[:, 3:-1]
    gt_car_trans = gt[:, 3:-1]
    dis_trans = np.linalg.norm(
        np.tile(dt_car_trans[:, None, :], [1, gt_num, 1]) -
        np.tile(gt_car_trans[None, :, :], [dt_num, 1, 1]),
        axis=2)

    # rotation similarity
    dt_car_rot = uts.euler_angles_to_quaternions(dt[:, :3])
    gt_car_rot = uts.euler_angles_to_quaternions(gt[:, :3])
    q1 = dt_car_rot / np.linalg.norm(dt_car_rot, axis=1)[:, None]
    q2 = gt_car_rot / np.linalg.norm(gt_car_rot, axis=1)[:, None]

    # diff = abs(np.matmul(q1, np.transpose(q2)))
    diff = abs(1 - np.sum(np.square(
        np.tile(q1[:, None, :], [1, gt_num, 1]) -
        np.tile(q2[None, :, :], [dt_num, 1, 1])),
                          axis=2) / 2.0)
    dis_rot = 2 * np.arccos(diff) * 180 / np.pi

    if False:  #cfg.eval.eval_on:
        if True:  #cfg.eval.sims_shape:  False  True
            #sims_shape = np.ones(sims_shape.shape, dtype=np.float)
            sim = np.linspace(1, 0.95, sims_shape.shape[1], endpoint=True)
            sims_shape = np.tile(sim, [dt_num, 1])

        if True:  #cfg.eval.dis_trans:
            dis_trans = np.zeros(dis_trans.shape, dtype=np.float)

        if True:  #cfg.eval.dis_rot:
            dis_rot = np.zeros(dis_rot.shape, dtype=np.float)

    return sims_shape, dis_trans, dis_rot
コード例 #2
0
    def _add_gt_annotations_BOP(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        entry_id = entry['entry_id']
        model_num = entry_id.split('/')[-3]
        img_num = int(entry_id.split('/')[-1][:4])

        entry['height'] = self.BOP.image_shape[0]
        entry['width'] = self.BOP.image_shape[1]

        cam_K = self.BOP.info_all[model_num][img_num]['cam_K']
        # There is only one model in each image
        cam_R_m2c = self.BOP.gt_all[model_num][img_num][0]['cam_R_m2c']
        cam_t_m2c = self.BOP.gt_all[model_num][img_num][0]['cam_t_m2c']
        obj_bb = self.BOP.gt_all[model_num][img_num][0]['obj_bb']
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []

        # We have the bounding box here
        x1, y1, x2, y2 = obj_bb[0], obj_bb[
            1], obj_bb[0] + obj_bb[2], obj_bb[1] + obj_bb[3]
        # We read the mask here:
        im = cv2.imread(entry_id)
        mask = im != 0
        mask_binary = mask[:, :, 0] + mask[:, :, 1] + mask[:, :, 2]
        mask_f = np.array(mask_binary, order='F', dtype=np.uint8)
        rle = COCOmask.encode(mask_f)
        valid_segms.append(rle)

        rotation_matrix = np.array(cam_R_m2c).reshape((3, 3))
        euler_angles = rotation_matrix_to_euler_angles(rotation_matrix)
        pose = np.concatenate((euler_angles, np.array(cam_t_m2c)))

        obj = {
            'area': mask_f.sum(),
            'clean_bbox': [x1, y1, x2, y2],
            'model_num': model_num,
            'img_num': img_num,
            'pose': pose
        }

        valid_objs.append(obj)

        num_valid_objs = len(valid_objs)
        boxes = np.zeros((num_valid_objs, 4), dtype=np.float32)
        # this is a legacy network from WAD Mask-RCNN
        seg_areas = np.zeros((num_valid_objs), dtype=np.float32)
        box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=np.int32)

        # newly added for 3d car
        poses = np.zeros((num_valid_objs, 6), dtype=np.float32)
        quaternions = np.zeros((num_valid_objs, 4), dtype=np.float32)

        for ix, obj in enumerate(valid_objs):
            boxes[ix, :] = obj['clean_bbox']
            seg_areas[ix] = obj['area']
            box_to_gt_ind_map[ix] = ix
            poses[ix] = obj['pose']
            quaternions[ix] = euler_angles_to_quaternions(
                np.array([obj['pose'][:3]]))

        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)

        entry['segms'].extend(valid_segms)
        # newly added for 3d car
        entry['poses'] = np.append(entry['poses'], poses, axis=0)
        entry['quaternions'] = np.append(entry['quaternions'],
                                         quaternions,
                                         axis=0)
コード例 #3
0
    def _add_gt_annotations_Car3d(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        entry_id = entry['entry_id']
        # Make file_name an abs path
        car_pose_file = os.path.join(self.Car3D.data_dir, 'car_poses',
                                     entry_id + '.json')
        assert os.path.exists(car_pose_file), 'Label \'{}\' not found'.format(
            car_pose_file)
        with open(car_pose_file) as f:
            car_poses = json.load(f)
        entry['height'] = self.Car3D.image_shape[0]
        entry['width'] = self.Car3D.image_shape[1]

        intrinsic_mat = self.Car3D.get_intrinsic_mat()
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        for i, car_pose in enumerate(car_poses):
            car_name = self.Car3D.car_id2name[car_pose['car_id']].name
            car = self.car_models[car_name]
            pose = np.array(car_pose['pose'])

            # project 3D points to 2d image plane
            rot_mat = euler_angles_to_rotation_matrix(pose[:3])
            rvect, _ = cv2.Rodrigues(rot_mat)
            imgpts, jac = cv2.projectPoints(np.float32(car['vertices']),
                                            rvect,
                                            pose[3:],
                                            intrinsic_mat,
                                            distCoeffs=None)

            imgpts = np.int32(imgpts).reshape(-1, 2)

            x1, y1, x2, y2 = imgpts[:, 0].min(), imgpts[:, 1].min(
            ), imgpts[:, 0].max(), imgpts[:, 1].max()
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, entry['height'], entry['width'])
            # Require non-zero seg area and more than 1x1 box size\
            obj = {
                'area': car_pose['area'],
                'clean_bbox': [x1, y1, x2, y2],
                'category_id': 33,
                'car_id': car_pose['car_id'],
                'visible_rate': car_pose['visible_rate'],
                'pose': car_pose['pose']
            }

            valid_objs.append(obj)

        num_valid_objs = len(valid_objs)
        boxes = np.zeros((num_valid_objs, 4), dtype=np.float32)
        # this is a legacy network from WAD Mask-RCNN
        car_class = 4
        gt_overlaps = np.zeros((num_valid_objs, 8), dtype=np.float32)
        seg_areas = np.zeros((num_valid_objs), dtype=np.float32)
        is_crowd = np.zeros((num_valid_objs), dtype=np.bool)
        box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=np.int32)

        # newly added for 3d car
        visible_rate = np.zeros((num_valid_objs), dtype=np.float32)
        poses = np.zeros((num_valid_objs, 6), dtype=np.float32)
        quaternions = np.zeros((num_valid_objs, 4), dtype=np.float32)

        car_cat_classes = np.zeros((num_valid_objs), dtype=np.int32)

        for ix, obj in enumerate(valid_objs):
            cls = np.where(self.Car3D.unique_car_models == obj['car_id'])[0][0]
            boxes[ix, :] = obj['clean_bbox']
            car_cat_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = False  # TODO: What's this flag for?
            box_to_gt_ind_map[ix] = ix
            gt_overlaps[ix, car_class] = 1.0
            visible_rate[ix] = obj['visible_rate']
            poses[ix] = obj['pose']
            quaternions[ix] = euler_angles_to_quaternions(
                np.array([obj['pose'][:3]]))
            # ensure the quaternion is upper hemispher
            quaternions[ix] = quaternion_upper_hemispher(quaternions[ix])

        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        # newly added for 3d car
        entry['visible_rate'] = np.append(entry['visible_rate'], visible_rate)
        entry['poses'] = np.append(entry['poses'], poses, axis=0)
        entry['car_cat_classes'] = np.append(entry['car_cat_classes'],
                                             car_cat_classes)
        entry['quaternions'] = np.append(entry['quaternions'],
                                         quaternions,
                                         axis=0)