def isvalid(self, new_center, bbox, bbox_list):
        """Check if the new person bbox are valid, which need to satisfies:

        1. the center is visible in at least 2 views, and
        2. have a sufficiently small iou with all other person bboxes.
        """
        new_center_us = new_center.reshape(1, -1)
        vis = 0
        for _, cam_param in self.cameras.items():
            single_view_camera = SimpleCamera(cam_param)
            loc_2d = single_view_camera.world_to_pixel(
                np.hstack((new_center_us, [[1000.0]])))
            if 10 < loc_2d[0, 0] < self.width - 10 and 10 < loc_2d[
                    0, 1] < self.height - 10:
                vis += 1

        if len(bbox_list) == 0:
            return vis >= 2

        bbox_list = np.array(bbox_list)
        x0 = np.maximum(bbox[0], bbox_list[:, 0])
        y0 = np.maximum(bbox[1], bbox_list[:, 1])
        x1 = np.minimum(bbox[2], bbox_list[:, 2])
        y1 = np.minimum(bbox[3], bbox_list[:, 3])

        intersection = np.maximum(0, (x1 - x0) * (y1 - y0))
        area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
        area_list = (bbox_list[:, 2] - bbox_list[:, 0]) * (bbox_list[:, 3] -
                                                           bbox_list[:, 1])
        iou_list = intersection / (area + area_list - intersection)

        return vis >= 2 and np.max(iou_list) < 0.01
def _keypoint_camera_to_world(keypoints,
                              camera_params,
                              image_name=None,
                              dataset='Body3DH36MDataset'):
    """Project 3D keypoints from the camera space to the world space.

    Args:
        keypoints (np.ndarray): 3D keypoints in shape [..., 3]
        camera_params (dict): Parameters for all cameras.
        image_name (str): The image name to specify the camera.
        dataset (str): The dataset type, e.g. Body3DH36MDataset.
    """
    cam_key = None
    if dataset == 'Body3DH36MDataset':
        subj, rest = osp.basename(image_name).split('_', 1)
        _, rest = rest.split('.', 1)
        camera, rest = rest.split('_', 1)
        cam_key = (subj, camera)
    else:
        raise NotImplementedError

    camera = SimpleCamera(camera_params[cam_key])
    keypoints_world = keypoints.copy()
    keypoints_world[..., :3] = camera.camera_to_world(keypoints[..., :3])

    return keypoints_world
    def _prepare_test_sample(self, idx):
        results = {}
        fid = self.frame_range[idx]

        for cam_id, cam_param in self.cameras.items():
            image_file = osp.join(
                self.img_prefix, 'Camera' + cam_id,
                'campus4-c{0}-{1:05d}.png'.format(cam_id, fid))

            all_poses_3d = []
            all_poses_3d_vis = []
            all_poses_2d = []
            all_poses_2d_vis = []
            single_view_camera = SimpleCamera(cam_param)

            for person in range(self.num_persons):
                pose3d = self.gt_pose_db[person][fid] * 1000.0
                if len(pose3d[0]) > 0:
                    all_poses_3d.append(pose3d)
                    all_poses_3d_vis.append(np.ones((self.num_joints, 3)))

                    pose2d = single_view_camera.world_to_pixel(pose3d)
                    x_check = np.bitwise_and(pose2d[:, 0] >= 0,
                                             pose2d[:, 0] <= self.width - 1)
                    y_check = np.bitwise_and(pose2d[:, 1] >= 0,
                                             pose2d[:, 1] <= self.height - 1)
                    check = np.bitwise_and(x_check, y_check)

                    joints_vis = np.ones((len(pose2d), 1))
                    joints_vis[np.logical_not(check)] = 0
                    all_poses_2d.append(pose2d)
                    all_poses_2d_vis.append(
                        np.repeat(np.reshape(joints_vis, (-1, 1)), 2, axis=1))

            pred_index = '{}_{}'.format(cam_id, fid)
            pred_poses = self.test_pose_db[pred_index]
            preds = []
            for pose in pred_poses:
                preds.append(np.array(pose['pred']))
            preds = np.array(preds)

            results[int(cam_id)] = {
                'image_file': image_file,
                'joints_3d': all_poses_3d,
                'joints_3d_visible': all_poses_3d_vis,
                'joints_2d': all_poses_2d,
                'joints_2d_visible': all_poses_2d_vis,
                'camera': cam_param,
                'joints': preds,
                'sample_id': idx * self.num_cameras + int(cam_id),
                'center': self.center,
                'scale': self.scale,
                'rotation': 0.0,
                'ann_info': self.ann_info
            }

        return results
    def _prepare_train_sample(self, idx):
        results = {}
        # To prepare a training sample, there are three steps.
        # 1. Randomly sample some 3D poses from motion capture database
        nposes_ori = np.random.choice(range(self.min_nposes, self.max_nposes))
        select_poses = np.random.choice(self.train_pose_db, nposes_ori)

        joints_3d = np.array([p['pose'] for p in select_poses])
        joints_3d_vis = np.array([p['vis'] for p in select_poses])

        bbox_list = []
        center_list = []
        # 2. Place the selected poses at random locations in the space
        for n in range(nposes_ori):
            points = joints_3d[n][:, :2].copy()
            # get the location of a person's root joint
            center = np.mean(points[self.root_id, :2], axis=0)
            rot_rad = np.random.uniform(-180, 180)

            new_center = self.get_new_center(center_list)
            new_xy = self.rotate_points(points, center,
                                        rot_rad) - center + new_center

            loop_count = 0
            # here n will be at least 1
            while not self.isvalid(new_center,
                                   self.calc_bbox(new_xy, joints_3d_vis[n]),
                                   bbox_list):
                loop_count += 1
                if loop_count >= 100:
                    break
                new_center = self.get_new_center(center_list)
                new_xy = self.rotate_points(points, center,
                                            rot_rad) - center + new_center

            if loop_count >= 100:
                nposes = n
                joints_3d = joints_3d[:n]
                joints_3d_vis = joints_3d_vis[:n]
                break
            else:
                nposes = nposes_ori
                center_list.append(new_center)
                bbox_list.append(self.calc_bbox(new_xy, joints_3d_vis[n]))
                joints_3d[n][:, :2] = new_xy

        joints_3d_u = np.zeros((self.maximum_person, len(joints_3d[0]), 3))
        joints_3d_vis_u = np.zeros((self.maximum_person, len(joints_3d[0]), 3))
        for i in range(nposes):
            joints_3d_u[i] = joints_3d[i][:, 0:3]
            joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3]

        roots_3d = np.mean(joints_3d_u[:, self.root_id], axis=1)

        # 3. Project 3D poses to all views to get the respective 2D locations
        for cam_id, cam_param in self.cameras.items():
            joints = []
            joints_vis = []
            single_view_camera = SimpleCamera(cam_param)
            for n in range(nposes):
                # project the 3D pose to the view to get 2D location
                pose2d = single_view_camera.world_to_pixel(joints_3d[n])
                # check the validity of joint cooridinate
                x_check = np.bitwise_and(pose2d[:, 0] >= 0,
                                         pose2d[:, 0] <= self.width - 1)
                y_check = np.bitwise_and(pose2d[:, 1] >= 0,
                                         pose2d[:, 1] <= self.height - 1)
                check = np.bitwise_and(x_check, y_check)
                vis = joints_3d_vis[n][:, 0] > 0
                vis[np.logical_not(check)] = 0

                joints.append(pose2d)
                joints_vis.append(
                    np.repeat(np.reshape(vis, (-1, 1)), 2, axis=1))

            # make joints and joints_vis having same shape
            joints_u = np.zeros((self.maximum_person, len(joints[0]), 2))
            joints_vis_u = np.zeros((self.maximum_person, len(joints[0]), 2))
            for i in range(nposes):
                joints_u[i] = joints[i]
                joints_vis_u[i] = joints_vis[i]

            results[int(cam_id)] = {
                'joints_3d': joints_3d_u,
                'joints_3d_visible': joints_3d_vis_u,
                'roots_3d': roots_3d,
                'joints': joints_u,
                'joints_visible': joints_vis_u,
                'camera': cam_param,
                'sample_id': idx * self.num_cameras + int(cam_id),
                'center': self.center,
                'scale': self.scale,
                'rotation': 0.0,
                'num_persons': nposes,
                'ann_info': self.ann_info
            }

        return results
Exemple #5
0
    def _get_db(self):
        """Get dataset base.

        Returns:
            dict: the dataset base (2D and 3D information)
        """
        width = 1920
        height = 1080
        db = []
        sample_id = 0
        for seq in self.seq_list:
            cameras = self._get_cam(seq)
            curr_anno = osp.join(self.img_prefix, seq,
                                 'hdPose3d_stage1_coco19')
            anno_files = sorted(glob.iglob('{:s}/*.json'.format(curr_anno)))
            print(f'load sequence: {seq}', flush=True)
            for i, file in enumerate(anno_files):
                if i % self.seq_frame_interval == 0:
                    with open(file) as dfile:
                        bodies = json.load(dfile)['bodies']
                    if len(bodies) == 0:
                        continue

                    for k, cam_param in cameras.items():
                        single_view_camera = SimpleCamera(cam_param)
                        postfix = osp.basename(file).replace('body3DScene', '')
                        prefix = '{:02d}_{:02d}'.format(k[0], k[1])
                        image_file = osp.join(seq, 'hdImgs', prefix,
                                              prefix + postfix)
                        image_file = image_file.replace('json', 'jpg')

                        all_poses_3d = np.zeros(
                            (self.max_persons, self.num_joints, 3),
                            dtype=np.float32)
                        all_poses_vis_3d = np.zeros(
                            (self.max_persons, self.num_joints, 3),
                            dtype=np.float32)
                        all_roots_3d = np.zeros((self.max_persons, 3),
                                                dtype=np.float32)
                        all_poses = np.zeros(
                            (self.max_persons, self.num_joints, 3),
                            dtype=np.float32)

                        cnt = 0
                        person_ids = -np.ones(self.max_persons, dtype=np.int)
                        for body in bodies:
                            if cnt >= self.max_persons:
                                break
                            pose3d = np.array(body['joints19']).reshape(
                                (-1, 4))
                            pose3d = pose3d[:self.num_joints]

                            joints_vis = pose3d[:, -1] > 0.1

                            if not joints_vis[self.root_id]:
                                continue

                            # Coordinate transformation
                            M = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0],
                                          [0.0, 1.0, 0.0]])
                            pose3d[:, 0:3] = pose3d[:, 0:3].dot(M) * 10.0

                            all_poses_3d[cnt] = pose3d[:, :3]
                            all_roots_3d[cnt] = pose3d[self.root_id, :3]
                            all_poses_vis_3d[cnt] = np.repeat(np.reshape(
                                joints_vis, (-1, 1)),
                                                              3,
                                                              axis=1)

                            pose2d = np.zeros((pose3d.shape[0], 3))
                            # get pose_2d from pose_3d
                            pose2d[:, :2] = single_view_camera.world_to_pixel(
                                pose3d[:, :3])
                            x_check = np.bitwise_and(pose2d[:, 0] >= 0,
                                                     pose2d[:, 0] <= width - 1)
                            y_check = np.bitwise_and(
                                pose2d[:, 1] >= 0, pose2d[:, 1] <= height - 1)
                            check = np.bitwise_and(x_check, y_check)
                            joints_vis[np.logical_not(check)] = 0
                            pose2d[:, -1] = joints_vis

                            all_poses[cnt] = pose2d
                            person_ids[cnt] = body['id']
                            cnt += 1

                        if cnt > 0:
                            db.append({
                                'image_file':
                                osp.join(self.img_prefix, image_file),
                                'joints_3d':
                                all_poses_3d,
                                'person_ids':
                                person_ids,
                                'joints_3d_visible':
                                all_poses_vis_3d,
                                'joints': [all_poses],
                                'roots_3d':
                                all_roots_3d,
                                'camera':
                                cam_param,
                                'num_persons':
                                cnt,
                                'sample_id':
                                sample_id,
                                'center':
                                np.array((width / 2, height / 2),
                                         dtype=np.float32),
                                'scale':
                                self._get_scale((width, height))
                            })
                            sample_id += 1
        return db
Exemple #6
0
    def show_result(self,
                    img,
                    img_metas,
                    visualize_2d=False,
                    input_heatmaps=None,
                    dataset_info=None,
                    radius=4,
                    thickness=2,
                    out_dir=None,
                    show=False):
        """Visualize the results."""
        result = self.forward_test(img,
                                   img_metas,
                                   input_heatmaps=input_heatmaps)
        pose_3d = result['pose_3d']
        sample_id = result['sample_id']
        batch_size = pose_3d.shape[0]
        # get kpts and skeleton structure

        for i in range(batch_size):
            # visualize 3d results
            img_meta = img_metas[i]
            num_cameras = len(img_meta['camera'])
            pose_3d_i = pose_3d[i]
            pose_3d_i = pose_3d_i[pose_3d_i[:, 0, 3] >= 0]

            num_persons, num_keypoints, _ = pose_3d_i.shape
            pose_3d_list = [p[..., [0, 1, 2, 4]]
                            for p in pose_3d_i] if num_persons > 0 else []
            img_3d = imshow_multiview_keypoints_3d(
                pose_3d_list,
                skeleton=dataset_info.skeleton,
                pose_kpt_color=dataset_info.pose_kpt_color[:num_keypoints],
                pose_link_color=dataset_info.pose_link_color,
                space_size=self.human_detector.space_size,
                space_center=self.human_detector.space_center)
            if out_dir is not None:
                mmcv.image.imwrite(
                    img_3d,
                    os.path.join(out_dir, 'vis_3d', f'{sample_id[i]}_3d.jpg'))

            if visualize_2d:
                for j in range(num_cameras):
                    single_camera = SimpleCamera(img_meta['camera'][j])
                    # img = mmcv.imread(img)
                    if num_persons > 0:
                        pose_2d = np.ones_like(pose_3d_i[..., :3])
                        pose_2d_flat = single_camera.world_to_pixel(
                            pose_3d_i[..., :3].reshape((-1, 3)))
                        pose_2d[..., :2] = pose_2d_flat.reshape(
                            (num_persons, -1, 2))
                        pose_2d_list = [pose for pose in pose_2d]
                    else:
                        pose_2d_list = []
                    with tempfile.TemporaryDirectory() as tmpdir:
                        if 'image_file' in img_meta:
                            img_file = img_meta['image_file'][j]
                        else:
                            img_size = img_meta['center'][j] * 2
                            img = np.zeros(
                                [int(img_size[1]),
                                 int(img_size[0]), 3],
                                dtype=np.uint8)
                            img.fill(255)  # or img[:] = 255
                            img_file = os.path.join(tmpdir, 'tmp.jpg')
                            mmcv.image.imwrite(img, img_file)
                        img = imshow_keypoints(
                            img_file, pose_2d_list, dataset_info.skeleton, 0.0,
                            dataset_info.pose_kpt_color[:num_keypoints],
                            dataset_info.pose_link_color, radius, thickness)
                    if out_dir is not None:
                        mmcv.image.imwrite(
                            img,
                            os.path.join(out_dir,
                                         f'{sample_id[i]}_{j}_2d.jpg'))