def isvalid(self, new_center, bbox, bbox_list): """Check if the new person bbox are valid, which need to satisfies: 1. the center is visible in at least 2 views, and 2. have a sufficiently small iou with all other person bboxes. """ new_center_us = new_center.reshape(1, -1) vis = 0 for _, cam_param in self.cameras.items(): single_view_camera = SimpleCamera(cam_param) loc_2d = single_view_camera.world_to_pixel( np.hstack((new_center_us, [[1000.0]]))) if 10 < loc_2d[0, 0] < self.width - 10 and 10 < loc_2d[ 0, 1] < self.height - 10: vis += 1 if len(bbox_list) == 0: return vis >= 2 bbox_list = np.array(bbox_list) x0 = np.maximum(bbox[0], bbox_list[:, 0]) y0 = np.maximum(bbox[1], bbox_list[:, 1]) x1 = np.minimum(bbox[2], bbox_list[:, 2]) y1 = np.minimum(bbox[3], bbox_list[:, 3]) intersection = np.maximum(0, (x1 - x0) * (y1 - y0)) area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) area_list = (bbox_list[:, 2] - bbox_list[:, 0]) * (bbox_list[:, 3] - bbox_list[:, 1]) iou_list = intersection / (area + area_list - intersection) return vis >= 2 and np.max(iou_list) < 0.01
def _keypoint_camera_to_world(keypoints, camera_params, image_name=None, dataset='Body3DH36MDataset'): """Project 3D keypoints from the camera space to the world space. Args: keypoints (np.ndarray): 3D keypoints in shape [..., 3] camera_params (dict): Parameters for all cameras. image_name (str): The image name to specify the camera. dataset (str): The dataset type, e.g. Body3DH36MDataset. """ cam_key = None if dataset == 'Body3DH36MDataset': subj, rest = osp.basename(image_name).split('_', 1) _, rest = rest.split('.', 1) camera, rest = rest.split('_', 1) cam_key = (subj, camera) else: raise NotImplementedError camera = SimpleCamera(camera_params[cam_key]) keypoints_world = keypoints.copy() keypoints_world[..., :3] = camera.camera_to_world(keypoints[..., :3]) return keypoints_world
def _prepare_test_sample(self, idx): results = {} fid = self.frame_range[idx] for cam_id, cam_param in self.cameras.items(): image_file = osp.join( self.img_prefix, 'Camera' + cam_id, 'campus4-c{0}-{1:05d}.png'.format(cam_id, fid)) all_poses_3d = [] all_poses_3d_vis = [] all_poses_2d = [] all_poses_2d_vis = [] single_view_camera = SimpleCamera(cam_param) for person in range(self.num_persons): pose3d = self.gt_pose_db[person][fid] * 1000.0 if len(pose3d[0]) > 0: all_poses_3d.append(pose3d) all_poses_3d_vis.append(np.ones((self.num_joints, 3))) pose2d = single_view_camera.world_to_pixel(pose3d) x_check = np.bitwise_and(pose2d[:, 0] >= 0, pose2d[:, 0] <= self.width - 1) y_check = np.bitwise_and(pose2d[:, 1] >= 0, pose2d[:, 1] <= self.height - 1) check = np.bitwise_and(x_check, y_check) joints_vis = np.ones((len(pose2d), 1)) joints_vis[np.logical_not(check)] = 0 all_poses_2d.append(pose2d) all_poses_2d_vis.append( np.repeat(np.reshape(joints_vis, (-1, 1)), 2, axis=1)) pred_index = '{}_{}'.format(cam_id, fid) pred_poses = self.test_pose_db[pred_index] preds = [] for pose in pred_poses: preds.append(np.array(pose['pred'])) preds = np.array(preds) results[int(cam_id)] = { 'image_file': image_file, 'joints_3d': all_poses_3d, 'joints_3d_visible': all_poses_3d_vis, 'joints_2d': all_poses_2d, 'joints_2d_visible': all_poses_2d_vis, 'camera': cam_param, 'joints': preds, 'sample_id': idx * self.num_cameras + int(cam_id), 'center': self.center, 'scale': self.scale, 'rotation': 0.0, 'ann_info': self.ann_info } return results
def _prepare_train_sample(self, idx): results = {} # To prepare a training sample, there are three steps. # 1. Randomly sample some 3D poses from motion capture database nposes_ori = np.random.choice(range(self.min_nposes, self.max_nposes)) select_poses = np.random.choice(self.train_pose_db, nposes_ori) joints_3d = np.array([p['pose'] for p in select_poses]) joints_3d_vis = np.array([p['vis'] for p in select_poses]) bbox_list = [] center_list = [] # 2. Place the selected poses at random locations in the space for n in range(nposes_ori): points = joints_3d[n][:, :2].copy() # get the location of a person's root joint center = np.mean(points[self.root_id, :2], axis=0) rot_rad = np.random.uniform(-180, 180) new_center = self.get_new_center(center_list) new_xy = self.rotate_points(points, center, rot_rad) - center + new_center loop_count = 0 # here n will be at least 1 while not self.isvalid(new_center, self.calc_bbox(new_xy, joints_3d_vis[n]), bbox_list): loop_count += 1 if loop_count >= 100: break new_center = self.get_new_center(center_list) new_xy = self.rotate_points(points, center, rot_rad) - center + new_center if loop_count >= 100: nposes = n joints_3d = joints_3d[:n] joints_3d_vis = joints_3d_vis[:n] break else: nposes = nposes_ori center_list.append(new_center) bbox_list.append(self.calc_bbox(new_xy, joints_3d_vis[n])) joints_3d[n][:, :2] = new_xy joints_3d_u = np.zeros((self.maximum_person, len(joints_3d[0]), 3)) joints_3d_vis_u = np.zeros((self.maximum_person, len(joints_3d[0]), 3)) for i in range(nposes): joints_3d_u[i] = joints_3d[i][:, 0:3] joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3] roots_3d = np.mean(joints_3d_u[:, self.root_id], axis=1) # 3. Project 3D poses to all views to get the respective 2D locations for cam_id, cam_param in self.cameras.items(): joints = [] joints_vis = [] single_view_camera = SimpleCamera(cam_param) for n in range(nposes): # project the 3D pose to the view to get 2D location pose2d = single_view_camera.world_to_pixel(joints_3d[n]) # check the validity of joint cooridinate x_check = np.bitwise_and(pose2d[:, 0] >= 0, pose2d[:, 0] <= self.width - 1) y_check = np.bitwise_and(pose2d[:, 1] >= 0, pose2d[:, 1] <= self.height - 1) check = np.bitwise_and(x_check, y_check) vis = joints_3d_vis[n][:, 0] > 0 vis[np.logical_not(check)] = 0 joints.append(pose2d) joints_vis.append( np.repeat(np.reshape(vis, (-1, 1)), 2, axis=1)) # make joints and joints_vis having same shape joints_u = np.zeros((self.maximum_person, len(joints[0]), 2)) joints_vis_u = np.zeros((self.maximum_person, len(joints[0]), 2)) for i in range(nposes): joints_u[i] = joints[i] joints_vis_u[i] = joints_vis[i] results[int(cam_id)] = { 'joints_3d': joints_3d_u, 'joints_3d_visible': joints_3d_vis_u, 'roots_3d': roots_3d, 'joints': joints_u, 'joints_visible': joints_vis_u, 'camera': cam_param, 'sample_id': idx * self.num_cameras + int(cam_id), 'center': self.center, 'scale': self.scale, 'rotation': 0.0, 'num_persons': nposes, 'ann_info': self.ann_info } return results
def _get_db(self): """Get dataset base. Returns: dict: the dataset base (2D and 3D information) """ width = 1920 height = 1080 db = [] sample_id = 0 for seq in self.seq_list: cameras = self._get_cam(seq) curr_anno = osp.join(self.img_prefix, seq, 'hdPose3d_stage1_coco19') anno_files = sorted(glob.iglob('{:s}/*.json'.format(curr_anno))) print(f'load sequence: {seq}', flush=True) for i, file in enumerate(anno_files): if i % self.seq_frame_interval == 0: with open(file) as dfile: bodies = json.load(dfile)['bodies'] if len(bodies) == 0: continue for k, cam_param in cameras.items(): single_view_camera = SimpleCamera(cam_param) postfix = osp.basename(file).replace('body3DScene', '') prefix = '{:02d}_{:02d}'.format(k[0], k[1]) image_file = osp.join(seq, 'hdImgs', prefix, prefix + postfix) image_file = image_file.replace('json', 'jpg') all_poses_3d = np.zeros( (self.max_persons, self.num_joints, 3), dtype=np.float32) all_poses_vis_3d = np.zeros( (self.max_persons, self.num_joints, 3), dtype=np.float32) all_roots_3d = np.zeros((self.max_persons, 3), dtype=np.float32) all_poses = np.zeros( (self.max_persons, self.num_joints, 3), dtype=np.float32) cnt = 0 person_ids = -np.ones(self.max_persons, dtype=np.int) for body in bodies: if cnt >= self.max_persons: break pose3d = np.array(body['joints19']).reshape( (-1, 4)) pose3d = pose3d[:self.num_joints] joints_vis = pose3d[:, -1] > 0.1 if not joints_vis[self.root_id]: continue # Coordinate transformation M = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0]]) pose3d[:, 0:3] = pose3d[:, 0:3].dot(M) * 10.0 all_poses_3d[cnt] = pose3d[:, :3] all_roots_3d[cnt] = pose3d[self.root_id, :3] all_poses_vis_3d[cnt] = np.repeat(np.reshape( joints_vis, (-1, 1)), 3, axis=1) pose2d = np.zeros((pose3d.shape[0], 3)) # get pose_2d from pose_3d pose2d[:, :2] = single_view_camera.world_to_pixel( pose3d[:, :3]) x_check = np.bitwise_and(pose2d[:, 0] >= 0, pose2d[:, 0] <= width - 1) y_check = np.bitwise_and( pose2d[:, 1] >= 0, pose2d[:, 1] <= height - 1) check = np.bitwise_and(x_check, y_check) joints_vis[np.logical_not(check)] = 0 pose2d[:, -1] = joints_vis all_poses[cnt] = pose2d person_ids[cnt] = body['id'] cnt += 1 if cnt > 0: db.append({ 'image_file': osp.join(self.img_prefix, image_file), 'joints_3d': all_poses_3d, 'person_ids': person_ids, 'joints_3d_visible': all_poses_vis_3d, 'joints': [all_poses], 'roots_3d': all_roots_3d, 'camera': cam_param, 'num_persons': cnt, 'sample_id': sample_id, 'center': np.array((width / 2, height / 2), dtype=np.float32), 'scale': self._get_scale((width, height)) }) sample_id += 1 return db
def show_result(self, img, img_metas, visualize_2d=False, input_heatmaps=None, dataset_info=None, radius=4, thickness=2, out_dir=None, show=False): """Visualize the results.""" result = self.forward_test(img, img_metas, input_heatmaps=input_heatmaps) pose_3d = result['pose_3d'] sample_id = result['sample_id'] batch_size = pose_3d.shape[0] # get kpts and skeleton structure for i in range(batch_size): # visualize 3d results img_meta = img_metas[i] num_cameras = len(img_meta['camera']) pose_3d_i = pose_3d[i] pose_3d_i = pose_3d_i[pose_3d_i[:, 0, 3] >= 0] num_persons, num_keypoints, _ = pose_3d_i.shape pose_3d_list = [p[..., [0, 1, 2, 4]] for p in pose_3d_i] if num_persons > 0 else [] img_3d = imshow_multiview_keypoints_3d( pose_3d_list, skeleton=dataset_info.skeleton, pose_kpt_color=dataset_info.pose_kpt_color[:num_keypoints], pose_link_color=dataset_info.pose_link_color, space_size=self.human_detector.space_size, space_center=self.human_detector.space_center) if out_dir is not None: mmcv.image.imwrite( img_3d, os.path.join(out_dir, 'vis_3d', f'{sample_id[i]}_3d.jpg')) if visualize_2d: for j in range(num_cameras): single_camera = SimpleCamera(img_meta['camera'][j]) # img = mmcv.imread(img) if num_persons > 0: pose_2d = np.ones_like(pose_3d_i[..., :3]) pose_2d_flat = single_camera.world_to_pixel( pose_3d_i[..., :3].reshape((-1, 3))) pose_2d[..., :2] = pose_2d_flat.reshape( (num_persons, -1, 2)) pose_2d_list = [pose for pose in pose_2d] else: pose_2d_list = [] with tempfile.TemporaryDirectory() as tmpdir: if 'image_file' in img_meta: img_file = img_meta['image_file'][j] else: img_size = img_meta['center'][j] * 2 img = np.zeros( [int(img_size[1]), int(img_size[0]), 3], dtype=np.uint8) img.fill(255) # or img[:] = 255 img_file = os.path.join(tmpdir, 'tmp.jpg') mmcv.image.imwrite(img, img_file) img = imshow_keypoints( img_file, pose_2d_list, dataset_info.skeleton, 0.0, dataset_info.pose_kpt_color[:num_keypoints], dataset_info.pose_link_color, radius, thickness) if out_dir is not None: mmcv.image.imwrite( img, os.path.join(out_dir, f'{sample_id[i]}_{j}_2d.jpg'))