def get_face_keypoint_ids(model_cfg: Config) -> List: """A helpfer function to get the keypoint indices of the face from the model config. Args: model_cfg (Config): pose model config. Returns: list[int]: face keypoint indices. The length depends on the dataset. """ face_indices = [] # try obtaining nose point ids from dataset_info try: dataset_info = DatasetInfo(model_cfg.data.test.dataset_info) for id in range(68): face_indices.append( dataset_info.keypoint_name2id.get(f'face_{id}', None)) except AttributeError: face_indices = [] if not face_indices: # Fall back to hard coded keypoint id dataset_name = model_cfg.data.test.type if dataset_name in {'TopDownCocoWholeBodyDataset'}: face_indices = list(range(23, 91)) else: raise ValueError('Can not determine the face id of ' f'{dataset_name}') return face_indices
def get_mouth_keypoint_ids(model_cfg: Config) -> int: """A helpfer function to get the mouth keypoint index from the model config. Args: model_cfg (Config): pose model config. Returns: int: The mouth keypoint index """ # try obtaining mouth point ids from dataset_info try: dataset_info = DatasetInfo(model_cfg.data.test.dataset_info) mouth_index = dataset_info.keypoint_name2id.get('face-62', None) except AttributeError: mouth_index = None if mouth_index is None: # Fall back to hard coded keypoint id dataset_name = model_cfg.data.test.type if dataset_name == 'TopDownCocoWholeBodyDataset': mouth_index = 85 else: raise ValueError('Can not determine the eye keypoint id of ' f'{dataset_name}') return mouth_index
def test_interhand3d_demo(): # H36M demo pose_model = init_pose_model( 'configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/' 'res50_interhand3d_all_256x256.py', None, device='cpu') image_name = 'tests/data/interhand2.6m/image2017.jpg' det_result = { 'image_name': image_name, 'bbox': [50, 50, 50, 50], # bbox format is 'xywh' 'camera_param': None, 'keypoints_3d_gt': None } det_results = [det_result] dataset = pose_model.cfg.data['test']['type'] dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) pose_results = inference_interhand_3d_model(pose_model, image_name, det_results, dataset=dataset) for res in pose_results: res['title'] = 'title' vis_3d_pose_result( pose_model, result=pose_results, img=det_results[0]['image_name'], dataset_info=dataset_info, ) # test special cases # Empty det results _ = inference_interhand_3d_model(pose_model, image_name, [], dataset=dataset) if torch.cuda.is_available(): _ = inference_interhand_3d_model(pose_model.cuda(), image_name, det_results, dataset=dataset) with pytest.raises(NotImplementedError): _ = inference_interhand_3d_model(pose_model, image_name, det_results, dataset='test')
def test_bottom_up_pose_tracking_demo(): # COCO demo # build the pose model from a config file and a checkpoint file pose_model = init_pose_model( 'configs/body/2d_kpt_sview_rgb_img/associative_embedding/' 'coco/res50_coco_512x512.py', None, device='cpu') image_name = 'tests/data/coco/000000000785.jpg' dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) pose_results, _ = inference_bottom_up_pose_model(pose_model, image_name, dataset_info=dataset_info) pose_results, next_id = get_track_id(pose_results, [], next_id=0) # show the results vis_pose_tracking_result(pose_model, image_name, pose_results, dataset_info=dataset_info) pose_results_last = pose_results # oks pose_results, next_id = get_track_id(pose_results, pose_results_last, next_id=next_id, use_oks=True) pose_results_last = pose_results # one_euro (will be deprecated) with pytest.deprecated_call(): pose_results, next_id = get_track_id(pose_results, pose_results_last, next_id=next_id, use_one_euro=True)
def get_wrist_keypoint_ids(model_cfg: Config) -> Tuple[int, int]: """A helpfer function to get the keypoint indices of left and right wrists from the model config. Args: model_cfg (Config): pose model config. Returns: tuple[int, int]: The keypoint indices of left and right wrists. """ # try obtaining eye point ids from dataset_info try: dataset_info = DatasetInfo(model_cfg.data.test.dataset_info) left_wrist_idx = dataset_info.keypoint_name2id.get('left_wrist', None) right_wrist_idx = dataset_info.keypoint_name2id.get( 'right_wrist', None) except AttributeError: left_wrist_idx = None right_wrist_idx = None if left_wrist_idx is None or right_wrist_idx is None: # Fall back to hard coded keypoint id dataset_name = model_cfg.data.test.type if dataset_name in { 'TopDownCocoDataset', 'TopDownCocoWholeBodyDataset' }: left_wrist_idx = 9 right_wrist_idx = 10 elif dataset_name == 'AnimalPoseDataset': left_wrist_idx = 16 right_wrist_idx = 17 elif dataset_name == 'AnimalAP10KDataset': left_wrist_idx = 7 right_wrist_idx = 10 else: raise ValueError('Can not determine the eye keypoint id of ' f'{dataset_name}') return left_wrist_idx, right_wrist_idx
def test_pose_lifter_demo(): # H36M demo pose_model = init_pose_model( 'configs/body/3d_kpt_sview_rgb_img/pose_lift/' 'h36m/simplebaseline3d_h36m.py', None, device='cpu') pose_det_result = { 'keypoints': np.zeros((17, 3)), 'bbox': [50, 50, 50, 50], 'track_id': 0, 'image_name': 'tests/data/h36m/S1_Directions_1.54138969_000001.jpg', } pose_results_2d = [[pose_det_result]] dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) pose_results_2d = extract_pose_sequence(pose_results_2d, frame_idx=0, causal=False, seq_len=1, step=1) _ = inference_pose_lifter_model(pose_model, pose_results_2d, dataset_info=dataset_info, with_track_id=False) pose_lift_results = inference_pose_lifter_model(pose_model, pose_results_2d, dataset_info=dataset_info, with_track_id=True) for res in pose_lift_results: res['title'] = 'title' vis_3d_pose_result(pose_model, pose_lift_results, img=pose_results_2d[0][0]['image_name'], dataset_info=dataset_info) # test special cases # Empty 2D results _ = inference_pose_lifter_model(pose_model, [[]], dataset_info=dataset_info, with_track_id=False) if torch.cuda.is_available(): _ = inference_pose_lifter_model(pose_model.cuda(), pose_results_2d, dataset_info=dataset_info, with_track_id=False) # test videopose3d pose_model = init_pose_model( 'configs/body/3d_kpt_sview_rgb_vid/video_pose_lift/h36m/' 'videopose3d_h36m_243frames_fullconv_supervised_cpn_ft.py', None, device='cpu') pose_det_result_0 = { 'keypoints': np.ones((17, 3)), 'bbox': [50, 50, 100, 100], 'track_id': 0, 'image_name': 'tests/data/h36m/S1_Directions_1.54138969_000001.jpg', } pose_det_result_1 = { 'keypoints': np.ones((17, 3)), 'bbox': [50, 50, 100, 100], 'track_id': 1, 'image_name': 'tests/data/h36m/S5_SittingDown.54138969_002061.jpg', } pose_det_result_2 = { 'keypoints': np.ones((17, 3)), 'bbox': [50, 50, 100, 100], 'track_id': 2, 'image_name': 'tests/data/h36m/S7_Greeting.55011271_000396.jpg', } pose_results_2d = [[pose_det_result_0], [pose_det_result_1], [pose_det_result_2]] dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) seq_len = pose_model.cfg.test_data_cfg.seq_len pose_results_2d_seq = extract_pose_sequence(pose_results_2d, 1, causal=False, seq_len=seq_len, step=1) pose_lift_results = inference_pose_lifter_model(pose_model, pose_results_2d_seq, dataset_info=dataset_info, with_track_id=True, image_size=[1000, 1000], norm_pose_2d=True) for res in pose_lift_results: res['title'] = 'title' vis_3d_pose_result( pose_model, pose_lift_results, img=pose_results_2d[0][0]['image_name'], dataset_info=dataset_info, )
def test_top_down_pose_tracking_demo(): # COCO demo # build the pose model from a config file and a checkpoint file pose_model = init_pose_model( 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/' 'coco/res50_coco_256x192.py', None, device='cpu') image_name = 'tests/data/coco/000000000785.jpg' dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) person_result = [{'bbox': [50, 50, 50, 100]}] # test a single image, with a list of bboxes. pose_results, _ = inference_top_down_pose_model(pose_model, image_name, person_result, format='xywh', dataset_info=dataset_info) pose_results, next_id = get_track_id(pose_results, [], next_id=0) # show the results vis_pose_tracking_result(pose_model, image_name, pose_results, dataset_info=dataset_info) pose_results_last = pose_results # AIC demo pose_model = init_pose_model( 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/' 'aic/res50_aic_256x192.py', None, device='cpu') image_name = 'tests/data/aic/054d9ce9201beffc76e5ff2169d2af2f027002ca.jpg' dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) # test a single image, with a list of bboxes. pose_results, _ = inference_top_down_pose_model(pose_model, image_name, person_result, format='xywh', dataset_info=dataset_info) pose_results, next_id = get_track_id(pose_results, pose_results_last, next_id) for pose_result in pose_results: del pose_result['bbox'] pose_results, next_id = get_track_id(pose_results, pose_results_last, next_id) # show the results vis_pose_tracking_result(pose_model, image_name, pose_results, dataset_info=dataset_info) # OneHand10K demo # build the pose model from a config file and a checkpoint file pose_model = init_pose_model( 'configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/' 'onehand10k/res50_onehand10k_256x256.py', None, device='cpu') image_name = 'tests/data/onehand10k/9.jpg' dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) # test a single image, with a list of bboxes. pose_results, _ = inference_top_down_pose_model( pose_model, image_name, [{ 'bbox': [10, 10, 30, 30] }], format='xywh', dataset_info=dataset_info) pose_results, next_id = get_track_id(pose_results, pose_results_last, next_id) # show the results vis_pose_tracking_result(pose_model, image_name, pose_results, dataset_info=dataset_info) # InterHand2D demo pose_model = init_pose_model( 'configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/' 'interhand2d/res50_interhand2d_all_256x256.py', None, device='cpu') image_name = 'tests/data/interhand2.6m/image2017.jpg' dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) # test a single image, with a list of bboxes. pose_results, _ = inference_top_down_pose_model(pose_model, image_name, [{ 'bbox': [50, 50, 0, 0] }], format='xywh', dataset_info=dataset_info) pose_results, next_id = get_track_id(pose_results, [], next_id=0) # show the results vis_pose_tracking_result(pose_model, image_name, pose_results, dataset_info=dataset_info) pose_results_last = pose_results # MPII demo pose_model = init_pose_model( 'configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/' 'mpii/res50_mpii_256x256.py', None, device='cpu') image_name = 'tests/data/mpii/004645041.jpg' dataset_info = DatasetInfo(pose_model.cfg.data['test']['dataset_info']) # test a single image, with a list of bboxes. pose_results, _ = inference_top_down_pose_model(pose_model, image_name, [{ 'bbox': [50, 50, 0, 0] }], format='xywh', dataset_info=dataset_info) pose_results, next_id = get_track_id(pose_results, pose_results_last, next_id) # show the results vis_pose_tracking_result(pose_model, image_name, pose_results, dataset_info=dataset_info)
def vis_pose_result(model, img, result, radius=4, thickness=1, kpt_score_thr=0.3, bbox_color='green', dataset='TopDownCocoDataset', dataset_info=None, show=False, out_file=None): """Visualize the detection results on the image. Args: model (nn.Module): The loaded detector. img (str | np.ndarray): Image filename or loaded image. result (list[dict]): The results to draw over `img` (bbox_result, pose_result). radius (int): Radius of circles. thickness (int): Thickness of lines. kpt_score_thr (float): The threshold to visualize the keypoints. skeleton (list[tuple()]): Default None. show (bool): Whether to show the image. Default True. out_file (str|None): The filename of the output visualization image. """ # get dataset info if (dataset_info is None and hasattr(model, 'cfg') and 'dataset_info' in model.cfg): dataset_info = DatasetInfo(model.cfg.dataset_info) if dataset_info is not None: skeleton = dataset_info.skeleton pose_kpt_color = dataset_info.pose_kpt_color pose_link_color = dataset_info.pose_link_color else: warnings.warn( 'dataset is deprecated.' 'Please set `dataset_info` in the config.' 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.', DeprecationWarning) # TODO: These will be removed in the later versions. palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255], [153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255], [255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102], [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]]) if dataset in ('TopDownCocoDataset', 'BottomUpCocoDataset', 'TopDownOCHumanDataset', 'AnimalMacaqueDataset'): # show the results skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9], [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4], [3, 5], [4, 6]] pose_link_color = palette[[ 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16 ]] pose_kpt_color = palette[[ 16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0 ]] elif dataset == 'TopDownCocoWholeBodyDataset': # show the results skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9], [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4], [3, 5], [4, 6], [15, 17], [15, 18], [15, 19], [16, 20], [16, 21], [16, 22], [91, 92], [92, 93], [93, 94], [94, 95], [91, 96], [96, 97], [97, 98], [98, 99], [91, 100], [100, 101], [101, 102], [102, 103], [91, 104], [104, 105], [105, 106], [106, 107], [91, 108], [108, 109], [109, 110], [110, 111], [112, 113], [113, 114], [114, 115], [115, 116], [112, 117], [117, 118], [118, 119], [119, 120], [112, 121], [121, 122], [122, 123], [123, 124], [112, 125], [125, 126], [126, 127], [127, 128], [112, 129], [129, 130], [130, 131], [131, 132]] pose_link_color = palette[[ 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16 ] + [16, 16, 16, 16, 16, 16] + [ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16 ] + [ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16 ]] pose_kpt_color = palette[ [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [19] * (68 + 42)] elif dataset == 'TopDownAicDataset': skeleton = [[2, 1], [1, 0], [0, 13], [13, 3], [3, 4], [4, 5], [8, 7], [7, 6], [6, 9], [9, 10], [10, 11], [12, 13], [0, 6], [3, 9]] pose_link_color = palette[[ 9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 0, 7, 7 ]] pose_kpt_color = palette[[ 9, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 0, 0 ]] elif dataset == 'TopDownMpiiDataset': skeleton = [[0, 1], [1, 2], [2, 6], [6, 3], [3, 4], [4, 5], [6, 7], [7, 8], [8, 9], [8, 12], [12, 11], [11, 10], [8, 13], [13, 14], [14, 15]] pose_link_color = palette[[ 16, 16, 16, 16, 16, 16, 7, 7, 0, 9, 9, 9, 9, 9, 9 ]] pose_kpt_color = palette[[ 16, 16, 16, 16, 16, 16, 7, 7, 0, 0, 9, 9, 9, 9, 9, 9 ]] elif dataset == 'TopDownMpiiTrbDataset': skeleton = [[12, 13], [13, 0], [13, 1], [0, 2], [1, 3], [2, 4], [3, 5], [0, 6], [1, 7], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [14, 15], [16, 17], [18, 19], [20, 21], [22, 23], [24, 25], [26, 27], [28, 29], [30, 31], [32, 33], [34, 35], [36, 37], [38, 39]] pose_link_color = palette[[16] * 14 + [19] * 13] pose_kpt_color = palette[[16] * 14 + [0] * 26] elif dataset in ('OneHand10KDataset', 'FreiHandDataset', 'PanopticDataset'): skeleton = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] pose_link_color = palette[[ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16 ]] pose_kpt_color = palette[[ 0, 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16 ]] elif dataset == 'InterHand2DDataset': skeleton = [[0, 1], [1, 2], [2, 3], [4, 5], [5, 6], [6, 7], [8, 9], [9, 10], [10, 11], [12, 13], [13, 14], [14, 15], [16, 17], [17, 18], [18, 19], [3, 20], [7, 20], [11, 20], [15, 20], [19, 20]] pose_link_color = palette[[ 0, 0, 0, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16, 0, 4, 8, 12, 16 ]] pose_kpt_color = palette[[ 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 0 ]] elif dataset == 'Face300WDataset': # show the results skeleton = [] pose_link_color = palette[[]] pose_kpt_color = palette[[19] * 68] kpt_score_thr = 0 elif dataset == 'FaceAFLWDataset': # show the results skeleton = [] pose_link_color = palette[[]] pose_kpt_color = palette[[19] * 19] kpt_score_thr = 0 elif dataset == 'FaceCOFWDataset': # show the results skeleton = [] pose_link_color = palette[[]] pose_kpt_color = palette[[19] * 29] kpt_score_thr = 0 elif dataset == 'FaceWFLWDataset': # show the results skeleton = [] pose_link_color = palette[[]] pose_kpt_color = palette[[19] * 98] kpt_score_thr = 0 elif dataset == 'AnimalHorse10Dataset': skeleton = [[0, 1], [1, 12], [12, 16], [16, 21], [21, 17], [17, 11], [11, 10], [10, 8], [8, 9], [9, 12], [2, 3], [3, 4], [5, 6], [6, 7], [13, 14], [14, 15], [18, 19], [19, 20]] pose_link_color = palette[[4] * 10 + [6] * 2 + [6] * 2 + [7] * 2 + [7] * 2] pose_kpt_color = palette[[ 4, 4, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 7, 7, 7, 4, 4, 7, 7, 7, 4 ]] elif dataset == 'AnimalFlyDataset': skeleton = [[1, 0], [2, 0], [3, 0], [4, 3], [5, 4], [7, 6], [8, 7], [9, 8], [11, 10], [12, 11], [13, 12], [15, 14], [16, 15], [17, 16], [19, 18], [20, 19], [21, 20], [23, 22], [24, 23], [25, 24], [27, 26], [28, 27], [29, 28], [30, 3], [31, 3]] pose_link_color = palette[[0] * 25] pose_kpt_color = palette[[0] * 32] elif dataset == 'AnimalLocustDataset': skeleton = [[1, 0], [2, 1], [3, 2], [4, 3], [6, 5], [7, 6], [9, 8], [10, 9], [11, 10], [13, 12], [14, 13], [15, 14], [17, 16], [18, 17], [19, 18], [21, 20], [22, 21], [24, 23], [25, 24], [26, 25], [28, 27], [29, 28], [30, 29], [32, 31], [33, 32], [34, 33]] pose_link_color = palette[[0] * 26] pose_kpt_color = palette[[0] * 35] elif dataset == 'AnimalZebraDataset': skeleton = [[1, 0], [2, 1], [3, 2], [4, 2], [5, 7], [6, 7], [7, 2], [8, 7]] pose_link_color = palette[[0] * 8] pose_kpt_color = palette[[0] * 9] elif dataset in 'AnimalPoseDataset': skeleton = [[0, 1], [0, 2], [1, 3], [0, 4], [1, 4], [4, 5], [5, 7], [6, 7], [5, 8], [8, 12], [12, 16], [5, 9], [9, 13], [13, 17], [6, 10], [10, 14], [14, 18], [6, 11], [11, 15], [15, 19]] pose_link_color = palette[[0] * 20] pose_kpt_color = palette[[0] * 20] else: NotImplementedError() if hasattr(model, 'module'): model = model.module img = model.show_result(img, result, skeleton, radius=radius, thickness=thickness, pose_kpt_color=pose_kpt_color, pose_link_color=pose_link_color, kpt_score_thr=kpt_score_thr, bbox_color=bbox_color, show=show, out_file=out_file) return img
def inference_bottom_up_pose_model(model, img_or_path, dataset='BottomUpCocoDataset', dataset_info=None, pose_nms_thr=0.9, return_heatmap=False, outputs=None): """Inference a single image with a bottom-up pose model. Note: - num_people: P - num_keypoints: K - bbox height: H - bbox width: W Args: model (nn.Module): The loaded pose model. img_or_path (str| np.ndarray): Image filename or loaded image. dataset (str): Dataset name, e.g. 'BottomUpCocoDataset'. It is deprecated. Please use dataset_info instead. dataset_info (DatasetInfo): A class containing all dataset info. pose_nms_thr (float): retain oks overlap < pose_nms_thr, default: 0.9. return_heatmap (bool) : Flag to return heatmap, default: False. outputs (list(str) | tuple(str)) : Names of layers whose outputs need to be returned, default: None. Returns: tuple: - pose_results (list[np.ndarray]): The predicted pose info. \ The length of the list is the number of people (P). \ Each item in the list is a ndarray, containing each \ person's pose (np.ndarray[Kx3]): x, y, score. - returned_outputs (list[dict[np.ndarray[N, K, H, W] | \ torch.Tensor[N, K, H, W]]]): \ Output feature maps from layers specified in `outputs`. \ Includes 'heatmap' if `return_heatmap` is True. """ # get dataset info if (dataset_info is None and hasattr(model, 'cfg') and 'dataset_info' in model.cfg): dataset_info = DatasetInfo(model.cfg.dataset_info) if dataset_info is not None: dataset_name = dataset_info.dataset_name flip_index = dataset_info.flip_index sigmas = getattr(dataset_info, 'sigmas', None) else: warnings.warn( 'dataset is deprecated.' 'Please set `dataset_info` in the config.' 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.', DeprecationWarning) assert (dataset == 'BottomUpCocoDataset') dataset_name = dataset flip_index = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] sigmas = None pose_results = [] returned_outputs = [] cfg = model.cfg device = next(model.parameters()).device if device.type == 'cpu': device = -1 # build the data pipeline test_pipeline = Compose(cfg.test_pipeline) _pipeline_gpu_speedup(test_pipeline, next(model.parameters()).device) # prepare data data = { 'dataset': dataset_name, 'ann_info': { 'image_size': np.array(cfg.data_cfg['image_size']), 'num_joints': cfg.data_cfg['num_joints'], 'flip_index': flip_index, } } if isinstance(img_or_path, np.ndarray): data['img'] = img_or_path else: data['image_file'] = img_or_path data = test_pipeline(data) data = collate([data], samples_per_gpu=1) data = scatter(data, [device])[0] with OutputHook(model, outputs=outputs, as_tensor=False) as h: # forward the model with torch.no_grad(): result = model(img=data['img'], img_metas=data['img_metas'], return_loss=False, return_heatmap=return_heatmap) if return_heatmap: h.layer_outputs['heatmap'] = result['output_heatmap'] returned_outputs.append(h.layer_outputs) for idx, pred in enumerate(result['preds']): area = (np.max(pred[:, 0]) - np.min(pred[:, 0])) * ( np.max(pred[:, 1]) - np.min(pred[:, 1])) pose_results.append({ 'keypoints': pred[:, :3], 'score': result['scores'][idx], 'area': area, }) # pose nms score_per_joint = cfg.model.test_cfg.get('score_per_joint', False) keep = oks_nms(pose_results, pose_nms_thr, sigmas, score_per_joint=score_per_joint) pose_results = [pose_results[_keep] for _keep in keep] return pose_results, returned_outputs
def inference_top_down_pose_model(model, imgs_or_paths, person_results=None, bbox_thr=None, format='xywh', dataset='TopDownCocoDataset', dataset_info=None, return_heatmap=False, outputs=None): """Inference a single image with a list of person bounding boxes. Support single-frame and multi-frame inference setting. Note: - num_frames: F - num_people: P - num_keypoints: K - bbox height: H - bbox width: W Args: model (nn.Module): The loaded pose model. imgs_or_paths (str | np.ndarray | list(str) | list(np.ndarray)): Image filename(s) or loaded image(s). person_results (list(dict), optional): a list of detected persons that contains ``bbox`` and/or ``track_id``: - ``bbox`` (4, ) or (5, ): The person bounding box, which contains 4 box coordinates (and score). - ``track_id`` (int): The unique id for each human instance. If not provided, a dummy person result with a bbox covering the entire image will be used. Default: None. bbox_thr (float | None): Threshold for bounding boxes. Only bboxes with higher scores will be fed into the pose detector. If bbox_thr is None, all boxes will be used. format (str): bbox format ('xyxy' | 'xywh'). Default: 'xywh'. - `xyxy` means (left, top, right, bottom), - `xywh` means (left, top, width, height). dataset (str): Dataset name, e.g. 'TopDownCocoDataset'. It is deprecated. Please use dataset_info instead. dataset_info (DatasetInfo): A class containing all dataset info. return_heatmap (bool) : Flag to return heatmap, default: False outputs (list(str) | tuple(str)) : Names of layers whose outputs need to be returned. Default: None. Returns: tuple: - pose_results (list[dict]): The bbox & pose info. \ Each item in the list is a dictionary, \ containing the bbox: (left, top, right, bottom, [score]) \ and the pose (ndarray[Kx3]): x, y, score. - returned_outputs (list[dict[np.ndarray[N, K, H, W] | \ torch.Tensor[N, K, H, W]]]): \ Output feature maps from layers specified in `outputs`. \ Includes 'heatmap' if `return_heatmap` is True. """ # decide whether to use multi frames for inference if isinstance(imgs_or_paths, (list, tuple)): use_multi_frames = True else: assert isinstance(imgs_or_paths, (str, np.ndarray)) use_multi_frames = False # get dataset info if (dataset_info is None and hasattr(model, 'cfg') and 'dataset_info' in model.cfg): dataset_info = DatasetInfo(model.cfg.dataset_info) if dataset_info is None: warnings.warn( 'dataset is deprecated.' 'Please set `dataset_info` in the config.' 'Check https://github.com/open-mmlab/mmpose/pull/663' ' for details.', DeprecationWarning) # only two kinds of bbox format is supported. assert format in ['xyxy', 'xywh'] pose_results = [] returned_outputs = [] if person_results is None: # create dummy person results sample = imgs_or_paths[0] if use_multi_frames else imgs_or_paths if isinstance(sample, str): width, height = Image.open(sample).size else: height, width = sample.shape[:2] person_results = [{'bbox': np.array([0, 0, width, height])}] if len(person_results) == 0: return pose_results, returned_outputs # Change for-loop preprocess each bbox to preprocess all bboxes at once. bboxes = np.array([box['bbox'] for box in person_results]) # Select bboxes by score threshold if bbox_thr is not None: assert bboxes.shape[1] == 5 valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0] bboxes = bboxes[valid_idx] person_results = [person_results[i] for i in valid_idx] if format == 'xyxy': bboxes_xyxy = bboxes bboxes_xywh = bbox_xyxy2xywh(bboxes) else: # format is already 'xywh' bboxes_xywh = bboxes bboxes_xyxy = bbox_xywh2xyxy(bboxes) # if bbox_thr remove all bounding box if len(bboxes_xywh) == 0: return [], [] with OutputHook(model, outputs=outputs, as_tensor=False) as h: # poses is results['pred'] # N x 17x 3 poses, heatmap = _inference_single_pose_model( model, imgs_or_paths, bboxes_xywh, dataset=dataset, dataset_info=dataset_info, return_heatmap=return_heatmap, use_multi_frames=use_multi_frames) if return_heatmap: h.layer_outputs['heatmap'] = heatmap returned_outputs.append(h.layer_outputs) assert len(poses) == len(person_results), print(len(poses), len(person_results), len(bboxes_xyxy)) for pose, person_result, bbox_xyxy in zip(poses, person_results, bboxes_xyxy): pose_result = person_result.copy() pose_result['keypoints'] = pose pose_result['bbox'] = bbox_xyxy pose_results.append(pose_result) return pose_results, returned_outputs
def get_hand_keypoint_ids(model_cfg: Config) -> List[int]: """A helpfer function to get the keypoint indices of left and right hand from the model config. Args: model_cfg (Config): pose model config. Returns: list[int]: hand keypoint indices. The length depends on the dataset. """ # try obtaining hand keypoint ids from dataset_info try: hand_indices = [] dataset_info = DatasetInfo(model_cfg.data.test.dataset_info) hand_indices.append( dataset_info.keypoint_name2id.get('left_hand_root', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'left_thumb{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'left_forefinger{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'left_middle_finger{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'left_ring_finger{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'left_pinky_finger{id}', None)) hand_indices.append( dataset_info.keypoint_name2id.get('right_hand_root', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'right_thumb{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'right_forefinger{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'right_middle_finger{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'right_ring_finger{id}', None)) for id in range(1, 5): hand_indices.append( dataset_info.keypoint_name2id.get(f'right_pinky_finger{id}', None)) except AttributeError: hand_indices = None if hand_indices is None: # Fall back to hard coded keypoint id dataset_name = model_cfg.data.test.type if dataset_name in {'TopDownCocoWholeBodyDataset'}: hand_indices = list(range(91, 133)) else: raise ValueError('Can not determine the hand id of ' f'{dataset_name}') return hand_indices
def test_voxelpose_forward(): dataset = 'Body3DMviewDirectPanopticDataset' dataset_class = DATASETS.get(dataset) dataset_info = Config.fromfile( 'configs/_base_/datasets/panoptic_body3d.py').dataset_info space_size = [8000, 8000, 2000] space_center = [0, -500, 800] cube_size = [20, 20, 8] data_cfg = dict(image_size=[960, 512], heatmap_size=[[240, 128]], space_size=space_size, space_center=space_center, cube_size=cube_size, num_joints=15, seq_list=['160906_band1'], cam_list=[(0, 12), (0, 6)], num_cameras=2, seq_frame_interval=1, subset='train', need_2d_label=True, need_camera_param=True, root_id=2) pipeline_heatmap = [ dict(type='MultiItemProcess', pipeline=[ dict(type='BottomUpGenerateTarget', sigma=3, max_num_people=20) ]), dict(type='DiscardDuplicatedItems', keys_list=[ 'joints_3d', 'joints_3d_visible', 'ann_info', 'roots_3d', 'num_persons', 'sample_id' ]), dict(type='GenerateVoxel3DHeatmapTarget', sigma=200.0, joint_indices=[2]), dict(type='RenameKeys', key_pairs=[('targets', 'input_heatmaps')]), dict(type='Collect', keys=['targets_3d', 'input_heatmaps'], meta_keys=[ 'camera', 'center', 'scale', 'joints_3d', 'num_persons', 'joints_3d_visible', 'roots_3d', 'sample_id' ]), ] model_cfg = dict(type='DetectAndRegress', backbone=None, human_detector=dict( type='VoxelCenterDetector', image_size=[960, 512], heatmap_size=[240, 128], space_size=space_size, cube_size=cube_size, space_center=space_center, center_net=dict(type='V2VNet', input_channels=15, output_channels=1), center_head=dict(type='CuboidCenterHead', space_size=space_size, space_center=space_center, cube_size=cube_size, max_num=3, max_pool_kernel=3), train_cfg=dict(dist_threshold=500000000.0), test_cfg=dict(center_threshold=0.0), ), pose_regressor=dict(type='VoxelSinglePose', image_size=[960, 512], heatmap_size=[240, 128], sub_space_size=[2000, 2000, 2000], sub_cube_size=[20, 20, 8], num_joints=15, pose_net=dict(type='V2VNet', input_channels=15, output_channels=15), pose_head=dict(type='CuboidPoseHead', beta=100.0), train_cfg=None, test_cfg=None)) model = builder.build_posenet(model_cfg) with tempfile.TemporaryDirectory() as tmpdir: dataset = dataset_class(ann_file=tmpdir + '/tmp_train.pkl', img_prefix='tests/data/panoptic_body3d/', data_cfg=data_cfg, pipeline=pipeline_heatmap, dataset_info=dataset_info, test_mode=False) data_loader = build_dataloader(dataset, seed=None, dist=False, shuffle=False, drop_last=False, workers_per_gpu=1, samples_per_gpu=1) with torch.no_grad(): for data in data_loader: # test forward_train _ = model(img=None, img_metas=data['img_metas'].data[0], return_loss=True, targets_3d=data['targets_3d'], input_heatmaps=data['input_heatmaps']) # test forward_test _ = model(img=None, img_metas=data['img_metas'].data[0], return_loss=False, input_heatmaps=data['input_heatmaps']) with tempfile.TemporaryDirectory() as tmpdir: model.show_result(img=None, img_metas=data['img_metas'].data[0], input_heatmaps=data['input_heatmaps'], dataset_info=DatasetInfo(dataset_info), out_dir=tmpdir, visualize_2d=True)