def get_coco_from_mesh(self, mesh_coord_cam, cam_param): # regress coco joints joint_coord_cam = np.dot(self.joint_regressor_coco, mesh_coord_cam) joint_coord_cam = self.add_pelvis_and_neck(joint_coord_cam) # projection f, c = cam_param['focal'], cam_param['princpt'] joint_coord_img = cam2pixel(joint_coord_cam, f, c) joint_coord_img[:, 2] = 1 return joint_coord_cam, joint_coord_img
def get_coco_from_mesh(self, mesh_coord_cam, cam_param): # regress coco joints mesh = torch.Tensor(mesh_coord_cam) joint_coord_cam = torch.matmul(self.joint_regressor_coco, mesh) joint_coord_cam = joint_coord_cam.numpy() joint_coord_cam = self.add_pelvis_and_neck(joint_coord_cam) # projection f, c = cam_param['focal'], cam_param['princpt'] joint_coord_img = cam2pixel(joint_coord_cam, f, c) joint_coord_img[:, 2] = 1 return joint_coord_cam, joint_coord_img
def get_joints_from_mesh(self, mesh, joint_set_name, cam_param): joint_coord_cam = None if joint_set_name == 'human36': joint_coord_cam = np.dot(self.joint_regressor_h36m, mesh) elif joint_set_name == 'coco': joint_coord_cam = np.dot(self.joint_regressor_coco, mesh) joint_coord_cam = self.add_pelvis_and_neck(joint_coord_cam) # projection f, c = np.array(cam_param['focal'], dtype=np.float32), np.array(cam_param['princpt'], dtype=np.float32) joint_coord_img = cam2pixel(joint_coord_cam / 1000, f, c) joint_coord_img[:, 2] = 1 return joint_coord_cam, joint_coord_img
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_id, img_name, cam_param, bbox, smpl_param, img_shape = data[ 'img_id'], data['img_name'], data['cam_param'], data['bbox'], data[ 'smpl_param'], data['img_shape'] flip, rot = augm_params(is_train=(self.data_split == 'train')) # smpl coordinates smpl_mesh_coord_cam, smpl_joint_coord_cam = self.get_smpl_coord( smpl_param) smpl_coord_cam = np.concatenate( (smpl_mesh_coord_cam, smpl_joint_coord_cam)) smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'], cam_param['princpt']) joint_coord_img = smpl_coord_img[self.smpl_vertex_num:][:, :2] # vis_2d_pose(joint_coord_img, img_path, self.smpl_skeleton, prefix='gt') # root relative cam coord smpl_coord_cam = smpl_coord_cam - smpl_coord_cam[ self.smpl_vertex_num + self.smpl_root_joint_idx] mesh_coord_cam = smpl_coord_cam[:self.smpl_vertex_num] joint_coord_cam = smpl_coord_cam[self.smpl_vertex_num:] if not cfg.DATASET.use_gt_input: # train / test with 2d dection input det_data = self.datalist_pose2d_det[idx] assert img_id == det_data['img_id'] joint_coord_img = det_data['img_joint'] # vis_2d_pose(joint_coord_img, img_path, self.smpl_skeleton, prefix='det') # vis_3d_pose(joint_coord_cam, self.smpl_skeleton, joint_set_name='smpl') # make new bbox bbox = get_bbox(joint_coord_img) bbox = process_bbox(bbox.copy()) # aug joint_coord_img, trans = j2d_processing( joint_coord_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, flip, self.flip_pairs) joint_coord_cam = j3d_processing(joint_coord_cam, rot, flip, self.flip_pairs) # -> 0~1 joint_coord_img = joint_coord_img[:, :2] joint_coord_img /= np.array( [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_coord_img, axis=0), np.std(joint_coord_img, axis=0) joint_coord_img = (joint_coord_img.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': # default valid mesh_valid = np.ones((len(mesh_coord_cam), 1), dtype=np.float32) reg_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) lift_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) inputs = {'pose2d': joint_coord_img} targets = { 'mesh': mesh_coord_cam / 1000, 'lift_pose3d': joint_coord_cam, 'reg_pose3d': joint_coord_cam } meta = { 'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid } elif cfg.MODEL.name == 'posenet': # default valid joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) return joint_coord_img, joint_coord_cam, joint_valid return inputs, targets, meta
def load_data(self): print('Load annotations of Human36M Protocol ' + str(self.protocol)) subject_list = self.get_subject() sampling_ratio = self.get_subsampling_ratio() # aggregate annotations from each subject db = COCO() cameras = {} joints = {} smpl_params = {} for subject in subject_list: # data load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_data.json'), 'r') as f: annot = json.load(f) if len(db.dataset) == 0: for k, v in annot.items(): db.dataset[k] = v else: for k, v in annot.items(): db.dataset[k] += v # camera load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f: cameras[str(subject)] = json.load(f) # joint coordinate load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f: joints[str(subject)] = json.load(f) # smpl parameter load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_smpl_param.json'), 'r') as f: smpl_params[str(subject)] = json.load(f) db.createIndex() skip_idx = [] datalist = [] skip_img_idx = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) img_name = img_path.split('/')[-1] # check subject and frame_idx frame_idx = img['frame_idx']; if frame_idx % sampling_ratio != 0: continue # check smpl parameter exist subject = img['subject']; action_idx = img['action_idx']; subaction_idx = img['subaction_idx']; frame_idx = img['frame_idx']; try: smpl_param = smpl_params[str(subject)][str(action_idx)][str(subaction_idx)][str(frame_idx)] except KeyError: skip_idx.append(image_id) skip_img_idx.append(img_path.split('/')[-1]) continue smpl_param['gender'] = 'neutral' # self.subject_genders[subject] # set corresponding gender # camera parameter cam_idx = img['cam_idx'] cam_param = cameras[str(subject)][str(cam_idx)] R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(cam_param['t'], dtype=np.float32), np.array( cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32) cam_param = {'R': R, 't': t, 'focal': f, 'princpt': c} # project world coordinate to cam, image coordinate space joint_world = np.array(joints[str(subject)][str(action_idx)][str(subaction_idx)][str(frame_idx)], dtype=np.float32) joint_cam = world2cam(joint_world, R, t) joint_img = cam2pixel(joint_cam, f, c) joint_vis = np.ones((self.human36_joint_num, 1)) bbox = process_bbox(np.array(ann['bbox'])) if bbox is None: continue datalist.append({ 'img_path': img_path, 'img_name': img_name, 'img_id': image_id, 'bbox': bbox, 'img_hw': (img['height'], img['width']), 'joint_img': joint_img, # [x_img, y_img, z_cam] 'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate 'joint_vis': joint_vis, 'smpl_param': smpl_param, 'cam_param': cam_param}) datalist = sorted(datalist, key=lambda x: x['img_name']) return datalist, skip_idx, skip_img_idx