def optimize_cam_param(project_net, joint_input, crop_size): bbox = get_bbox(joint_input) bbox1 = process_bbox(bbox.copy(), aspect_ratio=1.0, scale=1.25) bbox2 = process_bbox(bbox.copy()) proj_target_joint_img, trans = j2d_processing(joint_input.copy(), (crop_size, crop_size), bbox1, 0, 0, None) joint_img, _ = j2d_processing(joint_input.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox2, 0, 0, None) joint_img = joint_img[:, :2] joint_img /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0) joint_img = (joint_img.copy() - mean) / std joint_img = torch.Tensor(joint_img[None, :, :]).cuda() target_joint = torch.Tensor(proj_target_joint_img[None, :, :2]).cuda() # get optimization settings for projection criterion = nn.L1Loss() optimizer = optim.Adam(project_net.parameters(), lr=0.1) # estimate mesh, pose model.eval() pred_mesh, _ = model(joint_img) pred_mesh = pred_mesh[:, graph_perm_reverse[:mesh_model.face.max() + 1], :] pred_3d_joint = torch.matmul(joint_regressor, pred_mesh) out = {} # assume batch=1 project_net.train() for j in range(0, 1500): # projection pred_2d_joint = project_net(pred_3d_joint.detach()) loss = criterion(pred_2d_joint, target_joint[:, :17, :]) optimizer.zero_grad() loss.backward() optimizer.step() if j == 500: for param_group in optimizer.param_groups: param_group['lr'] = 0.05 if j == 1000: for param_group in optimizer.param_groups: param_group['lr'] = 0.001 out['mesh'] = pred_mesh[0].detach().cpu().numpy() out['cam_param'] = project_net.cam_param[0].detach().cpu().numpy() out['bbox'] = bbox1 out['target'] = proj_target_joint_img return out
def get_fitting_error(self, bbox, coco_from_dataset, coco_from_smpl, coco_joint_valid): bbox = process_bbox(bbox.copy(), aspect_ratio=1.0) coco_from_smpl_xy1 = np.concatenate( (coco_from_smpl[:, :2], np.ones_like(coco_from_smpl[:, 0:1])), 1) coco_from_smpl, _ = j2d_processing(coco_from_smpl_xy1, (64, 64), bbox, 0, 0, None) coco_from_dataset_xy1 = np.concatenate( (coco_from_dataset[:, :2], np.ones_like(coco_from_smpl[:, 0:1])), 1) coco_from_dataset, trans = j2d_processing(coco_from_dataset_xy1, (64, 64), bbox, 0, 0, None) # vis # img = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # img = cv2.warpAffine(img, trans, (64, 64), flags=cv2.INTER_LINEAR) # vis_2d_pose(coco_from_smpl, img, self.coco_origin_skeleton, prefix='from smpl') # vis_2d_pose(coco_from_dataset, img, self.coco_origin_skeleton, prefix='from dataset') # mask joint coordinates coco_joint = coco_from_dataset[:, :2][np.tile(coco_joint_valid, ( 1, 2)) == 1].reshape(-1, 2) coco_from_smpl = coco_from_smpl[:, :2][np.tile(coco_joint_valid, ( 1, 2)) == 1].reshape(-1, 2) error = np.sqrt(np.sum((coco_joint - coco_from_smpl)**2, 1)).mean() return error
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) annot_id, img_id, img_path, img_shape = data['annot_id'], data['image_id'], data['img_path'], data['img_shape'] cam_param, bbox, smpl_param = data['cam_param'].copy(), data['bbox'].copy(), data['smpl_param'].copy() rot, flip = 0, 0 # Detection # detection_data = copy.deepcopy(self.datadict_pose2d_det[idx]) # det_annot_id, det_joint_img_coco = detection_data['annotation_id'], np.array(detection_data['keypoints'], dtype=np.float32) detection_data = self.datadict_pose2d_det[annot_id] det_joint_img_coco = detection_data['img_joint'] joint_img_coco = self.add_pelvis_and_neck(det_joint_img_coco) # smpl coordinates mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param) # regress h36m, coco cam joints joint_cam_coco, gt_joint_img_coco = self.get_coco_from_mesh(mesh_cam, cam_param) joint_cam_h36m = self.get_h36mJ_from_mesh(mesh_cam) # root relative camera coordinate mesh_cam = mesh_cam - joint_cam_h36m[:1] joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1] joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1] # make new bbox bbox = get_bbox(gt_joint_img_coco) bbox = process_bbox(bbox.copy()) if cfg.DATASET.use_gt_input: joint_img_coco = gt_joint_img_coco # aug joint_img_coco, trans = j2d_processing(joint_img_coco.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, flip, None) # -> 0~1 joint_img_coco = joint_img_coco[:, :2] joint_img_coco /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_img_coco, axis=0), np.std(joint_img_coco, axis=0) joint_img_coco = (joint_img_coco.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': inputs = {'pose2d': joint_img_coco} targets = {'mesh': mesh_cam / 1000, 'reg_pose3d': joint_cam_h36m} meta = {'dummy': np.ones(1, dtype=np.float32)} return inputs, targets, meta elif cfg.MODEL.name == 'posenet': joint_valid = np.ones((len(joint_cam_coco), 1), dtype=np.float32) # dummy return joint_img_coco, joint_cam_coco, joint_valid
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_id, img_path, img_shape, joint_cam, cam_param, mano_param = \ data['img_id'], data['img_path'], data['img_shape'], data['joint_cam'], data['cam_param'], data['mano_param'] rot, flip = 0, 0 # mano coordinates mano_mesh_cam, mano_joint_cam = self.get_mano_coord(mano_param, cam_param) mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam)) # cam -> image projection # focal, princpt = cam_param['focal'], cam_param['princpt'] # joint_coord_img = cam2pixel(mano_joint_cam, focal, princpt)[:, :2] # root align cam mesh/joint mano_coord_cam = mano_coord_cam - mano_joint_cam[:1] mesh_coord_cam = mano_coord_cam[:self.vertex_num]; joint_coord_cam = mano_coord_cam[self.vertex_num:]; # use det det_data = self.datalist_pose2d_det[idx] assert img_id == det_data['img_id'] joint_coord_img = det_data['img_joint'] # make bbox bbox = get_bbox(joint_coord_img) bbox = process_bbox(bbox.copy()) # aug joint_coord_img, trans = j2d_processing(joint_coord_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, flip, None) # -> 0~1 joint_coord_img = joint_coord_img[:, :2] joint_coord_img /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_coord_img, axis=0), np.std(joint_coord_img, axis=0) joint_coord_img = (joint_coord_img.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': # default valid mesh_valid = np.ones((len(mesh_coord_cam), 1), dtype=np.float32) reg_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) lift_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) inputs = {'pose2d': joint_coord_img} targets = {'mesh': mesh_coord_cam / 1000, 'lift_pose3d': joint_coord_cam, 'reg_pose3d': joint_coord_cam} meta = {'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid} return inputs, targets, meta elif cfg.MODEL.name == 'posenet': # default valid joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) return joint_coord_img, joint_coord_cam, joint_valid
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) flip, rot = 0, 0 aid, img_path, img_shape = data['aid'], data['img_path'], data[ 'img_shape'] gt_joint_img_coco = data['joint_img'] det_data = self.datalist_cocoj_det[idx] det_aid = det_data['aid'] assert det_aid == aid, f"detection aid: {det_aid}, dataset aid: {aid} / det_aid type: {type(det_aid)}, aid type: {type(aid)}" det_img_coco = np.array(det_data['keypoints']).reshape(-1, 3) joint_img = self.add_pelvis_and_neck(det_img_coco) # vis_2d_joints(gt_joint_img, img_path, self.coco_skeleton, prefix=f"{img_path.split('/')[-1]}") bbox = get_bbox(joint_img) bbox1 = process_bbox(bbox.copy(), aspect_ratio=1.0, scale=1.25) bbox2 = process_bbox(bbox.copy()) proj_target_joint_img, trans = j2d_processing( joint_img.copy(), (self.img_res, self.img_res), bbox1, rot, flip, None) joint_img, _ = j2d_processing( joint_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox2, rot, flip, None) # -> 0~1 joint_img = joint_img[:, :2] joint_img /= np.array( [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0) joint_img = (joint_img.copy() - mean) / std return joint_img[:, :2], proj_target_joint_img[:, :2], img_path, trans
def load_data(self): print('Load annotations of COCO') db = COCO( osp.join(self.annot_path, 'person_keypoints_' + self.data_split + '2017.json')) with open(osp.join(self.annot_path, 'coco_smplify_train.json')) as f: smplify_results = json.load(f) datalist = [] if self.data_split == 'train': for aid in db.anns.keys(): ann = db.anns[aid] img = db.loadImgs(ann['image_id'])[0] imgname = osp.join('train2017', img['file_name']) img_path = osp.join(self.img_path, imgname) width, height = img['width'], img['height'] if ann['iscrowd'] or (ann['num_keypoints'] == 0): continue # bbox bbox = process_bbox(ann['bbox']) if bbox is None: continue # joint coordinates joint_img = np.array(ann['keypoints'], dtype=np.float32).reshape(-1, 3) joint_valid = (joint_img[:, 2].copy().reshape(-1, 1) > 0).astype(np.float32) joint_img[:, 2] = 0 if str(aid) in smplify_results: dp_data = None smplify_result = smplify_results[str(aid)] else: continue datalist.append({ 'img_path': img_path, 'img_shape': (height, width), 'bbox': bbox, 'joint_img': joint_img, # [org_img_x, org_img_y, 0] 'joint_valid': joint_valid, 'dp_data': dp_data, 'smplify_result': smplify_result }) return datalist
def load_data(self): db = COCO( osp.join(self.annot_path, 'person_keypoints_' + self.data_split + '2017.json')) datalist = [] for aid in db.anns.keys(): ann = db.anns[aid] img = db.loadImgs(ann['image_id'])[0] imgname = osp.join(f'{self.data_split}2017', img['file_name']) img_path = osp.join(self.img_path, imgname) width, height = img['width'], img['height'] if ann['iscrowd'] or (ann['num_keypoints'] == 0): continue # bbox bbox = process_bbox(ann['bbox']) if bbox is None: continue # joint coordinates joint_img = np.array(ann['keypoints'], dtype=np.float32).reshape(-1, 3) joint_img = self.add_pelvis_and_neck(joint_img) joint_valid = (joint_img[:, 2].copy().reshape(-1, 1) > 0).astype( np.float32) joint_img[:, 2] = 0 datalist.append({ 'aid': aid, 'img_path': img_path, 'img_shape': (height, width), 'bbox': bbox, 'joint_img': joint_img, # [org_img_x, org_img_y, 0] 'joint_valid': joint_valid, 'root_joint_depth': np.array([10], dtype=np.float32)[0] # dummy }) datalist = sorted(datalist, key=lambda x: x['aid']) return datalist
def load_data(self): print('Load annotations of 3DPW ') db = COCO(osp.join(self.data_path, 'new_3DPW_' + self.data_split + '.json')) datalist = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_width, img_height = img['width'], img['height'] sequence_name = img['sequence'] img_name = img['file_name'] # if 'fencing' not in sequence_name: # continue img_path = osp.join(self.img_path, sequence_name, img_name) cam_param = {k: np.array(v, dtype=np.float32) for k,v in img['cam_param'].items()} smpl_param = ann['smpl_param'] pid = ann['person_id'] vid_name = sequence_name + str(pid) bbox = process_bbox(np.array(ann['bbox'])) if bbox is None: continue datalist.append({ 'annot_id': aid, 'person_id': pid, 'image_id': image_id, 'img_path': img_path, 'vid_name': vid_name, 'img_shape': (img_height, img_width), 'cam_param': cam_param, 'bbox': bbox, 'smpl_param': smpl_param}) datalist = sorted(datalist, key=lambda x: (x['person_id'],x['img_path'])) valid_names = np.array([data['vid_name'] for data in datalist]) unique_names = np.unique(valid_names) video_indices = [] for u_n in unique_names: indexes = valid_names == u_n video_indices.append(indexes) return datalist, video_indices
def load_data(self): print('Load annotations of FreiHAND ') if self.data_split == 'train': db = COCO(osp.join(self.data_path, 'freihand_train_coco.json')) with open(osp.join(self.data_path, 'freihand_train_data.json')) as f: data = json.load(f) else: db = COCO(osp.join(self.data_path, 'freihand_eval_coco.json')) with open(osp.join(self.data_path, 'freihand_eval_data.json')) as f: data = json.load(f) datalist = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.data_path, img['file_name']) img_shape = (img['height'], img['width']) db_idx = str(img['db_idx']) if self.data_split == 'train': cam_param, mano_param, joint_cam = data[db_idx]['cam_param'], data[db_idx]['mano_param'], data[db_idx][ 'joint_3d'] joint_cam = np.array(joint_cam).reshape(-1, 3) bbox = process_bbox(np.array(ann['bbox'])) if bbox is None: continue else: cam_param, scale = data[db_idx]['cam_param'], data[db_idx]['scale'] cam_param['R'] = np.eye(3).astype(np.float32).tolist(); cam_param['t'] = np.zeros((3), dtype=np.float32) # dummy joint_cam = np.ones((self.joint_num, 3), dtype=np.float32) # dummy mano_param = {'pose': np.ones((48), dtype=np.float32), 'shape': np.ones((10), dtype=np.float32)} datalist.append({ 'img_id': image_id, 'img_path': img_path, 'img_shape': img_shape, 'joint_cam': joint_cam, 'cam_param': cam_param, 'mano_param': mano_param}) return sorted(datalist, key=lambda d: d['img_id'])
def load_data(self): print('Load annotations of SURREAL') db = COCO(osp.join(self.data_path, self.data_split + '.json')) datalist = [] for iid in db.imgs.keys(): img = db.imgs[iid] img_id = img['id'] img_width, img_height = img['width'], img['height'] img_name = img['file_name'] cam_param = {k: np.array(v) for k, v in img['cam_param'].items()} ann_id = db.getAnnIds(img_id) ann = db.loadAnns(ann_id)[0] smpl_param = ann['smpl_param'] joint_cam = np.array(ann['joint_cam'], dtype=np.float32).reshape(-1, 3) bbox = process_bbox(ann['bbox']) if bbox is None: continue datalist.append({ 'img_id': img_id, 'img_name': img_name, 'img_shape': (img_height, img_width), 'cam_param': cam_param, 'bbox': bbox, 'smpl_param': smpl_param }) if self.debug and len(datalist) > 1000: break datalist = sorted(datalist, key=lambda d: d['img_id']) if self.data_split == 'train': # fix gpu bug datalist.append(datalist[0]) return datalist
def load_data(self): print('Load annotations of 3DPW ') db = COCO(osp.join(self.data_path, '3DPW_' + self.data_split + '.json')) datalist = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_width, img_height = img['width'], img['height'] sequence_name = img['sequence'] img_name = img['file_name'] img_path = osp.join(self.img_path, sequence_name, img_name) cam_param = { k: np.array(v, dtype=np.float32) for k, v in img['cam_param'].items() } smpl_param = ann['smpl_param'] bbox = process_bbox(np.array(ann['bbox'])) if bbox is None: continue datalist.append({ 'annot_id': aid, 'image_id': image_id, 'img_path': img_path, 'img_shape': (img_height, img_width), 'cam_param': cam_param, 'bbox': bbox, 'smpl_param': smpl_param }) datalist = sorted(datalist, key=lambda x: x['annot_id']) return datalist
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_shape, bbox, dp_data, smplify_result = data['img_shape'], data[ 'bbox'], data['dp_data'], data['smplify_result'] flip, rot = augm_params(is_train=(self.data_split == 'train')) # img_name = img_path.split('/')[-1][:-4] smpl_param, cam_param = smplify_result['smpl_param'], smplify_result[ 'cam_param'] # regress h36m, coco joints mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param) joint_cam_h36m, joint_img_h36m = self.get_joints_from_mesh( mesh_cam, 'human36', cam_param) joint_cam_coco, joint_img_coco = self.get_joints_from_mesh( mesh_cam, 'coco', cam_param) # vis_2d_pose(joint_img_h36m, img_path, self.human36_skeleton, prefix='h36m joint') # vis_2d_pose(joint_img_coco, img_path, self.coco_skeleton, prefix='coco joint') # vis_3d_pose(joint_cam_h36m, self.human36_skeleton, 'human36', gt=True) # root relative camera coordinate mesh_cam = mesh_cam - joint_cam_h36m[:1] joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1] joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1] if self.input_joint_name == 'coco': joint_img, joint_cam = joint_img_coco, joint_cam_coco elif self.input_joint_name == 'human36': joint_img, joint_cam = joint_img_h36m, joint_cam_h36m # make new bbox tight_bbox = get_bbox(joint_img) bbox = process_bbox(tight_bbox.copy()) # aug joint_img, trans = j2d_processing( joint_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, 0, None) if not cfg.DATASET.use_gt_input: joint_img = self.replace_joint_img(joint_img, tight_bbox, trans) if flip: joint_img = flip_2d_joint(joint_img, cfg.MODEL.input_shape[1], self.flip_pairs) joint_cam = j3d_processing(joint_cam, rot, flip, self.flip_pairs) # -> 0~1 joint_img = joint_img[:, :2] joint_img /= np.array( [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0) joint_img = (joint_img.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': # default valid mesh_valid = np.ones((len(mesh_cam), 1), dtype=np.float32) reg_joint_valid = np.ones((len(joint_cam_h36m), 1), dtype=np.float32) lift_joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32) error = self.get_fitting_error(tight_bbox, data['joint_img'], joint_img_coco[:17], data['joint_valid']) if error > self.fitting_thr: mesh_valid[:], reg_joint_valid[:], lift_joint_valid[:] = 0, 0, 0 inputs = {'pose2d': joint_img} targets = { 'mesh': mesh_cam / 1000, 'lift_pose3d': joint_cam, 'reg_pose3d': joint_cam_h36m } meta = { 'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid } return inputs, targets, meta elif cfg.MODEL.name == 'posenet': # default valid joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32) # compute fitting error error = self.get_fitting_error(tight_bbox, data['joint_img'], joint_img_coco[:17], data['joint_valid']) if error > self.fitting_thr: joint_valid[:, :] = 0 return joint_img, joint_cam, joint_valid
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) flip, rot = augm_params(is_train=(self.data_split == 'train')) # get smpl mesh, joints smpl_param, cam_param, img_shape = data['smpl_param'], data[ 'cam_param'], data['img_shape'] mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param, cam_param) # regress coco joints joint_cam_h36m, joint_img_h36m = self.get_joints_from_mesh( mesh_cam, 'human36', cam_param) joint_cam_coco, joint_img_coco = self.get_joints_from_mesh( mesh_cam, 'coco', cam_param) # debug vis # vis_3d_pose(joint_cam_coco, self.coco_skeleton, joint_set_name='coco', prefix=f'coco_joint_cam_{idx}') # img = np.zeros((int(img_shape[0]), int(img_shape[1]), 3)) # vis_2d_pose(joint_img_coco, img, self.coco_skeleton, prefix='coco joint img') # root relative camera coordinate mesh_cam = mesh_cam - joint_cam_h36m[:1] joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1] joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1] # joint_cam is PoseNet target if self.input_joint_name == 'coco': joint_img, joint_cam = joint_img_coco, joint_cam_coco elif self.input_joint_name == 'human36': joint_img, joint_cam = joint_img_h36m, joint_cam_h36m # make new bbox tight_bbox = get_bbox(joint_img) bbox = process_bbox(tight_bbox.copy()) # aug joint_img, trans = j2d_processing( joint_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, 0, None) if not cfg.DATASET.use_gt_input: joint_img = self.replace_joint_img(joint_img, tight_bbox, trans) if flip: joint_img = flip_2d_joint(joint_img, cfg.MODEL.input_shape[1], self.flip_pairs) joint_cam = j3d_processing(joint_cam, rot, flip, self.flip_pairs) # -> 0~1 joint_img = joint_img[:, :2] joint_img /= np.array( [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0) joint_img = (joint_img.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': # default valid mesh_valid = np.ones((len(mesh_cam), 1), dtype=np.float32) reg_joint_valid = np.ones((len(joint_cam_h36m), 1), dtype=np.float32) lift_joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32) inputs = {'pose2d': joint_img} targets = { 'mesh': mesh_cam / 1000, 'lift_pose3d': joint_cam, 'reg_pose3d': joint_cam_h36m } meta = { 'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid } return inputs, targets, meta elif cfg.MODEL.name == 'posenet': # default valid joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32) return joint_img, joint_cam, joint_valid
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) annot_id, img_id, img_path, img_shape = data['annot_id'], data[ 'image_id'], data['img_path'], data['img_shape'] cam_param, bbox, smpl_param = data['cam_param'].copy( ), data['bbox'].copy(), data['smpl_param'].copy() rot, flip = 0, 0 # get coco img joints from detection joint_img_coco = data['pred_pose2d'] # vis # img = cv2.imread(img_path) # vis_2d_pose(joint_img_coco, img, self.coco_skeleton, prefix='vis2dpose', bbox=None) # import pdb; pdb.set_trace() # smpl coordinates mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param) # regress h36m, coco cam joints joint_cam_coco, gt_joint_img_coco = self.get_coco_from_mesh( mesh_cam, cam_param) joint_cam_h36m = self.get_h36mJ_from_mesh(mesh_cam) # root relative camera coordinate mesh_cam = mesh_cam - joint_cam_h36m[:1] joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1] joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1] if cfg.DATASET.use_gt_input: joint_img_coco = gt_joint_img_coco # make new bbox bbox = get_bbox(joint_img_coco) bbox = process_bbox(bbox.copy()) # aug joint_img_coco, trans = j2d_processing( joint_img_coco.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, flip, None) # -> 0~1 joint_img_coco = joint_img_coco[:, :2] joint_img_coco /= np.array( [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_img_coco, axis=0), np.std(joint_img_coco, axis=0) joint_img_coco = (joint_img_coco.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': inputs = {'pose2d': joint_img_coco} targets = {'mesh': mesh_cam / 1000, 'reg_pose3d': joint_cam_h36m} meta = {'dummy': np.ones(1, dtype=np.float32)} return inputs, targets, meta elif cfg.MODEL.name == 'posenet': joint_valid = np.ones((len(joint_cam_coco), 1), dtype=np.float32) # dummy return joint_img_coco, joint_cam_coco, joint_valid
def load_data(self): print('Load annotations of 3DPW ') db = COCO( osp.join(self.data_path, '3DPW_latest_' + self.data_split + '.json')) # get detected 2d pose with open( osp.join(self.data_path, f'darkpose_3dpw_{self.data_split}set_output.json') ) as f: # pose2d_outputs = {} data = json.load(f) for item in data: annot_id = str(item['annotation_id']) pose2d_outputs[annot_id] = { 'coco_joints': np.array(item['keypoints'], dtype=np.float32)[:, :3] } datalist = [] custompose_count = 0 for aid in db.anns.keys(): aid = int(aid) ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_width, img_height = img['width'], img['height'] sequence_name = img['sequence'] img_name = img['file_name'] img_path = osp.join(self.img_path, sequence_name, img_name) cam_param = { k: np.array(v, dtype=np.float32) for k, v in img['cam_param'].items() } smpl_param = ann['smpl_param'] pid = ann['person_id'] vid_name = sequence_name + str(pid) bbox = process_bbox(np.array(ann['bbox'])) if bbox is None: continue openpose = np.array(ann['openpose_result'], dtype=np.float32).reshape(-1, 3) openpose = self.add_pelvis_and_neck(openpose, self.openpose_joints_name, only_pelvis=True) custompose = np.array(pose2d_outputs[str(aid)]['coco_joints']) custompose = self.add_pelvis_and_neck(custompose, self.coco_joints_name) custompose_count += 1 datalist.append({ 'annot_id': aid, 'person_id': pid, 'image_id': image_id, 'img_path': img_path, 'vid_name': vid_name, 'img_shape': (img_height, img_width), 'cam_param': cam_param, 'bbox': bbox, 'smpl_param': smpl_param, 'pred_pose2d': custompose }) datalist = sorted(datalist, key=lambda x: (x['person_id'], x['img_path'])) valid_names = np.array([data['vid_name'] for data in datalist]) unique_names = np.unique(valid_names) video_indices = [] for u_n in unique_names: indexes = valid_names == u_n video_indices.append(indexes) print("num custom pose: ", custompose_count) return datalist, video_indices
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_id, bbox, smpl_param, cam_param, img_shape = data['img_id'], data['bbox'].copy(), data['smpl_param'].copy(), data['cam_param'].copy(), data['img_hw'] flip, rot = augm_params(is_train=(self.data_split == 'train')) # smpl coordinates mesh_cam, joint_cam_smpl = self.get_smpl_coord(smpl_param, cam_param) # regress coco joints joint_cam_coco, joint_img_coco = self.get_coco_from_mesh(mesh_cam, cam_param) # h36m joints from datasets joint_cam_h36m, joint_img_h36m = data['joint_cam'], data['joint_img'][:, :2] # root relative camera coordinate mesh_cam = mesh_cam - joint_cam_h36m[:1] # joint_cam_smpl = joint_cam_smpl - joint_cam_h36m[:1] joint_cam_coco = joint_cam_coco - joint_cam_coco[-2:-1] joint_cam_h36m = joint_cam_h36m - joint_cam_h36m[:1] # joint_cam is PoseNet target if self.input_joint_name == 'coco': joint_img, joint_cam = joint_img_coco, joint_cam_coco elif self.input_joint_name == 'human36': joint_img, joint_cam = joint_img_h36m, joint_cam_h36m # make new bbox bbox = get_bbox(joint_img) bbox = process_bbox(bbox.copy()) # aug joint_img, trans = j2d_processing(joint_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, flip, self.flip_pairs) joint_cam = j3d_processing(joint_cam, rot, flip, self.flip_pairs) if not cfg.DATASET.use_gt_input: joint_img = self.replace_joint_img(idx, img_id, joint_img, bbox, trans) # vis # img = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # new_img = cv2.warpAffine(img, trans, (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), flags=cv2.INTER_LINEAR) # vis_2d_pose(joint_img, new_img, self.human36_skeleton, prefix='detection') # vis_3d_pose(joint_cam, self.human36_skeleton, joint_set_name='human36', gt=True) # -> 0~1 joint_img = joint_img[:, :2] joint_img /= np.array([[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0) joint_img = (joint_img.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': # default valid mesh_valid = np.ones((len(mesh_cam), 1), dtype=np.float32) reg_joint_valid = np.ones((len(joint_cam_h36m), 1), dtype=np.float32) lift_joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32) # if fitted mesh is too far from h36m gt, discard it error = self.get_fitting_error(joint_cam_h36m, mesh_cam) if error > self.fitting_thr: mesh_valid[:] = 0 if self.input_joint_name == 'coco': lift_joint_valid[:] = 0 inputs = {'pose2d': joint_img} targets = {'mesh': mesh_cam / 1000, 'lift_pose3d': joint_cam, 'reg_pose3d': joint_cam_h36m} meta = {'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid} return inputs, targets, meta elif cfg.MODEL.name == 'posenet': # default valid joint_valid = np.ones((len(joint_cam), 1), dtype=np.float32) # if fitted mesh is too far from h36m gt, discard it error = self.get_fitting_error(joint_cam_h36m, mesh_cam) if (error > self.fitting_thr) and (self.input_joint_name == 'coco'): joint_valid[:] = 0 return joint_img, joint_cam, joint_valid
def load_data(self): if self.data_split == 'train': db = COCO(self.annot_path) with open(self.smpl_param_path) as f: smpl_params = json.load(f) else: print('Unknown data subset') assert 0 datalist = [] for iid in db.imgs.keys(): img = db.imgs[iid] img_id = img["id"] img_width, img_height = img['width'], img['height'] imgname = img['file_name'] img_path = osp.join(self.img_dir, imgname) focal = img["f"] princpt = img["c"] cam_param = {'focal': focal, 'princpt': princpt} # crop the closest person to the camera ann_ids = db.getAnnIds(img_id) anns = db.loadAnns(ann_ids) # sample closest subject root_depths = [ ann['keypoints_cam'][self.muco_root_joint_idx][2] for ann in anns ] closest_pid = root_depths.index(min(root_depths)) pid_list = [closest_pid] # sample close subjects # for i in range(len(anns)): # if i == closest_pid: # continue # picked = True # for j in range(len(anns)): # if i == j: # continue # dist = (np.array(anns[i]['keypoints_cam'][self.muco_root_joint_idx]) - np.array( # anns[j]['keypoints_cam'][self.muco_root_joint_idx])) ** 2 # dist_2d = math.sqrt(np.sum(dist[:2])) # dist_3d = math.sqrt(np.sum(dist)) # if dist_2d < 500 or dist_3d < 500: # picked = False # if picked: # pid_list.append(i) for pid in pid_list: joint_cam = np.array(anns[pid]['keypoints_cam']) joint_img = np.array(anns[pid]['keypoints_img']) joint_img = np.concatenate([joint_img, joint_cam[:, 2:]], 1) joint_valid = np.ones((self.muco_joint_num, 3)) bbox = process_bbox(anns[pid]['bbox']) if bbox is None: continue # check smpl parameter exist try: smpl_param = smpl_params[str(ann_ids[pid])] pose, shape, trans = np.array( smpl_param['pose']), np.array( smpl_param['shape']), np.array(smpl_param['trans']) sum = pose.sum() + shape.sum() + trans.sum() if np.isnan(sum): continue except KeyError: continue datalist.append({ 'annot_id': ann_ids[pid], 'img_path': img_path, 'img_shape': (img_height, img_width), 'bbox': bbox, 'joint_img': joint_img, 'joint_cam': joint_cam, 'joint_valid': joint_valid, 'cam_param': cam_param, 'smpl_param': smpl_param }) if self.debug and len(datalist) > 10000: break return datalist
# get model and input if joint_set == 'mano': mesh_model = MANO() else: mesh_model = SMPL() model, joint_regressor, joint_num, skeleton, graph_L, graph_perm_reverse = get_joint_setting( mesh_model, joint_category=joint_set) model = model.cuda() project_net = models.project_net.get_model().cuda() joint_regressor = torch.Tensor(joint_regressor).cuda() joint_input = np.load(input_path) # pre-process input bbox = get_bbox(joint_input) bbox1 = process_bbox(bbox.copy(), aspect_ratio=1.0, scale=1.25) bbox2 = process_bbox(bbox.copy()) proj_target_joint_img, trans = j2d_processing(joint_input.copy(), (IMG_RES, IMG_RES), bbox1, 0, 0, None) joint_img, _ = j2d_processing( joint_input.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox2, 0, 0, None) joint_img = joint_img[:, :2] joint_img /= np.array( [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) mean, std = np.mean(joint_img, axis=0), np.std(joint_img, axis=0) joint_img = (joint_img.copy() - mean) / std joint_img = torch.Tensor(joint_img[None, :, :]).cuda()
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_id, img_name, cam_param, bbox, smpl_param, img_shape = data[ 'img_id'], data['img_name'], data['cam_param'], data['bbox'], data[ 'smpl_param'], data['img_shape'] flip, rot = augm_params(is_train=(self.data_split == 'train')) # smpl coordinates smpl_mesh_coord_cam, smpl_joint_coord_cam = self.get_smpl_coord( smpl_param) smpl_coord_cam = np.concatenate( (smpl_mesh_coord_cam, smpl_joint_coord_cam)) smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'], cam_param['princpt']) joint_coord_img = smpl_coord_img[self.smpl_vertex_num:][:, :2] # vis_2d_pose(joint_coord_img, img_path, self.smpl_skeleton, prefix='gt') # root relative cam coord smpl_coord_cam = smpl_coord_cam - smpl_coord_cam[ self.smpl_vertex_num + self.smpl_root_joint_idx] mesh_coord_cam = smpl_coord_cam[:self.smpl_vertex_num] joint_coord_cam = smpl_coord_cam[self.smpl_vertex_num:] if not cfg.DATASET.use_gt_input: # train / test with 2d dection input det_data = self.datalist_pose2d_det[idx] assert img_id == det_data['img_id'] joint_coord_img = det_data['img_joint'] # vis_2d_pose(joint_coord_img, img_path, self.smpl_skeleton, prefix='det') # vis_3d_pose(joint_coord_cam, self.smpl_skeleton, joint_set_name='smpl') # make new bbox bbox = get_bbox(joint_coord_img) bbox = process_bbox(bbox.copy()) # aug joint_coord_img, trans = j2d_processing( joint_coord_img.copy(), (cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]), bbox, rot, flip, self.flip_pairs) joint_coord_cam = j3d_processing(joint_coord_cam, rot, flip, self.flip_pairs) # -> 0~1 joint_coord_img = joint_coord_img[:, :2] joint_coord_img /= np.array( [[cfg.MODEL.input_shape[1], cfg.MODEL.input_shape[0]]]) # normalize loc&scale mean, std = np.mean(joint_coord_img, axis=0), np.std(joint_coord_img, axis=0) joint_coord_img = (joint_coord_img.copy() - mean) / std if cfg.MODEL.name == 'pose2mesh_net': # default valid mesh_valid = np.ones((len(mesh_coord_cam), 1), dtype=np.float32) reg_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) lift_joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) inputs = {'pose2d': joint_coord_img} targets = { 'mesh': mesh_coord_cam / 1000, 'lift_pose3d': joint_coord_cam, 'reg_pose3d': joint_coord_cam } meta = { 'mesh_valid': mesh_valid, 'lift_pose3d_valid': lift_joint_valid, 'reg_pose3d_valid': reg_joint_valid } elif cfg.MODEL.name == 'posenet': # default valid joint_valid = np.ones((len(joint_coord_cam), 1), dtype=np.float32) return joint_coord_img, joint_coord_cam, joint_valid return inputs, targets, meta
def load_data(self): print('Load annotations of Human36M Protocol ' + str(self.protocol)) subject_list = self.get_subject() sampling_ratio = self.get_subsampling_ratio() # aggregate annotations from each subject db = COCO() cameras = {} joints = {} smpl_params = {} for subject in subject_list: # data load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_data.json'), 'r') as f: annot = json.load(f) if len(db.dataset) == 0: for k, v in annot.items(): db.dataset[k] = v else: for k, v in annot.items(): db.dataset[k] += v # camera load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f: cameras[str(subject)] = json.load(f) # joint coordinate load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f: joints[str(subject)] = json.load(f) # smpl parameter load with open(osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_smpl_param.json'), 'r') as f: smpl_params[str(subject)] = json.load(f) db.createIndex() skip_idx = [] datalist = [] skip_img_idx = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) img_name = img_path.split('/')[-1] # check subject and frame_idx frame_idx = img['frame_idx']; if frame_idx % sampling_ratio != 0: continue # check smpl parameter exist subject = img['subject']; action_idx = img['action_idx']; subaction_idx = img['subaction_idx']; frame_idx = img['frame_idx']; try: smpl_param = smpl_params[str(subject)][str(action_idx)][str(subaction_idx)][str(frame_idx)] except KeyError: skip_idx.append(image_id) skip_img_idx.append(img_path.split('/')[-1]) continue smpl_param['gender'] = 'neutral' # self.subject_genders[subject] # set corresponding gender # camera parameter cam_idx = img['cam_idx'] cam_param = cameras[str(subject)][str(cam_idx)] R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(cam_param['t'], dtype=np.float32), np.array( cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32) cam_param = {'R': R, 't': t, 'focal': f, 'princpt': c} # project world coordinate to cam, image coordinate space joint_world = np.array(joints[str(subject)][str(action_idx)][str(subaction_idx)][str(frame_idx)], dtype=np.float32) joint_cam = world2cam(joint_world, R, t) joint_img = cam2pixel(joint_cam, f, c) joint_vis = np.ones((self.human36_joint_num, 1)) bbox = process_bbox(np.array(ann['bbox'])) if bbox is None: continue datalist.append({ 'img_path': img_path, 'img_name': img_name, 'img_id': image_id, 'bbox': bbox, 'img_hw': (img['height'], img['width']), 'joint_img': joint_img, # [x_img, y_img, z_cam] 'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate 'joint_vis': joint_vis, 'smpl_param': smpl_param, 'cam_param': cam_param}) datalist = sorted(datalist, key=lambda x: x['img_name']) return datalist, skip_idx, skip_img_idx