Beispiel #1
0
    def get_fitting_error(self, coco_joint, smpl_mesh, cam_param, img2bb_trans,
                          coco_joint_valid):
        # get coco joint from smpl mesh
        coco_from_smpl = np.dot(self.coco_joint_regressor, smpl_mesh)
        coco_from_smpl = self.add_pelvis(
            coco_from_smpl)  # z-axis component will be removed
        coco_from_smpl = cam2pixel(coco_from_smpl, cam_param['focal'],
                                   cam_param['princpt'])
        coco_from_smpl_xy1 = np.concatenate(
            (coco_from_smpl[:, :2], np.ones_like(coco_from_smpl[:, 0:1])), 1)
        coco_from_smpl[:, :2] = np.dot(img2bb_trans,
                                       coco_from_smpl_xy1.transpose(
                                           1, 0)).transpose(1, 0)
        coco_from_smpl[:, 0] = coco_from_smpl[:, 0] / cfg.input_img_shape[
            1] * cfg.output_hm_shape[2]
        coco_from_smpl[:, 1] = coco_from_smpl[:, 1] / cfg.input_img_shape[
            0] * cfg.output_hm_shape[1]

        # mask joint coordinates
        coco_joint = coco_joint[:, :2][np.tile(coco_joint_valid, (
            1, 2)) == 1].reshape(-1, 2)
        coco_from_smpl = coco_from_smpl[:, :2][np.tile(coco_joint_valid, (
            1, 2)) == 1].reshape(-1, 2)

        error = np.sqrt(np.sum((coco_joint - coco_from_smpl)**2, 1)).mean()
        return error
def get_bbox(joint_world, joint_valid, camrot, campos, focal, princpt):

    joint_cam = []
    for i in range(len(joint_world)):
        joint_cam.append(world2cam(joint_world[i], camrot, campos))
    joint_cam = np.array(joint_cam).reshape(-1, 3)

    x_img, y_img, z_img = cam2pixel(joint_cam, focal, princpt)
    x_img = x_img[joint_valid[:, 0] == 1]
    y_img = y_img[joint_valid[:, 0] == 1]
    xmin = min(x_img)
    ymin = min(y_img)
    xmax = max(x_img)
    ymax = max(y_img)

    x_center = (xmin + xmax) / 2.
    width = xmax - xmin
    xmin = x_center - 0.5 * width * 1.5
    xmax = x_center + 0.5 * width * 1.5

    y_center = (ymin + ymax) / 2.
    height = ymax - ymin
    ymin = y_center - 0.5 * height * 1.5
    ymax = y_center + 0.5 * height * 1.5

    bbox = np.array([xmin, ymin, xmax, ymax]).astype(np.float32)

    return bbox
    def __init__(self, transform, mode, annot_subset):
        self.mode = mode # train, test, val
        self.annot_subset = annot_subset # all, human_annot, machine_annot
        self.img_path = '../data/InterHand2.6M/images'
        self.annot_path = '../data/InterHand2.6M/annotations'
        if self.annot_subset == 'machine_annot' and self.mode == 'val':
            self.rootnet_output_path = '../data/InterHand2.6M/rootnet_output/rootnet_interhand2.6m_output_machine_annot_val.json'
        else:
            self.rootnet_output_path = '../data/InterHand2.6M/rootnet_output/rootnet_interhand2.6m_output_all_test.json'
        self.transform = transform
        self.joint_num = 21 # single hand
        self.root_joint_idx = {'right': 20, 'left': 41}
        self.joint_type = {'right': np.arange(0,self.joint_num), 'left': np.arange(self.joint_num,self.joint_num*2)}
        self.skeleton = load_skeleton(osp.join(self.annot_path, 'skeleton.txt'), self.joint_num*2)
        
        self.datalist = []
        self.datalist_sh = []
        self.datalist_ih = []
        self.sequence_names = []
        
        # load annotation
        print("Load annotation from  " + osp.join(self.annot_path, self.annot_subset))
        db = COCO(osp.join(self.annot_path, self.annot_subset, 'InterHand2.6M_' + self.mode + '_data.json'))
        with open(osp.join(self.annot_path, self.annot_subset, 'InterHand2.6M_' + self.mode + '_camera.json')) as f:
            cameras = json.load(f)
        with open(osp.join(self.annot_path, self.annot_subset, 'InterHand2.6M_' + self.mode + '_joint_3d.json')) as f:
            joints = json.load(f)

        if (self.mode == 'val' or self.mode == 'test') and cfg.trans_test == 'rootnet':
            print("Get bbox and root depth from " + self.rootnet_output_path)
            rootnet_result = {}
            with open(self.rootnet_output_path) as f:
                annot = json.load(f)
            for i in range(len(annot)):
                rootnet_result[str(annot[i]['annot_id'])] = annot[i]
        else:
            print("Get bbox and root depth from groundtruth annotation")
        
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
 
            capture_id = img['capture']
            seq_name = img['seq_name']
            cam = img['camera']
            frame_idx = img['frame_idx']
            img_path = osp.join(self.img_path, self.mode, img['file_name'])
            
            campos, camrot = np.array(cameras[str(capture_id)]['campos'][str(cam)], dtype=np.float32), np.array(cameras[str(capture_id)]['camrot'][str(cam)], dtype=np.float32)
            focal, princpt = np.array(cameras[str(capture_id)]['focal'][str(cam)], dtype=np.float32), np.array(cameras[str(capture_id)]['princpt'][str(cam)], dtype=np.float32)
            joint_world = np.array(joints[str(capture_id)][str(frame_idx)], dtype=np.float32)
            joint_cam = world2cam(joint_world.transpose(1,0), camrot, campos.reshape(3,1)).transpose(1,0)
            joint_img = cam2pixel(joint_cam, focal, princpt)[:,:2]

            joint_valid = np.array(ann['joint_valid'],dtype=np.float32).reshape(self.joint_num*2)
            # if root is not valid -> root-relative 3D pose is also not valid. Therefore, mark all joints as invalid
            joint_valid[self.joint_type['right']] *= joint_valid[self.root_joint_idx['right']]
            joint_valid[self.joint_type['left']] *= joint_valid[self.root_joint_idx['left']]
            hand_type = ann['hand_type']
            hand_type_valid = np.array((ann['hand_type_valid']), dtype=np.float32)
            
            if (self.mode == 'val' or self.mode == 'test') and cfg.trans_test == 'rootnet':
                bbox = np.array(rootnet_result[str(aid)]['bbox'],dtype=np.float32)
                abs_depth = {'right': rootnet_result[str(aid)]['abs_depth'][0], 'left': rootnet_result[str(aid)]['abs_depth'][1]}
            else:
                img_width, img_height = img['width'], img['height']
                bbox = np.array(ann['bbox'],dtype=np.float32) # x,y,w,h
                bbox = process_bbox(bbox, (img_height, img_width))
                abs_depth = {'right': joint_cam[self.root_joint_idx['right'],2], 'left': joint_cam[self.root_joint_idx['left'],2]}

            cam_param = {'focal': focal, 'princpt': princpt}
            joint = {'cam_coord': joint_cam, 'img_coord': joint_img, 'valid': joint_valid}
            data = {'img_path': img_path, 'seq_name': seq_name, 'cam_param': cam_param, 'bbox': bbox, 'joint': joint, 'hand_type': hand_type, 'hand_type_valid': hand_type_valid, 'abs_depth': abs_depth, 'file_name': img['file_name'], 'capture': capture_id, 'cam': cam, 'frame': frame_idx}
            if hand_type == 'right' or hand_type == 'left':
                self.datalist_sh.append(data)
            else:
                self.datalist_ih.append(data)
            if seq_name not in self.sequence_names:
                self.sequence_names.append(seq_name)

        self.datalist = self.datalist_sh + self.datalist_ih
        print('Number of annotations in single hand sequences: ' + str(len(self.datalist_sh)))
        print('Number of annotations in interacting hand sequences: ' + str(len(self.datalist_ih)))
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox, joint_cam, cam_param, mano_param = data[
            'img_path'], data['img_shape'], data['bbox'], data[
                'joint_cam'], data['cam_param'], data['mano_param']

        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, _ = augmentation(
            img, bbox, self.data_split, exclude_flip=True
        )  # FreiHAND dataset only contains right hands. do not perform flip aug.
        img = self.transform(img.astype(np.float32)) / 255.

        if self.data_split == 'train':
            # mano coordinates
            mano_mesh_cam, mano_joint_cam, mano_pose, mano_shape = self.get_mano_coord(
                mano_param, cam_param)
            mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam))
            focal, princpt = cam_param['focal'], cam_param['princpt']
            mano_coord_img = cam2pixel(mano_coord_cam, focal, princpt)

            # affine transform x,y coordinates. root-relative depth
            mano_coord_img_xy1 = np.concatenate(
                (mano_coord_img[:, :2], np.ones_like(mano_coord_img[:, :1])),
                1)
            mano_coord_img[:, :2] = np.dot(img2bb_trans,
                                           mano_coord_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)[:, :2]
            root_joint_depth = mano_coord_cam[self.vertex_num +
                                              self.root_joint_idx][2]
            mano_coord_img[:, 2] = mano_coord_img[:, 2] - root_joint_depth
            mano_coord_img[:, 0] = mano_coord_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            mano_coord_img[:, 1] = mano_coord_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]
            mano_coord_img[:, 2] = (mano_coord_img[:, 2] /
                                    (cfg.bbox_3d_size / 2) +
                                    1) / 2. * cfg.output_hm_shape[0]

            # check truncation
            mano_trunc = ((mano_coord_img[:,0] >= 0) * (mano_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                        (mano_coord_img[:,1] >= 0) * (mano_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                        (mano_coord_img[:,2] >= 0) * (mano_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

            # split mesh and joint coordinates
            mano_mesh_img = mano_coord_img[:self.vertex_num]
            mano_joint_img = mano_coord_img[self.vertex_num:]
            mano_mesh_trunc = mano_trunc[:self.vertex_num]
            mano_joint_trunc = mano_trunc[self.vertex_num:]

            # 3D data rotation augmentation
            rot_aug_mat = np.array(
                [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
                 [np.sin(np.deg2rad(-rot)),
                  np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
                dtype=np.float32)
            # parameter
            mano_pose = mano_pose.reshape(-1, 3)
            root_pose = mano_pose[self.root_joint_idx, :]
            root_pose, _ = cv2.Rodrigues(root_pose)
            root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
            mano_pose[self.root_joint_idx] = root_pose.reshape(3)
            mano_pose = mano_pose.reshape(-1)
            # mano coordinate
            mano_joint_cam = mano_joint_cam - mano_joint_cam[
                self.root_joint_idx, None]  # root-relative
            mano_joint_cam = np.dot(rot_aug_mat, mano_joint_cam.transpose(
                1, 0)).transpose(1, 0)

            orig_joint_img = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            orig_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            orig_joint_valid = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy
            orig_joint_trunc = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy

            inputs = {'img': img}
            targets = {
                'orig_joint_img': orig_joint_img,
                'fit_joint_img': mano_joint_img,
                'fit_mesh_img': mano_mesh_img,
                'orig_joint_cam': orig_joint_cam,
                'fit_joint_cam': mano_joint_cam,
                'pose_param': mano_pose,
                'shape_param': mano_shape
            }
            meta_info = {
                'orig_joint_valid': orig_joint_valid,
                'orig_joint_trunc': orig_joint_trunc,
                'fit_joint_trunc': mano_joint_trunc,
                'fit_mesh_trunc': mano_mesh_trunc,
                'is_valid_fit': float(True),
                'is_3D': float(True)
            }
        else:
            inputs = {'img': img}
            targets = {}
            meta_info = {'bb2img_trans': bb2img_trans}

        return inputs, targets, meta_info
Beispiel #5
0
root_depth = 11250.5732421875 # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo)
root_depth /= 1000 # output of RootNet is milimeter. change it to meter
root_img = np.array([root_xy[0], root_xy[1], root_depth])
root_cam = pixel2cam(root_img[None,:], focal, princpt)
mesh_lixel_img[:,2] += root_depth
mesh_lixel_cam = pixel2cam(mesh_lixel_img, focal, princpt)
mesh_param_cam += root_cam.reshape(1,3)

# visualize lixel mesh in 2D space
vis_img = original_img.copy()
vis_img = vis_mesh(vis_img, mesh_lixel_img)
cv2.imwrite('output_mesh_lixel.jpg', vis_img)

# visualize lixel mesh in 2D space
vis_img = original_img.copy()
mesh_param_img = cam2pixel(mesh_param_cam, focal, princpt)
vis_img = vis_mesh(vis_img, mesh_param_img)
cv2.imwrite('output_mesh_param.jpg', vis_img)

# save mesh (obj)
save_obj(mesh_lixel_cam, face, 'output_mesh_lixel.obj')
save_obj(mesh_param_cam, face, 'output_mesh_param.obj')

# render mesh from lixel
vis_img = original_img.copy()
rendered_img = render_mesh(vis_img, mesh_lixel_cam, face, {'focal': focal, 'princpt': princpt})
cv2.imwrite('rendered_mesh_lixel.jpg', rendered_img)

# render mesh from param
vis_img = original_img.copy()
rendered_img = render_mesh(vis_img, mesh_param_cam, face, {'focal': focal, 'princpt': princpt})
Beispiel #6
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox, smpl_param, cam_param = data[
            'img_path'], data['img_shape'], data['bbox'], data[
                'smpl_param'], data['cam_param']

        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(
            img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32)) / 255.

        # muco gt
        muco_joint_img = data['joint_img']
        muco_joint_cam = data['joint_cam']
        muco_joint_cam = muco_joint_cam - muco_joint_cam[
            self.muco_root_joint_idx, None, :]  # root-relative
        muco_joint_valid = data['joint_valid']
        if do_flip:
            muco_joint_img[:, 0] = img_shape[1] - 1 - muco_joint_img[:, 0]
            muco_joint_cam[:, 0] = -muco_joint_cam[:, 0]
            for pair in self.muco_flip_pairs:
                muco_joint_img[pair[0], :], muco_joint_img[
                    pair[1], :] = muco_joint_img[
                        pair[1], :].copy(), muco_joint_img[pair[0], :].copy()
                muco_joint_cam[pair[0], :], muco_joint_cam[
                    pair[1], :] = muco_joint_cam[
                        pair[1], :].copy(), muco_joint_cam[pair[0], :].copy()
                muco_joint_valid[pair[0], :], muco_joint_valid[
                    pair[1], :] = muco_joint_valid[pair[1], :].copy(
                    ), muco_joint_valid[pair[0], :].copy()

        muco_joint_img_xy1 = np.concatenate(
            (muco_joint_img[:, :2], np.ones_like(muco_joint_img[:, :1])), 1)
        muco_joint_img[:, :2] = np.dot(img2bb_trans,
                                       muco_joint_img_xy1.transpose(
                                           1, 0)).transpose(1, 0)
        muco_joint_img[:, 0] = muco_joint_img[:, 0] / cfg.input_img_shape[
            1] * cfg.output_hm_shape[2]
        muco_joint_img[:, 1] = muco_joint_img[:, 1] / cfg.input_img_shape[
            0] * cfg.output_hm_shape[1]
        muco_joint_img[:, 2] = muco_joint_img[:, 2] - muco_joint_img[
            self.muco_root_joint_idx][2]  # root-relative
        muco_joint_img[:, 2] = (
            muco_joint_img[:, 2] /
            (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[
                0]  # change cfg.bbox_3d_size from meter to milimeter

        # check truncation
        muco_joint_trunc = muco_joint_valid * ((muco_joint_img[:,0] >= 0) * (muco_joint_img[:,0] < cfg.output_hm_shape[2]) * \
                    (muco_joint_img[:,1] >= 0) * (muco_joint_img[:,1] < cfg.output_hm_shape[1]) * \
                    (muco_joint_img[:,2] >= 0) * (muco_joint_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

        # transform muco joints to target db joints
        muco_joint_img = transform_joint_to_other_db(muco_joint_img,
                                                     self.muco_joints_name,
                                                     self.joints_name)
        muco_joint_cam = transform_joint_to_other_db(muco_joint_cam,
                                                     self.muco_joints_name,
                                                     self.joints_name)
        muco_joint_valid = transform_joint_to_other_db(muco_joint_valid,
                                                       self.muco_joints_name,
                                                       self.joints_name)
        muco_joint_trunc = transform_joint_to_other_db(muco_joint_trunc,
                                                       self.muco_joints_name,
                                                       self.joints_name)

        if smpl_param is not None:
            # smpl coordinates
            smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord(
                smpl_param, cam_param, do_flip, img_shape)
            smpl_coord_cam = np.concatenate((smpl_mesh_cam, smpl_joint_cam))
            focal, princpt = cam_param['focal'], cam_param['princpt']
            smpl_coord_img = cam2pixel(smpl_coord_cam, focal, princpt)

            # affine transform x,y coordinates. root-relative depth
            smpl_coord_img_xy1 = np.concatenate(
                (smpl_coord_img[:, :2], np.ones_like(smpl_coord_img[:, :1])),
                1)
            smpl_coord_img[:, :2] = np.dot(img2bb_trans,
                                           smpl_coord_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)[:, :2]
            smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[
                self.vertex_num + self.root_joint_idx][2]
            smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]
            smpl_coord_img[:, 2] = (
                smpl_coord_img[:, 2] /
                (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[
                    0]  # change cfg.bbox_3d_size from meter to milimeter

            # check truncation
            smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                        (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                        (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

            # split mesh and joint coordinates
            smpl_mesh_img = smpl_coord_img[:self.vertex_num]
            smpl_joint_img = smpl_coord_img[self.vertex_num:]
            smpl_mesh_trunc = smpl_trunc[:self.vertex_num]
            smpl_joint_trunc = smpl_trunc[self.vertex_num:]

            # if fitted mesh is too far from muco gt, discard it
            is_valid_fit = True
            error = self.get_fitting_error(data['joint_cam'], smpl_mesh_cam,
                                           do_flip)
            if error > self.fitting_thr:
                is_valid_fit = False

        else:
            smpl_joint_img = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            smpl_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            smpl_mesh_img = np.zeros((self.vertex_num, 3),
                                     dtype=np.float32)  # dummy
            smpl_pose = np.zeros((72), dtype=np.float32)  # dummy
            smpl_shape = np.zeros((10), dtype=np.float32)  # dummy
            smpl_joint_trunc = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy
            smpl_mesh_trunc = np.zeros((self.vertex_num, 1),
                                       dtype=np.float32)  # dummy
            is_valid_fit = False

        # 3D data rotation augmentation
        rot_aug_mat = np.array(
            [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
             [np.sin(np.deg2rad(-rot)),
              np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
            dtype=np.float32)
        # muco coordinate
        muco_joint_cam = np.dot(rot_aug_mat, muco_joint_cam.transpose(
            1, 0)).transpose(1, 0) / 1000  # milimeter to meter
        # parameter
        smpl_pose = smpl_pose.reshape(-1, 3)
        root_pose = smpl_pose[self.root_joint_idx, :]
        root_pose, _ = cv2.Rodrigues(root_pose)
        root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
        smpl_pose[self.root_joint_idx] = root_pose.reshape(3)
        smpl_pose = smpl_pose.reshape(-1)
        # smpl coordinate
        smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[self.root_joint_idx,
                                                         None]  # root-relative
        smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose(
            1, 0)).transpose(1, 0) / 1000  # milimeter to meter

        inputs = {'img': img}
        targets = {
            'orig_joint_img': muco_joint_img,
            'fit_joint_img': smpl_joint_img,
            'fit_mesh_img': smpl_mesh_img,
            'orig_joint_cam': muco_joint_cam,
            'fit_joint_cam': smpl_joint_cam,
            'pose_param': smpl_pose,
            'shape_param': smpl_shape
        }
        meta_info = {
            'orig_joint_valid': muco_joint_valid,
            'orig_joint_trunc': muco_joint_trunc,
            'fit_joint_trunc': smpl_joint_trunc,
            'fit_mesh_trunc': smpl_mesh_trunc,
            'is_valid_fit': float(is_valid_fit),
            'is_3D': float(True)
        }
        return inputs, targets, meta_info
Beispiel #7
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox = data['img_path'], data['img_shape'], data[
            'bbox']

        # image load and affine transform
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(
            img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32)) / 255.

        if self.data_split == 'train':
            # coco gt
            coco_joint_img = data['joint_img']
            coco_joint_valid = data['joint_valid']
            if do_flip:
                coco_joint_img[:, 0] = img_shape[1] - 1 - coco_joint_img[:, 0]
                for pair in self.coco_flip_pairs:
                    coco_joint_img[pair[0], :], coco_joint_img[
                        pair[1], :] = coco_joint_img[pair[1], :].copy(
                        ), coco_joint_img[pair[0], :].copy()
                    coco_joint_valid[pair[0], :], coco_joint_valid[
                        pair[1], :] = coco_joint_valid[pair[1], :].copy(
                        ), coco_joint_valid[pair[0], :].copy()

            coco_joint_img_xy1 = np.concatenate(
                (coco_joint_img[:, :2], np.ones_like(coco_joint_img[:, :1])),
                1)
            coco_joint_img[:, :2] = np.dot(img2bb_trans,
                                           coco_joint_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)
            coco_joint_img[:, 0] = coco_joint_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            coco_joint_img[:, 1] = coco_joint_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]

            # backup for calculating fitting error
            _coco_joint_img = coco_joint_img.copy()
            _coco_joint_valid = coco_joint_valid.copy()

            # check truncation
            coco_joint_trunc = coco_joint_valid * ((coco_joint_img[:,0] >= 0) * (coco_joint_img[:,0] < cfg.output_hm_shape[2]) * \
                        (coco_joint_img[:,1] >= 0) * (coco_joint_img[:,1] < cfg.output_hm_shape[1])).reshape(-1,1).astype(np.float32)

            # transform coco joints to target db joints
            coco_joint_img = transform_joint_to_other_db(
                coco_joint_img, self.coco_joints_name, self.joints_name)
            coco_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            coco_joint_valid = transform_joint_to_other_db(
                coco_joint_valid, self.coco_joints_name, self.joints_name)
            coco_joint_trunc = transform_joint_to_other_db(
                coco_joint_trunc, self.coco_joints_name, self.joints_name)

            smplify_result = data['smplify_result']
            if smplify_result is not None:
                # use fitted mesh
                smpl_param, cam_param = smplify_result[
                    'smpl_param'], smplify_result['cam_param']
                smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord(
                    smpl_param, cam_param, do_flip, img_shape)
                smpl_coord_cam = np.concatenate(
                    (smpl_mesh_cam, smpl_joint_cam))
                smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'],
                                           cam_param['princpt'])

                # x,y affine transform, root-relative depth
                smpl_coord_img_xy1 = np.concatenate(
                    (smpl_coord_img[:, :2], np.ones_like(
                        smpl_coord_img[:, 0:1])), 1)
                smpl_coord_img[:, :2] = np.dot(
                    img2bb_trans,
                    smpl_coord_img_xy1.transpose(1, 0)).transpose(1, 0)[:, :2]
                smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[
                    self.vertex_num + self.root_joint_idx][2]
                smpl_coord_img[:,
                               0] = smpl_coord_img[:, 0] / cfg.input_img_shape[
                                   1] * cfg.output_hm_shape[2]
                smpl_coord_img[:,
                               1] = smpl_coord_img[:, 1] / cfg.input_img_shape[
                                   0] * cfg.output_hm_shape[1]
                smpl_coord_img[:, 2] = (smpl_coord_img[:, 2] /
                                        (cfg.bbox_3d_size / 2) +
                                        1) / 2. * cfg.output_hm_shape[0]

                # check truncation
                smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                            (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                            (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

                # split mesh and joint coordinates
                smpl_mesh_img = smpl_coord_img[:self.vertex_num]
                smpl_joint_img = smpl_coord_img[self.vertex_num:]
                smpl_mesh_trunc = smpl_trunc[:self.vertex_num]
                smpl_joint_trunc = smpl_trunc[self.vertex_num:]

                # if fitted mesh is too far from h36m gt, discard it
                is_valid_fit = True
                error = self.get_fitting_error(_coco_joint_img, smpl_mesh_cam,
                                               cam_param, img2bb_trans,
                                               _coco_joint_valid)
                if error > self.fitting_thr:
                    is_valid_fit = False

            else:
                smpl_joint_img = np.zeros((self.joint_num, 3),
                                          dtype=np.float32)  # dummy
                smpl_joint_cam = np.zeros((self.joint_num, 3),
                                          dtype=np.float32)  # dummy
                smpl_mesh_img = np.zeros((self.vertex_num, 3),
                                         dtype=np.float32)  # dummy
                smpl_pose = np.zeros((72), dtype=np.float32)  # dummy
                smpl_shape = np.zeros((10), dtype=np.float32)  # dummy
                smpl_joint_trunc = np.zeros((self.joint_num, 1),
                                            dtype=np.float32)
                smpl_mesh_trunc = np.zeros((self.vertex_num, 1),
                                           dtype=np.float32)
                is_valid_fit = False

            # 3D data rotation augmentation
            rot_aug_mat = np.array(
                [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
                 [np.sin(np.deg2rad(-rot)),
                  np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
                dtype=np.float32)
            # parameter
            smpl_pose = smpl_pose.reshape(-1, 3)
            root_pose = smpl_pose[self.root_joint_idx, :]
            root_pose, _ = cv2.Rodrigues(root_pose)
            root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
            smpl_pose[self.root_joint_idx] = root_pose.reshape(3)
            smpl_pose = smpl_pose.reshape(-1)
            # smpl coordinate
            smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[
                self.root_joint_idx, None]  # root-relative
            smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose(
                1, 0)).transpose(1, 0)

            inputs = {'img': img}
            targets = {
                'orig_joint_img': coco_joint_img,
                'fit_joint_img': smpl_joint_img,
                'fit_mesh_img': smpl_mesh_img,
                'orig_joint_cam': coco_joint_cam,
                'fit_joint_cam': smpl_joint_cam,
                'pose_param': smpl_pose,
                'shape_param': smpl_shape
            }
            meta_info = {
                'orig_joint_valid': coco_joint_valid,
                'orig_joint_trunc': coco_joint_trunc,
                'fit_joint_trunc': smpl_joint_trunc,
                'fit_mesh_trunc': smpl_mesh_trunc,
                'is_valid_fit': float(is_valid_fit),
                'is_3D': float(False)
            }
            return inputs, targets, meta_info
        else:
            inputs = {'img': img}
            targets = {}
            meta_info = {'bb2img_trans': bb2img_trans}
            return inputs, targets, meta_info
Beispiel #8
0
    def load_data(self):
        subject_list = self.get_subject()
        sampling_ratio = self.get_subsampling_ratio()

        # aggregate annotations from each subject
        db = COCO()
        cameras = {}
        joints = {}
        smpl_params = {}
        for subject in subject_list:
            # data load
            with open(
                    osp.join(self.annot_path,
                             'Human36M_subject' + str(subject) + '_data.json'),
                    'r') as f:
                annot = json.load(f)
            if len(db.dataset) == 0:
                for k, v in annot.items():
                    db.dataset[k] = v
            else:
                for k, v in annot.items():
                    db.dataset[k] += v
            # camera load
            with open(
                    osp.join(
                        self.annot_path,
                        'Human36M_subject' + str(subject) + '_camera.json'),
                    'r') as f:
                cameras[str(subject)] = json.load(f)
            # joint coordinate load
            with open(
                    osp.join(
                        self.annot_path,
                        'Human36M_subject' + str(subject) + '_joint_3d.json'),
                    'r') as f:
                joints[str(subject)] = json.load(f)
            # smpl parameter load
            with open(
                    osp.join(
                        self.annot_path, 'Human36M_subject' + str(subject) +
                        '_smpl_param.json'), 'r') as f:
                smpl_params[str(subject)] = json.load(f)
        db.createIndex()

        if self.data_split == 'test' and not cfg.use_gt_info:
            print("Get bounding box and root from " + self.human_bbox_root_dir)
            bbox_root_result = {}
            with open(self.human_bbox_root_dir) as f:
                annot = json.load(f)
            for i in range(len(annot)):
                bbox_root_result[str(annot[i]['image_id'])] = {
                    'bbox': np.array(annot[i]['bbox']),
                    'root': np.array(annot[i]['root_cam'])
                }
        else:
            print("Get bounding box and root from groundtruth")

        datalist = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
            img_path = osp.join(self.img_dir, img['file_name'])
            img_shape = (img['height'], img['width'])

            # check subject and frame_idx
            frame_idx = img['frame_idx']
            if frame_idx % sampling_ratio != 0:
                continue

            # check smpl parameter exist
            subject = img['subject']
            action_idx = img['action_idx']
            subaction_idx = img['subaction_idx']
            frame_idx = img['frame_idx']
            try:
                smpl_param = smpl_params[str(subject)][str(action_idx)][str(
                    subaction_idx)][str(frame_idx)]
            except KeyError:
                smpl_param = None

            # camera parameter
            cam_idx = img['cam_idx']
            cam_param = cameras[str(subject)][str(cam_idx)]
            R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array(
                cam_param['t'], dtype=np.float32), np.array(
                    cam_param['f'],
                    dtype=np.float32), np.array(cam_param['c'],
                                                dtype=np.float32)
            cam_param = {'R': R, 't': t, 'focal': f, 'princpt': c}

            # only use frontal camera following previous works (HMR and SPIN)
            if self.data_split == 'test' and str(cam_idx) != '4':
                continue

            # project world coordinate to cam, image coordinate space
            joint_world = np.array(joints[str(subject)][str(action_idx)][str(
                subaction_idx)][str(frame_idx)],
                                   dtype=np.float32)
            joint_cam = world2cam(joint_world, R, t)
            joint_img = cam2pixel(joint_cam, f, c)
            joint_valid = np.ones((self.h36m_joint_num, 1))

            if self.data_split == 'test' and not cfg.use_gt_info:
                bbox = bbox_root_result[str(
                    image_id
                )]['bbox']  # bbox should be aspect ratio preserved-extended. It is done in RootNet.
                root_joint_depth = bbox_root_result[str(image_id)]['root'][2]
            else:
                bbox = process_bbox(np.array(ann['bbox']), img['width'],
                                    img['height'])
                if bbox is None: continue
                root_joint_depth = joint_cam[self.h36m_root_joint_idx][2]

            datalist.append({
                'img_path': img_path,
                'img_id': image_id,
                'img_shape': img_shape,
                'bbox': bbox,
                'joint_img': joint_img,
                'joint_cam': joint_cam,
                'joint_valid': joint_valid,
                'smpl_param': smpl_param,
                'root_joint_depth': root_joint_depth,
                'cam_param': cam_param
            })

        return datalist