Esempio n. 1
0
    def __getitem__(self, idx):
        data = self.datalist[idx]
        img_path, bbox, joint, hand_type, hand_type_valid = data[
            'img_path'], data['bbox'], data['joint'], data['hand_type'], data[
                'hand_type_valid']
        joint_cam = joint['cam_coord'].copy()
        joint_img = joint['img_coord'].copy()
        joint_valid = joint['valid'].copy()
        hand_type = self.handtype_str2array(hand_type)
        joint_coord = np.concatenate((joint_img, joint_cam[:, 2, None]), 1)

        # image load
        img = load_img(img_path)
        # augmentation
        img, joint_coord, joint_valid, hand_type, inv_trans = augmentation(
            img, bbox, joint_coord, joint_valid, hand_type, self.mode,
            self.joint_type)
        rel_root_depth = np.array([
            joint_coord[self.root_joint_idx['left'], 2] -
            joint_coord[self.root_joint_idx['right'], 2]
        ],
                                  dtype=np.float32).reshape(1)
        root_valid = np.array(
            [
                joint_valid[self.root_joint_idx['right']] *
                joint_valid[self.root_joint_idx['left']]
            ],
            dtype=np.float32).reshape(
                1) if hand_type[0] * hand_type[1] == 1 else np.zeros(
                    (1), dtype=np.float32)
        # transform to output heatmap space
        joint_coord, joint_valid, rel_root_depth, root_valid = transform_input_to_output_space(
            joint_coord, joint_valid, rel_root_depth, root_valid,
            self.root_joint_idx, self.joint_type)
        img = self.transform(img.astype(np.float32)) / 255.

        inputs = {'img': img}
        targets = {
            'joint_coord': joint_coord,
            'rel_root_depth': rel_root_depth,
            'hand_type': hand_type
        }
        meta_info = {
            'joint_valid': joint_valid,
            'root_valid': root_valid,
            'hand_type_valid': hand_type_valid,
            'inv_trans': inv_trans,
            'capture': int(data['capture']),
            'cam': int(data['cam']),
            'frame': int(data['frame'])
        }
        return inputs, targets, meta_info
Esempio n. 2
0
    def evaluate(self, outs, cur_sample_idx):

        annots = self.datalist
        sample_num = len(outs)
        eval_result = {'joint_out': [], 'mesh_out': []}
        for n in range(sample_num):
            annot = annots[cur_sample_idx + n]
            out = outs[n]

            # x,y: resize to input image space and perform bbox to image affine transform
            mesh_out_img = out['mesh_coord_img']
            mesh_out_img[:, 0] = mesh_out_img[:, 0] / cfg.output_hm_shape[
                2] * cfg.input_img_shape[1]
            mesh_out_img[:, 1] = mesh_out_img[:, 1] / cfg.output_hm_shape[
                1] * cfg.input_img_shape[0]
            mesh_out_img_xy1 = np.concatenate(
                (mesh_out_img[:, :2], np.ones_like(mesh_out_img[:, :1])), 1)
            mesh_out_img[:, :2] = np.dot(out['bb2img_trans'],
                                         mesh_out_img_xy1.transpose(
                                             1, 0)).transpose(1, 0)[:, :2]

            # z: devoxelize and translate to absolute depth
            root_joint_depth = annot['root_joint_depth']
            mesh_out_img[:,
                         2] = (mesh_out_img[:, 2] / cfg.output_hm_shape[0] * 2.
                               - 1) * (cfg.bbox_3d_size / 2)
            mesh_out_img[:, 2] = mesh_out_img[:, 2] + root_joint_depth

            # camera back-projection
            cam_param = annot['cam_param']
            focal, princpt = cam_param['focal'], cam_param['princpt']
            mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt)

            if cfg.stage == 'param':
                mesh_out_cam = out['mesh_coord_cam']
            joint_out_cam = np.dot(self.joint_regressor, mesh_out_cam)

            eval_result['mesh_out'].append(mesh_out_cam.tolist())
            eval_result['joint_out'].append(joint_out_cam.tolist())

            vis = False
            if vis:
                filename = annot['img_path'].split('/')[-1][:-4]

                img = load_img(annot['img_path'])[:, :, ::-1]
                img = vis_mesh(img, mesh_out_img, 0.5)
                cv2.imwrite(filename + '.jpg', img)

                save_obj(mesh_out_cam, self.mano.face, filename + '.obj')

        return eval_result
Esempio n. 3
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, bbox, smpl_param = data['img_path'], data['bbox'], data['smpl_param']
        
        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, _, _ = augmentation(img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32))/255.

        # smpl coordinates
        smpl_mesh_cam, smpl_joint_cam = self.get_smpl_coord(smpl_param)
        
        inputs = {'img': img}
        targets = {'fit_mesh_coord_cam': smpl_mesh_cam}
        meta_info = {'bb2img_trans': bb2img_trans}
        return inputs, targets, meta_info
Esempio n. 4
0
    def save_test_files(self):
        data = self.datalist
        print(len(data))
        for i in range(32):
            img_path, bbox = data[i]['img_path'], data[i]['bbox']
            print(img_path)
            bbox = bbox.tolist()

            img = load_img(img_path)
            img = img[:, :, ::-1].copy()
            cv2.imwrite("../custom_data/" + "img_" + str(i) + ".jpg", img)

            self.filenames.write("../custom_data/img_" + str(i) + ".jpg" +
                                 "\n")
            self.bbs.write(str(bbox) + "\n")

            self.filenames.flush()
            self.bbs.flush()
Esempio n. 5
0
    image.assign(clip_0_1(image))

    if len(losses) > 50:
        done = True
        for i in range(-51, -2):
            if (losses[i] - losses[i + 1]) > loss_tolerance * losses[i]:
                done = False

    losses.append(loss)

    return image, done


if __name__ == "__main__":
    style_path = "../images/style.jpg"
    style_image, _ = load_img(style_path)

    images, yuvs = load_video("../videos/cat2.mp4", frame_interval)

    styled_images = []
    losses = []
    start = time.time()

    content_layers = ["block5_conv2"]

    style_layers = [
        "block1_conv1",
        "block2_conv1",
        "block3_conv1",
        "block4_conv1",
        "block5_conv1",
Esempio n. 6
0
    def evaluate(self, outs, cur_sample_idx):
        
        annots = self.datalist
        sample_num = len(outs)
        eval_result = {'mpjpe_lixel': [], 'pa_mpjpe_lixel': [], 'mpjpe_param': [], 'pa_mpjpe_param': []}
        for n in range(sample_num):
            annot = annots[cur_sample_idx + n]
            out = outs[n]
            
            # h36m joint from gt mesh
            mesh_gt_cam = out['mesh_coord_cam_target']
            pose_coord_gt_h36m = np.dot(self.h36m_joint_regressor, mesh_gt_cam)
            depth_gt_h36m = pose_coord_gt_h36m[self.h36m_root_joint_idx,2]
            pose_coord_gt_h36m = pose_coord_gt_h36m - pose_coord_gt_h36m[self.h36m_root_joint_idx,None] # root-relative
            pose_coord_gt_h36m = pose_coord_gt_h36m[self.h36m_eval_joint,:]
            
            # mesh from lixel
            # x,y: resize to input image space and perform bbox to image affine transform
            mesh_out_img = out['mesh_coord_img']
            mesh_out_img[:,0] = mesh_out_img[:,0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1]
            mesh_out_img[:,1] = mesh_out_img[:,1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0]
            mesh_out_img_xy1 = np.concatenate((mesh_out_img[:,:2], np.ones_like(mesh_out_img[:,:1])),1)
            mesh_out_img[:,:2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose(1,0)).transpose(1,0)[:,:2]
            # z: devoxelize and translate to absolute depth
            if cfg.use_gt_info:
                root_joint_depth = depth_gt_h36m
            else:
                root_joint_depth = annot['root_joint_depth']
            mesh_out_img[:,2] = (mesh_out_img[:,2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2)
            mesh_out_img[:,2] = mesh_out_img[:,2] + root_joint_depth
            # camera back-projection
            cam_param = annot['cam_param']
            focal, princpt = cam_param['focal'], cam_param['princpt']
            mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt)
            


            # h36m joint from lixel mesh
            pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam)
            pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative
            pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:]
            pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m)
            eval_result['mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter
            eval_result['pa_mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter

            # h36m joint from parameter mesh
            if cfg.stage == 'param':
                mesh_out_cam = out['mesh_coord_cam']
                pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam)
                pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative
                pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:]
                pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m)
                eval_result['mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter
                eval_result['pa_mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter

            vis = False
            if vis:
                seq_name = annot['img_path'].split('/')[-2]
                img_name = annot['img_path'].split('/')[-1][:-4]
                filename = seq_name + '_' + img_name + '_' + str(n)

                img = load_img(annot['img_path'])[:,:,::-1]
                img = vis_mesh(img, mesh_out_img, 0.5)
                cv2.imwrite(filename + '.jpg', img)

                save_obj(mesh_out_cam, self.smpl.face, filename + '.obj')
                
        return eval_result
Esempio n. 7
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox, joint_cam, cam_param, mano_param = data[
            'img_path'], data['img_shape'], data['bbox'], data[
                'joint_cam'], data['cam_param'], data['mano_param']

        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, _ = augmentation(
            img, bbox, self.data_split, exclude_flip=True
        )  # FreiHAND dataset only contains right hands. do not perform flip aug.
        img = self.transform(img.astype(np.float32)) / 255.

        if self.data_split == 'train':
            # mano coordinates
            mano_mesh_cam, mano_joint_cam, mano_pose, mano_shape = self.get_mano_coord(
                mano_param, cam_param)
            mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam))
            focal, princpt = cam_param['focal'], cam_param['princpt']
            mano_coord_img = cam2pixel(mano_coord_cam, focal, princpt)

            # affine transform x,y coordinates. root-relative depth
            mano_coord_img_xy1 = np.concatenate(
                (mano_coord_img[:, :2], np.ones_like(mano_coord_img[:, :1])),
                1)
            mano_coord_img[:, :2] = np.dot(img2bb_trans,
                                           mano_coord_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)[:, :2]
            root_joint_depth = mano_coord_cam[self.vertex_num +
                                              self.root_joint_idx][2]
            mano_coord_img[:, 2] = mano_coord_img[:, 2] - root_joint_depth
            mano_coord_img[:, 0] = mano_coord_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            mano_coord_img[:, 1] = mano_coord_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]
            mano_coord_img[:, 2] = (mano_coord_img[:, 2] /
                                    (cfg.bbox_3d_size / 2) +
                                    1) / 2. * cfg.output_hm_shape[0]

            # check truncation
            mano_trunc = ((mano_coord_img[:,0] >= 0) * (mano_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                        (mano_coord_img[:,1] >= 0) * (mano_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                        (mano_coord_img[:,2] >= 0) * (mano_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

            # split mesh and joint coordinates
            mano_mesh_img = mano_coord_img[:self.vertex_num]
            mano_joint_img = mano_coord_img[self.vertex_num:]
            mano_mesh_trunc = mano_trunc[:self.vertex_num]
            mano_joint_trunc = mano_trunc[self.vertex_num:]

            # 3D data rotation augmentation
            rot_aug_mat = np.array(
                [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
                 [np.sin(np.deg2rad(-rot)),
                  np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
                dtype=np.float32)
            # parameter
            mano_pose = mano_pose.reshape(-1, 3)
            root_pose = mano_pose[self.root_joint_idx, :]
            root_pose, _ = cv2.Rodrigues(root_pose)
            root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
            mano_pose[self.root_joint_idx] = root_pose.reshape(3)
            mano_pose = mano_pose.reshape(-1)
            # mano coordinate
            mano_joint_cam = mano_joint_cam - mano_joint_cam[
                self.root_joint_idx, None]  # root-relative
            mano_joint_cam = np.dot(rot_aug_mat, mano_joint_cam.transpose(
                1, 0)).transpose(1, 0)

            orig_joint_img = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            orig_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            orig_joint_valid = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy
            orig_joint_trunc = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy

            inputs = {'img': img}
            targets = {
                'orig_joint_img': orig_joint_img,
                'fit_joint_img': mano_joint_img,
                'fit_mesh_img': mano_mesh_img,
                'orig_joint_cam': orig_joint_cam,
                'fit_joint_cam': mano_joint_cam,
                'pose_param': mano_pose,
                'shape_param': mano_shape
            }
            meta_info = {
                'orig_joint_valid': orig_joint_valid,
                'orig_joint_trunc': orig_joint_trunc,
                'fit_joint_trunc': mano_joint_trunc,
                'fit_mesh_trunc': mano_mesh_trunc,
                'is_valid_fit': float(True),
                'is_3D': float(True)
            }
        else:
            inputs = {'img': img}
            targets = {}
            meta_info = {'bb2img_trans': bb2img_trans}

        return inputs, targets, meta_info
Esempio n. 8
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox, smpl_param, cam_param = data[
            'img_path'], data['img_shape'], data['bbox'], data[
                'smpl_param'], data['cam_param']

        # img
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(
            img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32)) / 255.

        # muco gt
        muco_joint_img = data['joint_img']
        muco_joint_cam = data['joint_cam']
        muco_joint_cam = muco_joint_cam - muco_joint_cam[
            self.muco_root_joint_idx, None, :]  # root-relative
        muco_joint_valid = data['joint_valid']
        if do_flip:
            muco_joint_img[:, 0] = img_shape[1] - 1 - muco_joint_img[:, 0]
            muco_joint_cam[:, 0] = -muco_joint_cam[:, 0]
            for pair in self.muco_flip_pairs:
                muco_joint_img[pair[0], :], muco_joint_img[
                    pair[1], :] = muco_joint_img[
                        pair[1], :].copy(), muco_joint_img[pair[0], :].copy()
                muco_joint_cam[pair[0], :], muco_joint_cam[
                    pair[1], :] = muco_joint_cam[
                        pair[1], :].copy(), muco_joint_cam[pair[0], :].copy()
                muco_joint_valid[pair[0], :], muco_joint_valid[
                    pair[1], :] = muco_joint_valid[pair[1], :].copy(
                    ), muco_joint_valid[pair[0], :].copy()

        muco_joint_img_xy1 = np.concatenate(
            (muco_joint_img[:, :2], np.ones_like(muco_joint_img[:, :1])), 1)
        muco_joint_img[:, :2] = np.dot(img2bb_trans,
                                       muco_joint_img_xy1.transpose(
                                           1, 0)).transpose(1, 0)
        muco_joint_img[:, 0] = muco_joint_img[:, 0] / cfg.input_img_shape[
            1] * cfg.output_hm_shape[2]
        muco_joint_img[:, 1] = muco_joint_img[:, 1] / cfg.input_img_shape[
            0] * cfg.output_hm_shape[1]
        muco_joint_img[:, 2] = muco_joint_img[:, 2] - muco_joint_img[
            self.muco_root_joint_idx][2]  # root-relative
        muco_joint_img[:, 2] = (
            muco_joint_img[:, 2] /
            (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[
                0]  # change cfg.bbox_3d_size from meter to milimeter

        # check truncation
        muco_joint_trunc = muco_joint_valid * ((muco_joint_img[:,0] >= 0) * (muco_joint_img[:,0] < cfg.output_hm_shape[2]) * \
                    (muco_joint_img[:,1] >= 0) * (muco_joint_img[:,1] < cfg.output_hm_shape[1]) * \
                    (muco_joint_img[:,2] >= 0) * (muco_joint_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

        # transform muco joints to target db joints
        muco_joint_img = transform_joint_to_other_db(muco_joint_img,
                                                     self.muco_joints_name,
                                                     self.joints_name)
        muco_joint_cam = transform_joint_to_other_db(muco_joint_cam,
                                                     self.muco_joints_name,
                                                     self.joints_name)
        muco_joint_valid = transform_joint_to_other_db(muco_joint_valid,
                                                       self.muco_joints_name,
                                                       self.joints_name)
        muco_joint_trunc = transform_joint_to_other_db(muco_joint_trunc,
                                                       self.muco_joints_name,
                                                       self.joints_name)

        if smpl_param is not None:
            # smpl coordinates
            smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord(
                smpl_param, cam_param, do_flip, img_shape)
            smpl_coord_cam = np.concatenate((smpl_mesh_cam, smpl_joint_cam))
            focal, princpt = cam_param['focal'], cam_param['princpt']
            smpl_coord_img = cam2pixel(smpl_coord_cam, focal, princpt)

            # affine transform x,y coordinates. root-relative depth
            smpl_coord_img_xy1 = np.concatenate(
                (smpl_coord_img[:, :2], np.ones_like(smpl_coord_img[:, :1])),
                1)
            smpl_coord_img[:, :2] = np.dot(img2bb_trans,
                                           smpl_coord_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)[:, :2]
            smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[
                self.vertex_num + self.root_joint_idx][2]
            smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]
            smpl_coord_img[:, 2] = (
                smpl_coord_img[:, 2] /
                (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[
                    0]  # change cfg.bbox_3d_size from meter to milimeter

            # check truncation
            smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                        (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                        (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

            # split mesh and joint coordinates
            smpl_mesh_img = smpl_coord_img[:self.vertex_num]
            smpl_joint_img = smpl_coord_img[self.vertex_num:]
            smpl_mesh_trunc = smpl_trunc[:self.vertex_num]
            smpl_joint_trunc = smpl_trunc[self.vertex_num:]

            # if fitted mesh is too far from muco gt, discard it
            is_valid_fit = True
            error = self.get_fitting_error(data['joint_cam'], smpl_mesh_cam,
                                           do_flip)
            if error > self.fitting_thr:
                is_valid_fit = False

        else:
            smpl_joint_img = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            smpl_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            smpl_mesh_img = np.zeros((self.vertex_num, 3),
                                     dtype=np.float32)  # dummy
            smpl_pose = np.zeros((72), dtype=np.float32)  # dummy
            smpl_shape = np.zeros((10), dtype=np.float32)  # dummy
            smpl_joint_trunc = np.zeros((self.joint_num, 1),
                                        dtype=np.float32)  # dummy
            smpl_mesh_trunc = np.zeros((self.vertex_num, 1),
                                       dtype=np.float32)  # dummy
            is_valid_fit = False

        # 3D data rotation augmentation
        rot_aug_mat = np.array(
            [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
             [np.sin(np.deg2rad(-rot)),
              np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
            dtype=np.float32)
        # muco coordinate
        muco_joint_cam = np.dot(rot_aug_mat, muco_joint_cam.transpose(
            1, 0)).transpose(1, 0) / 1000  # milimeter to meter
        # parameter
        smpl_pose = smpl_pose.reshape(-1, 3)
        root_pose = smpl_pose[self.root_joint_idx, :]
        root_pose, _ = cv2.Rodrigues(root_pose)
        root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
        smpl_pose[self.root_joint_idx] = root_pose.reshape(3)
        smpl_pose = smpl_pose.reshape(-1)
        # smpl coordinate
        smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[self.root_joint_idx,
                                                         None]  # root-relative
        smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose(
            1, 0)).transpose(1, 0) / 1000  # milimeter to meter

        inputs = {'img': img}
        targets = {
            'orig_joint_img': muco_joint_img,
            'fit_joint_img': smpl_joint_img,
            'fit_mesh_img': smpl_mesh_img,
            'orig_joint_cam': muco_joint_cam,
            'fit_joint_cam': smpl_joint_cam,
            'pose_param': smpl_pose,
            'shape_param': smpl_shape
        }
        meta_info = {
            'orig_joint_valid': muco_joint_valid,
            'orig_joint_trunc': muco_joint_trunc,
            'fit_joint_trunc': smpl_joint_trunc,
            'fit_mesh_trunc': smpl_mesh_trunc,
            'is_valid_fit': float(is_valid_fit),
            'is_3D': float(True)
        }
        return inputs, targets, meta_info
Esempio n. 9
0
    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, bbox = data['img_path'], data['img_shape'], data[
            'bbox']

        # image load and affine transform
        img = load_img(img_path)
        img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(
            img, bbox, self.data_split)
        img = self.transform(img.astype(np.float32)) / 255.

        if self.data_split == 'train':
            # coco gt
            coco_joint_img = data['joint_img']
            coco_joint_valid = data['joint_valid']
            if do_flip:
                coco_joint_img[:, 0] = img_shape[1] - 1 - coco_joint_img[:, 0]
                for pair in self.coco_flip_pairs:
                    coco_joint_img[pair[0], :], coco_joint_img[
                        pair[1], :] = coco_joint_img[pair[1], :].copy(
                        ), coco_joint_img[pair[0], :].copy()
                    coco_joint_valid[pair[0], :], coco_joint_valid[
                        pair[1], :] = coco_joint_valid[pair[1], :].copy(
                        ), coco_joint_valid[pair[0], :].copy()

            coco_joint_img_xy1 = np.concatenate(
                (coco_joint_img[:, :2], np.ones_like(coco_joint_img[:, :1])),
                1)
            coco_joint_img[:, :2] = np.dot(img2bb_trans,
                                           coco_joint_img_xy1.transpose(
                                               1, 0)).transpose(1, 0)
            coco_joint_img[:, 0] = coco_joint_img[:, 0] / cfg.input_img_shape[
                1] * cfg.output_hm_shape[2]
            coco_joint_img[:, 1] = coco_joint_img[:, 1] / cfg.input_img_shape[
                0] * cfg.output_hm_shape[1]

            # backup for calculating fitting error
            _coco_joint_img = coco_joint_img.copy()
            _coco_joint_valid = coco_joint_valid.copy()

            # check truncation
            coco_joint_trunc = coco_joint_valid * ((coco_joint_img[:,0] >= 0) * (coco_joint_img[:,0] < cfg.output_hm_shape[2]) * \
                        (coco_joint_img[:,1] >= 0) * (coco_joint_img[:,1] < cfg.output_hm_shape[1])).reshape(-1,1).astype(np.float32)

            # transform coco joints to target db joints
            coco_joint_img = transform_joint_to_other_db(
                coco_joint_img, self.coco_joints_name, self.joints_name)
            coco_joint_cam = np.zeros((self.joint_num, 3),
                                      dtype=np.float32)  # dummy
            coco_joint_valid = transform_joint_to_other_db(
                coco_joint_valid, self.coco_joints_name, self.joints_name)
            coco_joint_trunc = transform_joint_to_other_db(
                coco_joint_trunc, self.coco_joints_name, self.joints_name)

            smplify_result = data['smplify_result']
            if smplify_result is not None:
                # use fitted mesh
                smpl_param, cam_param = smplify_result[
                    'smpl_param'], smplify_result['cam_param']
                smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord(
                    smpl_param, cam_param, do_flip, img_shape)
                smpl_coord_cam = np.concatenate(
                    (smpl_mesh_cam, smpl_joint_cam))
                smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'],
                                           cam_param['princpt'])

                # x,y affine transform, root-relative depth
                smpl_coord_img_xy1 = np.concatenate(
                    (smpl_coord_img[:, :2], np.ones_like(
                        smpl_coord_img[:, 0:1])), 1)
                smpl_coord_img[:, :2] = np.dot(
                    img2bb_trans,
                    smpl_coord_img_xy1.transpose(1, 0)).transpose(1, 0)[:, :2]
                smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[
                    self.vertex_num + self.root_joint_idx][2]
                smpl_coord_img[:,
                               0] = smpl_coord_img[:, 0] / cfg.input_img_shape[
                                   1] * cfg.output_hm_shape[2]
                smpl_coord_img[:,
                               1] = smpl_coord_img[:, 1] / cfg.input_img_shape[
                                   0] * cfg.output_hm_shape[1]
                smpl_coord_img[:, 2] = (smpl_coord_img[:, 2] /
                                        (cfg.bbox_3d_size / 2) +
                                        1) / 2. * cfg.output_hm_shape[0]

                # check truncation
                smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \
                            (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \
                            (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32)

                # split mesh and joint coordinates
                smpl_mesh_img = smpl_coord_img[:self.vertex_num]
                smpl_joint_img = smpl_coord_img[self.vertex_num:]
                smpl_mesh_trunc = smpl_trunc[:self.vertex_num]
                smpl_joint_trunc = smpl_trunc[self.vertex_num:]

                # if fitted mesh is too far from h36m gt, discard it
                is_valid_fit = True
                error = self.get_fitting_error(_coco_joint_img, smpl_mesh_cam,
                                               cam_param, img2bb_trans,
                                               _coco_joint_valid)
                if error > self.fitting_thr:
                    is_valid_fit = False

            else:
                smpl_joint_img = np.zeros((self.joint_num, 3),
                                          dtype=np.float32)  # dummy
                smpl_joint_cam = np.zeros((self.joint_num, 3),
                                          dtype=np.float32)  # dummy
                smpl_mesh_img = np.zeros((self.vertex_num, 3),
                                         dtype=np.float32)  # dummy
                smpl_pose = np.zeros((72), dtype=np.float32)  # dummy
                smpl_shape = np.zeros((10), dtype=np.float32)  # dummy
                smpl_joint_trunc = np.zeros((self.joint_num, 1),
                                            dtype=np.float32)
                smpl_mesh_trunc = np.zeros((self.vertex_num, 1),
                                           dtype=np.float32)
                is_valid_fit = False

            # 3D data rotation augmentation
            rot_aug_mat = np.array(
                [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
                 [np.sin(np.deg2rad(-rot)),
                  np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]],
                dtype=np.float32)
            # parameter
            smpl_pose = smpl_pose.reshape(-1, 3)
            root_pose = smpl_pose[self.root_joint_idx, :]
            root_pose, _ = cv2.Rodrigues(root_pose)
            root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose))
            smpl_pose[self.root_joint_idx] = root_pose.reshape(3)
            smpl_pose = smpl_pose.reshape(-1)
            # smpl coordinate
            smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[
                self.root_joint_idx, None]  # root-relative
            smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose(
                1, 0)).transpose(1, 0)

            inputs = {'img': img}
            targets = {
                'orig_joint_img': coco_joint_img,
                'fit_joint_img': smpl_joint_img,
                'fit_mesh_img': smpl_mesh_img,
                'orig_joint_cam': coco_joint_cam,
                'fit_joint_cam': smpl_joint_cam,
                'pose_param': smpl_pose,
                'shape_param': smpl_shape
            }
            meta_info = {
                'orig_joint_valid': coco_joint_valid,
                'orig_joint_trunc': coco_joint_trunc,
                'fit_joint_trunc': smpl_joint_trunc,
                'fit_mesh_trunc': smpl_mesh_trunc,
                'is_valid_fit': float(is_valid_fit),
                'is_3D': float(False)
            }
            return inputs, targets, meta_info
        else:
            inputs = {'img': img}
            targets = {}
            meta_info = {'bb2img_trans': bb2img_trans}
            return inputs, targets, meta_info
    def __getitem__(self, idx):
        frame = self.framelist[idx]
        seq_name, cam, frame_idx, joint = frame['seq_name'], frame[
            'cam'], frame['frame_idx'], frame['joint']
        joint_coord, joint_valid = joint['world_coord'], joint['valid']

        # input data
        # bbox calculate
        bbox = get_bbox(joint_coord, joint_valid, self.camrot[cam],
                        self.campos[cam], self.focal[cam], self.princpt[cam])
        xmin, ymin, xmax, ymax = bbox
        xmin = max(xmin, 0)
        ymin = max(ymin, 0)
        xmax = min(xmax, self.original_img_shape[1] - 1)
        ymax = min(ymax, self.original_img_shape[0] - 1)
        bbox = np.array([xmin, ymin, xmax, ymax])

        # image read
        img_path = osp.join(self.root_path, seq_name, 'images', 'cam' + cam,
                            'image' + "{:04d}".format(frame_idx) + '.png')
        img = load_img(img_path)
        xmin, ymin, xmax, ymax = bbox
        xmin, xmax = np.array([xmin, xmax
                               ]) / self.original_img_shape[1] * img.shape[1]
        ymin, ymax = np.array([ymin, ymax
                               ]) / self.original_img_shape[0] * img.shape[0]
        bbox_img = np.array([xmin, ymin, xmax - xmin + 1, ymax - ymin + 1])
        img = generate_patch_image(img, bbox_img, False, 1.0, 0.0,
                                   cfg.input_img_shape)
        input_img = self.transform(img) / 255.

        target_depthmaps = []
        cam_params = []
        affine_transes = []
        for cam in random.sample(self.selected_cameras, cfg.render_view_num):
            # bbox calculate
            bbox = get_bbox(joint_coord, joint_valid, self.camrot[cam],
                            self.campos[cam], self.focal[cam],
                            self.princpt[cam])
            xmin, ymin, xmax, ymax = bbox
            xmin = max(xmin, 0)
            ymin = max(ymin, 0)
            xmax = min(xmax, self.original_img_shape[1] - 1)
            ymax = min(ymax, self.original_img_shape[0] - 1)
            bbox = np.array([xmin, ymin, xmax, ymax])

            # depthmap read
            depthmap_path = osp.join(self.depthmap_root_path,
                                     "{:06d}".format(frame_idx),
                                     'depthmap' + cam + '.pkl')
            with open(depthmap_path, 'rb') as f:
                depthmap = pickle.load(f).astype(np.float32)
            xmin, ymin, xmax, ymax = bbox
            xmin, xmax = np.array(
                [xmin, xmax]) / self.original_img_shape[1] * depthmap.shape[1]
            ymin, ymax = np.array(
                [ymin, ymax]) / self.original_img_shape[0] * depthmap.shape[0]
            bbox_depthmap = np.array(
                [xmin, ymin, xmax - xmin + 1, ymax - ymin + 1])
            depthmap = generate_patch_image(depthmap[:, :, None],
                                            bbox_depthmap, False, 1.0, 0.0,
                                            cfg.rendered_img_shape)
            target_depthmaps.append(self.transform(depthmap))

            xmin, ymin, xmax, ymax = bbox
            affine_transes.append(
                gen_trans_from_patch_cv(
                    (xmin + xmax + 1) / 2., (ymin + ymax + 1) / 2.,
                    xmax - xmin + 1, ymax - ymin + 1,
                    cfg.rendered_img_shape[1], cfg.rendered_img_shape[0], 1.0,
                    0.0).astype(np.float32))
            cam_params.append({
                'camrot': self.camrot[cam],
                'campos': self.campos[cam],
                'focal': self.focal[cam],
                'princpt': self.princpt[cam]
            })

        inputs = {'img': input_img}
        targets = {'depthmap': target_depthmaps, 'joint': joint}
        meta_info = {'cam_param': cam_params, 'affine_trans': affine_transes}

        return inputs, targets, meta_info