コード例 #1
0
    def evaluate(self, outs, cur_sample_idx):

        annots = self.datalist
        sample_num = len(outs)
        eval_result = {'joint_out': [], 'mesh_out': []}
        for n in range(sample_num):
            annot = annots[cur_sample_idx + n]
            out = outs[n]

            # x,y: resize to input image space and perform bbox to image affine transform
            mesh_out_img = out['mesh_coord_img']
            mesh_out_img[:, 0] = mesh_out_img[:, 0] / cfg.output_hm_shape[
                2] * cfg.input_img_shape[1]
            mesh_out_img[:, 1] = mesh_out_img[:, 1] / cfg.output_hm_shape[
                1] * cfg.input_img_shape[0]
            mesh_out_img_xy1 = np.concatenate(
                (mesh_out_img[:, :2], np.ones_like(mesh_out_img[:, :1])), 1)
            mesh_out_img[:, :2] = np.dot(out['bb2img_trans'],
                                         mesh_out_img_xy1.transpose(
                                             1, 0)).transpose(1, 0)[:, :2]

            # z: devoxelize and translate to absolute depth
            root_joint_depth = annot['root_joint_depth']
            mesh_out_img[:,
                         2] = (mesh_out_img[:, 2] / cfg.output_hm_shape[0] * 2.
                               - 1) * (cfg.bbox_3d_size / 2)
            mesh_out_img[:, 2] = mesh_out_img[:, 2] + root_joint_depth

            # camera back-projection
            cam_param = annot['cam_param']
            focal, princpt = cam_param['focal'], cam_param['princpt']
            mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt)

            if cfg.stage == 'param':
                mesh_out_cam = out['mesh_coord_cam']
            joint_out_cam = np.dot(self.joint_regressor, mesh_out_cam)

            eval_result['mesh_out'].append(mesh_out_cam.tolist())
            eval_result['joint_out'].append(joint_out_cam.tolist())

            vis = False
            if vis:
                filename = annot['img_path'].split('/')[-1][:-4]

                img = load_img(annot['img_path'])[:, :, ::-1]
                img = vis_mesh(img, mesh_out_img, 0.5)
                cv2.imwrite(filename + '.jpg', img)

                save_obj(mesh_out_cam, self.mano.face, filename + '.obj')

        return eval_result
コード例 #2
0
    def evaluate(self, preds):

        print() 
        print('Evaluation start...')

        gts = self.datalist
        preds_joint_coord, preds_rel_root_depth, preds_hand_type, inv_trans = preds['joint_coord'], preds['rel_root_depth'], preds['hand_type'], preds['inv_trans']
        assert len(gts) == len(preds_joint_coord)
        sample_num = len(gts)
        
        mpjpe_sh = [[] for _ in range(self.joint_num*2)]
        mpjpe_ih = [[] for _ in range(self.joint_num*2)]
        mrrpe = []
        acc_hand_cls = 0; hand_cls_cnt = 0;
        for n in range(sample_num):
            data = gts[n]
            bbox, cam_param, joint, gt_hand_type, hand_type_valid = data['bbox'], data['cam_param'], data['joint'], data['hand_type'], data['hand_type_valid']
            focal = cam_param['focal']
            princpt = cam_param['princpt']
            gt_joint_coord = joint['cam_coord']
            joint_valid = joint['valid']
            
            # restore xy coordinates to original image space
            pred_joint_coord_img = preds_joint_coord[n].copy()
            pred_joint_coord_img[:,0] = pred_joint_coord_img[:,0]/cfg.output_hm_shape[2]*cfg.input_img_shape[1]
            pred_joint_coord_img[:,1] = pred_joint_coord_img[:,1]/cfg.output_hm_shape[1]*cfg.input_img_shape[0]
            for j in range(self.joint_num*2):
                pred_joint_coord_img[j,:2] = trans_point2d(pred_joint_coord_img[j,:2],inv_trans[n])
            # restore depth to original camera space
            pred_joint_coord_img[:,2] = (pred_joint_coord_img[:,2]/cfg.output_hm_shape[0] * 2 - 1) * (cfg.bbox_3d_size/2)
 
            # mrrpe
            if gt_hand_type == 'interacting' and joint_valid[self.root_joint_idx['left']] and joint_valid[self.root_joint_idx['right']]:
                pred_rel_root_depth = (preds_rel_root_depth[n]/cfg.output_root_hm_shape * 2 - 1) * (cfg.bbox_3d_size_root/2)

                pred_left_root_img = pred_joint_coord_img[self.root_joint_idx['left']].copy()
                pred_left_root_img[2] += data['abs_depth']['right'] + pred_rel_root_depth
                pred_left_root_cam = pixel2cam(pred_left_root_img[None,:], focal, princpt)[0]

                pred_right_root_img = pred_joint_coord_img[self.root_joint_idx['right']].copy()
                pred_right_root_img[2] += data['abs_depth']['right']
                pred_right_root_cam = pixel2cam(pred_right_root_img[None,:], focal, princpt)[0]
                
                pred_rel_root = pred_left_root_cam - pred_right_root_cam
                gt_rel_root = gt_joint_coord[self.root_joint_idx['left']] - gt_joint_coord[self.root_joint_idx['right']]
                mrrpe.append(float(np.sqrt(np.sum((pred_rel_root - gt_rel_root)**2))))

           
            # add root joint depth
            pred_joint_coord_img[self.joint_type['right'],2] += data['abs_depth']['right']
            pred_joint_coord_img[self.joint_type['left'],2] += data['abs_depth']['left']

            # back project to camera coordinate system
            pred_joint_coord_cam = pixel2cam(pred_joint_coord_img, focal, princpt)

            # root joint alignment
            for h in ('right', 'left'):
                pred_joint_coord_cam[self.joint_type[h]] = pred_joint_coord_cam[self.joint_type[h]] - pred_joint_coord_cam[self.root_joint_idx[h],None,:]
                gt_joint_coord[self.joint_type[h]] = gt_joint_coord[self.joint_type[h]] - gt_joint_coord[self.root_joint_idx[h],None,:]
            
            # mpjpe
            for j in range(self.joint_num*2):
                if joint_valid[j]:
                    if gt_hand_type == 'right' or gt_hand_type == 'left':
                        mpjpe_sh[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2)))
                    else:
                        mpjpe_ih[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2)))

            # handedness accuray
            if hand_type_valid:
                if gt_hand_type == 'right' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] < 0.5:
                    acc_hand_cls += 1
                elif gt_hand_type == 'left' and preds_hand_type[n][0] < 0.5 and preds_hand_type[n][1] > 0.5:
                    acc_hand_cls += 1
                elif gt_hand_type == 'interacting' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] > 0.5:
                    acc_hand_cls += 1
                hand_cls_cnt += 1

            vis = False
            if vis:
                img_path = data['img_path']
                cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
                _img = cvimg[:,:,::-1].transpose(2,0,1)
                vis_kps = pred_joint_coord_img.copy()
                vis_valid = joint_valid.copy()
                capture = str(data['capture'])
                cam = str(data['cam'])
                frame = str(data['frame'])
                filename = 'out_' + str(n) + '_' + gt_hand_type + '.jpg'
                vis_keypoints(_img, vis_kps, vis_valid, self.skeleton, filename)

            vis = False
            if vis:
                filename = 'out_' + str(n) + '_3d.jpg'
                vis_3d_keypoints(pred_joint_coord_cam, joint_valid, self.skeleton, filename)
        

        if hand_cls_cnt > 0: print('Handedness accuracy: ' + str(acc_hand_cls / hand_cls_cnt))
        if len(mrrpe) > 0: print('MRRPE: ' + str(sum(mrrpe)/len(mrrpe)))
        print()
 
        tot_err = []
        eval_summary = 'MPJPE for each joint: \n'
        for j in range(self.joint_num*2):
            tot_err_j = np.mean(np.concatenate((np.stack(mpjpe_sh[j]), np.stack(mpjpe_ih[j]))))
            joint_name = self.skeleton[j]['name']
            eval_summary += (joint_name + ': %.2f, ' % tot_err_j)
            tot_err.append(tot_err_j)
        print(eval_summary)
        print('MPJPE for all hand sequences: %.2f' % (np.mean(tot_err)))
        print()

        eval_summary = 'MPJPE for each joint: \n'
        for j in range(self.joint_num*2):
            mpjpe_sh[j] = np.mean(np.stack(mpjpe_sh[j]))
            joint_name = self.skeleton[j]['name']
            eval_summary += (joint_name + ': %.2f, ' % mpjpe_sh[j])
        print(eval_summary)
        print('MPJPE for single hand sequences: %.2f' % (np.mean(mpjpe_sh)))
        print()

        eval_summary = 'MPJPE for each joint: \n'
        for j in range(self.joint_num*2):
            mpjpe_ih[j] = np.mean(np.stack(mpjpe_ih[j]))
            joint_name = self.skeleton[j]['name']
            eval_summary += (joint_name + ': %.2f, ' % mpjpe_ih[j])
        print(eval_summary)
        print('MPJPE for interacting hand sequences: %.2f' % (np.mean(mpjpe_ih)))
コード例 #3
0
    def evaluate(self, outs, cur_sample_idx):
        
        annots = self.datalist
        sample_num = len(outs)
        eval_result = {'mpjpe_lixel': [], 'pa_mpjpe_lixel': [], 'mpjpe_param': [], 'pa_mpjpe_param': []}
        for n in range(sample_num):
            annot = annots[cur_sample_idx + n]
            out = outs[n]
            
            # h36m joint from gt mesh
            mesh_gt_cam = out['mesh_coord_cam_target']
            pose_coord_gt_h36m = np.dot(self.h36m_joint_regressor, mesh_gt_cam)
            depth_gt_h36m = pose_coord_gt_h36m[self.h36m_root_joint_idx,2]
            pose_coord_gt_h36m = pose_coord_gt_h36m - pose_coord_gt_h36m[self.h36m_root_joint_idx,None] # root-relative
            pose_coord_gt_h36m = pose_coord_gt_h36m[self.h36m_eval_joint,:]
            
            # mesh from lixel
            # x,y: resize to input image space and perform bbox to image affine transform
            mesh_out_img = out['mesh_coord_img']
            mesh_out_img[:,0] = mesh_out_img[:,0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1]
            mesh_out_img[:,1] = mesh_out_img[:,1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0]
            mesh_out_img_xy1 = np.concatenate((mesh_out_img[:,:2], np.ones_like(mesh_out_img[:,:1])),1)
            mesh_out_img[:,:2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose(1,0)).transpose(1,0)[:,:2]
            # z: devoxelize and translate to absolute depth
            if cfg.use_gt_info:
                root_joint_depth = depth_gt_h36m
            else:
                root_joint_depth = annot['root_joint_depth']
            mesh_out_img[:,2] = (mesh_out_img[:,2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2)
            mesh_out_img[:,2] = mesh_out_img[:,2] + root_joint_depth
            # camera back-projection
            cam_param = annot['cam_param']
            focal, princpt = cam_param['focal'], cam_param['princpt']
            mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt)
            


            # h36m joint from lixel mesh
            pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam)
            pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative
            pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:]
            pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m)
            eval_result['mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter
            eval_result['pa_mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter

            # h36m joint from parameter mesh
            if cfg.stage == 'param':
                mesh_out_cam = out['mesh_coord_cam']
                pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam)
                pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative
                pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:]
                pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m)
                eval_result['mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter
                eval_result['pa_mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter

            vis = False
            if vis:
                seq_name = annot['img_path'].split('/')[-2]
                img_name = annot['img_path'].split('/')[-1][:-4]
                filename = seq_name + '_' + img_name + '_' + str(n)

                img = load_img(annot['img_path'])[:,:,::-1]
                img = vis_mesh(img, mesh_out_img, 0.5)
                cv2.imwrite(filename + '.jpg', img)

                save_obj(mesh_out_cam, self.smpl.face, filename + '.obj')
                
        return eval_result
コード例 #4
0
ファイル: demo.py プロジェクト: iliskhan/darba
mesh_lixel_img = out['mesh_coord_img'][0].cpu().numpy()
mesh_param_cam = out['mesh_coord_cam'][0].cpu().numpy()

# restore mesh_lixel_img to original image space and continuous depth space
mesh_lixel_img[:,0] = mesh_lixel_img[:,0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1]
mesh_lixel_img[:,1] = mesh_lixel_img[:,1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0]
mesh_lixel_img[:,:2] = np.dot(bb2img_trans, np.concatenate((mesh_lixel_img[:,:2], np.ones_like(mesh_lixel_img[:,:1])),1).transpose(1,0)).transpose(1,0)
mesh_lixel_img[:,2] = (mesh_lixel_img[:,2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2)

# root-relative 3D coordinates -> absolute 3D coordinates
focal = (1500,1500);  princpt = (original_img_width/2, original_img_height/2);
root_xy = np.dot(joint_regressor, mesh_lixel_img)[root_joint_idx,:2]
root_depth = 11250.5732421875 # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo)
root_depth /= 1000 # output of RootNet is milimeter. change it to meter
root_img = np.array([root_xy[0], root_xy[1], root_depth])
root_cam = pixel2cam(root_img[None,:], focal, princpt)
mesh_lixel_img[:,2] += root_depth
mesh_lixel_cam = pixel2cam(mesh_lixel_img, focal, princpt)
mesh_param_cam += root_cam.reshape(1,3)

# visualize lixel mesh in 2D space
vis_img = original_img.copy()
vis_img = vis_mesh(vis_img, mesh_lixel_img)
cv2.imwrite('output_mesh_lixel.jpg', vis_img)

# visualize lixel mesh in 2D space
vis_img = original_img.copy()
mesh_param_img = cam2pixel(mesh_param_cam, focal, princpt)
vis_img = vis_mesh(vis_img, mesh_param_img)
cv2.imwrite('output_mesh_param.jpg', vis_img)
コード例 #5
0
    def evaluate(self, preds):

        print() 
        print('Evaluation start...')

        gts = self.datalist
        preds_joint_coord, preds_rel_root_depth, preds_hand_type, inv_trans = preds['joint_coord'], preds['rel_root_depth'], preds['hand_type'], preds['inv_trans']
        assert len(gts) == len(preds_joint_coord)
        sample_num = len(gts)
        
        mpjpe = [[] for _ in range(self.joint_num)] # treat right and left hand identical
        acc_hand_cls = 0
        for n in range(sample_num):
            data = gts[n]
            bbox, cam_param, joint, gt_hand_type = data['bbox'], data['cam_param'], data['joint'], data['hand_type']
            focal = cam_param['focal']
            princpt = cam_param['princpt']
            gt_joint_coord = joint['cam_coord']
            joint_valid = joint['valid']

            # restore coordinates to original space
            pred_joint_coord_img = preds_joint_coord[n].copy()
            pred_joint_coord_img[:,0] = pred_joint_coord_img[:,0]/cfg.output_hm_shape[2]*cfg.input_img_shape[1]
            pred_joint_coord_img[:,1] = pred_joint_coord_img[:,1]/cfg.output_hm_shape[1]*cfg.input_img_shape[0]
            for j in range(self.joint_num*2):
                pred_joint_coord_img[j,:2] = trans_point2d(pred_joint_coord_img[j,:2],inv_trans[n])
            pred_joint_coord_img[:,2] = (pred_joint_coord_img[:,2]/cfg.output_hm_shape[0] * 2 - 1) * (cfg.bbox_3d_size/2)
            pred_joint_coord_img[:,2] = pred_joint_coord_img[:,2] + data['abs_depth']

            # back project to camera coordinate system
            pred_joint_coord_cam = pixel2cam(pred_joint_coord_img, focal, princpt)

            # root joint alignment
            pred_joint_coord_cam[self.joint_type['right']] = pred_joint_coord_cam[self.joint_type['right']] - pred_joint_coord_cam[self.root_joint_idx['right'],None,:]
            pred_joint_coord_cam[self.joint_type['left']] = pred_joint_coord_cam[self.joint_type['left']] - pred_joint_coord_cam[self.root_joint_idx['left'],None,:]
            gt_joint_coord[self.joint_type['right']] = gt_joint_coord[self.joint_type['right']] - gt_joint_coord[self.root_joint_idx['right'],None,:]
            gt_joint_coord[self.joint_type['left']] = gt_joint_coord[self.joint_type['left']] - gt_joint_coord[self.root_joint_idx['left'],None,:]
            
            # select right or left hand using groundtruth hand type
            pred_joint_coord_cam = pred_joint_coord_cam[self.joint_type[gt_hand_type]]
            gt_joint_coord = gt_joint_coord[self.joint_type[gt_hand_type]]
            joint_valid = joint_valid[self.joint_type[gt_hand_type]]

            # mpjpe save
            for j in range(self.joint_num):
                if joint_valid[j]:
                    mpjpe[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2)))
            
            if gt_hand_type == 'right' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] < 0.5:
                acc_hand_cls += 1
            elif gt_hand_type == 'left' and preds_hand_type[n][0] < 0.5 and preds_hand_type[n][1] > 0.5:
                acc_hand_cls += 1

            vis = False
            if vis:
                img_path = data['img_path']
                cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
                _img = cvimg[:,:,::-1].transpose(2,0,1)
                vis_kps = pred_joint_coord_img.copy()
                vis_valid = joint_valid.copy()
                filename = 'out_' + str(n) + '.jpg'
                vis_keypoints(_img, vis_kps, vis_valid, self.skeleton[:self.joint_num], filename)

            vis = False
            if vis:
                filename = 'out_' + str(n) + '_3d.png'
                vis_3d_keypoints(pred_joint_coord_cam, joint_valid, self.skeleton[:self.joint_num], filename)

        print('Handedness accuracy: ' + str(acc_hand_cls / sample_num))

        eval_summary = 'MPJPE for each joint: \n'
        for j in range(self.joint_num):
            mpjpe[j] = np.mean(np.stack(mpjpe[j]))
            joint_name = self.skeleton[j]['name']
            eval_summary += (joint_name + ': %.2f, ' % mpjpe[j])
        print(eval_summary)
        print('MPJPE: %.2f' % (np.mean(mpjpe)))
コード例 #6
0
def main():

    # input_size=416
    # iou_threshold=0.45
    # score_threshold=0.3

    # Yolo = Load_Yolo_model()
    times = []
    output_path = "output"
    vid = cv2.VideoCapture(0)
    vid.set(3, 1280)
    vid.set(4, 1024)
    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))

    focal = (1500, 1500)
    princpt = (width / 2, height / 2)

    print(f"Width {width} Height {height}")

    bbox = [0, 0, width, height]
    bbox = process_bbox(bbox, width, height)

    root_depth = 11250.5732421875  # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo)
    root_depth /= 1000  # output of RootNet is milimeter. change it to meter
    with torch.no_grad():

        while True:
            _, frame = vid.read()

            t1 = time.time()
            try:
                original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                original_frame = cv2.cvtColor(original_frame,
                                              cv2.COLOR_BGR2RGB)
            except:
                break
            # image_data = image_preprocess(np.copy(original_frame), [input_size, input_size])
            # image_data = image_data[np.newaxis, ...].astype(np.float32)

            # if YOLO_FRAMEWORK == "tf":
            #     pred_bbox = Yolo.predict(image_data)
            # elif YOLO_FRAMEWORK == "trt":
            #     batched_input = tf.constant(image_data)
            #     result = Yolo(batched_input)
            #     pred_bbox = []
            #     for key, value in result.items():
            #         value = value.numpy()
            #         pred_bbox.append(value)

            # pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
            # pred_bbox = tf.concat(pred_bbox, axis=0)

            # bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
            # bboxes = nms(bboxes, iou_threshold, method='nms')
            # frame = draw_bbox(original_frame, bboxes)
            #----------------------------------------i2l meshnet---------------------------------------

            # original_img_height, original_img_width = original_frame.shape[:2]

            # bbox = bboxes[0][:4]

            img, img2bb_trans, bb2img_trans = generate_patch_image(
                original_frame, bbox, 1.0, 0.0, False, cfg.input_img_shape)
            img = transform(img.astype(np.float32)) / 255
            img = img.cuda()[None, :, :, :]

            # forward
            inputs = {'img': img}
            targets = {}
            meta_info = {'bb2img_trans': bb2img_trans}
            out = model(inputs, targets, meta_info, 'test')
            img = img[0].cpu().numpy().transpose(
                1, 2, 0)  # cfg.input_img_shape[1], cfg.input_img_shape[0], 3
            mesh_lixel_img = out['mesh_coord_img'][0].cpu().numpy()
            mesh_param_cam = out['mesh_coord_cam'][0].cpu().numpy()

            # restore mesh_lixel_img to original image space and continuous depth space
            mesh_lixel_img[:, 0] = mesh_lixel_img[:, 0] / cfg.output_hm_shape[
                2] * cfg.input_img_shape[1]
            mesh_lixel_img[:, 1] = mesh_lixel_img[:, 1] / cfg.output_hm_shape[
                1] * cfg.input_img_shape[0]
            mesh_lixel_img[:, :2] = np.dot(
                bb2img_trans,
                np.concatenate((mesh_lixel_img[:, :2],
                                np.ones_like(mesh_lixel_img[:, :1])),
                               1).transpose(1, 0)).transpose(1, 0)
            mesh_lixel_img[:, 2] = (
                mesh_lixel_img[:, 2] / cfg.output_hm_shape[0] * 2. -
                1) * (cfg.bbox_3d_size / 2)

            # root-relative 3D coordinates -> absolute 3D coordinates

            root_xy = np.dot(joint_regressor,
                             mesh_lixel_img)[root_joint_idx, :2]

            root_img = np.array([root_xy[0], root_xy[1], root_depth])
            root_cam = pixel2cam(root_img[None, :], focal, princpt)
            mesh_lixel_img[:, 2] += root_depth
            mesh_lixel_cam = pixel2cam(mesh_lixel_img, focal, princpt)
            mesh_param_cam += root_cam.reshape(1, 3)

            # visualize lixel mesh in 2D space
            # vis_img = frame.copy()
            # vis_img = vis_mesh(vis_img, mesh_lixel_img)
            # cv2.imwrite('output_mesh_lixel.jpg', vis_img)

            # visualize lixel mesh in 2D space
            # vis_img = frame.copy()
            # mesh_param_img = cam2pixel(mesh_param_cam, focal, princpt)
            # vis_img = vis_mesh(vis_img, mesh_param_img)
            # cv2.imwrite('output_mesh_param.jpg', vis_img)

            # save mesh (obj)
            # save_obj(mesh_lixel_cam, face, 'output_mesh_lixel.obj')
            # save_obj(mesh_param_cam, face, 'output_mesh_param.obj')

            # render mesh from lixel
            vis_img = frame.copy()
            rendered_img = render_mesh(vis_img, mesh_lixel_cam, face, {
                'focal': focal,
                'princpt': princpt
            })
            # cv2.imwrite('rendered_mesh_lixel.jpg', rendered_img)
            cv2.imshow('output', rendered_img / 255)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

            # render mesh from param
            # vis_img = frame.copy()
            # rendered_img = render_mesh(vis_img, mesh_param_cam, face, {'focal': focal, 'princpt': princpt})
            # cv2.imwrite('rendered_mesh_param.jpg', rendered_img)

            #----------------------------------------i2l meshnet---------------------------------------
            t2 = time.time()
            times.append(t2 - t1)
            times = times[-20:]

            ms = sum(times) / len(times) * 1000
            fps = 1000 / ms

            print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))