def evaluate(self, preds): print() print('Evaluation start...') gts = self.datalist preds_joint_coord, preds_rel_root_depth, preds_hand_type, inv_trans = preds['joint_coord'], preds['rel_root_depth'], preds['hand_type'], preds['inv_trans'] assert len(gts) == len(preds_joint_coord) sample_num = len(gts) mpjpe = [[] for _ in range(self.joint_num)] # treat right and left hand identical acc_hand_cls = 0 for n in range(sample_num): data = gts[n] bbox, cam_param, joint, gt_hand_type = data['bbox'], data['cam_param'], data['joint'], data['hand_type'] focal = cam_param['focal'] princpt = cam_param['princpt'] gt_joint_coord = joint['cam_coord'] joint_valid = joint['valid'] # restore coordinates to original space pred_joint_coord_img = preds_joint_coord[n].copy() pred_joint_coord_img[:,0] = pred_joint_coord_img[:,0]/cfg.output_hm_shape[2]*cfg.input_img_shape[1] pred_joint_coord_img[:,1] = pred_joint_coord_img[:,1]/cfg.output_hm_shape[1]*cfg.input_img_shape[0] for j in range(self.joint_num*2): pred_joint_coord_img[j,:2] = trans_point2d(pred_joint_coord_img[j,:2],inv_trans[n]) pred_joint_coord_img[:,2] = (pred_joint_coord_img[:,2]/cfg.output_hm_shape[0] * 2 - 1) * (cfg.bbox_3d_size/2) pred_joint_coord_img[:,2] = pred_joint_coord_img[:,2] + data['abs_depth'] # back project to camera coordinate system pred_joint_coord_cam = pixel2cam(pred_joint_coord_img, focal, princpt) # root joint alignment pred_joint_coord_cam[self.joint_type['right']] = pred_joint_coord_cam[self.joint_type['right']] - pred_joint_coord_cam[self.root_joint_idx['right'],None,:] pred_joint_coord_cam[self.joint_type['left']] = pred_joint_coord_cam[self.joint_type['left']] - pred_joint_coord_cam[self.root_joint_idx['left'],None,:] gt_joint_coord[self.joint_type['right']] = gt_joint_coord[self.joint_type['right']] - gt_joint_coord[self.root_joint_idx['right'],None,:] gt_joint_coord[self.joint_type['left']] = gt_joint_coord[self.joint_type['left']] - gt_joint_coord[self.root_joint_idx['left'],None,:] # select right or left hand using groundtruth hand type pred_joint_coord_cam = pred_joint_coord_cam[self.joint_type[gt_hand_type]] gt_joint_coord = gt_joint_coord[self.joint_type[gt_hand_type]] joint_valid = joint_valid[self.joint_type[gt_hand_type]] # mpjpe save for j in range(self.joint_num): if joint_valid[j]: mpjpe[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2))) if gt_hand_type == 'right' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] < 0.5: acc_hand_cls += 1 elif gt_hand_type == 'left' and preds_hand_type[n][0] < 0.5 and preds_hand_type[n][1] > 0.5: acc_hand_cls += 1 vis = False if vis: img_path = data['img_path'] cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) _img = cvimg[:,:,::-1].transpose(2,0,1) vis_kps = pred_joint_coord_img.copy() vis_valid = joint_valid.copy() filename = 'out_' + str(n) + '.jpg' vis_keypoints(_img, vis_kps, vis_valid, self.skeleton[:self.joint_num], filename) vis = False if vis: filename = 'out_' + str(n) + '_3d.png' vis_3d_keypoints(pred_joint_coord_cam, joint_valid, self.skeleton[:self.joint_num], filename) print('Handedness accuracy: ' + str(acc_hand_cls / sample_num)) eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num): mpjpe[j] = np.mean(np.stack(mpjpe[j])) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % mpjpe[j]) print(eval_summary) print('MPJPE: %.2f' % (np.mean(mpjpe)))
def evaluate(self, preds): print() print('Evaluation start...') gts = self.datalist preds_joint_coord, preds_rel_root_depth, preds_hand_type, inv_trans = preds['joint_coord'], preds['rel_root_depth'], preds['hand_type'], preds['inv_trans'] assert len(gts) == len(preds_joint_coord) sample_num = len(gts) mpjpe_sh = [[] for _ in range(self.joint_num*2)] mpjpe_ih = [[] for _ in range(self.joint_num*2)] mrrpe = [] acc_hand_cls = 0; hand_cls_cnt = 0; for n in range(sample_num): data = gts[n] bbox, cam_param, joint, gt_hand_type, hand_type_valid = data['bbox'], data['cam_param'], data['joint'], data['hand_type'], data['hand_type_valid'] focal = cam_param['focal'] princpt = cam_param['princpt'] gt_joint_coord = joint['cam_coord'] joint_valid = joint['valid'] # restore xy coordinates to original image space pred_joint_coord_img = preds_joint_coord[n].copy() pred_joint_coord_img[:,0] = pred_joint_coord_img[:,0]/cfg.output_hm_shape[2]*cfg.input_img_shape[1] pred_joint_coord_img[:,1] = pred_joint_coord_img[:,1]/cfg.output_hm_shape[1]*cfg.input_img_shape[0] for j in range(self.joint_num*2): pred_joint_coord_img[j,:2] = trans_point2d(pred_joint_coord_img[j,:2],inv_trans[n]) # restore depth to original camera space pred_joint_coord_img[:,2] = (pred_joint_coord_img[:,2]/cfg.output_hm_shape[0] * 2 - 1) * (cfg.bbox_3d_size/2) # mrrpe if gt_hand_type == 'interacting' and joint_valid[self.root_joint_idx['left']] and joint_valid[self.root_joint_idx['right']]: pred_rel_root_depth = (preds_rel_root_depth[n]/cfg.output_root_hm_shape * 2 - 1) * (cfg.bbox_3d_size_root/2) pred_left_root_img = pred_joint_coord_img[self.root_joint_idx['left']].copy() pred_left_root_img[2] += data['abs_depth']['right'] + pred_rel_root_depth pred_left_root_cam = pixel2cam(pred_left_root_img[None,:], focal, princpt)[0] pred_right_root_img = pred_joint_coord_img[self.root_joint_idx['right']].copy() pred_right_root_img[2] += data['abs_depth']['right'] pred_right_root_cam = pixel2cam(pred_right_root_img[None,:], focal, princpt)[0] pred_rel_root = pred_left_root_cam - pred_right_root_cam gt_rel_root = gt_joint_coord[self.root_joint_idx['left']] - gt_joint_coord[self.root_joint_idx['right']] mrrpe.append(float(np.sqrt(np.sum((pred_rel_root - gt_rel_root)**2)))) # add root joint depth pred_joint_coord_img[self.joint_type['right'],2] += data['abs_depth']['right'] pred_joint_coord_img[self.joint_type['left'],2] += data['abs_depth']['left'] # back project to camera coordinate system pred_joint_coord_cam = pixel2cam(pred_joint_coord_img, focal, princpt) # root joint alignment for h in ('right', 'left'): pred_joint_coord_cam[self.joint_type[h]] = pred_joint_coord_cam[self.joint_type[h]] - pred_joint_coord_cam[self.root_joint_idx[h],None,:] gt_joint_coord[self.joint_type[h]] = gt_joint_coord[self.joint_type[h]] - gt_joint_coord[self.root_joint_idx[h],None,:] # mpjpe for j in range(self.joint_num*2): if joint_valid[j]: if gt_hand_type == 'right' or gt_hand_type == 'left': mpjpe_sh[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2))) else: mpjpe_ih[j].append(np.sqrt(np.sum((pred_joint_coord_cam[j] - gt_joint_coord[j])**2))) # handedness accuray if hand_type_valid: if gt_hand_type == 'right' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] < 0.5: acc_hand_cls += 1 elif gt_hand_type == 'left' and preds_hand_type[n][0] < 0.5 and preds_hand_type[n][1] > 0.5: acc_hand_cls += 1 elif gt_hand_type == 'interacting' and preds_hand_type[n][0] > 0.5 and preds_hand_type[n][1] > 0.5: acc_hand_cls += 1 hand_cls_cnt += 1 vis = False if vis: img_path = data['img_path'] cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) _img = cvimg[:,:,::-1].transpose(2,0,1) vis_kps = pred_joint_coord_img.copy() vis_valid = joint_valid.copy() capture = str(data['capture']) cam = str(data['cam']) frame = str(data['frame']) filename = 'out_' + str(n) + '_' + gt_hand_type + '.jpg' vis_keypoints(_img, vis_kps, vis_valid, self.skeleton, filename) vis = False if vis: filename = 'out_' + str(n) + '_3d.jpg' vis_3d_keypoints(pred_joint_coord_cam, joint_valid, self.skeleton, filename) if hand_cls_cnt > 0: print('Handedness accuracy: ' + str(acc_hand_cls / hand_cls_cnt)) if len(mrrpe) > 0: print('MRRPE: ' + str(sum(mrrpe)/len(mrrpe))) print() tot_err = [] eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num*2): tot_err_j = np.mean(np.concatenate((np.stack(mpjpe_sh[j]), np.stack(mpjpe_ih[j])))) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % tot_err_j) tot_err.append(tot_err_j) print(eval_summary) print('MPJPE for all hand sequences: %.2f' % (np.mean(tot_err))) print() eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num*2): mpjpe_sh[j] = np.mean(np.stack(mpjpe_sh[j])) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % mpjpe_sh[j]) print(eval_summary) print('MPJPE for single hand sequences: %.2f' % (np.mean(mpjpe_sh))) print() eval_summary = 'MPJPE for each joint: \n' for j in range(self.joint_num*2): mpjpe_ih[j] = np.mean(np.stack(mpjpe_ih[j])) joint_name = self.skeleton[j]['name'] eval_summary += (joint_name + ': %.2f, ' % mpjpe_ih[j]) print(eval_summary) print('MPJPE for interacting hand sequences: %.2f' % (np.mean(mpjpe_ih)))
def visualize(self, preds, filename, img_path, inv_trans): joint_valid = [1.0]*21 + [1.0]*21 #change 1.0 to 0 if that hand is not resent right hand is comes first in output focal = [1500, 1500] # x-axis, y-axis princpt = [256/2, 256/2] root_joint_idx = {'right': 20, 'left': 41} preds_joint_coord, preds_rel_root_depth, preds_hand_type = preds['joint_coord'], preds['rel_root_depth'], preds['hand_type'] print(preds_hand_type) # img = load_image(img_path) # inv_trans = augmentation(img, # np.array(bbox), # preds_joint_coord, # joint_valid, # preds_hand_type, # 'test', # None) pred_joint_coord_img = preds_joint_coord[0].copy() pred_joint_coord_img[:,0] = pred_joint_coord_img[:,0]/cfg.output_hm_shape[2]*cfg.input_img_shape[1] pred_joint_coord_img[:,1] = pred_joint_coord_img[:,1]/cfg.output_hm_shape[1]*cfg.input_img_shape[0] for j in range(21*2): pred_joint_coord_img[j,:2] = trans_point2d(pred_joint_coord_img[j,:2], inv_trans) pred_joint_coord_img[:,2] = (pred_joint_coord_img[:,2]/cfg.output_hm_shape[0] * 2 - 1) * (cfg.bbox_3d_size/2) # if preds_hand_type[0][0] == 0.9 and preds_hand_type[0][1] == 0.9: #change threshold to execute this parth if both handa are present # pred_rel_root_depth = (preds_rel_root_depth[0]/cfg.output_root_hm_shape * 2 - 1) * (cfg.bbox_3d_size_root/2) # pred_left_root_img = pred_joint_coord_img[root_joint_idx['left']].copy() # pred_left_root_img[2] += pred_rel_root_depth # pred_left_root_cam = pixel2cam(pred_left_root_img[None,:], focal, princpt)[0] # pred_right_root_img = pred_joint_coord_img[root_joint_idx['right']].copy() # pred_right_root_cam = pixel2cam(pred_right_root_img[None,:], focal, princpt)[0] # pred_rel_root = pred_left_root_cam - pred_right_root_cam # pred_joint_coord_cam = pixel2cam(pred_joint_coord_img, focal, princpt) # joint_type = {'right': np.arange(0,21), 'left': np.arange(21,21*2)} # for h in ('right', 'left'): # pred_joint_coord_cam[joint_type[h]] = pred_joint_coord_cam[joint_type[h]] - pred_joint_coord_cam[root_joint_idx[h],None,:] # print("^^^^^^^^^^^^^^^^^^^^^^^^^^^") # print(pred_joint_coord_cam.shape) print(img_path) cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) _img = cvimg[:,:,::-1].transpose(2,0,1) vis_kps = pred_joint_coord_img.copy() vis_valid = joint_valid.copy() filename = img_path.replace(".jpg", "2D.jpg").replace("main/custom_data", "custom_output") vis_keypoints(_img, vis_kps, joint_valid, self.skeleton, filename) filename = img_path.replace(".jpg", "3D.jpg").replace("main/custom_data", "custom_output") vis_3d_keypoints(pred_joint_coord_img, joint_valid, self.skeleton, filename) print("Finished Processing Image!!!!" + "\n")