def regress(self, img_original, hand_bbox_list, add_margin=False): """ args: img_original: original raw image (BGR order by using cv2.imread) hand_bbox_list: [ dict( left_hand = [x0, y0, w, h] or None right_hand = [x0, y0, w, h] or None ) ... ] add_margin: whether to do add_margin given the hand bbox outputs: To be filled Note: Output element can be None. This is to keep the same output size with input bbox """ pred_output_list = list() hand_bbox_list_processed = list() for hand_bboxes in hand_bbox_list: if hand_bboxes is None: # Should keep the same size with bbox size pred_output_list.append(None) hand_bbox_list_processed.append(None) continue pred_output = dict(left_hand=None, right_hand=None) hand_bboxes_processed = dict(left_hand=None, right_hand=None) for hand_type in hand_bboxes: bbox = hand_bboxes[hand_type] if bbox is None: continue else: img_cropped, norm_img, bbox_scale_ratio, bbox_processed = \ self.__process_hand_bbox(img_original, hand_bboxes[hand_type], hand_type, add_margin) hand_bboxes_processed[hand_type] = bbox_processed with torch.no_grad(): # pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device)) self.model_regressor.set_input_imgonly( {'img': norm_img.unsqueeze(0)}) self.model_regressor.test() pred_res = self.model_regressor.get_pred_result() ##Output cam = pred_res['cams'][0, :] #scale, tranX, tranY pred_verts_origin = pred_res['pred_verts'][0] faces = self.model_regressor.right_hand_faces_local pred_pose = pred_res['pred_pose_params'].copy() pred_joints = pred_res['pred_joints_3d'].copy()[0] if hand_type == 'left_hand': cam[1] *= -1 pred_verts_origin[:, 0] *= -1 faces = faces[:, ::-1] pred_pose[:, 1::3] *= -1 pred_pose[:, 2::3] *= -1 pred_joints[:, 0] *= -1 pred_output[hand_type] = dict() pred_output[hand_type][ 'pred_vertices_smpl'] = pred_verts_origin # SMPL-X hand vertex in bbox space pred_output[hand_type][ 'pred_joints_smpl'] = pred_joints pred_output[hand_type]['faces'] = faces pred_output[hand_type][ 'bbox_scale_ratio'] = bbox_scale_ratio pred_output[hand_type]['bbox_top_left'] = np.array( bbox_processed[:2]) pred_output[hand_type]['pred_camera'] = cam pred_output[hand_type]['img_cropped'] = img_cropped # pred hand pose & shape params & hand joints 3d pred_output[hand_type][ 'pred_hand_pose'] = pred_pose # (1, 48): (1, 3) for hand rotation, (1, 45) for finger pose. pred_output[hand_type]['pred_hand_betas'] = pred_res[ 'pred_shape_params'] # (1, 10) #Convert vertices into bbox & image space cam_scale = cam[0] cam_trans = cam[1:] vert_smplcoord = pred_verts_origin.copy() joints_smplcoord = pred_joints.copy() vert_bboxcoord = convert_smpl_to_bbox( vert_smplcoord, cam_scale, cam_trans, bAppTransFirst=True) # SMPL space -> bbox space joints_bboxcoord = convert_smpl_to_bbox( joints_smplcoord, cam_scale, cam_trans, bAppTransFirst=True) # SMPL space -> bbox space hand_boxScale_o2n = pred_output[hand_type][ 'bbox_scale_ratio'] hand_bboxTopLeft = pred_output[hand_type][ 'bbox_top_left'] vert_imgcoord = convert_bbox_to_oriIm( vert_bboxcoord, hand_boxScale_o2n, hand_bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output[hand_type][ 'pred_vertices_img'] = vert_imgcoord joints_imgcoord = convert_bbox_to_oriIm( joints_bboxcoord, hand_boxScale_o2n, hand_bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output[hand_type][ 'pred_joints_img'] = joints_imgcoord pred_output_list.append(pred_output) hand_bbox_list_processed.append(hand_bboxes_processed) assert len(hand_bbox_list_processed) == len(hand_bbox_list) return pred_output_list
def integration_copy_paste(pred_body_list, pred_hand_list, smplx_model, image_shape): integral_output_list = list() for i in range(len(pred_body_list)): body_info = pred_body_list[i] hand_info = pred_hand_list[i] if body_info is None: integral_output_list.append(None) continue # copy and paste pred_betas = torch.from_numpy(body_info['pred_betas']).cuda() pred_rotmat = torch.from_numpy(body_info['pred_rotmat']).cuda() # integrate right hand pose hand_output = dict() if hand_info is not None and hand_info['right_hand'] is not None: right_hand_pose = torch.from_numpy(hand_info['right_hand']['pred_hand_pose'][:, 3:]).cuda() right_hand_global_orient = torch.from_numpy(hand_info['right_hand']['pred_hand_pose'][:,: 3]).cuda() right_hand_local_orient = transfer_rotation( smplx_model, pred_rotmat, right_hand_global_orient, 21) pred_rotmat[0, 21] = right_hand_local_orient else: right_hand_pose = torch.from_numpy(np.zeros( (1,45) , dtype= np.float32)).cuda() right_hand_global_orient = None right_hand_local_orient = None # integrate left hand pose if hand_info is not None and hand_info['left_hand'] is not None: left_hand_pose = torch.from_numpy(hand_info['left_hand']['pred_hand_pose'][:, 3:]).cuda() left_hand_global_orient = torch.from_numpy(hand_info['left_hand']['pred_hand_pose'][:, :3]).cuda() left_hand_local_orient = transfer_rotation( smplx_model, pred_rotmat, left_hand_global_orient, 20) pred_rotmat[0, 20] = left_hand_local_orient else: left_hand_pose = torch.from_numpy(np.zeros((1,45), dtype= np.float32)).cuda() left_hand_global_orient = None left_hand_local_orient = None pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda() pred_aa = pred_aa.reshape(pred_aa.shape[0], 72) smplx_output = smplx_model( betas = pred_betas, body_pose = pred_aa[:,3:], global_orient = pred_aa[:,:3], right_hand_pose = right_hand_pose, left_hand_pose= left_hand_pose, pose2rot = True) pred_vertices = smplx_output.vertices pred_vertices = pred_vertices[0].detach().cpu().numpy() pred_joints_3d = smplx_output.joints pred_joints_3d = pred_joints_3d[0].detach().cpu().numpy() camScale = body_info['pred_camera'][0] camTrans = body_info['pred_camera'][1:] bbox_top_left = body_info['bbox_top_left'] bbox_scale_ratio = body_info['bbox_scale_ratio'] integral_output = dict() integral_output['pred_vertices_smpl'] = pred_vertices integral_output['faces'] = smplx_model.faces integral_output['bbox_scale_ratio'] = bbox_scale_ratio integral_output['bbox_top_left'] = bbox_top_left integral_output['pred_camera'] = body_info['pred_camera'] pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0]) integral_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72) integral_output['pred_betas'] = pred_betas.detach().cpu().numpy() # convert mesh to original image space (X,Y are aligned to image) pred_vertices_bbox = convert_smpl_to_bbox( pred_vertices, camScale, camTrans) pred_vertices_img = convert_bbox_to_oriIm( pred_vertices_bbox, bbox_scale_ratio, bbox_top_left, image_shape[1], image_shape[0]) integral_output['pred_vertices_img'] = pred_vertices_img # convert mesh to original image space (X,Y are aligned to image) pred_joints_bbox = convert_smpl_to_bbox( pred_joints_3d, camScale, camTrans) pred_joints_img = convert_bbox_to_oriIm( pred_joints_bbox, bbox_scale_ratio, bbox_top_left, image_shape[1], image_shape[0]) integral_output['pred_joints_img'] = pred_joints_img pred_left_hand_joints = smplx_output.left_hand_joints pred_left_hand_joints = pred_left_hand_joints[0].detach().cpu().numpy() # convert mesh to original image space (X,Y are aligned to image) pred_left_hand_joints_bbox = convert_smpl_to_bbox( pred_left_hand_joints, camScale, camTrans) pred_left_hand_joints_img = convert_bbox_to_oriIm( pred_left_hand_joints_bbox, bbox_scale_ratio, bbox_top_left, image_shape[1], image_shape[0]) integral_output['pred_left_hand_joints_img'] = pred_left_hand_joints_img pred_right_hand_joints = smplx_output.right_hand_joints pred_right_hand_joints = pred_right_hand_joints[0].detach().cpu().numpy() # convert mesh to original image space (X,Y are aligned to image) pred_right_hand_joints_bbox = convert_smpl_to_bbox( pred_right_hand_joints, camScale, camTrans) pred_right_hand_joints_img = convert_bbox_to_oriIm( pred_right_hand_joints_bbox, bbox_scale_ratio, bbox_top_left, image_shape[1], image_shape[0]) integral_output['pred_right_hand_joints_img'] = pred_right_hand_joints_img # keep hand info r_hand_local_orient_body = body_info['pred_rotmat'][:, 21] # rot-mat r_hand_global_orient_body = transfer_rotation( smplx_model, pred_rotmat, torch.from_numpy(r_hand_local_orient_body).cuda(), 21, 'l2g', 'aa').numpy().reshape(1, 3) # aa r_hand_local_orient_body = gu.rotation_matrix_to_angle_axis(r_hand_local_orient_body) # rot-mat -> aa l_hand_local_orient_body = body_info['pred_rotmat'][:, 20] l_hand_global_orient_body = transfer_rotation( smplx_model, pred_rotmat, torch.from_numpy(l_hand_local_orient_body).cuda(), 20, 'l2g', 'aa').numpy().reshape(1, 3) l_hand_local_orient_body = gu.rotation_matrix_to_angle_axis(l_hand_local_orient_body) # rot-mat -> aa r_hand_local_orient_hand = None r_hand_global_orient_hand = None if right_hand_local_orient is not None: r_hand_local_orient_hand = gu.rotation_matrix_to_angle_axis( right_hand_local_orient).detach().cpu().numpy().reshape(1, 3) r_hand_global_orient_hand = right_hand_global_orient.detach().cpu().numpy().reshape(1, 3) l_hand_local_orient_hand = None l_hand_global_orient_hand = None if left_hand_local_orient is not None: l_hand_local_orient_hand = gu.rotation_matrix_to_angle_axis( left_hand_local_orient).detach().cpu().numpy().reshape(1, 3) l_hand_global_orient_hand = left_hand_global_orient.detach().cpu().numpy().reshape(1, 3) # poses and rotations related to hands integral_output['left_hand_local_orient_body'] = l_hand_local_orient_body integral_output['left_hand_global_orient_body'] = l_hand_global_orient_body integral_output['right_hand_local_orient_body'] = r_hand_local_orient_body integral_output['right_hand_global_orient_body'] = r_hand_global_orient_body integral_output['left_hand_local_orient_hand'] = l_hand_local_orient_hand integral_output['left_hand_global_orient_hand'] = l_hand_global_orient_hand integral_output['right_hand_local_orient_hand'] = r_hand_local_orient_hand integral_output['right_hand_global_orient_hand'] = r_hand_global_orient_hand integral_output['pred_left_hand_pose'] = left_hand_pose.detach().cpu().numpy() integral_output['pred_right_hand_pose'] = right_hand_pose.detach().cpu().numpy() # predicted hand betas, cameras, top-left corner and center left_hand_betas = None left_hand_camera = None left_hand_bbox_scale = None left_hand_bbox_top_left = None if hand_info is not None and hand_info['left_hand'] is not None: left_hand_betas = hand_info['left_hand']['pred_hand_betas'] left_hand_camera = hand_info['left_hand']['pred_camera'] left_hand_bbox_scale = hand_info['left_hand']['bbox_scale_ratio'] left_hand_bbox_top_left = hand_info['left_hand']['bbox_top_left'] right_hand_betas = None right_hand_camera = None right_hand_bbox_scale = None right_hand_bbox_top_left = None if hand_info is not None and hand_info['right_hand'] is not None: right_hand_betas = hand_info['right_hand']['pred_hand_betas'] right_hand_camera = hand_info['right_hand']['pred_camera'] right_hand_bbox_scale = hand_info['right_hand']['bbox_scale_ratio'] right_hand_bbox_top_left = hand_info['right_hand']['bbox_top_left'] integral_output['pred_left_hand_betas'] = left_hand_betas integral_output['left_hand_camera'] = left_hand_camera integral_output['left_hand_bbox_scale_ratio'] = left_hand_bbox_scale integral_output['left_hand_bbox_top_left'] = left_hand_bbox_top_left integral_output['pred_right_hand_betas'] = right_hand_betas integral_output['right_hand_camera'] = right_hand_camera integral_output['right_hand_bbox_scale_ratio'] = right_hand_bbox_scale integral_output['right_hand_bbox_top_left'] = right_hand_bbox_top_left integral_output_list.append(integral_output) return integral_output_list
def __calc_mesh(demo_type, smpl_type, smpl_model, img_shape, pred_output_list): for pred_output in pred_output_list: if pred_output is not None: # hand if demo_type == 'hand': assert 'left_hand' in pred_output and 'right_hand' in pred_output for hand_type in pred_output: hand_pred = pred_output[hand_type] if hand_pred is not None: pose_params = torch.from_numpy( hand_pred['pred_hand_pose']) betas = torch.from_numpy(hand_pred['pred_hand_betas']) pred_verts, hand_faces = __calc_hand_mesh( hand_type, pose_params, betas, smpl_model) hand_pred['pred_vertices_smpl'] = pred_verts cam_scale = hand_pred['pred_camera'][0] cam_trans = hand_pred['pred_camera'][1:] vert_bboxcoord = convert_smpl_to_bbox( pred_verts, cam_scale, cam_trans, bAppTransFirst=True) # SMPL space -> bbox space bbox_scale_ratio = hand_pred['bbox_scale_ratio'] bbox_top_left = hand_pred['bbox_top_left'] vert_imgcoord = convert_bbox_to_oriIm( vert_bboxcoord, bbox_scale_ratio, bbox_top_left, img_shape[1], img_shape[0]) pred_output[hand_type][ 'pred_vertices_img'] = vert_imgcoord # body else: pose_params = torch.from_numpy(pred_output['pred_body_pose']) betas = torch.from_numpy(pred_output['pred_betas']) if 'pred_right_hand_pose' in pred_output: pred_right_hand_pose = torch.from_numpy( pred_output['pred_right_hand_pose']) else: pred_right_hand_pose = torch.zeros((1, 45), dtype=torch.float32) if 'pred_left_hand_pose' in pred_output: pred_left_hand_pose = torch.from_numpy( pred_output['pred_left_hand_pose']) else: pred_left_hand_pose = torch.zeros((1, 45), dtype=torch.float32) pred_verts, faces = _calc_body_mesh(smpl_type, smpl_model, pose_params, betas, pred_left_hand_pose, pred_right_hand_pose) pred_output['pred_vertices_smpl'] = pred_verts pred_output['faces'] = faces cam_scale = pred_output['pred_camera'][0] cam_trans = pred_output['pred_camera'][1:] vert_bboxcoord = convert_smpl_to_bbox( pred_verts, cam_scale, cam_trans, bAppTransFirst=False) # SMPL space -> bbox space bbox_scale_ratio = pred_output['bbox_scale_ratio'] bbox_top_left = pred_output['bbox_top_left'] vert_imgcoord = convert_bbox_to_oriIm(vert_bboxcoord, bbox_scale_ratio, bbox_top_left, img_shape[1], img_shape[0]) pred_output['pred_vertices_img'] = vert_imgcoord
def regress(self, img_original, body_bbox_list): """ args: img_original: original raw image (BGR order by using cv2.imread) body_bbox: bounding box around the target: (minX, minY, width, height) outputs: pred_vertices_img: pred_joints_vis_img: pred_rotmat pred_betas pred_camera bbox: [bbr[0], bbr[1],bbr[0]+bbr[2], bbr[1]+bbr[3]]) bboxTopLeft: bbox top left (redundant) boxScale_o2n: bbox scaling factor (redundant) """ pred_output_list = list() for body_bbox in body_bbox_list: img, norm_img, boxScale_o2n, bboxTopLeft, bbox = process_image_bbox( img_original, body_bbox, input_res=constants.IMG_RES) # bboxTopLeft = bbox['bboxXYWH'][:2] ''' print("bboxTopLeft", bboxTopLeft) print('img', img.shape) cv2.imwrite("img.png", img) print("here !!!!", ) sys.exit(0) ''' if img is None: pred_output_list.append(None) continue with torch.no_grad(): # model forward pred_rotmat, pred_betas, pred_camera = self.model_regressor( norm_img.to(self.device)) #Convert rot_mat to aa since hands are always in aa pred_aa = rotmat3x3_to_angleaxis(pred_rotmat) pred_aa = pred_aa.view(pred_aa.shape[0], -1) smpl_output = self.smpl(betas=pred_betas, body_pose=pred_aa[:, 3:], global_orient=pred_aa[:, :3], pose2rot=True) pred_vertices = smpl_output.vertices pred_joints_3d = smpl_output.joints pred_vertices = pred_vertices[0].cpu().numpy() pred_camera = pred_camera.cpu().numpy().ravel() camScale = pred_camera[0] # *1.15 camTrans = pred_camera[1:] pred_output = dict() # Convert mesh to original image space (X,Y are aligned to image) # 1. SMPL -> 2D bbox # 2. 2D bbox -> original 2D image pred_vertices_bbox = convert_smpl_to_bbox( pred_vertices, camScale, camTrans) pred_vertices_img = convert_bbox_to_oriIm( pred_vertices_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) # Convert joint to original image space (X,Y are aligned to image) pred_joints_3d = pred_joints_3d[0].cpu().numpy() # (1,49,3) pred_joints_vis = pred_joints_3d[:, :3] # (49,3) pred_joints_vis_bbox = convert_smpl_to_bbox( pred_joints_vis, camScale, camTrans) pred_joints_vis_img = convert_bbox_to_oriIm( pred_joints_vis_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) # Output pred_output['img_cropped'] = img[:, :, ::-1] pred_output['pred_vertices_smpl'] = smpl_output.vertices[ 0].cpu().numpy() # SMPL vertex in original smpl space pred_output[ 'pred_vertices_img'] = pred_vertices_img # SMPL vertex in image space pred_output[ 'pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space pred_rotmat_tensor = torch.zeros((1, 24, 3, 4), dtype=torch.float32) pred_rotmat_tensor[:, :, :, :3] = pred_rotmat.detach().cpu() pred_aa_tensor = gu.rotation_matrix_to_angle_axis( pred_rotmat_tensor.squeeze()) pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy( ).reshape(1, 72) pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy( ) # (1, 24, 3, 3) pred_output['pred_betas'] = pred_betas.detach().cpu().numpy( ) # (1, 10) pred_output['pred_camera'] = pred_camera pred_output['bbox_top_left'] = bboxTopLeft pred_output['bbox_scale_ratio'] = boxScale_o2n pred_output['faces'] = self.smpl.faces if self.use_smplx: img_center = np.array( (img_original.shape[1], img_original.shape[0])) * 0.5 # right hand pred_joints = smpl_output.right_hand_joints[0].cpu().numpy( ) pred_joints_bbox = convert_smpl_to_bbox( pred_joints, camScale, camTrans) pred_joints_img = convert_bbox_to_oriIm( pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output[ 'right_hand_joints_img_coord'] = pred_joints_img # left hand pred_joints = smpl_output.left_hand_joints[0].cpu().numpy() pred_joints_bbox = convert_smpl_to_bbox( pred_joints, camScale, camTrans) pred_joints_img = convert_bbox_to_oriIm( pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output['left_hand_joints_img_coord'] = pred_joints_img pred_output_list.append(pred_output) return pred_output_list
def intergration_copy_paste(pred_body_list, pred_hand_list, smplx_model, image_shape): integral_output_list = list() for i in range(len(pred_body_list)): body_info = pred_body_list[i] hand_info = pred_hand_list[i] if body_info is None or hand_info is None: integral_output_list.append(None) continue # copy and paste pred_betas = torch.from_numpy(body_info['pred_betas']).cuda() pred_rotmat = torch.from_numpy(body_info['pred_rotmat']).cuda() if hand_info['right_hand'] is not None: right_hand_pose = torch.from_numpy( hand_info['right_hand']['pred_hand_pose'][:, 3:]).cuda() right_hand_global_orient = torch.from_numpy( hand_info['right_hand']['pred_hand_pose'][:, :3]).cuda() right_hand_local_orient = transfer_hand_wrist( smplx_model, pred_rotmat[0], right_hand_global_orient, 'right_hand', 'l2g') pred_rotmat[0, 21] = right_hand_local_orient else: right_hand_pose = torch.from_numpy( np.zeros((1, 45), dtype=np.float32)).cuda() if hand_info['left_hand'] is not None: left_hand_pose = torch.from_numpy( hand_info['left_hand']['pred_hand_pose'][:, 3:]).cuda() left_hand_global_orient = torch.from_numpy( hand_info['left_hand']['pred_hand_pose'][:, :3]).cuda() left_hand_local_orient = transfer_hand_wrist( smplx_model, pred_rotmat[0], left_hand_global_orient, 'left_hand', 'l2g') pred_rotmat[0, 20] = left_hand_local_orient else: left_hand_pose = torch.from_numpy( np.zeros((1, 45), dtype=np.float32)).cuda() # smplx_output = smplx_model( # betas = pred_betas, # body_pose = pred_rotmat[:,1:], # global_orient = pred_rotmat[:,0].unsqueeze(1), # right_hand_pose = right_hand_pose, # left_hand_pose= left_hand_pose, # pose2rot = False) #Convert rot_mat to aa since hands are always in aa pred_aa = rotmat3x3_to_angleaxis(pred_rotmat) pred_aa = pred_aa.view(pred_aa.shape[0], -1) smplx_output = smplx_model(betas=pred_betas, body_pose=pred_aa[:, 3:], global_orient=pred_aa[:, :3], right_hand_pose=right_hand_pose, left_hand_pose=left_hand_pose, pose2rot=True) pred_vertices = smplx_output.vertices pred_vertices = pred_vertices[0].detach().cpu().numpy() pred_joints_3d = smplx_output.joints pred_joints_3d = pred_joints_3d[0].detach().cpu().numpy() camScale = body_info['pred_camera'][0] camTrans = body_info['pred_camera'][1:] bbox_top_left = body_info['bbox_top_left'] bbox_scale_ratio = body_info['bbox_scale_ratio'] integral_output = dict() integral_output['pred_vertices_smpl'] = pred_vertices integral_output['faces'] = smplx_model.faces integral_output['bbox_scale_ratio'] = bbox_scale_ratio integral_output['bbox_top_left'] = bbox_top_left integral_output['pred_camera'] = body_info['pred_camera'] pred_rotmat_tensor = torch.zeros((1, 24, 3, 4), dtype=torch.float32) pred_rotmat_tensor[:, :, :, :3] = pred_rotmat.detach().cpu() pred_aa_tensor = gu.rotation_matrix_to_angle_axis( pred_rotmat_tensor.squeeze()) integral_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy( ).reshape(1, 72) integral_output['pred_betas'] = pred_betas.detach().cpu().numpy() integral_output['pred_left_hand_pose'] = left_hand_pose.detach().cpu( ).numpy() integral_output['pred_right_hand_pose'] = right_hand_pose.detach().cpu( ).numpy() # convert mesh to original image space (X,Y are aligned to image) pred_vertices_bbox = convert_smpl_to_bbox(pred_vertices, camScale, camTrans) pred_vertices_img = convert_bbox_to_oriIm(pred_vertices_bbox, bbox_scale_ratio, bbox_top_left, image_shape[1], image_shape[0]) integral_output['pred_vertices_img'] = pred_vertices_img integral_output_list.append(integral_output) return integral_output_list
def regress(self, img_original, hand_bbox_list, add_margin=False, debug=True, viz_path="tmp.png", K=None): """ args: img_original: original raw image (BGR order by using cv2.imread) hand_bbox_list: [ dict( left_hand = [x0, y0, w, h] or None right_hand = [x0, y0, w, h] or None ) ... ] add_margin: whether to do add_margin given the hand bbox outputs: To be filled Note: Output element can be None. This is to keep the same output size with input bbox """ pred_output_list = list() hand_bbox_list_processed = list() for hand_bboxes in hand_bbox_list: if hand_bboxes is None: # Should keep the same size with bbox size pred_output_list.append(None) hand_bbox_list_processed.append(None) continue pred_output = dict(left_hand=None, right_hand=None) hand_bboxes_processed = dict(left_hand=None, right_hand=None) for hand_type in hand_bboxes: bbox = hand_bboxes[hand_type] if bbox is None: continue else: img_cropped, norm_img, bbox_scale_ratio, bbox_processed = \ self.__process_hand_bbox(img_original, hand_bboxes[hand_type], hand_type, add_margin) hand_bboxes_processed[hand_type] = bbox_processed with torch.no_grad(): # pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device)) self.model_regressor.set_input_imgonly( {'img': norm_img.unsqueeze(0)}) self.model_regressor.test() pred_res = self.model_regressor.get_pred_result() ##Output cam = pred_res['cams'][0, :] #scale, tranX, tranY pred_verts_origin = pred_res['pred_verts'][0] faces = self.model_regressor.right_hand_faces_local pred_pose = pred_res['pred_pose_params'].copy() pred_joints = pred_res['pred_joints_3d'].copy()[0] if hand_type == 'left_hand': cam[1] *= -1 pred_verts_origin[:, 0] *= -1 faces = faces[:, ::-1] pred_pose[:, 1::3] *= -1 pred_pose[:, 2::3] *= -1 pred_joints[:, 0] *= -1 pred_output[hand_type] = dict() pred_output[hand_type][ 'pred_vertices_smpl'] = pred_verts_origin # SMPL-X hand vertex in bbox space pred_output[hand_type][ 'pred_joints_smpl'] = pred_joints pred_output[hand_type]['faces'] = faces pred_output[hand_type][ 'bbox_scale_ratio'] = bbox_scale_ratio pred_output[hand_type]['bbox_top_left'] = np.array( bbox_processed[:2]) pred_output[hand_type]['pred_camera'] = cam pred_output[hand_type]['img_cropped'] = img_cropped # Get global camera global_cams = local_to_global_cam( bbox_wh_to_xy( np.array([list(bbox_processed) ]).astype(np.float)), cam[None, :], max(img_original.shape)) pred_output[hand_type]["global_cams"] = global_cams # pred hand pose & shape params & hand joints 3d pred_output[hand_type][ 'pred_hand_pose'] = pred_pose # (1, 48): (1, 3) for hand rotation, (1, 45) for finger pose. # Recover PCA components if hand_type == "right_hand": comps = torch.Tensor(self.mano_model.rh_mano_pca. full_hand_components) elif hand_type == "left_hand": comps = torch.Tensor(self.mano_model.lh_mano_pca. full_hand_components) pca_comps = torch.einsum('bi,ij->bj', [ torch.Tensor(pred_pose[:, 3:]), torch.inverse(comps) ]) pred_output[hand_type]['pred_hand_betas'] = pred_res[ 'pred_shape_params'] # (1, 10) pred_output[hand_type][ 'pred_pca_pose'] = pca_comps.numpy() hand_side = hand_type.split("_")[0] pred_output[hand_type]["hand_side"] = hand_side pred_output[hand_type][ 'mano_trans'] = self.mano_model.get_mano_trans( mano_pose=pred_pose[:, 3:][0], rot=pred_pose[:, :3][0], betas=pred_res["pred_shape_params"][0], ref_verts=pred_verts_origin, side=hand_side) #Convert vertices into bbox & image space cam_scale = cam[0] cam_trans = cam[1:] vert_smplcoord = pred_verts_origin.copy() joints_smplcoord = pred_joints.copy() vert_bboxcoord = convert_smpl_to_bbox( vert_smplcoord, cam_scale, cam_trans, bAppTransFirst=True) # SMPL space -> bbox space joints_bboxcoord = convert_smpl_to_bbox( joints_smplcoord, cam_scale, cam_trans, bAppTransFirst=True) # SMPL space -> bbox space hand_boxScale_o2n = pred_output[hand_type][ 'bbox_scale_ratio'] hand_bboxTopLeft = pred_output[hand_type][ 'bbox_top_left'] vert_imgcoord = convert_bbox_to_oriIm( vert_bboxcoord, hand_boxScale_o2n, hand_bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output[hand_type][ 'pred_vertices_img'] = vert_imgcoord joints_imgcoord = convert_bbox_to_oriIm( joints_bboxcoord, hand_boxScale_o2n, hand_bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output[hand_type][ 'pred_joints_img'] = joints_imgcoord if K is not None: K = npt.numpify(K) K_viz = K.copy() # r_hand, _ = cv2.Rodrigues(r_vec) K_viz[:2] = K_viz[:2] / max(img_original.shape) ortho_scale_pixels = global_cams[0, 0] / 2 * max( img_original.shape ) # scale of verts_pixel / pred_verts_origin ortho_trans_pixels = ( global_cams[0, 1:] + 1 / global_cams[0, 0]) * ortho_scale_pixels t_vec = camconvs.weakcam2persptrans( np.concatenate([ np.array([ortho_scale_pixels]), ortho_trans_pixels ], 0) / max(img_original.shape), K_viz)[:, None] r_hand = np.eye(3) # Sanity check nptype = np.float32 proj2d, camverts = project.proj2d( pred_verts_origin.astype(nptype), K.astype(nptype), rot=r_hand.astype(nptype), trans=t_vec[:, 0].astype(nptype)) pred_output[hand_type]['camverts'] = camverts pred_output[hand_type][ 'perspective_trans'] = t_vec.transpose() pred_output[hand_type]['perspective_rot'] = r_hand pred_output_list.append(pred_output) hand_bbox_list_processed.append(hand_bboxes_processed) assert len(hand_bbox_list_processed) == len(hand_bbox_list) return pred_output_list
def body_regress(self, img_original, body_bbox_list): """ args: img_original: original raw image (BGR order by using cv2.imread) body_bbox: bounding box around the target: (minX, minY, width, height) outputs: pred_vertices_img: pred_joints_vis_img: pred_rotmat pred_betas pred_camera bbox: [bbr[0], bbr[1],bbr[0]+bbr[2], bbr[1]+bbr[3]]) bboxTopLeft: bbox top left (redundant) boxScale_o2n: bbox scaling factor (redundant) """ pred_output_list = list() for body_bbox in body_bbox_list: img, norm_img, boxScale_o2n, bboxTopLeft, bbox = process_image_bbox( img_original, body_bbox, input_res=constants.IMG_RES) bboxTopLeft = np.array(bboxTopLeft) # bboxTopLeft = bbox['bboxXYWH'][:2] if img is None: pred_output_list.append(None) continue with torch.no_grad(): # model forward pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device)) #Convert rot_mat to aa since hands are always in aa # pred_aa = rotmat3x3_to_angle_axis(pred_rotmat) pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda() pred_aa = pred_aa.reshape(pred_aa.shape[0], 72) smpl_output = self.smpl( betas=pred_betas, body_pose=pred_aa[:,3:], global_orient=pred_aa[:,:3], pose2rot=True) pred_vertices = smpl_output.vertices pred_joints_3d = smpl_output.joints pred_vertices = pred_vertices[0].cpu().numpy() pred_camera = pred_camera.cpu().numpy().ravel() camScale = pred_camera[0] # *1.15 camTrans = pred_camera[1:] pred_output = dict() # Convert mesh to original image space (X,Y are aligned to image) # 1. SMPL -> 2D bbox # 2. 2D bbox -> original 2D image pred_vertices_bbox = convert_smpl_to_bbox(pred_vertices, camScale, camTrans) pred_vertices_img = convert_bbox_to_oriIm( pred_vertices_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) # Convert joint to original image space (X,Y are aligned to image) pred_joints_3d = pred_joints_3d[0].cpu().numpy() # (1,49,3) pred_joints_vis = pred_joints_3d[:,:3] # (49,3) pred_joints_vis_bbox = convert_smpl_to_bbox(pred_joints_vis, camScale, camTrans) pred_joints_vis_img = convert_bbox_to_oriIm( pred_joints_vis_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) # Output pred_output['img_cropped'] = img[:, :, ::-1] pred_output['pred_vertices_smpl'] = smpl_output.vertices[0].cpu().numpy() # SMPL vertex in original smpl space pred_output['pred_vertices_img'] = pred_vertices_img # SMPL vertex in image space pred_output['pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0]) pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72) pred_body_pose = pred_output['pred_body_pose'] pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy() # (1, 24, 3, 3) pred_output['pred_betas'] = pred_betas.detach().cpu().numpy() # (1, 10) pred_output['pred_camera'] = pred_camera pred_output['bbox_top_left'] = bboxTopLeft pred_output['bbox_scale_ratio'] = boxScale_o2n pred_output['faces'] = self.smpl.faces if self.use_smplx: img_center = np.array((img_original.shape[1], img_original.shape[0]) ) * 0.5 # right hand pred_joints = smpl_output.right_hand_joints[0].cpu().numpy() pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans) pred_joints_img = convert_bbox_to_oriIm( pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output['right_hand_joints_img_coord'] = pred_joints_img # left hand pred_joints = smpl_output.left_hand_joints[0].cpu().numpy() pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans) pred_joints_img = convert_bbox_to_oriIm( pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) pred_output['left_hand_joints_img_coord'] = pred_joints_img pred_output_list.append(pred_output) if self.body_only is True: with open('/home/student/bodyonly_side/' + 'results.txt', 'a') as file_handle: file_handle.write('\nframe_id:') file_handle.write(str(self.frame_id)) file_handle.write('\npred_body_pose:') file_handle.write(str(pred_body_pose)) file_handle.write('\npred_joints_vis_img:') file_handle.write(str(pred_joints_vis_img)) #save images(only body_module has visualizer) # extract mesh for rendering (vertices in image space and faces) from pred_output_list pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list) # visualization res_img = self.visualizer.visualize(img_original,pred_mesh_list = pred_mesh_list, body_bbox_list = body_bbox_list) # show result in the screen res_img = res_img.astype(np.uint8) # ImShow(res_img) cv2.imwrite('/home/student/bodyonly_side/' +str(self.frame_id)+'.jpg',res_img) return pred_output_list