def __init__(self, smpl_model="assets/pretrains/smpl_model.pkl", part_path="assets/pretrains/smpl_part_info.json", device=torch.device("cuda:0")): self.smpl_model_path = smpl_model self.part_path = part_path self.mean_shape = torch.from_numpy( np.array([-0.00124704, 0.00200815, 0.01044902, 0.01385473, 0.01137672, -0.01685408, 0.0201432, -0.00677187, 0.0050879, -0.0051118])).float() self.smpl = SMPL(model_path=self.smpl_model_path).to(device) with open(self.part_path, 'r') as f: # dict_keys(['00_head', '01_body', '02_left_arm', '03_right_arm', '04_left_leg', # '05_right_leg', '06_left_foot', '07_right_foot', '08_left_hand', '09_right_hand']) self.smpl_part_info = json.load(f) self.right_leg_verts_idx = np.array(self.smpl_part_info['03_right_arm']['vertex']) self.left_leg_verts_idx = np.array(self.smpl_part_info['02_left_arm']['vertex']) self.right_leg_inner_verts_idx = self.get_inner_verts_idx_of_leg(self.right_leg_verts_idx, inner_part_rate=0.3, right=True) self.left_leg_inner_verts_idx = self.get_inner_verts_idx_of_leg(self.left_leg_verts_idx, inner_part_rate=0.3, right=False)
def __init__(self, cfg_or_path: Union[EasyDict, str], device=torch.device("cpu")): """ Args: cfg_or_path (EasyDict or str): the configuration EasyDict or the cfg_path with `toml` file. If it is an EasyDict instance, it must contains the followings, --ckpt_path (str): the path of the pre-trained checkpoints; --smpl_path (str): the path of the smpl model; --smpl_mean_params (str): the path of the mean parameters of SMPL. Otherwise if it is a `toml` file, an example could be the followings, ckpt_path = "./assets/pretrains/spin_ckpt.pth" smpl_path = "./assets/pretrains/smpl_model.pkl" smpl_mean_params = "./assets/pretrains/smpl_mean_params.npz" device (torch.device): """ self.device = device # RGB self.MEAN = torch.as_tensor([0.485, 0.456, 0.406])[None, :, None, None].to(self.device) self.STD = torch.as_tensor([0.229, 0.224, 0.225])[None, :, None, None].to(self.device) if isinstance(cfg_or_path, str): cfg = EasyDict(load_toml_file(cfg_or_path)) else: cfg = cfg_or_path self.model = build_spin(pretrained=False) checkpoint = torch.load(cfg["ckpt_path"]) self.model.load_state_dict(checkpoint, strict=True) self.model.eval() self._smpl = SMPL(cfg["smpl_path"]).to(self.device) self.model = self.model.to(self.device)
def _create_networks(self): # 1. body mesh recovery model self.body_rec = SMPL(self._opt.smpl_model).to(self.device) # 2. flow composition module self.flow_comp = FlowCompositionForSwapper(opt=self._opt).to(self.device) # 3.0 create generator self.generator, self.temporal_fifo = self._create_generator( self._opt.neural_render_cfg.Generator) self.generator = self.generator.to(self.device)
def process_func(gpu_id, process_info_list): os.environ["CUDA_DEVICES_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) device = torch.device("cuda:0") render = SMPLRenderer(image_size=IMAGE_SIZE).to(device) smpl = SMPL().to(device) print( f"----------------------------gpu_id = {gpu_id}----------------------") for process_info in process_info_list: if not process_info.check_has_been_processed(process_info.vid_infos): print(gpu_id, process_info) per_instance_func(smpl, render, process_info) else: print(process_info)
def __init__(self, image_size=512, device=torch.device("cpu")): render = SMPLRenderer(image_size=image_size, face_path='./assets/pretrains/smpl_faces.npy', uv_map_path='./assets/pretrains/mapper.txt', fill_back=False).to(device) smpl = SMPL(model_path="./assets/pretrains/smpl_model.pkl").to(device) self.render = render self.smpl = smpl self.device = device self.visual_render = SMPLRenderer( image_size=image_size, face_path='assets/pretrains/smpl_faces.npy', uv_map_path='./assets/pretrains/mapper.txt', fill_back=False).to(device) self.visual_render.set_ambient_light()
def visual_pose3d_results( save_video_path, img_dir, smpls_info, parse_dir=None, smpl_model="./assets/checkpoints/pose3d/smpl_model.pkl", image_size=512, fps=25): """ Args: save_video_path: img_dir: smpls_info: parse_dir: smpl_model: image_size: fps: Returns: """ device = torch.device("cuda:0") render = SMPLRenderer(image_size=image_size).to(device) smpl = SMPL(smpl_model).to(device) render.set_ambient_light() texs = render.color_textures().to(device)[None] valid_img_names = smpls_info["valid_img_names"] all_init_cams = smpls_info["all_init_smpls"][:, 0:3] all_init_poses = smpls_info["all_init_smpls"][:, 3:-10] all_init_shapes = smpls_info["all_init_smpls"][:, -10:] all_opt_cams = smpls_info["all_opt_smpls"][:, 0:3] all_opt_poses = smpls_info["all_opt_smpls"][:, 3:-10] all_opt_shapes = smpls_info["all_opt_smpls"][:, -10:] all_keypoints = smpls_info["all_keypoints"] has_opt = len(all_opt_poses) > 0 has_kps = all_keypoints is not None and len(all_keypoints) > 0 def render_result(imgs, cams, poses, shapes): nonlocal texs verts, _, _ = smpl(beta=shapes, theta=poses, get_skin=True) rd_imgs, _ = render.render(cams, verts, texs) sil = render.render_silhouettes(cams, verts)[:, None].contiguous() masked_img = imgs * (1 - sil) + rd_imgs * sil return masked_img def visual_single_frame(i, image_name): nonlocal img_dir, parse_dir, all_opt_cams, all_opt_poses, all_opt_shapes, \ all_init_cams, all_init_poses, all_init_shapes, has_opt im_path = os.path.join(img_dir, image_name) image = cv2.imread(im_path) if has_kps: joints = all_keypoints[i] image = draw_skeleton(image, joints, radius=6, transpose=False, threshold=0.25) image = np.transpose(image, (2, 0, 1)) image = image.astype(np.float) / 255 image = torch.tensor(image).float()[None].to(device) init_cams = torch.tensor(all_init_cams[i]).float()[None].to(device) init_pose = torch.tensor(all_init_poses[i]).float()[None].to(device) init_shape = torch.tensor(all_init_shapes[i]).float()[None].to(device) init_result = render_result(image, cams=init_cams, poses=init_pose, shapes=init_shape) fused_images = [image, init_result] if parse_dir is not None: alpha_path = os.path.join(parse_dir, image_name.split(".")[0] + "_alpha.png") if os.path.exists(alpha_path): alpha = cv2.imread(alpha_path) alpha = alpha.astype(np.float32) / 255 alpha = np.transpose(alpha, (2, 0, 1)) alpha = torch.from_numpy(alpha).to(device) alpha.unsqueeze_(0) fused_images.append(alpha) if has_opt: opt_cams = torch.tensor(all_opt_cams[i]).float()[None].to(device) opt_pose = torch.tensor(all_opt_poses[i]).float()[None].to(device) opt_shape = torch.tensor( all_opt_shapes[i]).float()[None].to(device) opt_result = render_result(image, cams=opt_cams, poses=opt_pose, shapes=opt_shape) fused_images.append(opt_result) num = len(fused_images) if num % 2 == 0: nrow = 2 else: nrow = 3 fused_images = torch.cat(fused_images, dim=0) fused_images = make_grid(fused_images, nrow=nrow, normalize=False) return fused_images if len(all_init_shapes) == 0: return first_image = visual_single_frame(0, valid_img_names[0]).cpu().numpy() height, width = first_image.shape[1:] tmp_avi_video_path = f"{save_video_path}.avi" fourcc = cv2.VideoWriter_fourcc(*'XVID') videoWriter = cv2.VideoWriter(tmp_avi_video_path, fourcc, fps, (width, height)) for i, image_name in enumerate(tqdm(valid_img_names)): fused_image = visual_single_frame(i, image_name) fused_image = fused_image.cpu().numpy() fused_image = np.transpose(fused_image, (1, 2, 0)) fused_image = fused_image * 255 fused_image = fused_image.astype(np.uint8) videoWriter.write(fused_image) videoWriter.release() convert_avi_to_mp4(tmp_avi_video_path, save_video_path)
from tqdm import tqdm from iPERCore.tools.human_digitalizer import deformers from iPERCore.tools.human_digitalizer import renders from iPERCore.tools.human_digitalizer.bodynets import SMPL, SMPLH from iPERCore.tools.utils.filesio.persistence import load_pickle_file from iPERCore.tools.utils.visualizers.visdom_visualizer import VisdomVisualizer visualizer = VisdomVisualizer(env='test_deformers', ip='http://10.10.10.100', port=31102) IMAGE_SIZE = 512 device = torch.device("cuda:0") smpl = SMPL(model_path="assets/checkpoints/pose3d/smpl_model.pkl").to(device) smplh = SMPLH( model_path="./assets/checkpoints/pose3d/smpl_model_with_hand_v2.pkl").to( device) render = renders.SMPLRenderer(image_size=IMAGE_SIZE).to(device) render.set_ambient_light() texs = render.color_textures()[None].to(device) def cloth_link_animate_visual(links_ids, cams, pose, shape, ref_smpl_path): """ Args: links_ids: cams:
def __init__(self, opt): super(FlowCompositionForTrainer, self).__init__(opt) smpl = SMPL(model_path=self._opt.smpl_model) smpl.eval() self.smpl = smpl
class SPINRunner(BasePose3dRunner): def __init__(self, cfg_or_path: Union[EasyDict, str], device=torch.device("cpu")): """ Args: cfg_or_path (EasyDict or str): the configuration EasyDict or the cfg_path with `toml` file. If it is an EasyDict instance, it must contains the followings, --ckpt_path (str): the path of the pre-trained checkpoints; --smpl_path (str): the path of the smpl model; --smpl_mean_params (str): the path of the mean parameters of SMPL. Otherwise if it is a `toml` file, an example could be the followings, ckpt_path = "./assets/pretrains/spin_ckpt.pth" smpl_path = "./assets/pretrains/smpl_model.pkl" smpl_mean_params = "./assets/pretrains/smpl_mean_params.npz" device (torch.device): """ self.device = device # RGB self.MEAN = torch.as_tensor([0.485, 0.456, 0.406])[None, :, None, None].to(self.device) self.STD = torch.as_tensor([0.229, 0.224, 0.225])[None, :, None, None].to(self.device) if isinstance(cfg_or_path, str): cfg = EasyDict(load_toml_file(cfg_or_path)) else: cfg = cfg_or_path self.model = build_spin(pretrained=False) checkpoint = torch.load(cfg["ckpt_path"]) self.model.load_state_dict(checkpoint, strict=True) self.model.eval() self._smpl = SMPL(cfg["smpl_path"]).to(self.device) self.model = self.model.to(self.device) def __call__(self, image: np.ndarray, boxes: Union[np.ndarray, List, Tuple, Any], action: ACTIONS = ACTIONS.SPLIT) -> Dict[str, Any]: """ Args: image (np.ndarray): (H, W, C), color intensity [0, 255] with BGR color channel; boxes (np.ndarray or List, or Tuple or None): (N, 4) action: -- 0: only return `cams`, `pose` and `shape` of SMPL; -- 1: return `cams`, `pose`, `shape` and `verts`. -- 2: return `cams`, `pose`, `shape`, `verts`, `j2d` and `j3d`. Returns: result (dict): """ image = np.copy(image) proc_img, proc_info = preprocess(image, boxes) proc_img = torch.tensor(proc_img).to(device=self.device)[None] with torch.no_grad(): proc_img = (proc_img - self.MEAN) / self.STD smpls = self.model(proc_img) cams_orig = cam_init2orig( smpls[:, 0:3], proc_info["scale"], torch.tensor(proc_info["start_pt"], device=self.device).float()) cams = cam_norm(cams_orig, proc_info["im_shape"][0]) smpls[:, 0:3] = cams if action == ACTIONS.SPLIT: result = self.body_model.split(smpls) elif action == ACTIONS.SKIN: result = self.body_model.skinning(smpls) elif action == ACTIONS.SMPL: result = {"theta": smpls} else: result = self.body_model.get_details(smpls) result["proc_info"] = proc_info return result def run_with_smplify(self, image_paths: List[str], boxes: List[Union[List, Tuple, np.ndarray]], keypoints_info: Dict, smplify_runner: BasePose3dRefiner, batch_size: int = 16, num_workers: int = 4, filter_invalid: bool = True, temporal: bool = True): """ Args: image_paths (list of str): the image paths; boxes (list of Union[np.np.ndarray, list, tuple)): the bounding boxes of each image; keypoints_info (Dict): the keypoints information of each image; smplify_runner (BasePose3dRefiner): the simplify instance, it must contains the keypoint_formater; batch_size (int): the mini-batch size; num_workers (int): the number of processes; filter_invalid (bool): the flag to control whether filter invalid frames or not; temporal (bool): use temporal smooth optimization or not. Returns: smpl_infos (dict): the estimated smpl infomations, it contains, --all_init_smpls (torch.Tensor): (num, 85), the initialized smpls; --all_opt_smpls (torch.Tensor): (num, 85), the optimized smpls; --all_valid_ids (torch.Tensor): (num of valid frames,), the valid indexes. """ def head_is_valid(head_boxes): return (head_boxes[:, 1] - head_boxes[:, 0]) * ( head_boxes[:, 3] - head_boxes[:, 2]) > 10 * 10 dataset = InferenceDatasetWithKeypoints( image_paths, boxes, keypoints_info, smplify_runner.keypoint_formater, image_size=224, temporal=temporal) data_loader = build_inference_loader(dataset, batch_size=batch_size, num_workers=num_workers) """ sample (dict): the sample information, it contains, --image (torch.Tensor): (3, 224, 224) is the cropped image range of [0, 1] and normalized by MEAN and STD, RGB channel; --orig_image (torch.Tensor): (3, height, width) is the in rage of [0, 1], RGB channel; --im_shape (torch.Tensor): (height, width) --keypoints (dict): (num_joints, 3), and num_joints could be [75,]. --center (torch.Tensor): (2,); --start_pt (torch.Tensor): (2,); --scale (torch.Tensor): (1,); --img_path (str): the image path. """ all_init_smpls = [] all_opt_smpls = [] all_pose3d_img_ids = [] for sample in tqdm(data_loader): images = sample["image"].to(self.device) start_pt = sample["start_pt"].to(self.device) scale = sample["scale"][:, None].to(self.device).float() im_shape = sample["im_shape"][:, 0:1].to(self.device) keypoints_info = sample["keypoints"].to(self.device) img_ids = sample["img_id"] with torch.no_grad(): init_smpls = self.model(images) cams_orig = cam_init2orig(init_smpls[:, 0:3], scale, start_pt) cams = cam_norm(cams_orig, im_shape) init_smpls[:, 0:3] = cams smplify_results = smplify_runner(keypoints_info, cams, init_smpls[:, -10:], init_smpls[:, 3:-10], proc_kps=False, temporal=temporal) opt_smpls = torch.cat([ cams, smplify_results["new_opt_pose"], smplify_results["new_opt_betas"] ], dim=1) if filter_invalid: opt_smpls_info = self.get_details(opt_smpls) head_boxes = cal_head_bbox(opt_smpls_info["j2d"], image_size=512) valid = head_is_valid(head_boxes).nonzero(as_tuple=False) valid.squeeze_(-1) img_ids = img_ids[valid] all_init_smpls.append(init_smpls.cpu()) all_opt_smpls.append(opt_smpls.cpu()) all_pose3d_img_ids.append(img_ids.cpu()) all_init_smpls = torch.cat(all_init_smpls, dim=0) all_opt_smpls = torch.cat(all_opt_smpls, dim=0) all_valid_ids = torch.cat(all_pose3d_img_ids, dim=0) smpl_infos = { "all_init_smpls": all_init_smpls, "all_opt_smpls": all_opt_smpls, "all_valid_ids": all_valid_ids } return smpl_infos def run(self, image_paths: List[str], boxes: List[List], batch_size: int = 16, num_workers: int = 4, filter_invalid: bool = True, temporal: bool = True): """ Args: image_paths (list of str): the image paths; boxes (list of list): the bounding boxes of each image; batch_size (int): the mini-batch size; num_workers (int): the number of processes; filter_invalid (bool): the flag to control whether filter invalid frames or not; temporal (bool): use temporal smooth optimization or not. Returns: smpl_infos (dict): the estimated smpl infomations, it contains, --all_init_smpls (torch.Tensor): (num, 85), the initialized smpls; --all_opt_smpls (torch.Tensor): None --all_valid_ids (torch.Tensor): (num of valid frames,), the valid indexes. """ def head_is_valid(head_boxes): return (head_boxes[:, 1] - head_boxes[:, 0]) * ( head_boxes[:, 3] - head_boxes[:, 2]) > 10 * 10 dataset = InferenceDataset(image_paths, boxes, image_size=224) data_loader = build_inference_loader(dataset, batch_size=batch_size, num_workers=num_workers) """ sample (dict): the sample information, it contains, --image (torch.Tensor): (3, 224, 224) is the cropped image range of [0, 1] and normalized by MEAN and STD, RGB channel; --orig_image (torch.Tensor): (3, height, width) is the in rage of [0, 1], RGB channel; --im_shape (torch.Tensor): (height, width) --keypoints (dict): (num_joints, 3), and num_joints could be [75,]. --center (torch.Tensor): (2,); --start_pt (torch.Tensor): (2,); --scale (torch.Tensor): (1,); --img_path (str): the image path. """ all_init_smpls = [] all_pose3d_img_ids = [] for sample in tqdm(data_loader): images = sample["image"].to(self.device) start_pt = sample["start_pt"].to(self.device) scale = sample["scale"][:, None].to(self.device).float() im_shape = sample["im_shape"][:, 0:1].to(self.device) img_ids = sample["img_id"] with torch.no_grad(): init_smpls = self.model(images) cams_orig = cam_init2orig(init_smpls[:, 0:3], scale, start_pt) cams = cam_norm(cams_orig, im_shape) init_smpls[:, 0:3] = cams if filter_invalid: init_smpls_info = self.get_details(init_smpls) head_boxes = cal_head_bbox(init_smpls_info["j2d"], image_size=512) valid = head_is_valid(head_boxes).nonzero(as_tuple=False) valid.squeeze_(-1) img_ids = img_ids[valid] all_init_smpls.append(init_smpls.cpu()) all_pose3d_img_ids.append(img_ids.cpu()) all_init_smpls = torch.cat(all_init_smpls, dim=0) all_valid_ids = torch.cat(all_pose3d_img_ids, dim=0) smpl_infos = { "all_init_smpls": all_init_smpls, "all_opt_smpls": None, "all_valid_ids": all_valid_ids } return smpl_infos def get_details(self, smpls): return self._smpl.get_details(smpls) @property def mean_theta(self): mean_cam = self.model.init_cam mean_pose = self.model.init_pose mean_shape = self.model.init_shape mean_theta = torch.cat([mean_cam, mean_pose, mean_shape], dim=-1)[0] return mean_theta @property def body_model(self): return self._smpl