def main(exp_cfg): device = torch.device('cuda') if not torch.cuda.is_available(): logger.error('CUDA is not available!') sys.exit(3) logger.remove() logger.add(lambda x: tqdm.write(x, end=''), level=exp_cfg.logger_level.upper(), colorize=True) model = SMPLXNet(exp_cfg) try: model = model.to(device=device) except RuntimeError: # Re-submit in case of a device error sys.exit(3) model.train() optim_cfg = exp_cfg.get('optim', {}) optimizer = build_optimizer(model, optim_cfg) lr_scheduler = build_scheduler(optimizer, optim_cfg['scheduler']) checkpoint_folder = osp.join(exp_cfg.output_folder, exp_cfg.checkpoint_folder) checkpointer = Checkpointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=checkpoint_folder, pretrained=exp_cfg.pretrained) arguments = {'iteration': 0, 'epoch_number': 0} extra_checkpoint_data = checkpointer.load_checkpoint() for key in arguments: if key in extra_checkpoint_data: arguments[key] = extra_checkpoint_data[key] dataloaders = make_all_data_loaders(exp_cfg, split='train') dataloader = dataloaders['body'] print("Start training") start_time = time.time() for epoch in range(arguments['epoch_number'], optim_cfg['num_epochs']): train_stats = train_one_epoch(model, dataloader, optimizer, device, epoch) lr_scheduler.step() Checkpointer.save_checkpoint('checkpoint.pth') total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def main( image_folder: str, exp_cfg, show: bool = False, demo_output_folder: str = 'demo_output', pause: float = -1, focal_length: float = 5000, rcnn_batch: int = 1, sensor_width: float = 36, save_vis: bool = True, save_params: bool = False, save_mesh: bool = False, degrees: Optional[List[float]] = [], ) -> None: device = torch.device('cuda') if not torch.cuda.is_available(): logger.error('CUDA is not available!') sys.exit(3) logger.remove() logger.add(lambda x: tqdm.write(x, end=''), level=exp_cfg.logger_level.upper(), colorize=True) expose_dloader = preprocess_images(image_folder, exp_cfg, batch_size=rcnn_batch, device=device) demo_output_folder = osp.expanduser(osp.expandvars(demo_output_folder)) logger.info(f'Saving results to: {demo_output_folder}') os.makedirs(demo_output_folder, exist_ok=True) model = SMPLXNet(exp_cfg) try: model = model.to(device=device) except RuntimeError: # Re-submit in case of a device error sys.exit(3) output_folder = exp_cfg.output_folder checkpoint_folder = osp.join(output_folder, exp_cfg.checkpoint_folder) checkpointer = Checkpointer(model, save_dir=checkpoint_folder, pretrained=exp_cfg.pretrained) arguments = {'iteration': 0, 'epoch_number': 0} extra_checkpoint_data = checkpointer.load_checkpoint() for key in arguments: if key in extra_checkpoint_data: arguments[key] = extra_checkpoint_data[key] model = model.eval() means = np.array(exp_cfg.datasets.body.transforms.mean) std = np.array(exp_cfg.datasets.body.transforms.std) render = save_vis or show body_crop_size = exp_cfg.get('datasets', {}).get('body', {}).get('transforms').get( 'crop_size', 256) if render: hd_renderer = HDRenderer(img_size=body_crop_size) total_time = 0 cnt = 0 for bidx, batch in enumerate(tqdm(expose_dloader, dynamic_ncols=True)): full_imgs_list, body_imgs, body_targets = batch if full_imgs_list is None: continue full_imgs = to_image_list(full_imgs_list) body_imgs = body_imgs.to(device=device) body_targets = [target.to(device) for target in body_targets] full_imgs = full_imgs.to(device=device) torch.cuda.synchronize() start = time.perf_counter() model_output = model(body_imgs, body_targets, full_imgs=full_imgs, device=device) torch.cuda.synchronize() elapsed = time.perf_counter() - start cnt += 1 total_time += elapsed hd_imgs = full_imgs.images.detach().cpu().numpy().squeeze() body_imgs = body_imgs.detach().cpu().numpy() body_output = model_output.get('body') _, _, H, W = full_imgs.shape # logger.info(f'{H}, {W}') # H, W, _ = hd_imgs.shape if render: hd_imgs = np.transpose(undo_img_normalization(hd_imgs, means, std), [0, 2, 3, 1]) hd_imgs = np.clip(hd_imgs, 0, 1.0) right_hand_crops = body_output.get('right_hand_crops') left_hand_crops = torch.flip(body_output.get('left_hand_crops'), dims=[-1]) head_crops = body_output.get('head_crops') bg_imgs = undo_img_normalization(body_imgs, means, std) right_hand_crops = undo_img_normalization(right_hand_crops, means, std) left_hand_crops = undo_img_normalization(left_hand_crops, means, std) head_crops = undo_img_normalization(head_crops, means, std) body_output = model_output.get('body', {}) num_stages = body_output.get('num_stages', 3) stage_n_out = body_output.get(f'stage_{num_stages - 1:02d}', {}) model_vertices = stage_n_out.get('vertices', None) if stage_n_out is not None: model_vertices = stage_n_out.get('vertices', None) faces = stage_n_out['faces'] if model_vertices is not None: model_vertices = model_vertices.detach().cpu().numpy() camera_parameters = body_output.get('camera_parameters', {}) camera_scale = camera_parameters['scale'].detach() camera_transl = camera_parameters['translation'].detach() out_img = OrderedDict() final_model_vertices = None stage_n_out = model_output.get('body', {}).get('final', {}) if stage_n_out is not None: final_model_vertices = stage_n_out.get('vertices', None) if final_model_vertices is not None: final_model_vertices = final_model_vertices.detach().cpu().numpy() camera_parameters = model_output.get('body', {}).get( 'camera_parameters', {}) camera_scale = camera_parameters['scale'].detach() camera_transl = camera_parameters['translation'].detach() hd_params = weak_persp_to_blender( body_targets, camera_scale=camera_scale, camera_transl=camera_transl, H=H, W=W, sensor_width=sensor_width, focal_length=focal_length, ) if save_vis: bg_hd_imgs = np.transpose(hd_imgs, [0, 3, 1, 2]) out_img['hd_imgs'] = bg_hd_imgs if render: # Render the initial predictions on the original image resolution hd_orig_overlays = hd_renderer( model_vertices, faces, focal_length=hd_params['focal_length_in_px'], camera_translation=hd_params['transl'], camera_center=hd_params['center'], bg_imgs=bg_hd_imgs, return_with_alpha=True, ) out_img['hd_orig_overlay'] = hd_orig_overlays # Render the overlays of the final prediction if render: hd_overlays = hd_renderer( final_model_vertices, faces, focal_length=hd_params['focal_length_in_px'], camera_translation=hd_params['transl'], camera_center=hd_params['center'], bg_imgs=bg_hd_imgs, return_with_alpha=True, body_color=[0.4, 0.4, 0.7]) out_img['hd_overlay'] = hd_overlays for deg in degrees: hd_overlays = hd_renderer( final_model_vertices, faces, focal_length=hd_params['focal_length_in_px'], camera_translation=hd_params['transl'], camera_center=hd_params['center'], bg_imgs=bg_hd_imgs, return_with_alpha=True, render_bg=False, body_color=[0.4, 0.4, 0.7], deg=deg, ) out_img[f'hd_rendering_{deg:03.0f}'] = hd_overlays if save_vis: for key in out_img.keys(): out_img[key] = np.clip( np.transpose(out_img[key], [0, 2, 3, 1]) * 255, 0, 255).astype(np.uint8) for idx in tqdm(range(len(body_targets)), 'Saving ...'): fname = body_targets[idx].get_field('fname') curr_out_path = osp.join(demo_output_folder, fname) os.makedirs(curr_out_path, exist_ok=True) if save_vis: for name, curr_img in out_img.items(): pil_img.fromarray(curr_img[idx]).save( osp.join(curr_out_path, f'{name}.png')) if save_mesh: # Store the mesh predicted by the body-crop network naive_mesh = o3d.geometry.TriangleMesh() naive_mesh.vertices = Vec3d(model_vertices[idx] + hd_params['transl'][idx]) naive_mesh.triangles = Vec3i(faces) mesh_fname = osp.join(curr_out_path, f'body_{fname}.ply') o3d.io.write_triangle_mesh(mesh_fname, naive_mesh) # Store the final mesh expose_mesh = o3d.geometry.TriangleMesh() expose_mesh.vertices = Vec3d(final_model_vertices[idx] + hd_params['transl'][idx]) expose_mesh.triangles = Vec3i(faces) mesh_fname = osp.join(curr_out_path, f'{fname}.ply') o3d.io.write_triangle_mesh(mesh_fname, expose_mesh) if save_params: params_fname = osp.join(curr_out_path, f'{fname}_params.npz') out_params = dict(fname=fname) for key, val in stage_n_out.items(): if torch.is_tensor(val): val = val.detach().cpu().numpy()[idx] out_params[key] = val for key, val in hd_params.items(): if torch.is_tensor(val): val = val.detach().cpu().numpy() if np.isscalar(val[idx]): out_params[key] = val[idx].item() else: out_params[key] = val[idx] np.savez_compressed(params_fname, **out_params) if show: nrows = 1 ncols = 4 + len(degrees) fig, axes = plt.subplots(ncols=ncols, nrows=nrows, num=0, gridspec_kw={ 'wspace': 0, 'hspace': 0 }) axes = axes.reshape(nrows, ncols) for ax in axes.flatten(): ax.clear() ax.set_axis_off() axes[0, 0].imshow(hd_imgs[idx]) axes[0, 1].imshow(out_img['rgb'][idx]) axes[0, 2].imshow(out_img['hd_orig_overlay'][idx]) axes[0, 3].imshow(out_img['hd_overlay'][idx]) start = 4 for deg in degrees: axes[0, start].imshow( out_img[f'hd_rendering_{deg:03.0f}'][idx]) start += 1 plt.draw() if pause > 0: plt.pause(pause) else: plt.show() logger.info(f'Average inference time: {total_time / cnt}')
def main( args, exp_cfg, show: bool = False, output_folder: str = 'pose', pause: float = -1, focal_length: float = 5000, rcnn_batch: int = 1, sensor_width: float = 36, save_vis: bool = False, save_params: bool = False, save_mesh: bool = False, degrees: Optional[List[float]] = [], ) -> bool: device = torch.device('cuda') if not torch.cuda.is_available(): logger.error('CUDAが無効になっています') return False process_img_pathes = os.path.join(args.img_dir, "frames", "**", "frame_*.png") # 準備 expose_dloader = preprocess_images(process_img_pathes, exp_cfg, batch_size=rcnn_batch, device=device) model = None try: model = SMPLXNet(exp_cfg) model = model.to(device=device) except RuntimeError: logger.error('学習モデルが解析出来ませんでした') return False output_folder = exp_cfg.output_folder checkpoint_folder = osp.join(output_folder, exp_cfg.checkpoint_folder) checkpointer = Checkpointer(model, save_dir=checkpoint_folder, pretrained=exp_cfg.pretrained) arguments = {'iteration': 0, 'epoch_number': 0} extra_checkpoint_data = checkpointer.load_checkpoint() for key in arguments: if key in extra_checkpoint_data: arguments[key] = extra_checkpoint_data[key] model = model.eval() means = np.array(exp_cfg.datasets.body.transforms.mean) std = np.array(exp_cfg.datasets.body.transforms.std) render = save_vis or show body_crop_size = exp_cfg.get('datasets', {}).get('body', {}).get('transforms').get( 'crop_size', 256) if render: hd_renderer = HDRenderer(img_size=body_crop_size) logger.info("姿勢推定開始(人物×フレーム数分)", decoration=MLogger.DECORATION_LINE) cnt = 0 for bidx, batch in enumerate(tqdm(expose_dloader, dynamic_ncols=True)): full_imgs_list, body_imgs, body_targets = batch if full_imgs_list is None: continue full_imgs = to_image_list(full_imgs_list) body_imgs = body_imgs.to(device=device) body_targets = [target.to(device) for target in body_targets] full_imgs = full_imgs.to(device=device) camera_parameters = None camera_scale = None camera_transl = None model_output = model(body_imgs, body_targets, full_imgs=full_imgs, device=device) cnt += 1 body_imgs = body_imgs.detach().cpu().numpy() body_output = model_output.get('body') _, _, H, W = full_imgs.shape body_output = model_output.get('body', {}) num_stages = body_output.get('num_stages', 3) stage_n_out = body_output.get(f'stage_{num_stages - 1:02d}', {}) model_vertices = stage_n_out.get('vertices', None) if stage_n_out is not None: model_vertices = stage_n_out.get('vertices', None) # faces = stage_n_out['faces'] if model_vertices is not None: # model_vertices = model_vertices.detach().cpu().numpy() camera_parameters = body_output.get('camera_parameters', {}) camera_scale = camera_parameters['scale'].detach() camera_transl = camera_parameters['translation'].detach() out_img = OrderedDict() final_model_vertices = None stage_n_out = model_output.get('body', {}).get('final', {}) if stage_n_out is not None: final_model_vertices = stage_n_out.get('vertices', None) if final_model_vertices is not None: # final_model_vertices = final_model_vertices.detach().cpu().numpy() camera_parameters = model_output.get('body', {}).get( 'camera_parameters', {}) camera_scale = camera_parameters['scale'].detach() camera_transl = camera_parameters['translation'].detach() hd_params = weak_persp_to_blender( body_targets, camera_scale=camera_scale, camera_transl=camera_transl, H=H, W=W, sensor_width=sensor_width, focal_length=focal_length, ) # hd_imgs = full_imgs.images.detach().cpu().numpy().squeeze() # if render: # hd_imgs = np.transpose(undo_img_normalization(hd_imgs, means, std), # [0, 2, 3, 1]) # hd_imgs = np.clip(hd_imgs, 0, 1.0) # right_hand_crops = body_output.get('right_hand_crops') # left_hand_crops = torch.flip( # body_output.get('left_hand_crops'), dims=[-1]) # head_crops = body_output.get('head_crops') # bg_imgs = undo_img_normalization(body_imgs, means, std) # right_hand_crops = undo_img_normalization( # right_hand_crops, means, std) # left_hand_crops = undo_img_normalization( # left_hand_crops, means, std) # head_crops = undo_img_normalization(head_crops, means, std) # if save_vis: # bg_hd_imgs = np.transpose(hd_imgs, [0, 3, 1, 2]) # out_img['hd_imgs'] = bg_hd_imgs # if render: # # Render the initial predictions on the original image resolution # hd_orig_overlays = hd_renderer( # model_vertices, faces, # focal_length=hd_params['focal_length_in_px'], # camera_translation=hd_params['transl'], # camera_center=hd_params['center'], # bg_imgs=bg_hd_imgs, # return_with_alpha=True, # ) # out_img['hd_orig_overlay'] = hd_orig_overlays # # Render the overlays of the final prediction # if render: # hd_overlays = hd_renderer( # final_model_vertices, # faces, # focal_length=hd_params['focal_length_in_px'], # camera_translation=hd_params['transl'], # camera_center=hd_params['center'], # bg_imgs=bg_hd_imgs, # return_with_alpha=True, # body_color=[0.4, 0.4, 0.7] # ) # out_img['hd_overlay'] = hd_overlays # if save_vis: # for key in out_img.keys(): # out_img[key] = np.clip( # np.transpose( # out_img[key], [0, 2, 3, 1]) * 255, 0, 255).astype( # np.uint8) camera_scale_np = camera_scale.cpu().numpy() camera_tansl_np = camera_transl.cpu().numpy() # bbox保持 cbbox = body_targets[0].bbox.detach().cpu().numpy() bbox_size = np.array(body_targets[0].size) dset_center = np.array(body_targets[0].extra_fields['center']) dset_size = np.array(body_targets[0].extra_fields['bbox_size']) # 画面サイズに合わせる(描画のため、int) bbox = np.tile(dset_center, 2) + ( (cbbox / np.tile(bbox_size, 2) - np.tile(0.5, 4)) * np.tile(dset_size, 4)) img_bbox = bbox.astype(np.int) hd_params['img_bbox'] = bbox proj_joints = stage_n_out['proj_joints'][0].detach().cpu().numpy() hd_params['proj_joints'] = proj_joints for idx in range(len(body_targets)): fname = body_targets[idx].get_field('fname') idx_dir = body_targets[idx].get_field('idx_dir') params_json_path = osp.join(args.img_dir, "frames", idx_dir, f'{fname}_joints.json') out_params = dict(fname=fname) for key, val in stage_n_out.items(): if torch.is_tensor(val): val = val.detach().cpu().numpy()[idx] out_params[key] = val if save_vis: for name, curr_img in out_img.items(): pil_img.fromarray(curr_img[idx]).save( osp.join(args.img_dir, "frames", idx_dir, f'{name}.png')) # json出力 joint_dict = {} joint_dict["image"] = {"width": W, "height": H} joint_dict["depth"] = {"depth": float(hd_params["depth"][0][0])} joint_dict["camera"] = { "scale": float(camera_scale_np[0][0]), "transl": { "x": float(camera_tansl_np[0, 0]), "y": float(camera_tansl_np[0, 1]) } } joint_dict["bbox"] = {"x": float(hd_params["img_bbox"][0]), "y": float(hd_params["img_bbox"][1]), \ "width": float(hd_params["img_bbox"][2]) - float(hd_params["img_bbox"][0]), "height": float(hd_params["img_bbox"][3]) - float(hd_params["img_bbox"][1])} joint_dict["others"] = {'shift_x': float(hd_params["shift_x"][0]), 'shift_y': float(hd_params["shift_y"][0]), \ 'focal_length_in_mm': float(hd_params["focal_length_in_mm"][0]), 'focal_length_in_px': float(hd_params["focal_length_in_px"][0]), \ 'sensor_width': float(hd_params["sensor_width"][0]), 'center': {"x": float(hd_params['center'][0, 0]), "y": float(hd_params['center'][0, 1])}} joint_dict["joints"] = {} joint_dict["proj_joints"] = {} proj_joints = hd_params["proj_joints"] joints = out_params["joints"] min_joints = np.min(proj_joints, axis=0) max_joints = np.max(proj_joints, axis=0) diff_joints = max_joints - min_joints diff_bbox = np.array([ hd_params['img_bbox'][2] - hd_params['img_bbox'][0], hd_params['img_bbox'][3] - hd_params['img_bbox'][1] ]) jscale = diff_joints / diff_bbox jscale = np.mean([jscale[0], jscale[1]]) for jidx, jname in enumerate(KEYPOINT_NAMES): j2d = proj_joints[jidx] / jscale joint_dict["proj_joints"][jname] = { 'x': float(hd_params['center'][0, 0] + j2d[0]), 'y': float(hd_params['center'][0, 1] + j2d[1]) } joint_dict["joints"][jname] = { 'x': float(joints[jidx][0]), 'y': float(-joints[jidx][1]), 'z': float(joints[jidx][2]) } # for pose_name in ["global_orient", "body_pose", "left_hand_pose", "right_hand_pose", "jaw_pose"]: # joint_dict[pose_name] = {} # for pidx, pvalues in enumerate(out_params[pose_name]): # joint_dict[pose_name][pidx] = { # 'xAxis': {'x': float(pvalues[0,0]), 'y': float(pvalues[0,1]), 'z': float(pvalues[0,2])}, # 'yAxis': {'x': float(pvalues[1,0]), 'y': float(pvalues[1,1]), 'z': float(pvalues[1,2])}, # 'zAxis': {'x': float(pvalues[2,0]), 'y': float(pvalues[2,1]), 'z': float(pvalues[2,2])} # } with open(params_json_path, 'w') as f: json.dump(joint_dict, f, indent=4) return True
def main( exp_cfg, show=False, demo_output_folder='demo_output', pause=-1, focal_length=5000, sensor_width=36, save_vis=True, save_params=False, save_mesh=False, degrees=[], ): device = torch.device('cuda') if not torch.cuda.is_available(): logger.error('CUDA is not available!') sys.exit(3) logger.remove() logger.add(lambda x: tqdm.write(x, end=''), level=exp_cfg.logger_level.upper(), colorize=True) demo_output_folder = osp.expanduser(osp.expandvars(demo_output_folder)) logger.info(f'Saving results to: {demo_output_folder}') os.makedirs(demo_output_folder, exist_ok=True) model = SMPLXNet(exp_cfg) try: model = model.to(device=device) except RuntimeError: # Re-submit in case of a device error sys.exit(3) checkpoint_folder = osp.join(exp_cfg.output_folder, exp_cfg.checkpoint_folder) checkpointer = Checkpointer(model, save_dir=checkpoint_folder, pretrained=exp_cfg.pretrained) arguments = {'iteration': 0, 'epoch_number': 0} extra_checkpoint_data = checkpointer.load_checkpoint() for key in arguments: if key in extra_checkpoint_data: arguments[key] = extra_checkpoint_data[key] model = model.eval() means = np.array(exp_cfg.datasets.body.transforms.mean) std = np.array(exp_cfg.datasets.body.transforms.std) render = save_vis or show body_crop_size = exp_cfg.get('datasets', {}).get('body', {}).get('transforms').get( 'crop_size', 256) if render: hd_renderer = HDRenderer(img_size=body_crop_size) dataloaders = make_all_data_loaders(exp_cfg, split='test') with Evaluator(exp_cfg) as evaluator: evaluator.run(model, dataloaders, exp_cfg, device)
def main( video_path: str, exp_cfg, show: bool = False, demo_output_folder: str = 'demo_output', pause: float = -1, focal_length: float = 5000, rcnn_batch: int = 1, sensor_width: float = 36, save_vis: bool = True, save_params: bool = False, save_mesh: bool = False, degrees: Optional[List[float]] = [], ) -> None: device = torch.device('cuda') if not torch.cuda.is_available(): logger.error('CUDA is not available!') sys.exit(3) logger.remove() logger.add(lambda x: tqdm.write(x, end=''), level=exp_cfg.logger_level.upper(), colorize=True) # 画像フォルダ作成 image_folder = osp.join(osp.dirname(osp.abspath(video_path)), osp.basename(video_path).replace(".", "_")) os.makedirs(image_folder, exist_ok=True) # 動画を静画に変えて出力 idx = 0 cap = cv2.VideoCapture(video_path) # 幅と高さを取得 original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) while (cap.isOpened()): # 動画から1枚キャプチャして読み込む flag, frame = cap.read() # Capture frame-by-frame # 終わったフレームより後は飛ばす # 明示的に終わりが指定されている場合、その時も終了する if flag == False: break cv2.imwrite(osp.join(image_folder, "capture_{0:012d}.png".format(idx)), frame) idx += 1 cap.release() expose_dloader = preprocess_images(image_folder + "/*.png", exp_cfg, batch_size=rcnn_batch, device=device) demo_output_folder = osp.join( osp.expanduser(osp.expandvars(demo_output_folder)), osp.basename(video_path).replace(".", "_"), datetime.datetime.now().strftime('%Y%m%d_%H%M%S')) logger.info(f'Saving results to: {demo_output_folder}') os.makedirs(demo_output_folder, exist_ok=True) #関節位置情報ファイル posf = open(osp.join(demo_output_folder, 'pos.txt'), 'w') model = SMPLXNet(exp_cfg) try: model = model.to(device=device) except RuntimeError: # Re-submit in case of a device error sys.exit(3) output_folder = exp_cfg.output_folder checkpoint_folder = osp.join(output_folder, exp_cfg.checkpoint_folder) checkpointer = Checkpointer(model, save_dir=checkpoint_folder, pretrained=exp_cfg.pretrained) arguments = {'iteration': 0, 'epoch_number': 0} extra_checkpoint_data = checkpointer.load_checkpoint() for key in arguments: if key in extra_checkpoint_data: arguments[key] = extra_checkpoint_data[key] model = model.eval() means = np.array(exp_cfg.datasets.body.transforms.mean) std = np.array(exp_cfg.datasets.body.transforms.std) render = save_vis or show body_crop_size = exp_cfg.get('datasets', {}).get('body', {}).get('transforms').get( 'crop_size', 256) if render: hd_renderer = HDRenderer(img_size=body_crop_size) total_time = 0 cnt = 0 for bidx, batch in enumerate(tqdm(expose_dloader, dynamic_ncols=True)): full_imgs_list, body_imgs, body_targets = batch if full_imgs_list is None: continue full_imgs = to_image_list(full_imgs_list) body_imgs = body_imgs.to(device=device) body_targets = [target.to(device) for target in body_targets] full_imgs = full_imgs.to(device=device) torch.cuda.synchronize() start = time.perf_counter() model_output = model(body_imgs, body_targets, full_imgs=full_imgs, device=device) torch.cuda.synchronize() elapsed = time.perf_counter() - start cnt += 1 total_time += elapsed hd_imgs = full_imgs.images.detach().cpu().numpy().squeeze() body_imgs = body_imgs.detach().cpu().numpy() body_output = model_output.get('body') _, _, H, W = full_imgs.shape # logger.info(f'{H}, {W}') # H, W, _ = hd_imgs.shape if render: hd_imgs = np.transpose(undo_img_normalization(hd_imgs, means, std), [0, 2, 3, 1]) hd_imgs = np.clip(hd_imgs, 0, 1.0) right_hand_crops = body_output.get('right_hand_crops') left_hand_crops = torch.flip(body_output.get('left_hand_crops'), dims=[-1]) head_crops = body_output.get('head_crops') bg_imgs = undo_img_normalization(body_imgs, means, std) right_hand_crops = undo_img_normalization(right_hand_crops, means, std) left_hand_crops = undo_img_normalization(left_hand_crops, means, std) head_crops = undo_img_normalization(head_crops, means, std) body_output = model_output.get('body', {}) num_stages = body_output.get('num_stages', 3) stage_n_out = body_output.get(f'stage_{num_stages - 1:02d}', {}) model_vertices = stage_n_out.get('vertices', None) if stage_n_out is not None: model_vertices = stage_n_out.get('vertices', None) faces = stage_n_out['faces'] if model_vertices is not None: model_vertices = model_vertices.detach().cpu().numpy() camera_parameters = body_output.get('camera_parameters', {}) camera_scale = camera_parameters['scale'].detach() camera_transl = camera_parameters['translation'].detach() out_img = OrderedDict() final_model_vertices = None stage_n_out = model_output.get('body', {}).get('final', {}) if stage_n_out is not None: final_model_vertices = stage_n_out.get('vertices', None) if final_model_vertices is not None: final_model_vertices = final_model_vertices.detach().cpu().numpy() camera_parameters = model_output.get('body', {}).get( 'camera_parameters', {}) camera_scale = camera_parameters['scale'].detach() camera_transl = camera_parameters['translation'].detach() hd_params = weak_persp_to_blender( body_targets, camera_scale=camera_scale, camera_transl=camera_transl, H=H, W=W, sensor_width=sensor_width, focal_length=focal_length, ) if save_vis: bg_hd_imgs = np.transpose(hd_imgs, [0, 3, 1, 2]) out_img['hd_imgs'] = bg_hd_imgs if render: # Render the initial predictions on the original image resolution hd_orig_overlays = hd_renderer( model_vertices, faces, focal_length=hd_params['focal_length_in_px'], camera_translation=hd_params['transl'], camera_center=hd_params['center'], bg_imgs=bg_hd_imgs, return_with_alpha=True, ) out_img['hd_orig_overlay'] = hd_orig_overlays # Render the overlays of the final prediction if render: # bbox保持 cbbox = body_targets[0].bbox.detach().cpu().numpy() bbox_size = np.array(body_targets[0].size) dset_center = np.array(body_targets[0].extra_fields['center']) dset_size = np.array(body_targets[0].extra_fields['bbox_size']) # 画面サイズに合わせる(描画のため、int) bbox = np.tile(dset_center, 2) + ( (cbbox / np.tile(bbox_size, 2) - np.tile(0.5, 4)) * np.tile(dset_size, 4)) img_bbox = bbox.astype(np.int) hd_params['img_bbox'] = bbox hd_overlays = hd_renderer( final_model_vertices, faces, focal_length=hd_params['focal_length_in_px'], camera_translation=hd_params['transl'], camera_center=hd_params['center'], bg_imgs=bg_hd_imgs, return_with_alpha=True, body_color=[0.4, 0.4, 0.7]) proj_joints = stage_n_out['proj_joints'][0].detach().cpu().numpy() hd_params['proj_joints'] = proj_joints try: # 横線 for x in range(img_bbox[0], img_bbox[2] + 1): for y in [img_bbox[1], img_bbox[3] + 1]: if hd_overlays.shape[2] > x and hd_overlays[3] > y: hd_overlays[:, :, y, x] = np.array([1, 0, 0, 1]) # 縦線 for x in [img_bbox[0], img_bbox[2] + 1]: for y in range(img_bbox[1], img_bbox[3] + 1): if hd_overlays.shape[2] > x and hd_overlays[3] > y: hd_overlays[:, :, y, x] = np.array([1, 0, 0, 1]) # カメラ中央 for x in range(int(hd_params['center'][0, 0] - 1), int(hd_params['center'][0, 0] + 2)): for y in range(int(hd_params['center'][0, 1] - 1), int(hd_params['center'][0, 1] + 2)): if hd_overlays.shape[2] > x and hd_overlays[3] > y: hd_overlays[:, :, y, x] = np.array([0, 1, 0, 1]) min_joints = np.min(proj_joints, axis=0) max_joints = np.max(proj_joints, axis=0) diff_joints = max_joints - min_joints diff_bbox = np.array([ hd_params['img_bbox'][2] - hd_params['img_bbox'][0], hd_params['img_bbox'][3] - hd_params['img_bbox'][1] ]) jscale = diff_joints / diff_bbox jscale = np.mean([jscale[0], jscale[1]]) for jidx, jname in enumerate(KEYPOINT_NAMES): j2d = proj_joints[jidx] / jscale # ジョイント for x in range(int(hd_params['center'][0, 0] + j2d[0] - 1), int(hd_params['center'][0, 0] + j2d[0] + 2)): for y in range( int(hd_params['center'][0, 1] + j2d[1] - 1), int(hd_params['center'][0, 1] + j2d[1] + 2)): if hd_overlays.shape[2] > x and hd_overlays[3] > y: hd_overlays[:, :, y, x] = np.array([0, 0, 1, 1]) except Exception as e: print('hd_overlays error: %s' % e) pass out_img['hd_overlay'] = hd_overlays for deg in degrees: hd_overlays = hd_renderer( final_model_vertices, faces, focal_length=hd_params['focal_length_in_px'], camera_translation=hd_params['transl'], camera_center=hd_params['center'], bg_imgs=bg_hd_imgs, return_with_alpha=True, render_bg=False, body_color=[0.4, 0.4, 0.7], deg=deg, ) out_img[f'hd_rendering_{deg:03.0f}'] = hd_overlays if save_vis: for key in out_img.keys(): out_img[key] = np.clip( np.transpose(out_img[key], [0, 2, 3, 1]) * 255, 0, 255).astype(np.uint8) for idx in tqdm(range(len(body_targets)), 'Saving ...'): # TODO 複数人対応 if idx > 0: break fname = body_targets[idx].get_field('fname') curr_out_path = osp.join(demo_output_folder, fname) os.makedirs(curr_out_path, exist_ok=True) if save_vis: for name, curr_img in out_img.items(): pil_img.fromarray(curr_img[idx]).save( osp.join(curr_out_path, f'{name}.png')) if save_mesh: # Store the mesh predicted by the body-crop network naive_mesh = o3d.geometry.TriangleMesh() naive_mesh.vertices = Vec3d(model_vertices[idx] + hd_params['transl'][idx]) naive_mesh.triangles = Vec3i(faces) mesh_fname = osp.join(curr_out_path, f'body_{fname}.ply') o3d.io.write_triangle_mesh(mesh_fname, naive_mesh) # Store the final mesh expose_mesh = o3d.geometry.TriangleMesh() expose_mesh.vertices = Vec3d(final_model_vertices[idx] + hd_params['transl'][idx]) expose_mesh.triangles = Vec3i(faces) mesh_fname = osp.join(curr_out_path, f'{fname}.ply') o3d.io.write_triangle_mesh(mesh_fname, expose_mesh) if save_params: params_fname = osp.join(curr_out_path, f'{fname}_params.npz') out_params = dict(fname=fname) for key, val in stage_n_out.items(): if torch.is_tensor(val): val = val.detach().cpu().numpy()[idx] out_params[key] = val for key, val in hd_params.items(): if torch.is_tensor(val): val = val.detach().cpu().numpy() if np.isscalar(val[idx]): out_params[key] = val[idx].item() else: out_params[key] = val[idx] try: for param_name in ['center']: params_txt_fname = osp.join( curr_out_path, f'{fname}_params_{param_name}.txt') np.savetxt(params_txt_fname, out_params[param_name]) for param_name in ['img_bbox']: params_txt_fname = osp.join( curr_out_path, f'{fname}_params_{param_name}.txt') np.savetxt(params_txt_fname, hd_params[param_name]) for param_name in ['joints']: params_txt_fname = osp.join( curr_out_path, f'{fname}_params_{param_name}.json') # json出力 joint_dict = {} joint_dict["image"] = {"width": W, "height": H} joint_dict["depth"] = { "depth": float(hd_params["depth"][0][0]) } joint_dict["center"] = { "x": float(hd_params['center'][0, 0]), "y": float(hd_params['center'][0, 1]) } joint_dict["bbox"] = { "x": float(hd_params["img_bbox"][0]), "y": float(hd_params["img_bbox"][1]), "width": float(hd_params["img_bbox"][2]), "height": float(hd_params["img_bbox"][3]) } joint_dict["joints"] = {} joint_dict["proj_joints"] = {} proj_joints = hd_params["proj_joints"] joints = out_params["joints"] min_joints = np.min(proj_joints, axis=0) max_joints = np.max(proj_joints, axis=0) diff_joints = max_joints - min_joints diff_bbox = np.array([ hd_params['img_bbox'][2] - hd_params['img_bbox'][0], hd_params['img_bbox'][3] - hd_params['img_bbox'][1] ]) jscale = diff_joints / diff_bbox jscale = np.mean([jscale[0], jscale[1]]) for jidx, jname in enumerate(KEYPOINT_NAMES): j2d = proj_joints[jidx] / jscale joint_dict["proj_joints"][jname] = { 'x': float(hd_params['center'][0, 0] + j2d[0]), 'y': float(hd_params['center'][0, 1] + j2d[1]) } joint_dict["joints"][jname] = { 'x': float(joints[jidx][0]), 'y': float(-joints[jidx][1]), 'z': float(joints[jidx][2]) } with open(params_txt_fname, 'w') as f: json.dump(joint_dict, f, indent=4) # 描画設定 fig = plt.figure(figsize=(15, 15), dpi=100) # 3DAxesを追加 ax = fig.add_subplot(111, projection='3d') # ジョイント出力 ax.set_xlim3d(int(-(original_width / 2)), int(original_width / 2)) ax.set_ylim3d(0, int(original_height / 2)) ax.set_zlim3d(0, int(original_height)) ax.set(xlabel='x', ylabel='y', zlabel='z') xs = [] ys = [] zs = [] for j3d_from_idx, j3d_to_idx in ALL_CONNECTIONS: jfname = KEYPOINT_NAMES[j3d_from_idx] jtname = KEYPOINT_NAMES[j3d_to_idx] xs = [ joint_dict[jfname]['x'], joint_dict[jtname]['x'] ] ys = [ joint_dict[jfname]['y'], joint_dict[jtname]['y'] ] zs = [ joint_dict[jfname]['z'], joint_dict[jtname]['z'] ] ax.plot3D(xs, ys, zs, marker="o", ms=2, c="#0000FF") plt.savefig( os.path.join(curr_out_path, f'{fname}_{param_name}.png')) plt.close() # posの出力 joint_names = [(0, 'pelvis'), (1, 'right_hip'), (2, 'right_knee'), (3, 'right_ankle'), \ (6, 'left_hip'), (7, 'left_knee'), (8, 'left_ankle'), \ (12, 'spine1'), (13, 'spine2'), (14, 'neck'), (15, 'head'), \ (17, 'left_shoulder'), (18, 'left_elbow'), (19, 'left_wrist'), \ (25, 'right_shoulder'), (26, 'right_elbow'), (27, 'right_wrist') ] N = [] I = [] for (jnidx, iname) in joint_names: for jidx, smplx_jn in enumerate(KEYPOINT_NAMES): if smplx_jn == iname: N.append(jnidx) I.append([ joint_dict[iname]['x'], joint_dict[iname]['y'], joint_dict[iname]['z'] ]) for i in np.arange(len(I)): # 0: index, 1: x軸, 2:Y軸, 3:Z軸 posf.write( str(N[i]) + " " + str(I[i][0]) + " " + str(I[i][2]) + " " + str(I[i][1]) + ", ") #終わったら改行 posf.write("\n") except Exception as e: print('savetxt error: %s' % e) pass np.savez_compressed(params_fname, **out_params) if show: nrows = 1 ncols = 4 + len(degrees) fig, axes = plt.subplots(ncols=ncols, nrows=nrows, num=0, gridspec_kw={ 'wspace': 0, 'hspace': 0 }) axes = axes.reshape(nrows, ncols) for ax in axes.flatten(): ax.clear() ax.set_axis_off() axes[0, 0].imshow(hd_imgs[idx]) axes[0, 1].imshow(out_img['rgb'][idx]) axes[0, 2].imshow(out_img['hd_orig_overlay'][idx]) axes[0, 3].imshow(out_img['hd_overlay'][idx]) start = 4 for deg in degrees: axes[0, start].imshow( out_img[f'hd_rendering_{deg:03.0f}'][idx]) start += 1 plt.draw() if pause > 0: plt.pause(pause) else: plt.show() fmt = cv2.VideoWriter_fourcc(*'mp4v') # メッシュの結合 writer = cv2.VideoWriter(osp.join(demo_output_folder, 'hd_overlay.mp4'), fmt, 30, (original_width, original_height)) for img_path in glob.glob(osp.join(demo_output_folder, "**/hd_overlay.png")): writer.write(cv2.imread(img_path)) writer.release() # JOINTの結合 writer = cv2.VideoWriter(osp.join(demo_output_folder, 'joints.mp4'), fmt, 30, (1500, 1500)) for img_path in glob.glob(osp.join(demo_output_folder, "**/*_joints.png")): writer.write(cv2.imread(img_path)) writer.release() cv2.destroyAllWindows() posf.close() logger.info(f'Average inference time: {total_time / cnt}')