def renderRes(self, image_path, output_path, vibe_results): img0 = cv2.imread(osp.join(image_path, os.listdir(image_path)[0])) orig_height, orig_width = img0.shape[0:2] # ========= Render results as a single video ========= # renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=False) output_img_folder = f'{image_path}_output' os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') # prepare results for rendering num_frames = len(os.listdir(image_path)) frame_results = prepare_rendering_results(vibe_results, num_frames) mesh_color = { k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in vibe_results.keys() } image_file_names = sorted([ os.path.join(image_path, x) for x in os.listdir(image_path) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in range(len(image_file_names)): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] mc = mesh_color[person_id] mesh_filename = None img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) cv2.imwrite( os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) # ========= Save rendered video ========= # save_name = 'vibe_result.mp4' save_name = os.path.join(output_path, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) shutil.rmtree(output_img_folder)
def main(args): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') video_file = args.vid_file # ========= [Optional] download the youtube video ========= # if video_file.startswith('https://www.youtube.com'): print(f'Donwloading YouTube video \"{video_file}\"') video_file = download_youtube_clip(video_file, '/tmp') if video_file is None: exit('Youtube url is not valid!') print(f'YouTube Video has been downloaded to {video_file}...') if not os.path.isfile(video_file): exit(f'Input video \"{video_file}\" does not exist!') output_path = os.path.join( args.output_folder, os.path.basename(video_file).replace('.mp4', '')) os.makedirs(output_path, exist_ok=True) image_folder, num_frames, img_shape = video_to_images(video_file, return_info=True) print(f'Input video number of frames {num_frames}') orig_height, orig_width = img_shape[:2] total_time = time.time() # ========= Run tracking ========= # bbox_scale = 1.1 if args.tracking_method == 'pose': if not os.path.isabs(video_file): video_file = os.path.join(os.getcwd(), video_file) tracking_results = run_posetracker(video_file, staf_folder=args.staf_dir, display=args.display) else: # run multi object tracker mot = MPT( device=device, batch_size=args.tracker_batch_size, display=args.display, detector_type=args.detector, output_format='dict', yolo_img_size=args.yolo_img_size, ) tracking_results = mot(image_folder) # remove tracklets if num_frames is less than MIN_NUM_FRAMES for person_id in list(tracking_results.keys()): if tracking_results[person_id]['frames'].shape[0] < MIN_NUM_FRAMES: del tracking_results[person_id] # ========= Define VIBE model ========= # model = VIBE_Demo( seqlen=16, n_layers=2, hidden_size=1024, add_linear=True, use_residual=True, ).to(device) # ========= Load pretrained weights ========= # pretrained_file = download_ckpt(use_3dpw=False) ckpt = torch.load(pretrained_file) print(f'Performance of pretrained model on 3DPW: {ckpt["performance"]}') ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) model.eval() print(f'Loaded pretrained weights from \"{pretrained_file}\"') # ========= Run VIBE on each person ========= # print(f'Running VIBE on each tracklet...') vibe_time = time.time() vibe_results = {} for person_id in tqdm(list(tracking_results.keys())): bboxes = joints2d = None if args.tracking_method == 'bbox': bboxes = tracking_results[person_id]['bbox'] elif args.tracking_method == 'pose': joints2d = tracking_results[person_id]['joints2d'] frames = tracking_results[person_id]['frames'] dataset = Inference( image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, ) bboxes = dataset.bboxes frames = dataset.frames has_keypoints = True if joints2d is not None else False dataloader = DataLoader(dataset, batch_size=args.vibe_batch_size, num_workers=16) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for batch in dataloader: if has_keypoints: batch, nj2d = batch norm_joints2d.append(nj2d.numpy().reshape(-1, 21, 3)) batch = batch.unsqueeze(0) batch = batch.to(device) batch_size, seqlen = batch.shape[:2] output = model(batch)[-1] pred_cam.append(output['theta'][:, :, :3].reshape( batch_size * seqlen, -1)) pred_verts.append(output['verts'].reshape( batch_size * seqlen, -1, 3)) pred_pose.append(output['theta'][:, :, 3:75].reshape( batch_size * seqlen, -1)) pred_betas.append(output['theta'][:, :, 75:].reshape( batch_size * seqlen, -1)) pred_joints3d.append(output['kp_3d'].reshape( batch_size * seqlen, -1, 3)) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch # ========= [Optional] run Temporal SMPLify to refine the results ========= # if args.run_smplify and args.tracking_method == 'pose': norm_joints2d = np.concatenate(norm_joints2d, axis=0) norm_joints2d = convert_kps(norm_joints2d, src='staf', dst='spin') norm_joints2d = torch.from_numpy(norm_joints2d).float().to(device) # Run Temporal SMPLify update, new_opt_vertices, new_opt_cam, new_opt_pose, new_opt_betas, \ new_opt_joints3d, new_opt_joint_loss, opt_joint_loss = smplify_runner( pred_rotmat=pred_pose, pred_betas=pred_betas, pred_cam=pred_cam, j2d=norm_joints2d, device=device, batch_size=norm_joints2d.shape[0], pose2aa=False, ) # update the parameters after refinement print( f'Update ratio after Temporal SMPLify: {update.sum()} / {norm_joints2d.shape[0]}' ) pred_verts = pred_verts.cpu() pred_cam = pred_cam.cpu() pred_pose = pred_pose.cpu() pred_betas = pred_betas.cpu() pred_joints3d = pred_joints3d.cpu() pred_verts[update] = new_opt_vertices[update] pred_cam[update] = new_opt_cam[update] pred_pose[update] = new_opt_pose[update] pred_betas[update] = new_opt_betas[update] pred_joints3d[update] = new_opt_joints3d[update] elif args.run_smplify and args.tracking_method == 'bbox': print( '[WARNING] You need to enable pose tracking to run Temporal SMPLify algorithm!' ) print('[WARNING] Continuing without running Temporal SMPLify!..') # ========= Save results to a pickle file ========= # pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() # Runs 1 Euro Filter to smooth out the results if args.smooth: min_cutoff = args.smooth_min_cutoff # 0.004 beta = args.smooth_beta # 1.5 print( f'Running smoothing on person {person_id}, min_cutoff: {min_cutoff}, beta: {beta}' ) pred_verts, pred_pose, pred_joints3d = smooth_pose( pred_pose, pred_betas, min_cutoff=min_cutoff, beta=beta) orig_cam = convert_crop_cam_to_orig_img(cam=pred_cam, bbox=bboxes, img_width=orig_width, img_height=orig_height) output_dict = { 'pred_cam': pred_cam, 'orig_cam': orig_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, 'joints2d': joints2d, 'bboxes': bboxes, 'frame_ids': frames, } vibe_results[person_id] = output_dict del model end = time.time() fps = num_frames / (end - vibe_time) print(f'VIBE FPS: {fps:.2f}') total_time = time.time() - total_time print( f'Total time spent: {total_time:.2f} seconds (including model loading time).' ) print( f'Total FPS (including model loading time): {num_frames / total_time:.2f}.' ) print( f'Saving output results to \"{os.path.join(output_path, "vibe_output.pkl")}\".' ) joblib.dump(vibe_results, os.path.join(output_path, "vibe_output.pkl")) if not args.no_render: # ========= Render results as a single video ========= # renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=args.wireframe) output_img_folder = f'{image_folder}_output' os.makedirs(output_img_folder, exist_ok=True) if args.joints3dview: output_img_raw_folder = f'{image_folder}_raw_output' os.makedirs(output_img_raw_folder, exist_ok=True) output_img_joints3d_folder = f'{image_folder}_joints3d_output' os.makedirs(output_img_joints3d_folder, exist_ok=True) output_img_mesh_folder = f'{image_folder}_mesh_output' os.makedirs(output_img_mesh_folder, exist_ok=True) output_img_meshside_folder = f'{image_folder}_meshside_output' os.makedirs(output_img_meshside_folder, exist_ok=True) output_img_all_folder = f'{image_folder}_all_output' os.makedirs(output_img_all_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') # prepare results for rendering frame_results = prepare_rendering_results(vibe_results, num_frames) mesh_color = { k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in vibe_results.keys() } image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) length_image_files = len(image_file_names) #length_image_files = 100 for frame_idx in tqdm(range(length_image_files)): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) if args.sideview: side_img = np.zeros_like(img) if args.joints3dview: img_raw = img.copy() img_joints3d = np.zeros_like(img) joints3d_list = [] for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] joints3d = person_data['joints3d'] #print('frame_verts.shape = {}\nframe_cam.shape ={}\njoints3d.shape = {}'.format( # frame_verts.shape, frame_cam.shape, joints3d.shape)) mc = mesh_color[person_id] if args.joints3dview: joints3d_list.append(joints3d) # img_joints3d = render_joints3d(joints3d, img_raw.shape) mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'meshes', f'{person_id:04d}') os.makedirs(mesh_folder, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) if args.sideview: side_img = renderer.render( side_img, frame_verts, cam=frame_cam, color=mc, angle=270, axis=[0, 1, 0], ) if args.sideview: img_mesh = img.copy() img = np.concatenate([img, side_img], axis=1) cv2.imwrite( os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.joints3dview: #img_joints3d = np.zeros_like(img_raw) if len(joints3d_list) == 0: img_joints3d = np.zeros_like(img_raw) else: joints3d = np.concatenate(joints3d_list) img_joints3d = render_joints3d(joints3d, img_raw.shape) if args.joints3dview: img_up = np.concatenate([img_raw, img_joints3d], axis=1) img_down = np.concatenate([img_mesh, side_img], axis=1) img_all = np.concatenate([img_up, img_down], axis=0) cv2.imwrite( os.path.join(output_img_raw_folder, f'{frame_idx:06d}.png'), img_raw) cv2.imwrite( os.path.join(output_img_joints3d_folder, f'{frame_idx:06d}.png'), img_joints3d) cv2.imwrite( os.path.join(output_img_mesh_folder, f'{frame_idx:06d}.png'), img_mesh) cv2.imwrite( os.path.join(output_img_meshside_folder, f'{frame_idx:06d}.png'), side_img) cv2.imwrite( os.path.join(output_img_all_folder, f'{frame_idx:06d}.png'), img_all) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() # ========= Save rendered video ========= # vid_name = os.path.basename(video_file) save_name = f'{vid_name.replace(".mp4", "")}_vibe_result.mp4' save_name = os.path.join(output_path, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) shutil.rmtree(output_img_folder) if args.joints3dview: ''' save_name_raw = f'{vid_name.replace(".mp4", "")}_raw.mp4' save_name_raw = os.path.join(output_path, save_name_raw) images_to_video(img_folder=output_img_raw_folder, output_vid_file=save_name_raw) shutil.rmtree(output_img_raw_folder) save_name_joints3d = f'{vid_name.replace(".mp4", "")}_joints3d.mp4' save_name_joints3d = os.path.join(output_path, save_name_joints3d) images_to_video(img_folder=output_img_joints3d_folder, output_vid_file=save_name_joints3d) shutil.rmtree(output_img_joints3d_folder) save_name_mesh = f'{vid_name.replace(".mp4", "")}_mesh.mp4' save_name_mesh = os.path.join(output_path, save_name_mesh) images_to_video(img_folder=output_img_mesh_folder, output_vid_file=save_name_mesh) shutil.rmtree(output_img_mesh_folder) save_name_meshside = f'{vid_name.replace(".mp4", "")}_meshside.mp4' save_name_meshside = os.path.join(output_path, save_name_meshside) images_to_video(img_folder=output_img_meshside_folder, output_vid_file=save_name_meshside) shutil.rmtree(output_img_meshside_folder) ''' save_name_all = f'{vid_name.replace(".mp4", "")}_all.mp4' save_name_all = os.path.join(output_path, save_name_all) images_to_video(img_folder=output_img_all_folder, output_vid_file=save_name_all) shutil.rmtree(output_img_all_folder) shutil.rmtree(image_folder) print('================= END =================')
def main(args): torch.cuda.set_device(args.gpu_id) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') print(f'Loading video list {args.video_list}') video_list = [l.strip() for l in open(args.video_list, 'r').readlines()] if len(video_list) < 1: print('No files were found in video list') return print('Loading VIBE model') # ========= Define VIBE model ========= # model = VIBE_Demo( seqlen=16, n_layers=2, hidden_size=1024, add_linear=True, use_residual=True, ).to(device) # ========= Load VIBE pretrained weights ========= # pretrained_file = download_ckpt(use_3dpw=False) ckpt = torch.load(pretrained_file) print(f'Performance of pretrained model on 3DPW: {ckpt["performance"]}') ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) model.eval() print(f'Loaded pretrained weights from \"{pretrained_file}\"') num_videos = len(video_list) print(f'Processing {num_videos} videos.') for video_idx, video_file in enumerate(video_list, start=1): if not osp.isfile(video_file): print(f'Input video \"{video_file}\" does not exist! Moving on to next file.') continue filename = osp.splitext(osp.basename(video_file))[0] output_path = osp.join(args.output_folder, filename) os.makedirs(output_path, exist_ok=True) image_folder, num_frames, img_shape = video_to_images(video_file, return_info=True) print(f'[{video_idx}/{num_videos}] Processing {num_frames} frames') orig_height, orig_width = img_shape[:2] # ========= Run tracking ========= # bbox_scale = 1.1 if args.tracking_method == 'pose': if not osp.isabs(video_file): video_file = osp.join(os.getcwd(), video_file) tracking_results = run_posetracker(video_file, staf_folder=args.staf_dir, display=args.display) else: # run multi object tracker mot = MPT( device=device, batch_size=args.tracker_batch_size, display=args.display, detector_type=args.detector, output_format='dict', yolo_img_size=args.yolo_img_size, ) tracking_results = mot(image_folder) # remove tracklets if num_frames is less than MIN_NUM_FRAMES for person_id in list(tracking_results.keys()): if tracking_results[person_id]['frames'].shape[0] < MIN_NUM_FRAMES: del tracking_results[person_id] # ========= Run VIBE on each person ========= # print(f'Running VIBE on each tracklet...') vibe_results = {} for person_id in tqdm(list(tracking_results.keys())): bboxes = joints2d = None if args.tracking_method == 'bbox': bboxes = tracking_results[person_id]['bbox'] elif args.tracking_method == 'pose': joints2d = tracking_results[person_id]['joints2d'] frames = tracking_results[person_id]['frames'] dataset = Inference( image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, ) bboxes = dataset.bboxes frames = dataset.frames has_keypoints = True if joints2d is not None else False dataloader = DataLoader(dataset, batch_size=args.vibe_batch_size, num_workers=16) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for batch in dataloader: if has_keypoints: batch, nj2d = batch norm_joints2d.append(nj2d.numpy().reshape(-1, 21, 3)) batch = batch.unsqueeze(0) batch = batch.to(device) batch_size, seqlen = batch.shape[:2] output = model(batch)[-1] pred_cam.append(output['theta'][:, :, :3].reshape(batch_size * seqlen, -1)) pred_verts.append(output['verts'].reshape(batch_size * seqlen, -1, 3)) pred_pose.append(output['theta'][:,:,3:75].reshape(batch_size * seqlen, -1)) pred_betas.append(output['theta'][:, :,75:].reshape(batch_size * seqlen, -1)) pred_joints3d.append(output['kp_3d'].reshape(batch_size * seqlen, -1, 3)) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch # ========= [Optional] run Temporal SMPLify to refine the results ========= # if args.run_smplify and args.tracking_method == 'pose': norm_joints2d = np.concatenate(norm_joints2d, axis=0) norm_joints2d = convert_kps(norm_joints2d, src='staf', dst='spin') norm_joints2d = torch.from_numpy(norm_joints2d).float().to(device) # Run Temporal SMPLify update, new_opt_vertices, new_opt_cam, new_opt_pose, new_opt_betas, \ new_opt_joints3d, new_opt_joint_loss, opt_joint_loss = smplify_runner( pred_rotmat=pred_pose, pred_betas=pred_betas, pred_cam=pred_cam, j2d=norm_joints2d, device=device, batch_size=norm_joints2d.shape[0], pose2aa=False, ) # update the parameters after refinement print(f'Update ratio after Temporal SMPLify: {update.sum()} / {norm_joints2d.shape[0]}') pred_verts = pred_verts.cpu() pred_cam = pred_cam.cpu() pred_pose = pred_pose.cpu() pred_betas = pred_betas.cpu() pred_joints3d = pred_joints3d.cpu() pred_verts[update] = new_opt_vertices[update] pred_cam[update] = new_opt_cam[update] pred_pose[update] = new_opt_pose[update] pred_betas[update] = new_opt_betas[update] pred_joints3d[update] = new_opt_joints3d[update] elif args.run_smplify and args.tracking_method == 'bbox': print('[WARNING] You need to enable pose tracking to run Temporal SMPLify algorithm!') print('[WARNING] Continuing without running Temporal SMPLify!..') # ========= Save results to a pickle file ========= # pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() orig_cam = convert_crop_cam_to_orig_img( cam=pred_cam, bbox=bboxes, img_width=orig_width, img_height=orig_height ) output_dict = { 'pred_cam': pred_cam, 'orig_cam': orig_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, 'joints2d': joints2d, 'bboxes': bboxes, 'frame_ids': frames, } vibe_results[person_id] = output_dict # Clean-up the temporal folder # Save the outputs to joblib pkl file. File is loaded through joblib.load(pkl_path) output_pkl_path = osp.join(args.output_folder, f'{filename}.pkl') print(f'Saving output results to \"{output_pkl_path}\".') joblib.dump(vibe_results, output_pkl_path) if not args.no_render: # ========= Render results as a single video ========= # renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=args.wireframe) output_img_folder = f'{image_folder}_output' os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') # prepare results for rendering frame_results = prepare_rendering_results(vibe_results, num_frames) mesh_color = {k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in vibe_results.keys()} image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) if args.sideview: side_img = np.zeros_like(img) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] mc = mesh_color[person_id] mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'meshes', f'{person_id:04d}') os.makedirs(mesh_folder, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) if args.sideview: side_img = renderer.render( side_img, frame_verts, cam=frame_cam, color=mc, angle=270, axis=[0,1,0], ) if args.sideview: img = np.concatenate([img, side_img], axis=1) cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() # ========= Save rendered video ========= # vid_name = os.path.basename(video_file) save_name = f'{vid_name.replace(".mp4", "")}_vibe_result.mp4' save_name = os.path.join(output_path, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) shutil.rmtree(output_img_folder) # Clean-up after processing del model shutil.rmtree(image_folder) print('================= END =================')
def main(args): if args.device == 'cpu': device = torch.device('cpu') print('Running on CPU') else: device = torch.device('cuda') print('Running on GPU') if args.vid_file: video_file = args.vid_file if not os.path.isfile(video_file): exit(f'Input video \"{video_file}\" does not exist!') else: image_file = args.img_file if not os.path.isfile(image_file): exit(f'Input video \"{image_file}\" does not exist!') output_path = os.path.join( args.output_folder, os.path.basename(video_file).replace('.mp4', '')) # output_path = os.path.join(args.output_folder, os.path.basename(video_file).split('.')[0]) os.makedirs(output_path, exist_ok=True) image_folder, num_frames, img_shape = video_to_images(video_file, return_info=True) print(f'Input video number of frames {num_frames}') orig_height, orig_width = img_shape[:2] total_time = time.time() # resize video if too big # ffmpeg -i input.avi -filter:v scale=720:-1 -c:a copy output.mkv # ========= Run tracking ========= # bbox_scale = 1.1 if args.tracking_method == 'pose': if not os.path.isabs(video_file): video_file = os.path.join(os.getcwd(), video_file) tracking_results = run_posetracker(video_file, staf_folder=args.staf_dir, display=args.display) else: # run multi object tracker mot = MPT( device=device, batch_size=args.tracker_batch_size, display=args.display, detector_type=args.detector, output_format='dict', yolo_img_size=args.yolo_img_size, ) tracking_results = mot(image_folder) import pdb pdb.set_trace # remove tracklets if num_frames is less than MIN_NUM_FRAMES for person_id in list(tracking_results.keys()): if tracking_results[person_id]['frames'].shape[0] < MIN_NUM_FRAMES: del tracking_results[person_id] # ========= Define VIBE model ========= # model = VIBE_Demo( seqlen=16, n_layers=2, hidden_size=1024, add_linear=True, use_residual=True, ).to(device) # ========= Load pretrained weights ========= # pretrained_file = download_ckpt(use_3dpw=True) ckpt = torch.load(pretrained_file) print(f'Performance of pretrained model on 3DPW: {ckpt["performance"]}') ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) model.eval() print(f'Loaded pretrained weights from \"{pretrained_file}\"') # ========= Run VIBE on each person ========= # print(f'Running VIBE on each tracklet...') vibe_time = time.time() vibe_results = {} for person_id in tqdm(list(tracking_results.keys())): bboxes = joints2d = None if args.tracking_method == 'bbox': bboxes = tracking_results[person_id]['bbox'] elif args.tracking_method == 'pose': joints2d = tracking_results[person_id]['joints2d'] frames = tracking_results[person_id]['frames'] dataset = Inference( image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, ) bboxes = dataset.bboxes frames = dataset.frames has_keypoints = True if joints2d is not None else False dataloader = DataLoader(dataset, batch_size=args.vibe_batch_size, num_workers=16) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for batch in dataloader: if has_keypoints: batch, nj2d = batch norm_joints2d.append(nj2d.numpy().reshape(-1, 21, 3)) batch = batch.unsqueeze(0) batch = batch.to(device) batch_size, seqlen = batch.shape[:2] output = model(batch)[-1] pred_cam.append(output['theta'][:, :, :3].reshape( batch_size * seqlen, -1)) pred_verts.append(output['verts'].reshape( batch_size * seqlen, -1, 3)) pred_pose.append(output['theta'][:, :, 3:75].reshape( batch_size * seqlen, -1)) pred_betas.append(output['theta'][:, :, 75:].reshape( batch_size * seqlen, -1)) pred_joints3d.append(output['kp_3d'].reshape( batch_size * seqlen, -1, 3)) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch # ========= [Optional] run Temporal SMPLify to refine the results ========= # if args.run_smplify and args.tracking_method == 'pose': norm_joints2d = np.concatenate(norm_joints2d, axis=0) norm_joints2d = convert_kps(norm_joints2d, src='staf', dst='spin') norm_joints2d = torch.from_numpy(norm_joints2d).float().to(device) # Run Temporal SMPLify update, new_opt_vertices, new_opt_cam, new_opt_pose, new_opt_betas, \ new_opt_joints3d, new_opt_joint_loss, opt_joint_loss = smplify_runner( pred_rotmat=pred_pose, pred_betas=pred_betas, pred_cam=pred_cam, j2d=norm_joints2d, device=device, batch_size=norm_joints2d.shape[0], pose2aa=False, ) # update the parameters after refinement print( f'Update ratio after Temporal SMPLify: {update.sum()} / {norm_joints2d.shape[0]}' ) pred_verts = pred_verts.cpu() pred_cam = pred_cam.cpu() pred_pose = pred_pose.cpu() pred_betas = pred_betas.cpu() pred_joints3d = pred_joints3d.cpu() pred_verts[update] = new_opt_vertices[update] pred_cam[update] = new_opt_cam[update] pred_pose[update] = new_opt_pose[update] pred_betas[update] = new_opt_betas[update] pred_joints3d[update] = new_opt_joints3d[update] elif args.run_smplify and args.tracking_method == 'bbox': print( '[WARNING] You need to enable pose tracking to run Temporal SMPLify algorithm!' ) print('[WARNING] Continuing without running Temporal SMPLify!..') # ========= Save results to a pickle file ========= # pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() orig_cam = convert_crop_cam_to_orig_img(cam=pred_cam, bbox=bboxes, img_width=orig_width, img_height=orig_height) output_dict = { 'pred_cam': pred_cam, 'orig_cam': orig_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, 'joints2d': joints2d, 'bboxes': bboxes, 'frame_ids': frames, } vibe_results[person_id] = output_dict del model end = time.time() fps = num_frames / (end - vibe_time) print(f'VIBE FPS: {fps:.2f}') total_time = time.time() - total_time print( f'Total time spent: {total_time:.2f} seconds (including model loading time).' ) print( f'Total FPS (including model loading time): {num_frames / total_time:.2f}.' ) print( f'Saving output results to \"{os.path.join(output_path, "vibe_output.pkl")}\".' ) joblib.dump(vibe_results, os.path.join(output_path, "vibe_output.pkl")) if not args.no_render: # ========= Render results as a single video ========= # renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=args.wireframe) output_img_folder = f'{image_folder}_images' os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') output_pose_folder = f'{image_folder}_poses' os.makedirs(output_pose_folder, exist_ok=True) print(f'Saving poses to {output_pose_folder}') # prepare results for rendering from numpy import save save(f'{os.path.basename(video_file)}_poses.npy', vibe_results[1]['joints3d'][:, :25, :]) print('Saving numpy poses file to' + f'{video_file}_poses.npy') frame_results = prepare_rendering_results( vibe_results, num_frames) # returns a list of dicts (one dict for each person) mesh_color = { k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in vibe_results.keys() } image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) if args.sideview: side_img = np.zeros_like(img) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] frame_pose = person_data['joints3d'][:25] mc = mesh_color[person_id] mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'meshes', f'{person_id:04d}') os.makedirs(mesh_folder, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') # bgr image (opencv format) img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) # import pdb; pdb.set_trace() # Create a 3D projection and save as img # pose is mirrored # plot_skeleton(output_pose_folder, frame_idx, frame_pose) if args.sideview: side_img = renderer.render( side_img, frame_verts, cam=frame_cam, color=mc, angle=270, axis=[0, 1, 0], ) if args.sideview: img = np.concatenate([img, side_img], axis=1) # concatenate pose img with this image before writing cv2.imwrite( os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() # ========= Save rendered video ========= # vid_name = os.path.basename(video_file) save_name = f'{vid_name.replace(".mp4", "")}_vibe_result.mp4' save_name = os.path.join(output_path, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) # shutil.rmtree(output_img_folder) shutil.rmtree(image_folder) print('================= END =================')
def runDemo(image_folder, output_folder, pretrained, tracker_batch_size=12, vibe_batch_size=450, wireframe=False): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') output_path = os.path.join(output_folder, os.path.basename(image_folder).replace('.mp4', '')) os.makedirs(output_path, exist_ok=True) num_frames, img_shape = img_folder_Info(image_folder) print(f'Input video number of frames {num_frames}') orig_height, orig_width = img_shape[:2] total_time = time.time() # ========= Run tracking ========= # bbox_scale = 1.0 # run multi object tracker mot = MPT( device=device, batch_size=tracker_batch_size, display=False, detector_type='yolo', output_format='dict', yolo_img_size=416, ) tracking_results = mot(image_folder) # remove tracklets if num_frames is less than MIN_NUM_FRAMES for person_id in list(tracking_results.keys()): if tracking_results[person_id]['frames'].shape[0] < MIN_NUM_FRAMES: del tracking_results[person_id] # ========= Define VIBE model ========= # model = e2e_VIBE( seqlen=16, n_layers=2, hidden_size=1024, add_linear=True, use_residual=True, ).to(device) # ========= Load pretrained weights ========= # pretrained_file = pretrained ckpt = torch.load(pretrained_file) ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) model.eval() print(f'Loaded pretrained weights from \"{pretrained_file}\"') # ========= Run VIBE on each person ========= # print(f'Running VIBE on each tracklet...') vibe_time = time.time() vibe_results = {} time_results = {} for person_id in tqdm(list(tracking_results.keys())): person_start_time = time.time() joints2d = None bboxes = tracking_results[person_id]['bbox'] frames = tracking_results[person_id]['frames'] dataset = Inference( image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, ) bboxes = dataset.bboxes frames = dataset.frames dataloader = DataLoader(dataset, batch_size=vibe_batch_size, num_workers=16) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for batch in dataloader: batch = batch.unsqueeze(0) batch = batch.to(device) batch_size, seqlen = batch.shape[:2] output = model(batch)[-1] # output = model(batch, J_regressor=J_regressor)[-1] pred_cam.append(output['theta'][:, :, :3].reshape(batch_size * seqlen, -1)) pred_verts.append(output['verts'].reshape(batch_size * seqlen, -1, 3)) pred_pose.append(output['theta'][:,:,3:75].reshape(batch_size * seqlen, -1)) pred_betas.append(output['theta'][:, :,75:].reshape(batch_size * seqlen, -1)) pred_joints3d.append(output['kp_3d'].reshape(batch_size * seqlen, -1, 3)) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch person_end_time = time.time() person_time = person_end_time - person_start_time person_frame = len(frames) print(f'Person Time: {person_time:.2f}, Person FPS:{person_frame/person_time: .2f} ') # ========= Save results to a pickle file ========= # pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() orig_cam = convert_crop_cam_to_orig_img( cam=pred_cam, bbox=bboxes, img_width=orig_width, img_height=orig_height ) output_dict = { 'pred_cam': pred_cam, 'orig_cam': orig_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, 'joints2d': joints2d, 'bboxes': bboxes, 'frame_ids': frames, } vibe_results[person_id] = output_dict del model end = time.time() fps = num_frames / (end - vibe_time) print(f'VIBE FPS: {fps:.2f}') total_time = time.time() - total_time print(f'Total time spent: {total_time:.2f} seconds (including model loading time).') print(f'Total FPS (including model loading time): {num_frames / total_time:.2f}.') print(f'Saving output results to \"{os.path.join(output_path, "vibe_output.pkl")}\".') joblib.dump(vibe_results, os.path.join(output_folder, "vibe_output.pkl")) # ========= Render results as a single video ========= # renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=wireframe) output_img_folder = f'{image_folder}_output' os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') # prepare results for rendering frame_results = prepare_rendering_results(vibe_results, num_frames) mesh_color = {k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in vibe_results.keys()} image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] mc = mesh_color[person_id] mesh_filename = None img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) # ========= Save rendered video ========= # vid_name = os.path.basename(image_folder) save_name = 'vibe_result.mp4' save_name = os.path.join(output_folder, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) shutil.rmtree(output_img_folder) print('================= END =================')
cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() # ========= Save rendered video ========= # vid_name = os.path.basename(video_file) save_name = f'{vid_name.replace(".mp4", "")}_vibe_result.mp4' save_name = os.path.join(output_path, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) shutil.rmtree(output_img_folder) shutil.rmtree(image_folder) print('================= END =================') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--vid_file', type=str, help='input video path or youtube link') parser.add_argument('--output_folder', type=str, help='output folder to write results')
def main(args): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') """ Prepare input video (images) """ video_file = args.vid_file if video_file.startswith('https://www.youtube.com'): print(f"Donwloading YouTube video \'{video_file}\'") video_file = download_youtube_clip(video_file, '/tmp') if video_file is None: exit('Youtube url is not valid!') print(f"YouTube Video has been downloaded to {video_file}...") if not os.path.isfile(video_file): exit(f"Input video \'{video_file}\' does not exist!") output_path = osp.join('./output/demo_output', os.path.basename(video_file).replace('.mp4', '')) Path(output_path).mkdir(parents=True, exist_ok=True) image_folder, num_frames, img_shape = video_to_images(video_file, return_info=True) print(f"Input video number of frames {num_frames}\n") orig_height, orig_width = img_shape[:2] """ Run tracking """ total_time = time.time() bbox_scale = 1.2 # run multi object tracker mot = MPT( device=device, batch_size=args.tracker_batch_size, display=args.display, detector_type=args.detector, output_format='dict', yolo_img_size=args.yolo_img_size, ) tracking_results = mot(image_folder) # remove tracklets if num_frames is less than MIN_NUM_FRAMES for person_id in list(tracking_results.keys()): if tracking_results[person_id]['frames'].shape[0] < MIN_NUM_FRAMES: del tracking_results[person_id] """ Get TCMR model """ seq_len = 16 model = TCMR(seqlen=seq_len, n_layers=2, hidden_size=1024).to(device) # Load pretrained weights pretrained_file = args.model ckpt = torch.load(pretrained_file) print(f"Load pretrained weights from \'{pretrained_file}\'") ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) # Change mesh gender gender = args.gender # 'neutral', 'male', 'female' model.regressor.smpl = SMPL(SMPL_MODEL_DIR, batch_size=64, create_transl=False, gender=gender).cuda() model.eval() # Get feature_extractor from lib.models.spin import hmr hmr = hmr().to(device) checkpoint = torch.load( osp.join(BASE_DATA_DIR, 'spin_model_checkpoint.pth.tar')) hmr.load_state_dict(checkpoint['model'], strict=False) hmr.eval() """ Run TCMR on each person """ print("\nRunning TCMR on each person tracklet...") tcmr_time = time.time() tcmr_results = {} for person_id in tqdm(list(tracking_results.keys())): bboxes = joints2d = None bboxes = tracking_results[person_id]['bbox'] frames = tracking_results[person_id]['frames'] # Prepare static image features dataset = CropDataset( image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, ) bboxes = dataset.bboxes frames = dataset.frames has_keypoints = True if joints2d is not None else False crop_dataloader = DataLoader(dataset, batch_size=256, num_workers=16) with torch.no_grad(): feature_list = [] for i, batch in enumerate(crop_dataloader): if has_keypoints: batch, nj2d = batch norm_joints2d.append(nj2d.numpy().reshape(-1, 21, 3)) batch = batch.to(device) feature = hmr.feature_extractor(batch.reshape(-1, 3, 224, 224)) feature_list.append(feature.cpu()) del batch feature_list = torch.cat(feature_list, dim=0) # Encode temporal features and estimate 3D human mesh dataset = FeatureDataset( image_folder=image_folder, frames=frames, seq_len=seq_len, ) dataset.feature_list = feature_list dataloader = DataLoader(dataset, batch_size=64, num_workers=32) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for i, batch in enumerate(dataloader): if has_keypoints: batch, nj2d = batch norm_joints2d.append(nj2d.numpy().reshape(-1, 21, 3)) batch = batch.to(device) output = model(batch)[0][-1] pred_cam.append(output['theta'][:, :3]) pred_verts.append(output['verts']) pred_pose.append(output['theta'][:, 3:75]) pred_betas.append(output['theta'][:, 75:]) pred_joints3d.append(output['kp_3d']) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch # ========= Save results to a pickle file ========= # pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() bboxes[:, 2:] = bboxes[:, 2:] * 1.2 if args.render_plain: pred_cam[:, 0], pred_cam[:, 1:] = 1, 0 # np.array([[1, 0, 0]]) orig_cam = convert_crop_cam_to_orig_img(cam=pred_cam, bbox=bboxes, img_width=orig_width, img_height=orig_height) output_dict = { 'pred_cam': pred_cam, 'orig_cam': orig_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, 'joints2d': joints2d, 'bboxes': bboxes, 'frame_ids': frames, } tcmr_results[person_id] = output_dict del model end = time.time() fps = num_frames / (end - tcmr_time) print(f'TCMR FPS: {fps:.2f}') total_time = time.time() - total_time print( f'Total time spent: {total_time:.2f} seconds (including model loading time).' ) print( f'Total FPS (including model loading time): {num_frames / total_time:.2f}.' ) if args.save_pkl: print( f"Saving output results to \'{os.path.join(output_path, 'tcmr_output.pkl')}\'." ) joblib.dump(tcmr_results, os.path.join(output_path, "tcmr_output.pkl")) """ Render results as a single video """ renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=args.wireframe) output_img_folder = f'{image_folder}_output' input_img_folder = f'{image_folder}_input' os.makedirs(output_img_folder, exist_ok=True) os.makedirs(input_img_folder, exist_ok=True) print(f"\nRendering output video, writing frames to {output_img_folder}") # prepare results for rendering frame_results = prepare_rendering_results(tcmr_results, num_frames) mesh_color = { k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in tcmr_results.keys() } image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) input_img = img.copy() if args.render_plain: img[:] = 0 if args.sideview: side_img = np.zeros_like(img) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'meshes', f'{person_id:04d}') Path(mesh_folder).mkdir(parents=True, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') mc = mesh_color[person_id] img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) if args.sideview: side_img = renderer.render( side_img, frame_verts, cam=frame_cam, color=mc, angle=270, axis=[0, 1, 0], ) if args.sideview: img = np.concatenate([img, side_img], axis=1) # save output frames cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.jpg'), img) cv2.imwrite(os.path.join(input_img_folder, f'{frame_idx:06d}.jpg'), input_img) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() """ Save rendered video """ vid_name = os.path.basename(video_file) save_name = f'tcmr_{vid_name.replace(".mp4", "")}_output.mp4' save_path = os.path.join(output_path, save_name) images_to_video(img_folder=output_img_folder, output_vid_file=save_path) images_to_video(img_folder=input_img_folder, output_vid_file=os.path.join(output_path, vid_name)) print(f"Saving result video to {os.path.abspath(save_path)}") shutil.rmtree(output_img_folder) shutil.rmtree(input_img_folder) shutil.rmtree(image_folder)
def main(args): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') SEQ_LENGTH = args.sequence_length MIN_NUM_FRAMES = 1 # Don't change this TRACKER_BATCH_SIZE = MIN_NUM_FRAMES images_to_eval = [] yolo_img_size = args.yolo_img_size image_folder = 'live_rendered_images' output_path = args.output_folder os.makedirs(image_folder, exist_ok=True) os.makedirs(output_path, exist_ok=True) os.makedirs('live_imgs', exist_ok=True) model = VIBE_Demo(seqlen=SEQ_LENGTH, n_layers=2, hidden_size=1024, add_linear=True, use_residual=True, live_inference=True).to(device) pretrained_file = download_ckpt(use_3dpw=False) ckpt = torch.load(pretrained_file) ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) model.eval() print(f'Loaded pretrained weights from \"{pretrained_file}\"') mot = MPT( device=device, batch_size=TRACKER_BATCH_SIZE, display=False, detector_type=args.detector, output_format='dict', yolo_img_size=yolo_img_size, ) # An asynchronous camera implementation to run cv2 camera in background while model is running cap = AsyncCamera(0, display=args.live_display) bbox_scale = 1.1 i = 0 bbox_lis, frame_lis, images_lis, joints2d_lis = [], [], [], [] pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] while (True): # If q is pressed cap.stop will turn True if (cap.stop): break ret, captured_frames = cap.read() if (not ret): continue if (len(captured_frames) < MIN_NUM_FRAMES): continue images = get_images_from_captures(captured_frames, MIN_NUM_FRAMES) cap.del_frame_lis() orig_height, orig_width = images[0].shape[:2] orig_dim = (orig_height, orig_width) saveToDir(images) if args.tracking_method == 'pose': images_to_video('./live_imgs', './live_imgs/pose_video.mp4') tracking_results = run_posetracker('live_imgs/pose_video.mp4', staf_folder=args.staf_dir, display=args.display) else: tracking_results = mot('./live_imgs') if args.live_display: cap.set_display_image(images[-1]) if (len(tracking_results.keys()) == 0): print('Unable to detect any person') for image in images: images_lis.append(image) if len(tracking_results.keys()) != 0: person_id = (list)(tracking_results.keys())[0] print(person_id) frames = tracking_results[person_id]['frames'] bboxes, joints2d = None, None if args.tracking_method == 'pose': joints2d = tracking_results[person_id]['joints2d'] if (joints2d_lis == []): joints2d_lis = joints2d else: joints2d_lis = np.vstack([joints2d_lis, joints2d]) else: bboxes = tracking_results[person_id]['bbox'] if (bbox_lis == []): bbox_lis = bboxes else: bbox_lis = np.vstack([bbox_lis, bboxes]) for x in (1 + i + frames - MIN_NUM_FRAMES): frame_lis.append(x) dataset = LiveInference( images=images_lis[-SEQ_LENGTH:], frames=frame_lis[-SEQ_LENGTH:], bboxes=bbox_lis[-SEQ_LENGTH:], joints2d=joints2d_lis[-SEQ_LENGTH:] if joints2d is not None else None, scale=bbox_scale, ) bboxes = dataset.bboxes if args.tracking_method == 'pose': if (bbox_lis == []): bbox_lis = bboxes else: bbox_lis = np.vstack([bbox_lis, bboxes[-1:]]) cap.set_bounding_box(bbox_lis[-1]) has_keypoints = True if joints2d is not None else False norm_joints2d = [] with torch.no_grad(): # A manual implementation for getting data since dataloader is slow for few inputs tup = [ dataset.__getitem__(x) for x in range(dataset.__len__()) ] if has_keypoints: for j, batch in enumerate(tup): tup[j], nj2d = batch norm_joints2d.append(nj2d[:21, :].reshape(-1, 21, 3)) for j, x in enumerate(tup): tup[j] = x.unsqueeze(0) tup = tuple(tup) batch = torch.cat((tup), 0) batch = batch.unsqueeze(0) batch = batch.to(device) batch_size, seqlen = batch.shape[:2] # Send only latest image to hmr for faster inferencing output = model(batch[:, -1:, :, :, :])[-1] pred_cam.append( output['theta'][:, -MIN_NUM_FRAMES:, :3].reshape( batch_size * MIN_NUM_FRAMES, -1)) pred_verts.append( output['verts'][:, -MIN_NUM_FRAMES:, ].reshape( batch_size * MIN_NUM_FRAMES, -1, 3)) pred_pose.append( output['theta'][:, -MIN_NUM_FRAMES:, ][:, :, 3:75].reshape( batch_size * MIN_NUM_FRAMES, -1)) pred_betas.append( output['theta'][:, -MIN_NUM_FRAMES:, ][:, :, 75:].reshape( batch_size * MIN_NUM_FRAMES, -1)) pred_joints3d.append( output['kp_3d'][:, -MIN_NUM_FRAMES:, ].reshape( batch_size * MIN_NUM_FRAMES, -1, 3)) del batch pred_verts[-MIN_NUM_FRAMES:], pred_cam[ -MIN_NUM_FRAMES:], pred_pose[-MIN_NUM_FRAMES:], pred_betas[ -MIN_NUM_FRAMES:], pred_joints3d[ -MIN_NUM_FRAMES:], norm_joints2d[ -MIN_NUM_FRAMES:] = temporal_simplify( pred_verts[-MIN_NUM_FRAMES:], pred_cam[-MIN_NUM_FRAMES:], pred_pose[-MIN_NUM_FRAMES:], pred_betas[-MIN_NUM_FRAMES:], pred_joints3d[-MIN_NUM_FRAMES:], norm_joints2d[-MIN_NUM_FRAMES:], device, args) get_vibe_results( pred_cam[-MIN_NUM_FRAMES:], pred_verts[-MIN_NUM_FRAMES:], pred_pose[-MIN_NUM_FRAMES:], pred_betas[-MIN_NUM_FRAMES:], pred_joints3d[-MIN_NUM_FRAMES:], joints2d_lis[-MIN_NUM_FRAMES:], bbox_lis[-MIN_NUM_FRAMES:], frame_lis[-MIN_NUM_FRAMES], orig_dim, 0) images = [] i = i + 1 if (i == args.max_frames): break del model vibe_results = get_vibe_results(pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, joints2d_lis, bbox_lis, frame_lis, orig_dim, 0) if not args.no_render: for i, image in enumerate(images_lis): cv2.imwrite(f'{image_folder}/{(i):06d}.jpg', image) print(frame_lis) render(orig_dim, frame_lis, vibe_results, image_folder, output_path, len(images_lis), args) shutil.rmtree('live_imgs') print('================= END =================')
def render(orig_dim, frame_lis, vibe_results, image_folder, output_path, num_frames, args): orig_height, orig_width = orig_dim renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=args.wireframe) output_img_folder = f'live_result_output' os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') # prepare results for rendering frame_results = prepare_rendering_results(vibe_results, num_frames) mesh_color = { k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in vibe_results.keys() } image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) if (args.sideview): side_img = np.zeros_like(img) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] mc = mesh_color[person_id] mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'meshes', f'{person_id:04d}') os.makedirs(mesh_folder, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) if args.sideview: side_img = renderer.render( side_img, frame_verts, cam=frame_cam, color=mc, angle=270, axis=[0, 1, 0], ) if args.sideview: img = np.concatenate([img, side_img], axis=1) cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() # ========= Save rendered video ========= # save_name = f'live_vibe_result.mp4' save_name = os.path.join(output_path, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) shutil.rmtree(output_img_folder) shutil.rmtree(image_folder)
def main(args): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') video_file = args.vid_file # ========= [Optional] download the youtube video ========= # if video_file.startswith('https://www.youtube.com'): print(f'Donwloading YouTube video \"{video_file}\"') video_file = download_youtube_clip(video_file, '/tmp') if video_file is None: exit('Youtube url is not valid!') print(f'YouTube Video has been downloaded to {video_file}...') if not os.path.isfile(video_file): exit(f'Input video \"{video_file}\" does not exist!') output_path = os.path.join(args.output_folder, os.path.basename(video_file).replace('.mp4', '')) os.makedirs(output_path, exist_ok=True) image_folder, num_frames, img_shape = video_to_images(video_file, return_info=True) print(f'Input video number of frames {num_frames}') orig_height, orig_width = img_shape[:2] total_time = time.time() # ========= Run tracking ========= # bbox_scale = 1.1 if args.tracking_method == 'pose': if not os.path.isabs(video_file): video_file = os.path.join(os.getcwd(), video_file) tracking_results = run_posetracker(video_file, staf_folder=args.staf_dir, display=args.display) else: # run multi object tracker mot = MPT( device=device, batch_size=args.tracker_batch_size, display=args.display, detector_type=args.detector, output_format='dict', yolo_img_size=args.yolo_img_size, ) tracking_results = mot(image_folder) # remove tracklets if num_frames is less than MIN_NUM_FRAMES for person_id in list(tracking_results.keys()): if tracking_results[person_id]['frames'].shape[0] < MIN_NUM_FRAMES: del tracking_results[person_id] # ========= Define VIBE model ========= # model = VIBE_Demo( seqlen=16, n_layers=2, hidden_size=1024, add_linear=True, use_residual=True, ).to(device) # ========= Load pretrained weights ========= # pretrained_file = download_ckpt(use_3dpw=False) ckpt = torch.load(pretrained_file) print(f'Performance of pretrained model on 3DPW: {ckpt["performance"]}') ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) model.eval() print(f'Loaded pretrained weights from \"{pretrained_file}\"') # ========= Run VIBE on each person ========= # print(f'Running VIBE on each tracklet...') vibe_time = time.time() vibe_results = {} for person_id in tqdm(list(tracking_results.keys())): bboxes = joints2d = None if args.tracking_method == 'bbox': bboxes = tracking_results[person_id]['bbox'] elif args.tracking_method == 'pose': joints2d = tracking_results[person_id]['joints2d'] frames = tracking_results[person_id]['frames'] dataset = Inference( image_folder=image_folder, frames=frames, bboxes=bboxes, joints2d=joints2d, scale=bbox_scale, ) bboxes = dataset.bboxes frames = dataset.frames has_keypoints = True if joints2d is not None else False # reduce the num of worker if you encountered the error: DLL load failed: The paging file is too small for this operation to complete dataloader = DataLoader(dataset, batch_size=args.vibe_batch_size, num_workers=8) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for batch in dataloader: if has_keypoints: batch, nj2d = batch norm_joints2d.append(nj2d.numpy().reshape(-1, 21, 3)) batch = batch.unsqueeze(0) batch = batch.to(device) batch_size, seqlen = batch.shape[:2] output = model(batch)[-1] pred_cam.append(output['theta'][:, :, :3].reshape(batch_size * seqlen, -1)) pred_verts.append(output['verts'].reshape(batch_size * seqlen, -1, 3)) pred_pose.append(output['theta'][:,:,3:75].reshape(batch_size * seqlen, -1)) pred_betas.append(output['theta'][:, :,75:].reshape(batch_size * seqlen, -1)) pred_joints3d.append(output['kp_3d'].reshape(batch_size * seqlen, -1, 3)) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch # ========= [Optional] run Temporal SMPLify to refine the results ========= # if args.run_smplify and args.tracking_method == 'pose': norm_joints2d = np.concatenate(norm_joints2d, axis=0) norm_joints2d = convert_kps(norm_joints2d, src='staf', dst='spin') norm_joints2d = torch.from_numpy(norm_joints2d).float().to(device) # Run Temporal SMPLify update, new_opt_vertices, new_opt_cam, new_opt_pose, new_opt_betas, \ new_opt_joints3d, new_opt_joint_loss, opt_joint_loss = smplify_runner( pred_rotmat=pred_pose, pred_betas=pred_betas, pred_cam=pred_cam, j2d=norm_joints2d, device=device, batch_size=norm_joints2d.shape[0], pose2aa=False, ) # update the parameters after refinement print(f'Update ratio after Temporal SMPLify: {update.sum()} / {norm_joints2d.shape[0]}') pred_verts = pred_verts.cpu() pred_cam = pred_cam.cpu() pred_pose = pred_pose.cpu() pred_betas = pred_betas.cpu() pred_joints3d = pred_joints3d.cpu() pred_verts[update] = new_opt_vertices[update] pred_cam[update] = new_opt_cam[update] pred_pose[update] = new_opt_pose[update] pred_betas[update] = new_opt_betas[update] pred_joints3d[update] = new_opt_joints3d[update] elif args.run_smplify and args.tracking_method == 'bbox': print('[WARNING] You need to enable pose tracking to run Temporal SMPLify algorithm!') print('[WARNING] Continuing without running Temporal SMPLify!..') # ========= Save results to a pickle file ========= # pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() orig_cam = convert_crop_cam_to_orig_img( cam=pred_cam, bbox=bboxes, img_width=orig_width, img_height=orig_height ) output_dict = { 'pred_cam': pred_cam, 'orig_cam': orig_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, 'joints2d': joints2d, 'bboxes': bboxes, 'frame_ids': frames, } vibe_results[person_id] = output_dict del model end = time.time() fps = num_frames / (end - vibe_time) print(f'VIBE FPS: {fps:.2f}') total_time = time.time() - total_time print(f'Total time spent: {total_time:.2f} seconds (including model loading time).') print(f'Total FPS (including model loading time): {num_frames / total_time:.2f}.') print(f'Saving output results to \"{os.path.join(output_path, "vibe_output.pkl")}\".') joblib.dump(vibe_results, os.path.join(output_path, "vibe_output.pkl")) if not args.no_render: # ========= Render results as a single video ========= # renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=args.wireframe) output_img_folder = f'{image_folder}_output' os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') # prepare results for rendering frame_results = prepare_rendering_results(vibe_results, num_frames) mesh_color = {k: colorsys.hsv_to_rgb(np.random.rand(), 0.5, 1.0) for k in vibe_results.keys()} image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) if args.sideview: side_img = np.zeros_like(img) for person_id, person_data in frame_results[frame_idx].items(): frame_verts = person_data['verts'] frame_cam = person_data['cam'] mc = mesh_color[person_id] mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'meshes', f'{person_id:04d}') os.makedirs(mesh_folder, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') img = renderer.render( img, frame_verts, cam=frame_cam, color=mc, mesh_filename=mesh_filename, ) if args.sideview: side_img = renderer.render( side_img, frame_verts, cam=frame_cam, color=mc, angle=270, axis=[0,1,0], ) if args.sideview: img = np.concatenate([img, side_img], axis=1) font = cv2.FONT_HERSHEY_SIMPLEX x = 10 #position of text y = 20 #position of text cv2.putText(img, str(frame_idx), (x,y), font ,0.55,(0,255,0),1) cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), img) if args.display: cv2.imshow('Video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break if args.display: cv2.destroyAllWindows() # ========= Save rendered video ========= # vid_name = os.path.basename(video_file) save_name = f'{vid_name.replace(".mp4", "")}_vibe_result.mp4' save_name = os.path.join(output_path, save_name) print(f'Saving result video to {save_name}') images_to_video(img_folder=output_img_folder, output_vid_file=save_name) shutil.rmtree(output_img_folder) shutil.rmtree(image_folder) # generate and save the joints csv file for animating avatars later output = joblib.load(os.path.join(output_path, "vibe_output.pkl")) for i in output.keys(): print('Track ids:', i , end='\n\n') num_ppl = len(output.keys()) print('VIBE output file content:', end='\n\n') vid_name = os.path.basename(video_file) vibe_result_folder = output_path # output the pose result as csv # format: v_personId_numFrames pose_filename_list = [] for i in output.keys(): pose_filename = vibe_result_folder + "/" + vid_name + "_"+ str(i) + "_" + str(output[i]['pose'].shape[0]) + ".csv" pose_filename_list.append(pose_filename) field_names = [] for idx in range(73): # 72 -> 73 (+ frame_id at 0) field_names.append(str(idx)) with open(pose_filename, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(field_names) for frame_id in range(len(output[i]['pose'])): output_data = [output[i]['frame_ids'][frame_id]] output_data.extend(output[i]['pose'][frame_id]) #print(output_data) writer.writerow(output_data) print('================= END =================')
def main(args): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # ========= Define VIBE model ========= # model = VIBE_Demo( seqlen=16, device=device, n_layers=2, hidden_size=1024, add_linear=True, use_residual=True, ).to(device) # ========= Load pretrained weights ========= # pretrained_file = download_ckpt(use_3dpw=False) ckpt = torch.load(pretrained_file, map_location=device) print(f'Performance of pretrained model on 3DPW: {ckpt["performance"]}') ckpt = ckpt['gen_state_dict'] model.load_state_dict(ckpt, strict=False) model.eval() print(f'Loaded pretrained weights from \"{pretrained_file}\"') total_time = time.time() # ========= Run VIBE on crops ========= # print(f'Running VIBE on crops...') vibe_time = time.time() image_folder = args.input_folder dataset = InferenceFromCrops(image_folder=image_folder) orig_height = orig_width = 512 dataloader = DataLoader(dataset, batch_size=args.vibe_batch_size, num_workers=0) with torch.no_grad(): pred_cam, pred_verts, pred_pose, pred_betas, pred_joints3d, norm_joints2d = [], [], [], [], [], [] for batch_num, batch in enumerate(dataloader): print("BATCH:", batch_num) batch = batch.unsqueeze(0) batch = batch.to(device) batch_size, seqlen = batch.shape[:2] output = model(batch)[-1] pred_cam.append(output['theta'][:, :, :3].reshape( batch_size * seqlen, -1)) pred_verts.append(output['verts'].reshape(batch_size * seqlen, -1, 3)) pred_pose.append(output['theta'][:, :, 3:75].reshape( batch_size * seqlen, -1)) pred_betas.append(output['theta'][:, :, 75:].reshape( batch_size * seqlen, -1)) pred_joints3d.append(output['kp_3d'].reshape( batch_size * seqlen, -1, 3)) pred_cam = torch.cat(pred_cam, dim=0) pred_verts = torch.cat(pred_verts, dim=0) pred_pose = torch.cat(pred_pose, dim=0) pred_betas = torch.cat(pred_betas, dim=0) pred_joints3d = torch.cat(pred_joints3d, dim=0) del batch # ========= [Optional] run Temporal SMPLify to refine the results ========= # if args.run_smplify and args.tracking_method == 'pose': norm_joints2d = np.concatenate(norm_joints2d, axis=0) norm_joints2d = convert_kps(norm_joints2d, src='staf', dst='spin') norm_joints2d = torch.from_numpy(norm_joints2d).float().to(device) # Run Temporal SMPLify update, new_opt_vertices, new_opt_cam, new_opt_pose, new_opt_betas, \ new_opt_joints3d, new_opt_joint_loss, opt_joint_loss = smplify_runner( pred_rotmat=pred_pose, pred_betas=pred_betas, pred_cam=pred_cam, j2d=norm_joints2d, device=device, batch_size=norm_joints2d.shape[0], pose2aa=False, ) # update the parameters after refinement print( f'Update ratio after Temporal SMPLify: {update.sum()} / {norm_joints2d.shape[0]}' ) pred_verts = pred_verts.cpu() pred_cam = pred_cam.cpu() pred_pose = pred_pose.cpu() pred_betas = pred_betas.cpu() pred_joints3d = pred_joints3d.cpu() pred_verts[update] = new_opt_vertices[update] pred_cam[update] = new_opt_cam[update] pred_pose[update] = new_opt_pose[update] pred_betas[update] = new_opt_betas[update] pred_joints3d[update] = new_opt_joints3d[update] elif args.run_smplify and args.tracking_method == 'bbox': print( '[WARNING] You need to enable pose tracking to run Temporal SMPLify algorithm!' ) print('[WARNING] Continuing without running Temporal SMPLify!..') # ========= Save results to a pickle file ========= # output_path = image_folder.replace('cropped_frames', 'vibe_results') os.makedirs(output_path, exist_ok=True) pred_cam = pred_cam.cpu().numpy() pred_verts = pred_verts.cpu().numpy() pred_pose = pred_pose.cpu().numpy() pred_betas = pred_betas.cpu().numpy() pred_joints3d = pred_joints3d.cpu().numpy() vibe_results = { 'pred_cam': pred_cam, 'verts': pred_verts, 'pose': pred_pose, 'betas': pred_betas, 'joints3d': pred_joints3d, } del model end = time.time() fps = len(dataset) / (end - vibe_time) print(f'VIBE FPS: {fps:.2f}') total_time = time.time() - total_time print( f'Total time spent: {total_time:.2f} seconds (including model loading time).' ) print( f'Total FPS (including model loading time): {len(dataset) / total_time:.2f}.' ) print( f'Saving vibe results to \"{os.path.join(output_path, "vibe_results.pkl")}\".' ) with open(os.path.join(output_path, "vibe_results.pkl"), 'wb') as f_save: pickle.dump(vibe_results, f_save) if not args.no_render: # ========= Render results as a single video ========= # renderer = Renderer(resolution=(orig_width, orig_height), orig_img=True, wireframe=args.wireframe) output_img_folder = os.path.join(output_path, 'vibe_images') os.makedirs(output_img_folder, exist_ok=True) print(f'Rendering output video, writing frames to {output_img_folder}') image_file_names = sorted([ os.path.join(image_folder, x) for x in os.listdir(image_folder) if x.endswith('.png') or x.endswith('.jpg') ]) for frame_idx in tqdm(range(len(image_file_names))): img_fname = image_file_names[frame_idx] img = cv2.imread(img_fname) frame_verts = vibe_results['verts'][frame_idx] frame_cam = vibe_results['pred_cam'][frame_idx] mesh_filename = None if args.save_obj: mesh_folder = os.path.join(output_path, 'vibe_meshes') os.makedirs(mesh_folder, exist_ok=True) mesh_filename = os.path.join(mesh_folder, f'{frame_idx:06d}.obj') rend_img = renderer.render( img, frame_verts, cam=frame_cam, mesh_filename=mesh_filename, ) whole_img = rend_img if args.sideview: side_img_bg = np.zeros_like(img) side_rend_img90 = renderer.render( side_img_bg, frame_verts, cam=frame_cam, angle=90, axis=[0, 1, 0], ) side_rend_img270 = renderer.render( side_img_bg, frame_verts, cam=frame_cam, angle=270, axis=[0, 1, 0], ) if args.reposed_render: smpl = SMPL('data/vibe_data', batch_size=1) zero_pose = torch.from_numpy( np.zeros((1, pred_pose.shape[-1]))).float() zero_pose[:, 0] = np.pi pred_frame_betas = torch.from_numpy( pred_betas[frame_idx][None, :]).float() with torch.no_grad(): reposed_smpl_output = smpl( betas=pred_frame_betas, body_pose=zero_pose[:, 3:], global_orient=zero_pose[:, :3]) reposed_verts = reposed_smpl_output.vertices reposed_verts = reposed_verts.cpu().detach().numpy() reposed_cam = np.array([0.9, 0, 0]) reposed_rend_img = renderer.render(side_img_bg, reposed_verts[0], cam=reposed_cam) reposed_rend_img90 = renderer.render(side_img_bg, reposed_verts[0], cam=reposed_cam, angle=90, axis=[0, 1, 0]) top_row = np.concatenate( [img, reposed_rend_img, reposed_rend_img90], axis=1) bot_row = np.concatenate( [rend_img, side_rend_img90, side_rend_img270], axis=1) whole_img = np.concatenate([top_row, bot_row], axis=0) else: top_row = np.concatenate([img, side_img_bg, side_img_bg], axis=1) bot_row = np.concatenate( [rend_img, side_rend_img90, side_rend_img270], axis=1) whole_img = np.concatenate([top_row, bot_row], axis=0) # cv2.imwrite(os.path.join(output_img_folder, f'{frame_idx:06d}.png'), whole_img) cv2.imwrite( os.path.join(output_img_folder, os.path.basename(img_fname)), whole_img) # ========= Save rendered video ========= # save_vid_path = os.path.join(output_path, 'vibe_video.mp4') print(f'Saving result video to {save_vid_path}') images_to_video(img_folder=output_img_folder, output_vid_file=save_vid_path) print('================= END =================')