def live_application(capture, output_dirpath): """ Launch an application that reads from a webcam and estimates hand pose at real-time. The captured hand must be the right hand, but will be flipped internally and rendered. Parameters ---------- capture : object An object from `capture.py` to read capture stream from. """ ############ output visualization ############ view_mat = axangle2mat([1, 0, 0], np.pi) # align different coordinate systems window_size = 1080 hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) mesh = o3d.geometry.TriangleMesh() mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = \ o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000) mesh.compute_vertex_normals() viewer = o3d.visualization.Visualizer() viewer.create_window( width=window_size + 1, height=window_size + 1, window_name='Minimal Hand - output' ) viewer.add_geometry(mesh) view_control = viewer.get_view_control() cam_params = view_control.convert_to_pinhole_camera_parameters() extrinsic = cam_params.extrinsic.copy() extrinsic[0:3, 3] = 0 cam_params.extrinsic = extrinsic cam_params.intrinsic.set_intrinsics( window_size + 1, window_size + 1, config.CAM_FX, config.CAM_FY, window_size // 2, window_size // 2 ) view_control.convert_from_pinhole_camera_parameters(cam_params) view_control.set_constant_z_far(1000) render_option = viewer.get_render_option() render_option.load_from_json('./render_option.json') viewer.update_renderer() ############ input visualization ############ pygame.init() display = pygame.display.set_mode((window_size, window_size)) pygame.display.set_caption('Minimal Hand - input') ############ misc ############ mesh_smoother = OneEuroFilter(4.0, 0.0) clock = pygame.time.Clock() model = ModelPipeline() i = 0 while True: frame_large = capture.read() if frame_large is None: print(f'none frame {i}') return # if frame_large.shape[0] > frame_large.shape[1]: # margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) # frame_large = frame_large[margin:-margin] # else: # margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) # frame_large = frame_large[:, margin:-margin] frame_large = np.flip(frame_large, axis=1).copy() frame = imresize(frame_large, (128, 128)) _, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) v = hand_mesh.set_abs_quat(theta_mano) v *= 2 # for better visualization v = v * 1000 + np.array([0, 0, 400]) v = mesh_smoother.process(v) mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T) mesh.paint_uniform_color(config.HAND_COLOR) mesh.compute_triangle_normals() mesh.compute_vertex_normals() viewer.update_geometry() viewer.poll_events() viewer.capture_screen_image(osp.join(output_dirpath, "%06d.jpg" % i)) display.blit( pygame.surfarray.make_surface( np.transpose( imresize(frame_large, (window_size, window_size) ), (1, 0, 2)) ), (0, 0) ) pygame.display.update() # if keyboard.is_pressed("esc"): # break clock.tick(30) i += 1
def live_application(capture): """ Launch an application that reads from a webcam and estimates hand pose at real-time. The captured hand must be the right hand, but will be flipped internally and rendered. Parameters ---------- capture : object An object from `capture.py` to read capture stream from. """ ############render setting############ render = o3d_render(config.HAND_MESH_MODEL_PATH) extrinsic = render.extrinsic extrinsic[0:3, 3] = 0 render.extrinsic = extrinsic render.intrinsic = [config.CAM_FX, config.CAM_FY] render.updata_params() render.environments('./render_option.json', 1000) extrinsic = render.extrinsic.copy() intrinsic = render.intrinsic ############ misc ############ mesh_smoother = OneEuroFilter(4.0, 0.0) clock = pygame.time.Clock() hand_machine = minimal_hand( '../weights/minimal_hand/model/hand_mesh/hand_mesh_model.pkl', './weights/detnet.pth', './weights/iknet.pth') hand_machine.cuda() hand_machine.eval() cnt = 0 while True: flag, frame_large = capture.read() cnt += 1 if not flag: break if frame_large is None: continue if frame_large.shape[0] > frame_large.shape[1]: margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) frame_large = frame_large[margin:-margin] else: margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) frame_large = frame_large[:, margin:-margin] frame_large = np.flip(frame_large, axis=1).copy() frame = imresize(frame_large, (128, 128)) original_img = frame.copy()[..., ::-1] original_img = cv2.resize(original_img, (256, 256)) frame = torch.from_numpy(frame) frame = rearrange(frame, 'h w c -> 1 c h w') frame = frame.float() / 255 with torch.no_grad(): frame = frame.cuda() _, theta_mpii = hand_machine(frame) theta_mpii = theta_mpii.detach().cpu().numpy() theta_mano = mpii_to_mano(theta_mpii) v = render.hand_mesh.set_abs_quat(theta_mano) v *= 2 # for better visualization v = v * 1000 + np.array([0, 0, 400]) v = mesh_smoother.process(v) render.rendering(v, config.HAND_COLOR) render_img = render.capture_img() xy = render.projector() for joint in xy.astype(np.int).tolist(): render_img = cv2.circle(render_img, tuple(joint[:2]), 3, (0, 0, 255), 3) render_img = cv2.resize(render_img, (256, 256)) save_img = np.concatenate([original_img, render_img], axis=1) cv2.imshow("f**k", save_img) cv2.imwrite("render_results/%06d.png" % cnt, save_img)
def live_application(capture): """ Launch an application that reads from a webcam and estimates hand pose at real-time. The captured hand must be the right hand, but will be flipped internally and rendered. Parameters ---------- capture : object An object from `capture.py` to read capture stream from. """ ############ output visualization ############ view_mat = axangle2mat([1, 0, 0], np.pi) # align different coordinate systems window_size = 1080 hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) mesh = o3d.geometry.TriangleMesh() mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = \ o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000) mesh.compute_vertex_normals() viewer = o3d.visualization.Visualizer() viewer.create_window(width=window_size + 1, height=window_size + 1, window_name='Minimal Hand - output') viewer.add_geometry(mesh) view_control = viewer.get_view_control() cam_params = view_control.convert_to_pinhole_camera_parameters() extrinsic = cam_params.extrinsic.copy() extrinsic[0:3, 3] = 0 cam_params.extrinsic = extrinsic cam_params.intrinsic.set_intrinsics(window_size + 1, window_size + 1, config.CAM_FX, config.CAM_FY, window_size // 2, window_size // 2) view_control.convert_from_pinhole_camera_parameters(cam_params) view_control.set_constant_z_far(1000) render_option = viewer.get_render_option() render_option.load_from_json('./render_option.json') viewer.update_renderer() ############ input visualization ############ pygame.init() display = pygame.display.set_mode((window_size, window_size)) pygame.display.set_caption('Minimal Hand - input') ############ misc ############ mesh_smoother = OneEuroFilter(4.0, 0.0) clock = pygame.time.Clock() model = ModelPipeline() # image_paths = glob.glob("samples_benet/casa/*") input_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_video/CVmyQR31Dr4_5-3-rgb_front.mp4" output_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_video/CVmyQR31Dr4_5-3-rgb_front.mp4" output_frames_folder = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_frames/CVmyQR31Dr4_5-3-rgb_front" output_pose_file = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose/CVmyQR31Dr4_5-3-rgb_front.pickle" os.system("mkdir " + output_frames_folder) cap = cv2.VideoCapture(input_video_path) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) # for inputpath in image_paths: # for i in range(length): writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'PIM1'), fps, (1081, 731)) pose_data = [] for i in range(length): # frame_large = capture.read() #inputpath = "samples_benet/hand_far.jpg" # frame_large = cv2.imread(inputpath) ret, frame_large = cap.read() frame_large = cv2.cvtColor(frame_large, cv2.COLOR_BGR2RGB) if frame_large is None: continue if frame_large.shape[0] > frame_large.shape[1]: margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) frame_large = frame_large[margin:-margin] elif frame_large.shape[0] < frame_large.shape[1]: print() margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) frame_large = frame_large[:, margin:-margin] frame_large = np.flip(frame_large, axis=1).copy() frame = imresize(frame_large, (128, 128)) # plt.imshow(frame) # plt.show() _, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) pose_data.append(theta_mano) v = hand_mesh.set_abs_quat(theta_mano) v *= 2 # for better visualization v = v * 1000 + np.array([0, 0, 400]) v = mesh_smoother.process(v) mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T) mesh.paint_uniform_color(config.HAND_COLOR) mesh.compute_triangle_normals() mesh.compute_vertex_normals() # for some version of open3d you may need `viewer.update_geometry(mesh)` viewer.update_geometry() viewer.poll_events() write_rendered_frame(writer, viewer, output_frames_folder, i) display.blit( pygame.surfarray.make_surface( np.transpose(imresize(frame_large, (window_size, window_size)), (1, 0, 2))), (0, 0)) pygame.display.update() # pygame.image.save(display, "kk.jpeg") # if keyboard.is_pressed("esc"): # break # clock.tick(30) with open(output_pose_file, "wb") as output: pickle.dump(pose_data, output)
def live_application(capture): """ Launch an application that reads from a webcam and estimates hand pose at real-time. The captured hand must be the right hand, but will be flipped internally and rendered. Parameters ---------- capture : object An object from `capture.py` to read capture stream from. """ view_mat = axangle2mat([1, 0, 0], np.pi) window_size_w = 640 window_size_h = 480 hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) mesh = o3d.geometry.TriangleMesh() mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = \ o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000) mesh.compute_vertex_normals() viewer = o3d.visualization.Visualizer() viewer.create_window( width=window_size_w + 1, height=window_size_h + 1, window_name='Minimal Hand - output' ) viewer.add_geometry(mesh) view_control = viewer.get_view_control() cam_params = view_control.convert_to_pinhole_camera_parameters() extrinsic = cam_params.extrinsic.copy() extrinsic[0:3, 3] = 0 cam_params.extrinsic = extrinsic cam_params.intrinsic.set_intrinsics( window_size_w + 1, window_size_h + 1, config.CAM_FX, config.CAM_FY, window_size_w // 2, window_size_h // 2 ) view_control.convert_from_pinhole_camera_parameters(cam_params) view_control.set_constant_z_far(1000) render_option = viewer.get_render_option() render_option.load_from_json('./render_option.json') viewer.update_renderer() mesh_smoother = OneEuroFilter(4.0, 0.0) cords_smoother = OneEuroFilter(4.0, 0.0) model = ModelPipeline() while True: frame_large = capture.read() if frame_large is None: continue if frame_large.shape[0] > frame_large.shape[1]: margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2) frame_large = frame_large[margin:-margin] else: margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2) frame_large = frame_large[:, margin:-margin] if config.left is False: frame_large = np.flip(frame_large, axis=1).copy() frame = imresize(frame_large, (128, 128)) cords, theta_mpii = model.process(frame) theta_mano = mpii_to_mano(theta_mpii) v = hand_mesh.set_abs_quat(theta_mano) # v *= 2 # for better visualization v = v * 1000 + np.array([0, 0, 400]) # v = np.array([x+cords[0] for x in v]) v = mesh_smoother.process(v) mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces) mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T) mesh.paint_uniform_color(config.HAND_COLOR) mesh.compute_triangle_normals() mesh.compute_vertex_normals() # for some version of open3d you may need `viewer.update_geometry(mesh)` viewer.update_geometry() viewer.poll_events() cords = cords_smoother.process(cords) cords = cords * 150 + 200 cords = np.delete(cords, 2, 1) # v = v * 1.5 + 200 # v = np.delete(v, 2, 1) frame_large = cv2.cvtColor(frame_large, cv2.COLOR_RGB2BGR) # cv2.polylines(frame_large, v, False, (0, 0, 0)) for x in cords: cv2.drawMarker(frame_large, (int(x[0]), int(x[1])), (0, 0, 0)) # cv2.line(frame_large, (int(v[x][0]), int(v[x][1])), (int(v[x+1][0]), int(v[x+1][1])), (0, 0, 0)) # meshindices = np.array(mesh.triangles) # meshvertices = np.array(mesh.vertices) - 80 # pts2d = cv2.projectPoints(meshvertices, (0, 0, 0), (0, 0, 0), np.array([[620.744, 0., 0.], [0., 621.151, 0.], [0., 0., 1.]]), None)[0].astype(int) # for face in meshindices: # cv2.fillConvexPoly(frame_large, pts2d[face], (64, 64, 192)) # cv2.polylines(frame_large, pts2d[meshindices], True, (255, 255, 255)) # cv2.polylines(frame_large, v, False, (0, 0, 0)) cv2.imshow("Hand AI", frame_large) if keyboard.is_pressed("q"): cv2.destroyAllWindows() break cv2.waitKey(1) time.sleep(0.0015)