Beispiel #1
0
def live_application(capture, output_dirpath):
  """
  Launch an application that reads from a webcam and estimates hand pose at
  real-time.

  The captured hand must be the right hand, but will be flipped internally
  and rendered.

  Parameters
  ----------
  capture : object
    An object from `capture.py` to read capture stream from.
  """
  ############ output visualization ############
  view_mat = axangle2mat([1, 0, 0], np.pi) # align different coordinate systems
  window_size = 1080

  hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH)
  mesh = o3d.geometry.TriangleMesh()
  mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
  mesh.vertices = \
    o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000)
  mesh.compute_vertex_normals()

  viewer = o3d.visualization.Visualizer()
  viewer.create_window(
    width=window_size + 1, height=window_size + 1,
    window_name='Minimal Hand - output'
  )
  viewer.add_geometry(mesh)

  view_control = viewer.get_view_control()
  cam_params = view_control.convert_to_pinhole_camera_parameters()
  extrinsic = cam_params.extrinsic.copy()
  extrinsic[0:3, 3] = 0
  cam_params.extrinsic = extrinsic
  cam_params.intrinsic.set_intrinsics(
    window_size + 1, window_size + 1, config.CAM_FX, config.CAM_FY,
    window_size // 2, window_size // 2
  )
  view_control.convert_from_pinhole_camera_parameters(cam_params)
  view_control.set_constant_z_far(1000)

  render_option = viewer.get_render_option()
  render_option.load_from_json('./render_option.json')
  viewer.update_renderer()

  ############ input visualization ############
  pygame.init()
  display = pygame.display.set_mode((window_size, window_size))
  pygame.display.set_caption('Minimal Hand - input')

  ############ misc ############
  mesh_smoother = OneEuroFilter(4.0, 0.0)
  clock = pygame.time.Clock()
  model = ModelPipeline()

  i = 0
  while True:
    frame_large = capture.read()
    if frame_large is None:
      print(f'none frame {i}')
      return

    # if frame_large.shape[0] > frame_large.shape[1]:
    #   margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
    #   frame_large = frame_large[margin:-margin]
    # else:
    #   margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
    #   frame_large = frame_large[:, margin:-margin]

    frame_large = np.flip(frame_large, axis=1).copy()
    frame = imresize(frame_large, (128, 128))

    _, theta_mpii = model.process(frame)
    theta_mano = mpii_to_mano(theta_mpii)

    v = hand_mesh.set_abs_quat(theta_mano)
    v *= 2 # for better visualization
    v = v * 1000 + np.array([0, 0, 400])
    v = mesh_smoother.process(v)
    mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
    mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T)
    mesh.paint_uniform_color(config.HAND_COLOR)
    mesh.compute_triangle_normals()
    mesh.compute_vertex_normals()
    viewer.update_geometry()
    viewer.poll_events()
    viewer.capture_screen_image(osp.join(output_dirpath, "%06d.jpg" % i))

    display.blit(
      pygame.surfarray.make_surface(
        np.transpose(
          imresize(frame_large, (window_size, window_size)
        ), (1, 0, 2))
      ),
      (0, 0)
    )
    pygame.display.update()

    # if keyboard.is_pressed("esc"):
    #   break

    clock.tick(30)
    i += 1
Beispiel #2
0
def live_application(capture):
    """
  Launch an application that reads from a webcam and estimates hand pose at
  real-time.

  The captured hand must be the right hand, but will be flipped internally
  and rendered.

  Parameters
  ----------
  capture : object
    An object from `capture.py` to read capture stream from.
  """

    ############render setting############
    render = o3d_render(config.HAND_MESH_MODEL_PATH)
    extrinsic = render.extrinsic
    extrinsic[0:3, 3] = 0
    render.extrinsic = extrinsic
    render.intrinsic = [config.CAM_FX, config.CAM_FY]
    render.updata_params()
    render.environments('./render_option.json', 1000)

    extrinsic = render.extrinsic.copy()
    intrinsic = render.intrinsic

    ############ misc ############
    mesh_smoother = OneEuroFilter(4.0, 0.0)
    clock = pygame.time.Clock()
    hand_machine = minimal_hand(
        '../weights/minimal_hand/model/hand_mesh/hand_mesh_model.pkl',
        './weights/detnet.pth', './weights/iknet.pth')

    hand_machine.cuda()
    hand_machine.eval()
    cnt = 0

    while True:
        flag, frame_large = capture.read()
        cnt += 1
        if not flag:
            break
        if frame_large is None:
            continue
        if frame_large.shape[0] > frame_large.shape[1]:
            margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
            frame_large = frame_large[margin:-margin]
        else:
            margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
            frame_large = frame_large[:, margin:-margin]

        frame_large = np.flip(frame_large, axis=1).copy()
        frame = imresize(frame_large, (128, 128))
        original_img = frame.copy()[..., ::-1]
        original_img = cv2.resize(original_img, (256, 256))
        frame = torch.from_numpy(frame)
        frame = rearrange(frame, 'h w c -> 1 c h w')
        frame = frame.float() / 255
        with torch.no_grad():
            frame = frame.cuda()
            _, theta_mpii = hand_machine(frame)
        theta_mpii = theta_mpii.detach().cpu().numpy()
        theta_mano = mpii_to_mano(theta_mpii)

        v = render.hand_mesh.set_abs_quat(theta_mano)
        v *= 2  # for better visualization
        v = v * 1000 + np.array([0, 0, 400])
        v = mesh_smoother.process(v)

        render.rendering(v, config.HAND_COLOR)
        render_img = render.capture_img()
        xy = render.projector()

        for joint in xy.astype(np.int).tolist():
            render_img = cv2.circle(render_img, tuple(joint[:2]), 3,
                                    (0, 0, 255), 3)

        render_img = cv2.resize(render_img, (256, 256))
        save_img = np.concatenate([original_img, render_img], axis=1)

        cv2.imshow("f**k", save_img)
        cv2.imwrite("render_results/%06d.png" % cnt, save_img)
Beispiel #3
0
def live_application(capture):
    """
  Launch an application that reads from a webcam and estimates hand pose at
  real-time.

  The captured hand must be the right hand, but will be flipped internally
  and rendered.

  Parameters
  ----------
  capture : object
    An object from `capture.py` to read capture stream from.
  """
    ############ output visualization ############
    view_mat = axangle2mat([1, 0, 0],
                           np.pi)  # align different coordinate systems
    window_size = 1080

    hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH)
    mesh = o3d.geometry.TriangleMesh()
    mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
    mesh.vertices = \
      o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000)
    mesh.compute_vertex_normals()

    viewer = o3d.visualization.Visualizer()
    viewer.create_window(width=window_size + 1,
                         height=window_size + 1,
                         window_name='Minimal Hand - output')
    viewer.add_geometry(mesh)

    view_control = viewer.get_view_control()
    cam_params = view_control.convert_to_pinhole_camera_parameters()
    extrinsic = cam_params.extrinsic.copy()
    extrinsic[0:3, 3] = 0
    cam_params.extrinsic = extrinsic
    cam_params.intrinsic.set_intrinsics(window_size + 1, window_size + 1,
                                        config.CAM_FX, config.CAM_FY,
                                        window_size // 2, window_size // 2)
    view_control.convert_from_pinhole_camera_parameters(cam_params)
    view_control.set_constant_z_far(1000)

    render_option = viewer.get_render_option()
    render_option.load_from_json('./render_option.json')
    viewer.update_renderer()

    ############ input visualization ############
    pygame.init()
    display = pygame.display.set_mode((window_size, window_size))
    pygame.display.set_caption('Minimal Hand - input')

    ############ misc ############
    mesh_smoother = OneEuroFilter(4.0, 0.0)
    clock = pygame.time.Clock()
    model = ModelPipeline()
    # image_paths = glob.glob("samples_benet/casa/*")

    input_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_video/CVmyQR31Dr4_5-3-rgb_front.mp4"
    output_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_video/CVmyQR31Dr4_5-3-rgb_front.mp4"
    output_frames_folder = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_frames/CVmyQR31Dr4_5-3-rgb_front"
    output_pose_file = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose/CVmyQR31Dr4_5-3-rgb_front.pickle"

    os.system("mkdir " + output_frames_folder)

    cap = cv2.VideoCapture(input_video_path)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # for inputpath in image_paths:
    # for i in range(length):

    writer = cv2.VideoWriter(output_video_path,
                             cv2.VideoWriter_fourcc(*'PIM1'), fps, (1081, 731))

    pose_data = []

    for i in range(length):
        # frame_large = capture.read()
        #inputpath = "samples_benet/hand_far.jpg"
        # frame_large = cv2.imread(inputpath)
        ret, frame_large = cap.read()

        frame_large = cv2.cvtColor(frame_large, cv2.COLOR_BGR2RGB)

        if frame_large is None:
            continue
        if frame_large.shape[0] > frame_large.shape[1]:
            margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
            frame_large = frame_large[margin:-margin]
        elif frame_large.shape[0] < frame_large.shape[1]:
            print()
            margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
            frame_large = frame_large[:, margin:-margin]

        frame_large = np.flip(frame_large, axis=1).copy()

        frame = imresize(frame_large, (128, 128))
        # plt.imshow(frame)
        # plt.show()

        _, theta_mpii = model.process(frame)
        theta_mano = mpii_to_mano(theta_mpii)

        pose_data.append(theta_mano)

        v = hand_mesh.set_abs_quat(theta_mano)
        v *= 2  # for better visualization
        v = v * 1000 + np.array([0, 0, 400])
        v = mesh_smoother.process(v)
        mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
        mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T)
        mesh.paint_uniform_color(config.HAND_COLOR)
        mesh.compute_triangle_normals()
        mesh.compute_vertex_normals()
        # for some version of open3d you may need `viewer.update_geometry(mesh)`
        viewer.update_geometry()

        viewer.poll_events()

        write_rendered_frame(writer, viewer, output_frames_folder, i)

        display.blit(
            pygame.surfarray.make_surface(
                np.transpose(imresize(frame_large, (window_size, window_size)),
                             (1, 0, 2))), (0, 0))
        pygame.display.update()
        # pygame.image.save(display, "kk.jpeg")
        # if keyboard.is_pressed("esc"):
        #   break

        # clock.tick(30)

    with open(output_pose_file, "wb") as output:
        pickle.dump(pose_data, output)
Beispiel #4
0
def live_application(capture):
    """
    Launch an application that reads from a webcam and estimates hand pose at
    real-time.

    The captured hand must be the right hand, but will be flipped internally
    and rendered.

    Parameters
    ----------
    capture : object
        An object from `capture.py` to read capture stream from.
    """

    view_mat = axangle2mat([1, 0, 0], np.pi)
    window_size_w = 640
    window_size_h = 480

    hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) 
    mesh = o3d.geometry.TriangleMesh()
    mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
    mesh.vertices = \
        o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000)
    mesh.compute_vertex_normals()

    viewer = o3d.visualization.Visualizer()
    viewer.create_window(
        width=window_size_w + 1, height=window_size_h + 1,
        window_name='Minimal Hand - output'
    )
    viewer.add_geometry(mesh)

    view_control = viewer.get_view_control()
    cam_params = view_control.convert_to_pinhole_camera_parameters()
    extrinsic = cam_params.extrinsic.copy()
    extrinsic[0:3, 3] = 0
    cam_params.extrinsic = extrinsic
    cam_params.intrinsic.set_intrinsics(
        window_size_w + 1, window_size_h + 1, config.CAM_FX, config.CAM_FY,
        window_size_w // 2, window_size_h // 2
    )
    view_control.convert_from_pinhole_camera_parameters(cam_params)
    view_control.set_constant_z_far(1000)

    render_option = viewer.get_render_option()
    render_option.load_from_json('./render_option.json')
    viewer.update_renderer()

    mesh_smoother = OneEuroFilter(4.0, 0.0)
    cords_smoother = OneEuroFilter(4.0, 0.0)
    model = ModelPipeline()

    while True:
        frame_large = capture.read()
        if frame_large is None:
            continue
        if frame_large.shape[0] > frame_large.shape[1]:
            margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
            frame_large = frame_large[margin:-margin]
        else:
            margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
            frame_large = frame_large[:, margin:-margin]

        if config.left is False:
            frame_large = np.flip(frame_large, axis=1).copy()
        frame = imresize(frame_large, (128, 128))

        cords, theta_mpii = model.process(frame)
        theta_mano = mpii_to_mano(theta_mpii)

        v = hand_mesh.set_abs_quat(theta_mano)
        # v *= 2 # for better visualization
        v = v * 1000 + np.array([0, 0, 400])
        # v = np.array([x+cords[0] for x in v])
        v = mesh_smoother.process(v)
        mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
        mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T)
        mesh.paint_uniform_color(config.HAND_COLOR)
        mesh.compute_triangle_normals()
        mesh.compute_vertex_normals()
        # for some version of open3d you may need `viewer.update_geometry(mesh)`
        viewer.update_geometry()

        viewer.poll_events()

        cords = cords_smoother.process(cords)

        cords = cords * 150 + 200
        cords = np.delete(cords, 2, 1)

        # v = v * 1.5 + 200
        # v = np.delete(v, 2, 1)

        frame_large = cv2.cvtColor(frame_large, cv2.COLOR_RGB2BGR)
        # cv2.polylines(frame_large, v, False, (0, 0, 0))
        for x in cords:
            cv2.drawMarker(frame_large, (int(x[0]), int(x[1])), (0, 0, 0))
            # cv2.line(frame_large, (int(v[x][0]), int(v[x][1])), (int(v[x+1][0]), int(v[x+1][1])), (0, 0, 0))

        # meshindices = np.array(mesh.triangles)
        # meshvertices = np.array(mesh.vertices) - 80

        # pts2d = cv2.projectPoints(meshvertices, (0, 0, 0), (0, 0, 0), np.array([[620.744, 0., 0.], [0., 621.151, 0.], [0., 0., 1.]]), None)[0].astype(int)
        # for face in meshindices:
        #     cv2.fillConvexPoly(frame_large, pts2d[face], (64, 64, 192))
        # cv2.polylines(frame_large, pts2d[meshindices], True, (255, 255, 255))

        # cv2.polylines(frame_large, v, False, (0, 0, 0))
        cv2.imshow("Hand AI", frame_large)

        if keyboard.is_pressed("q"):
            cv2.destroyAllWindows()
            break

        cv2.waitKey(1)
        time.sleep(0.0015)