Exemple #1
0
def main():
    model = ModelPipeline()

    webcam = WebcamCapture()

    base_fps = webcam.get(cv2.CAP_PROP_FPS)
    print('Base FPS:', base_fps)

    info_frame = crop(webcam.read())

    mp.Process(target=pull, args=(info_frame, base_fps)).start()

    context = zmq.Context()
    socket = context.socket(zmq.PUSH)
    socket.bind('tcp://*:5555')

    height, width, _ = info_frame.shape

    hand_data_keys = ['origin', 'joints', 'distX', 'distY', 'vert']

    fps_send = FPS('Send:')
    while True:
        frame_large = webcam.read_rgb()
        frame_large = crop(frame_large)

        frame_large_l = frame_large[:, :width // 2]
        frame_large_r = frame_large[:, width // 2:]

        frame_l = imresize(frame_large_l, (128, 128))
        frame_r = imresize(frame_large_r, (128, 128))

        # iv - intermediate values
        ivl, _ = model.process(np.flip(frame_l, axis=1))
        ivr, _ = model.process(frame_r)

        hand_data_l = calc_hand_data(ivl)
        hand_data_r = calc_hand_data(ivr)

        if hand_data_l is not None and hand_data_r is not None:
            socket.send_json(
                {
                    'dataL': dict(zip(hand_data_keys, hand_data_l)),
                    'dataR': dict(zip(hand_data_keys, hand_data_r)),
                    'frameWidth': frame_large.shape[1],
                    'frameHeight': frame_large.shape[0],
                }, zmq.SNDMORE)
            socket.send(np.flip(frame_large, axis=0).tobytes())

            fps_send()
def live_application(capture, output_dirpath):
    model = ModelPipeline()

    frame_index = 0
    mano_params = []
    measure_time = True
    while True:
        frame_large = capture.read()
        if frame_large is None:
            print(f'none frame {frame_index}')
            # if frame_index == 0:
            #   continue
            break
        # if frame_large.shape[0] > frame_large.shape[1]:
        #   margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
        #   frame_large = frame_large[margin:-margin]
        # else:
        #   margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
        #   frame_large = frame_large[:, margin:-margin]

        frame = imresize(frame_large, (128, 128))

        if measure_time:
            ends1 = []
            ends2 = []
            for i in range(1000):
                start = time.time()
                _, theta_mpii = model.process(frame)
                end1 = time.time()
                theta_mano = mpii_to_mano(theta_mpii)
                end2 = time.time()
                ends1.append(end1 - start)
                ends2.append(end2 - start)
            t1 = np.mean(ends1[10:])
            t2 = np.mean(ends2[10:])
            print(f't1: {t1 * 1000:.2f}ms, {1 / t1:.2f}hz')
            print(f't2: {t2 * 1000:.2f}ms, {1 / t2:.2f}hz')
            return
        else:
            _, theta_mpii = model.process(frame)
            theta_mano = mpii_to_mano(theta_mpii)

        mano_params.append(deepcopy(theta_mano.tolist()))

        osp.join(output_dirpath, "%06d.jpg" % frame_index)
        frame_index += 1
    with open(osp.join(output_dirpath, f'{capture.side}.pickle'), 'w') as f:
        json.dump(mano_params, f)
Exemple #3
0
class Processor:
    def __init__(self):
        self.model = ModelPipeline()
        self.flip_side = 'right'

    def process(self, img, side):
        frame = imresize(img, (128, 128))

        if side == self.flip_side:
            frame = np.flip(frame, axis=1)

        _, theta_mpii = self.model.process(frame)
        theta_mano = mpii_to_mano(theta_mpii)
        return theta_mano, frame
Exemple #4
0
def live_application(capture, output_dirpath):
    """
  Launch an application that reads from a webcam and estimates hand pose at
  real-time.

  The captured hand must be the right hand, but will be flipped internally
  and rendered.

  Parameters
  ----------
  capture : object
    An object from `capture.py` to read capture stream from.
  """
    ############ output visualization ############
    view_mat = axangle2mat([1, 0, 0],
                           np.pi)  # align different coordinate systems
    window_size = 1080

    hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH)
    mesh = o3d.geometry.TriangleMesh()
    mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
    mesh.vertices = \
      o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000)
    mesh.compute_vertex_normals()

    viewer = o3d.visualization.Visualizer()
    viewer.create_window(width=window_size + 1,
                         height=window_size + 1,
                         window_name='Minimal Hand - output')
    viewer.add_geometry(mesh)

    view_control = viewer.get_view_control()
    cam_params = view_control.convert_to_pinhole_camera_parameters()
    extrinsic = cam_params.extrinsic.copy()
    extrinsic[0:3, 3] = 0
    cam_params.extrinsic = extrinsic
    cam_params.intrinsic.set_intrinsics(window_size + 1, window_size + 1,
                                        config.CAM_FX, config.CAM_FY,
                                        window_size // 2, window_size // 2)
    view_control.convert_from_pinhole_camera_parameters(cam_params)
    view_control.set_constant_z_far(1000)

    render_option = viewer.get_render_option()
    render_option.load_from_json('./render_option.json')
    viewer.update_renderer()

    ############ input visualization ############
    pygame.init()
    display = pygame.display.set_mode((window_size, window_size))
    pygame.display.set_caption('Minimal Hand - input')

    ############ misc ############
    mesh_smoother = OneEuroFilter(4.0, 0.0)
    clock = pygame.time.Clock()
    model = ModelPipeline()

    frame_index = 0
    mano_params = []
    while True:
        frame_large = capture.read()
        if frame_large is None:
            print(f'none frame {frame_index}')
            # if frame_index == 0:
            #   continue
            break

        # if frame_large.shape[0] > frame_large.shape[1]:
        #   margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
        #   frame_large = frame_large[margin:-margin]
        # else:
        #   margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
        #   frame_large = frame_large[:, margin:-margin]

        frame = imresize(frame_large, (128, 128))

        _, theta_mpii = model.process(frame)
        theta_mano = mpii_to_mano(theta_mpii)

        mano_params.append(deepcopy(theta_mano.tolist()))

        osp.join(output_dirpath, "%06d.jpg" % frame_index)

        v = hand_mesh.set_abs_quat(theta_mano)
        v *= 2  # for better visualization
        v = v * 1000 + np.array([0, 0, 400])
        v = mesh_smoother.process(v)
        mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
        mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T)
        mesh.paint_uniform_color(config.HAND_COLOR)
        mesh.compute_triangle_normals()
        mesh.compute_vertex_normals()
        viewer.update_geometry()
        viewer.poll_events()
        viewer.capture_screen_image(
            osp.join(output_dirpath, "%06d.jpg" % frame_index))

        display.blit(
            pygame.surfarray.make_surface(
                np.transpose(imresize(frame_large, (window_size, window_size)),
                             (1, 0, 2))), (0, 0))
        pygame.display.update()

        # if keyboard.is_pressed("esc"):
        #   break

        clock.tick(30)
        frame_index += 1
    with open(osp.join(output_dirpath, f'{capture.side}.pickle'), 'w') as f:
        json.dump(mano_params, f)
Exemple #5
0
 def __init__(self):
     self.model = ModelPipeline()
     self.flip_side = 'right'
# )
# view_control.convert_from_pinhole_camera_parameters(cam_params)
# view_control.set_constant_z_far(1000)

# render_option = viewer1.get_render_option()
# render_option.load_from_json('./render_option.json')
# viewer1.update_renderer()
# render_option = viewer2.get_render_option()
# render_option.load_from_json('./render_option.json')
# viewer2.update_renderer()

mesh_smoother1 = OneEuroFilter(2.0, 0.0)
mesh_smoother2 = OneEuroFilter(2.0, 0.0)
cords_smoother1 = OneEuroFilter(2.0, 0.0)
cords_smoother2 = OneEuroFilter(2.0, 0.0)
model = ModelPipeline()

que = Queue()
quet1 = Queue()
quet2 = Queue()

t11 = threading.Thread(target=Loop1, args=(
    que,
    quet1,
    quet2,
))
t12 = threading.Thread(target=Loop1, args=(
    que,
    quet1,
    quet2,
))
def live_application(capture):
    """
  Launch an application that reads from a webcam and estimates hand pose at
  real-time.

  The captured hand must be the right hand, but will be flipped internally
  and rendered.

  Parameters
  ----------
  capture : object
    An object from `capture.py` to read capture stream from.
  """
    ############ output visualization ############
    view_mat = axangle2mat([1, 0, 0],
                           np.pi)  # align different coordinate systems
    window_size = 1080

    hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH)
    mesh = o3d.geometry.TriangleMesh()
    mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
    mesh.vertices = \
      o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000)
    mesh.compute_vertex_normals()

    viewer = o3d.visualization.Visualizer()
    viewer.create_window(width=window_size + 1,
                         height=window_size + 1,
                         window_name='Minimal Hand - output')
    viewer.add_geometry(mesh)

    view_control = viewer.get_view_control()
    cam_params = view_control.convert_to_pinhole_camera_parameters()
    extrinsic = cam_params.extrinsic.copy()
    extrinsic[0:3, 3] = 0
    cam_params.extrinsic = extrinsic
    cam_params.intrinsic.set_intrinsics(window_size + 1, window_size + 1,
                                        config.CAM_FX, config.CAM_FY,
                                        window_size // 2, window_size // 2)
    view_control.convert_from_pinhole_camera_parameters(cam_params)
    view_control.set_constant_z_far(1000)

    render_option = viewer.get_render_option()
    render_option.load_from_json('./render_option.json')
    viewer.update_renderer()

    ############ input visualization ############
    pygame.init()
    display = pygame.display.set_mode((window_size, window_size))
    pygame.display.set_caption('Minimal Hand - input')

    ############ misc ############
    mesh_smoother = OneEuroFilter(4.0, 0.0)
    clock = pygame.time.Clock()
    model = ModelPipeline()
    # image_paths = glob.glob("samples_benet/casa/*")

    input_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_video/CVmyQR31Dr4_5-3-rgb_front.mp4"
    output_video_path = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_video/CVmyQR31Dr4_5-3-rgb_front.mp4"
    output_frames_folder = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose_frames/CVmyQR31Dr4_5-3-rgb_front"
    output_pose_file = "../How2Sign/utterance_level/train/rgb_front/features/hand_pose/CVmyQR31Dr4_5-3-rgb_front.pickle"

    os.system("mkdir " + output_frames_folder)

    cap = cv2.VideoCapture(input_video_path)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # for inputpath in image_paths:
    # for i in range(length):

    writer = cv2.VideoWriter(output_video_path,
                             cv2.VideoWriter_fourcc(*'PIM1'), fps, (1081, 731))

    pose_data = []

    for i in range(length):
        # frame_large = capture.read()
        #inputpath = "samples_benet/hand_far.jpg"
        # frame_large = cv2.imread(inputpath)
        ret, frame_large = cap.read()

        frame_large = cv2.cvtColor(frame_large, cv2.COLOR_BGR2RGB)

        if frame_large is None:
            continue
        if frame_large.shape[0] > frame_large.shape[1]:
            margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
            frame_large = frame_large[margin:-margin]
        elif frame_large.shape[0] < frame_large.shape[1]:
            print()
            margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
            frame_large = frame_large[:, margin:-margin]

        frame_large = np.flip(frame_large, axis=1).copy()

        frame = imresize(frame_large, (128, 128))
        # plt.imshow(frame)
        # plt.show()

        _, theta_mpii = model.process(frame)
        theta_mano = mpii_to_mano(theta_mpii)

        pose_data.append(theta_mano)

        v = hand_mesh.set_abs_quat(theta_mano)
        v *= 2  # for better visualization
        v = v * 1000 + np.array([0, 0, 400])
        v = mesh_smoother.process(v)
        mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
        mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T)
        mesh.paint_uniform_color(config.HAND_COLOR)
        mesh.compute_triangle_normals()
        mesh.compute_vertex_normals()
        # for some version of open3d you may need `viewer.update_geometry(mesh)`
        viewer.update_geometry()

        viewer.poll_events()

        write_rendered_frame(writer, viewer, output_frames_folder, i)

        display.blit(
            pygame.surfarray.make_surface(
                np.transpose(imresize(frame_large, (window_size, window_size)),
                             (1, 0, 2))), (0, 0))
        pygame.display.update()
        # pygame.image.save(display, "kk.jpeg")
        # if keyboard.is_pressed("esc"):
        #   break

        # clock.tick(30)

    with open(output_pose_file, "wb") as output:
        pickle.dump(pose_data, output)
Exemple #8
0
def live_application(capture):
    """
    Launch an application that reads from a webcam and estimates hand pose at
    real-time.

    The captured hand must be the right hand, but will be flipped internally
    and rendered.

    Parameters
    ----------
    capture : object
        An object from `capture.py` to read capture stream from.
    """

    view_mat = axangle2mat([1, 0, 0], np.pi)
    window_size_w = 640
    window_size_h = 480

    hand_mesh = HandMesh(config.HAND_MESH_MODEL_PATH) 
    mesh = o3d.geometry.TriangleMesh()
    mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
    mesh.vertices = \
        o3d.utility.Vector3dVector(np.matmul(view_mat, hand_mesh.verts.T).T * 1000)
    mesh.compute_vertex_normals()

    viewer = o3d.visualization.Visualizer()
    viewer.create_window(
        width=window_size_w + 1, height=window_size_h + 1,
        window_name='Minimal Hand - output'
    )
    viewer.add_geometry(mesh)

    view_control = viewer.get_view_control()
    cam_params = view_control.convert_to_pinhole_camera_parameters()
    extrinsic = cam_params.extrinsic.copy()
    extrinsic[0:3, 3] = 0
    cam_params.extrinsic = extrinsic
    cam_params.intrinsic.set_intrinsics(
        window_size_w + 1, window_size_h + 1, config.CAM_FX, config.CAM_FY,
        window_size_w // 2, window_size_h // 2
    )
    view_control.convert_from_pinhole_camera_parameters(cam_params)
    view_control.set_constant_z_far(1000)

    render_option = viewer.get_render_option()
    render_option.load_from_json('./render_option.json')
    viewer.update_renderer()

    mesh_smoother = OneEuroFilter(4.0, 0.0)
    cords_smoother = OneEuroFilter(4.0, 0.0)
    model = ModelPipeline()

    while True:
        frame_large = capture.read()
        if frame_large is None:
            continue
        if frame_large.shape[0] > frame_large.shape[1]:
            margin = int((frame_large.shape[0] - frame_large.shape[1]) / 2)
            frame_large = frame_large[margin:-margin]
        else:
            margin = int((frame_large.shape[1] - frame_large.shape[0]) / 2)
            frame_large = frame_large[:, margin:-margin]

        if config.left is False:
            frame_large = np.flip(frame_large, axis=1).copy()
        frame = imresize(frame_large, (128, 128))

        cords, theta_mpii = model.process(frame)
        theta_mano = mpii_to_mano(theta_mpii)

        v = hand_mesh.set_abs_quat(theta_mano)
        # v *= 2 # for better visualization
        v = v * 1000 + np.array([0, 0, 400])
        # v = np.array([x+cords[0] for x in v])
        v = mesh_smoother.process(v)
        mesh.triangles = o3d.utility.Vector3iVector(hand_mesh.faces)
        mesh.vertices = o3d.utility.Vector3dVector(np.matmul(view_mat, v.T).T)
        mesh.paint_uniform_color(config.HAND_COLOR)
        mesh.compute_triangle_normals()
        mesh.compute_vertex_normals()
        # for some version of open3d you may need `viewer.update_geometry(mesh)`
        viewer.update_geometry()

        viewer.poll_events()

        cords = cords_smoother.process(cords)

        cords = cords * 150 + 200
        cords = np.delete(cords, 2, 1)

        # v = v * 1.5 + 200
        # v = np.delete(v, 2, 1)

        frame_large = cv2.cvtColor(frame_large, cv2.COLOR_RGB2BGR)
        # cv2.polylines(frame_large, v, False, (0, 0, 0))
        for x in cords:
            cv2.drawMarker(frame_large, (int(x[0]), int(x[1])), (0, 0, 0))
            # cv2.line(frame_large, (int(v[x][0]), int(v[x][1])), (int(v[x+1][0]), int(v[x+1][1])), (0, 0, 0))

        # meshindices = np.array(mesh.triangles)
        # meshvertices = np.array(mesh.vertices) - 80

        # pts2d = cv2.projectPoints(meshvertices, (0, 0, 0), (0, 0, 0), np.array([[620.744, 0., 0.], [0., 621.151, 0.], [0., 0., 1.]]), None)[0].astype(int)
        # for face in meshindices:
        #     cv2.fillConvexPoly(frame_large, pts2d[face], (64, 64, 192))
        # cv2.polylines(frame_large, pts2d[meshindices], True, (255, 255, 255))

        # cv2.polylines(frame_large, v, False, (0, 0, 0))
        cv2.imshow("Hand AI", frame_large)

        if keyboard.is_pressed("q"):
            cv2.destroyAllWindows()
            break

        cv2.waitKey(1)
        time.sleep(0.0015)
Exemple #9
0
def __init__():
    global detection_graph, sess, fingerDetector
    detection_graph, sess = detector_utils.load_inference_graph()
    fingerDetector = ModelPipeline()