def test_blank_image(self):
     with mp_hands.Hands() as hands:
         image = np.zeros([100, 100, 3], dtype=np.uint8)
         image.fill(255)
         results = hands.process(image)
         self.assertIsNone(results.multi_hand_landmarks)
         self.assertIsNone(results.multi_handedness)
    def test_multi_hands(self, static_image_mode, num_frames):
        image_path = os.path.join(os.path.dirname(__file__),
                                  'testdata/hands.jpg')
        hands = mp_hands.Hands(static_image_mode=static_image_mode,
                               max_num_hands=2,
                               min_detection_confidence=0.5)
        image = cv2.flip(cv2.imread(image_path), 1)

        def process_one_frame():
            results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            handedness = [
                handedness.classification[0].label
                for handedness in results.multi_handedness
            ]
            self.assertLen(handedness, 2)
            multi_hand_coordinates = []
            for landmarks in results.multi_hand_landmarks:
                self.assertLen(landmarks.landmark, 21)
                x = [landmark.x for landmark in landmarks.landmark]
                y = [landmark.y for landmark in landmarks.landmark]
                hand_coordinates = np.transpose(np.stack(
                    (y, x))) * image.shape[0:2]
                multi_hand_coordinates.append(hand_coordinates)
            self.assertLen(multi_hand_coordinates, 2)
            prediction_error = np.abs(
                np.asarray(multi_hand_coordinates) -
                np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION))
            npt.assert_array_less(prediction_error, DIFF_THRESHOLOD)

        for _ in range(num_frames):
            process_one_frame()
        hands.close()
Beispiel #3
0
 def test_multi_hands(self, static_image_mode, model_complexity, num_frames):
   image_path = os.path.join(os.path.dirname(__file__), 'testdata/hands.jpg')
   image = cv2.imread(image_path)
   with mp_hands.Hands(
       static_image_mode=static_image_mode,
       max_num_hands=2,
       model_complexity=model_complexity,
       min_detection_confidence=0.5) as hands:
     for idx in range(num_frames):
       results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
       self._annotate(image.copy(), results, idx)
       handedness = [
           handedness.classification[0].label
           for handedness in results.multi_handedness
       ]
       multi_hand_coordinates = []
       rows, cols, _ = image.shape
       for landmarks in results.multi_hand_landmarks:
         self.assertLen(landmarks.landmark, 21)
         x = [landmark.x * cols for landmark in landmarks.landmark]
         y = [landmark.y * rows for landmark in landmarks.landmark]
         hand_coordinates = np.column_stack((x, y))
         multi_hand_coordinates.append(hand_coordinates)
       self.assertLen(handedness, 2)
       self.assertLen(multi_hand_coordinates, 2)
       prediction_error = np.abs(
           np.asarray(multi_hand_coordinates) -
           np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION))
       diff_threshold = LITE_MODEL_DIFF_THRESHOLD if model_complexity == 0 else FULL_MODEL_DIFF_THRESHOLD
       npt.assert_array_less(prediction_error, diff_threshold)
Beispiel #4
0
    def _process_video(self,
                       model_complexity,
                       video_path,
                       max_num_hands=1,
                       num_landmarks=21,
                       num_dimensions=3):
        # Predict pose landmarks for each frame.
        video_cap = cv2.VideoCapture(video_path)
        landmarks_per_frame = []
        w_landmarks_per_frame = []
        with mp_hands.Hands(static_image_mode=False,
                            max_num_hands=max_num_hands,
                            model_complexity=model_complexity,
                            min_detection_confidence=0.5) as hands:
            while True:
                # Get next frame of the video.
                success, input_frame = video_cap.read()
                if not success:
                    break

                # Run pose tracker.
                input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
                frame_shape = input_frame.shape
                result = hands.process(image=input_frame)
                frame_landmarks = np.zeros(
                    [max_num_hands, num_landmarks, num_dimensions]) * np.nan
                frame_w_landmarks = np.zeros(
                    [max_num_hands, num_landmarks, num_dimensions]) * np.nan

                if result.multi_hand_landmarks:
                    for idx, landmarks in enumerate(
                            result.multi_hand_landmarks):
                        landmarks = self._landmarks_list_to_array(
                            landmarks, frame_shape)
                        frame_landmarks[idx] = landmarks
                if result.multi_hand_world_landmarks:
                    for idx, w_landmarks in enumerate(
                            result.multi_hand_world_landmarks):
                        w_landmarks = self._world_landmarks_list_to_array(
                            w_landmarks)
                        frame_w_landmarks[idx] = w_landmarks

                landmarks_per_frame.append(frame_landmarks)
                w_landmarks_per_frame.append(frame_w_landmarks)
        return (np.array(landmarks_per_frame), np.array(w_landmarks_per_frame))
 def test_invalid_image_shape(self):
     hands = mp_hands.Hands()
     with self.assertRaisesRegex(
             ValueError,
             'Input image must contain three channel rgb data.'):
         hands.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
Beispiel #6
0
    frame = np.flip(frame, axis=1).copy()
    return cv2.resize(frame, (128, 128))


################################################################################
#   Creatig src and socket binding
################################################################################
context = zmq.Context()
socket = context.socket(zmq.REP)
socket.bind("tcp://*:5555")

################################################################################
#   Starting Hand-tracking
################################################################################
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

################################################################################
#   Starting WebCam
################################################################################
cap = cv2.VideoCapture(1)

while True:

    ########################################
    # Stage 01 - Waiting

    print("Waiting request ...")

    #  Wait for next request from client
    message = socket.recv_string()
Beispiel #7
0
import cv2
import mediapipe as mp
import time
import mediapipe.python.solutions.hands as mpHands

cap = cv2.VideoCapture(1)
mpHands = mp.solutions.hands
hands = mpHands.Hands()
mpDraw = mp.solutions.drawing_utils
# palm detection module basically works on complete image and provide a cropped image of the hand
# hand landmark module: find 21 different landmarks on cropped image of the hand
while True:
    success, img = cap.read()
    # convert the BGR to RGB because the hands only use the RGB
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgRGB)

    if results.multi_hand_landmarks:
        for handLms in results.multi_hand_landmarks:
            #print(handLms)
            print(mpHands.HAND_CONNECTIONS)
            mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
    if results.multi_handedness:
        print(results.multi_handedness)

    cv2.imshow("Image", img)
    cv2.waitKey(1)