Ejemplo n.º 1
0
 def __init__(self, options):
     self.options = options
     self.max_score = 0
     self.tracker = dlib.correlation_tracker()
     self.hand_detector = HandDetector(options)
     self.empty_frames = 0
     self.wrong_frames = 0
Ejemplo n.º 2
0
class HandTracker:
    def __init__(self, options):
        self.options = options
        self.max_score = 0
        self.tracker = dlib.correlation_tracker()
        self.hand_detector = HandDetector(options)
        self.empty_frames = 0
        self.wrong_frames = 0

    def get_hand_rect(self, frame):
        frame_scaled = cv2.resize(frame,
                                  (self.options[consts.tracking_image_width],
                                   self.options[consts.tracking_image_height]))
        score, det_rel = self.hand_detector.detect_hand(frame)
        if self.max_score == 0 and score > 0:
            position = util.from_relative(det_rel, frame_scaled.shape)
            position = util.fit_rect(position, frame_scaled.shape)
            self.tracker.start_track(frame_scaled, util.to_dlib(position))
            self.max_score = score
        if self.max_score > 0:
            self.tracker.update(frame_scaled)
            position = util.fit_rect(
                util.from_dlib(self.tracker.get_position()),
                frame_scaled.shape)
            pos_rel = util.to_relative(position, frame_scaled.shape)
            if score <= 0:
                self.empty_frames += 1
                if self.empty_frames >= self.options[consts.empty_frames]:
                    self.max_score = 0
                    self.empty_frames = 0
            else:
                self.empty_frames = 0
            if util.are_different_locations(pos_rel, det_rel):
                self.wrong_frames += 1
                if self.wrong_frames == 5:
                    self.wrong_frames = 0
                    self.wrong_frames = 0
                    position = util.from_relative(det_rel, frame_scaled.shape)
                    position = util.fit_rect(position, frame_scaled.shape)
                    self.tracker.start_track(frame_scaled,
                                             util.to_dlib(position))
                    self.max_score = score
            else:
                self.wrong_frames = 0
            rect = util.from_relative(pos_rel, frame.shape)
            hand_rect = util.to_square(rect, True)
            return hand_rect
        else:
            return None
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--img', help='image file path')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet",
                                 "models/coco_posenet.npz",
                                 device=args.gpu)
    hand_detector = HandDetector("handnet",
                                 "models/handnet.npz",
                                 device=args.gpu)
    face_detector = FaceDetector("facenet",
                                 "models/facenet.npz",
                                 device=args.gpu)

    # read image
    img = cv2.imread(args.img)

    # inference
    print("Estimating pose...")
    person_pose_array, _ = pose_detector(img)
    res_img = cv2.addWeighted(img, 0.6,
                              draw_person_pose(img, person_pose_array), 0.4, 0)

    # each person detected
Ejemplo n.º 4
0
def convertData(gesture):
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet",
                                 "models/coco_posenet.npz",
                                 device=args.gpu)
    hand_detector = HandDetector("handnet",
                                 "models/handnet.npz",
                                 device=args.gpu)
    dataset = buildGestureDict("dataset/")
    gesturedf = pd.read_csv("sample.csv")
    for video in dataset[gesture]["videos"]:
        print("Currently processing the video for " + video["filename"])
        startvideo = time.time()
        cap = cv2.VideoCapture(video["filepath"])
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        amount_of_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        print("Amount of Frames:", amount_of_frames)
        cap.set(cv2.CAP_PROP_FPS, 5)
        ret, img = cap.read()
        counter = 1
        df = pd.DataFrame(columns=["Head", "Left", "Right"])
        frame_tracker = int(amount_of_frames / 12)
        framecounter = 0
        #print(frame_tracker)
        left = 0
        right = 0
        while ret:
            ret, img = cap.read()
            # get video frame
            if not ret:
                print("Failed to capture image")
                break
            person_pose_array, _ = pose_detector(img)
            res_img = cv2.addWeighted(img, 0.6,
                                      draw_person_pose(img, person_pose_array),
                                      0.4, 0)
            if (counter % frame_tracker == 0):
                for person_pose in person_pose_array:
                    firstPerson = True
                    if not firstPerson:
                        continue
                    unit_length = pose_detector.get_unit_length(person_pose)
                    # hands estimation
                    # print("Estimating hands keypoints...")
                    hands = pose_detector.crop_hands(img, person_pose,
                                                     unit_length)
                    if hands["left"] is not None:
                        hand_img = hands["left"]["img"]
                        bbox = hands["left"]["bbox"]
                        hand_keypoints = hand_detector(hand_img,
                                                       hand_type="left")
                        for x in range(len(hand_keypoints)):
                            if (hand_keypoints[x] != None):
                                hand_keypoints[x] = list(
                                    np.delete(hand_keypoints[x], 2))
                                hand_keypoints[x] = [
                                    int(y) for y in hand_keypoints[x]
                                ]
                        res_img = draw_hand_keypoints(res_img, hand_keypoints,
                                                      (bbox[0], bbox[1]))
                        left = hand_keypoints
                        cv2.rectangle(res_img, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (255, 255, 255), 1)
                    else:
                        left = [[1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000]]

                    if hands["right"] is not None:
                        hand_img = hands["right"]["img"]
                        bbox = hands["right"]["bbox"]
                        hand_keypoints = hand_detector(hand_img,
                                                       hand_type="right")
                        for x in range(len(hand_keypoints)):
                            if (hand_keypoints[x] != None):
                                hand_keypoints[x] = list(
                                    np.delete(hand_keypoints[x], 2))
                                hand_keypoints[x] = [
                                    int(y) for y in hand_keypoints[x]
                                ]
                        res_img = draw_hand_keypoints(res_img, hand_keypoints,
                                                      (bbox[0], bbox[1]))
                        right = hand_keypoints
                        cv2.rectangle(res_img, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (255, 255, 255), 1)
                    else:
                        right = [[1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000]]
                    print("Body Pose")
                    person_pose = np.delete(person_pose, 9, 0)
                    person_pose = np.delete(person_pose, 9, 0)
                    person_pose = np.delete(person_pose, 10, 0)
                    person_pose = np.delete(person_pose, 10, 0)
                    person_pose = person_pose.tolist()
                    for z in range(len(person_pose)):
                        if (person_pose[z] != None):
                            person_pose[z] = list(np.delete(person_pose[z], 2))
                            person_pose[z] = [int(a) for a in person_pose[z]]
                    print(person_pose)
                    print("Left")
                    print(left)
                    print("Right")
                    print(right)
                cv2.imshow("result", res_img)
                head = person_pose
                for x in range(len(head)):
                    if (head[x] == None):
                        head[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                head = pca.fit_transform(head)
                dfhead = pd.DataFrame(data=head)
                dfhead = dfhead.T
                dfhead = dfhead.rename(
                    columns={
                        0: "head_1",
                        1: "head_2",
                        2: "head_3",
                        3: "head_4",
                        4: "head_5",
                        5: "head_6",
                        6: "head_7",
                        7: "head_8",
                        8: "head_9",
                        9: "head_10",
                        10: "head_11",
                        11: "head_12",
                        12: "head_13",
                        13: "head_14"
                    })
                for x in range(len(left)):
                    if (left[x] == None):
                        left[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                left = pca.fit_transform(left)
                dfleft = pd.DataFrame(data=left)
                dfleft = dfleft.T
                dfleft = dfleft.rename(
                    columns={
                        0: "left_1",
                        1: "left_2",
                        2: "left_3",
                        3: "left_4",
                        4: "left_5",
                        5: "left_6",
                        6: "left_7",
                        7: "left_8",
                        8: "left_9",
                        9: "left_10",
                        10: "left_11",
                        11: "left_12",
                        12: "left_13",
                        13: "left_14",
                        14: "left_15",
                        15: "left_16",
                        16: "left_17",
                        17: "left_18",
                        18: "left_19",
                        19: "left_20",
                        20: "left_21"
                    })
                for x in range(len(right)):
                    if (right[x] == None):
                        right[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                right = pca.fit_transform(right)
                dfright = pd.DataFrame(data=right)
                dfright = dfright.T
                dfright = dfright.rename(
                    columns={
                        0: "right_1",
                        1: "right_2",
                        2: "right_3",
                        3: "right_4",
                        4: "right_5",
                        5: "right_6",
                        6: "right_7",
                        7: "right_8",
                        8: "right_9",
                        9: "right_10",
                        10: "right_11",
                        11: "right_12",
                        12: "right_13",
                        13: "right_14",
                        14: "right_15",
                        15: "right_16",
                        16: "right_17",
                        17: "right_18",
                        18: "right_19",
                        19: "right_20",
                        20: "right_21"
                    })
                df2 = pd.concat([dfhead, dfleft, dfright], axis=1)
                df2["frame"] = framecounter
                df2["gesture"] = video["gesture"]
                df2["speaker"] = video["actor"]
                framecounter = framecounter + 1
                df2["frame"] = df2["frame"].astype(int)
                newdf = newdf.append(df2, sort=False)
                gesturedf = gesturedf.append(df2, sort=False)
                firstPerson = False
            else:
                cv2.imshow("result", img)
                counter = counter + 1
                #print("Frame",counter)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break  #print(df)
        cap.release()
        cv2.destroyAllWindows()
    gesturedf.to_csv("dataset720new/" + gesture + ".csv", index=False)
    print("Done Recording for: " + gesture)
    print("Took " + str(time.time() - startvideo) + "seconds")
Ejemplo n.º 5
0
import os
import cv2
import tensorflow as tf
import controls
from gesture_detector import GestureDetector, config, find_static_gesture, find_click
from hand_detector import HandDetector

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

if __name__ == "__main__":
    recognizer = GestureDetector()
    recognizer.get_classes()

    detector = HandDetector(max_hands=1, track_con=0.85)

    confidence = [0.0]
    gesture = "None"
    gesture_index = 0

    mouse_mode = False
    k = [1920 / 640, 1080 / 480]

    # capture video from USB web-camera
    cap = cv2.VideoCapture(0)
    cv2.namedWindow("Источник", cv2.WINDOW_AUTOSIZE)

    # for pause after mouse mode
    skipped_frames = 0
Ejemplo n.º 6
0
def main():
    # Set of commands that the module recognizes paired with actions
    # COMMAND : (Basic?, List of actions)
    # Dictionary updated as new commands added
    commands = {
        "GO" : (True, ["GO"]),
        "BACK" : (True, ["BACK"]),
        "LEFT" : (True, ["LEFT"]),
        "RIGHT" : (True, ["RIGHT"])
    }

    # Initialize the HandDetector object
    detector = HandDetector()

    # Check command line arguments to pre-load a dataset or create new
    if len(sys.argv) == 1:
        model = md.Model()
    else:
        filename = str(sys.argv[1])
        model = md.Model(filename = filename)

    try :
        # Calibrate with current background
        print("Starting Calibration")
        detector.calibrateBackground()
        print("Calibration Done")

        # Initialize status variables
        prev_img_hand = None
        prev_prediction = None
        prev_look = False

        running = True
        while running:
            # Wait for command from speech recognition module
            speech_fifo = open('../speechToHand.fifo', 'r')
            audio_cmd = speech_fifo.readline()[:-1]
            print("Command: " + str(audio_cmd))
            
            # Check if known audio command 
            if(audio_cmd in commands):
                is_basic , motions = commands[audio_cmd]

                if(is_basic):
                    # For basic command, detect hand, add it to the model and train it
                    img_hand = detector.detectHand()
                    if(not img_hand is None):
                        model.add(img_hand,audio_cmd)
                        model.train()

                # Perform the motion sequence associated with command
                for motion_cmd in motions:
                    # Send command to perform motion 
                    subprocess.check_output('echo ' + motion_cmd + ' > ../handToMotion.fifo', shell=True)
                    subprocess.check_output('echo ' + motion_cmd + ' >> ../handToMotion.log', shell=True)
                    # Wait until motion is completed
                    motion_fifo = open('../motionToHand.fifo', 'r')
                    motion_cmd = motion_fifo.readline()[:-1]

                # Calibrate background once motion complete
                detector.calibrateBackground()
                print("Calibration Done")

                # Send complete acknowledgement to speech-recognition
                subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True)
                subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True)

                prev_look = False
            
            # Reinforcement for previous detection
            elif (audio_cmd == "GOOD" and prev_img_hand is not None and prev_look):
                print("Reinforcement received")
                model.enforce(prev_img_hand,prev_prediction)
                model.train()
                prev_look = False

                # Send complete acknowledgement to speech-recognition
                subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True)
                subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True)

            # Command to recognize gesture and follow
            elif (audio_cmd == "LOOK"):
                # Detect hand and predict command
                img_hand = detector.detectHand()
                if(not img_hand is None):
                    # Predict the command for the hand detected
                    prediction = model.predict(img_hand)
                    if(not prediction == ""):
                        print("Prediction: " + str(prediction))
                        # Send command to perform motion 
                        subprocess.check_output('echo ' + prediction.upper() + ' > ../handToMotion.fifo', shell=True)
                        subprocess.check_output('echo ' + prediction.upper() + ' >> ../handToMotion.log', shell=True)
                        # Wait until motion is completed
                        motion_fifo = open('../motionToHand.fifo', 'r')
                        motion_cmd = motion_fifo.readline()[:-1]
                        # Calibrate background once motion complete
                        detector.calibrateBackground()
                        print("Calibration Done")
                        # Save images for reinforcement
                        prev_img_hand = img_hand
                        prev_prediction = prediction 
                        prev_look = True
                    else:
                        # No prediction 
                        print("No prediction")
                        prev_look = False
                else:
                    # No detection
                    print("Hand not found") 
                    prev_look = False
                # Send complete acknowledgement to speech-recognition
                subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True)
                subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True)

            # Quit the program   
            elif (audio_cmd == "QUIT") :
                running = False
                # Send quit command to motion
                subprocess.check_output('echo ' + audio_cmd.upper() + ' > ../handToMotion.fifo', shell=True)
                subprocess.check_output('echo ' + audio_cmd.upper() + ' >> ../handToMotion.log', shell=True)
                # Send complete acknowledgement to speech-recognition
                subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True)
                subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True)
            
            # New voice command detected
            else:
                print("Looking")
                # Loop for detecting sequence of gestures
                flag = True
                motions = []
                while(flag):
                    # Indicate to animation that hand being detected 
                    subprocess.check_output('echo "CHANGE" > ../handToAnimation.fifo', shell=True)
                    subprocess.check_output('echo "CHANGE" >> ../handToAnimation.log', shell=True)
                    # Wait to ensure that hand gesture changed
                    time.sleep(0.5)
                    # Detect gesture
                    img_hand = detector.detectHand()
                    if(img_hand is None):
                        # Stop if no hand found
                        flag = False 
                        break
                    # Predict the command for gesture detected and add
                    prediction = model.predict(img_hand)
                    if(not prediction == ""):
                        print("Prediction added: " + str(prediction))
                        motions.append(prediction)
                        # Break if 5 gestures detected
                        if (len(motions) >= 5):
                            print("Max limit")
                            break
                    else:
                        # No prediction 
                        print("No Prediction")
                        break
                    # Buffer before next gesture detected
                    time.sleep(0.5)
                    print("Change Gesture")
                    # Indicate to animation that hand not being detected 
                    subprocess.check_output('echo "CHANGE" > ../handToAnimation.fifo', shell=True)
                    subprocess.check_output('echo "CHANGE" >> ../handToAnimation.log', shell=True)
                    # Buffer time for change in hand gesture
                    time.sleep(2)
                # Indicate to animation that looking for gestures over
                subprocess.check_output('echo "DONE" > ../handToAnimation.fifo', shell=True)       
                subprocess.check_output('echo "DONE" >> ../handToAnimation.log', shell=True)          
                
                # Perform the detected motion sequence
                if(len(motions) > 0):
                    commands[audio_cmd] = (False,motions)
                    print("New command performed")
                    for motion_cmd in commands[audio_cmd][1]:
                        # Send command to perform motion 
                        subprocess.check_output('echo ' + motion_cmd + ' > ../handToMotion.fifo', shell=True)
                        subprocess.check_output('echo ' + motion_cmd + ' >> ../handToMotion.log', shell=True)
                        # Wait until motion is completed
                        motion_fifo = open('../motionToHand.fifo', 'r')
                        motion_cmd = motion_fifo.readline()[:-1]
                    # Calibrate background once motion complete
                    detector.calibrateBackground()
                    print("Calibration Done")

                    # Send acknowledgement to speech-recognition that new command has been added 
                    subprocess.check_output('echo ' + audio_cmd + ' > ../handToSpeech.fifo', shell=True)
                    subprocess.check_output('echo ' + audio_cmd + ' >> ../handToSpeech.log', shell=True)
                else:
                    # Nack to speech-recognition to not add new command
                    print("New command not mapped")
                    # Send nack to speech recognition                    
                    subprocess.check_output('echo "NONE" > ../handToSpeech.fifo', shell=True)
                    subprocess.check_output('echo "NONE" >> ../handToSpeech.log', shell=True)
                
                prev_look = False


    except KeyboardInterrupt:
        pass
    # Save model
    model.save('knn_dataset.dat')
    print("Model saved")
    detector.closeCamera()
    print("Exit")
def main(cap, im_scale=2, view_results=False):
    debug_i = 0
    fps_timer_arr = [0] * 16
    fps = 0

    # load model
    pose_device = 0
    pose_model_dir = '../../Chainer_Realtime_Multi-Person_Pose_Estimation/models'
    pose_detector = PoseDetector("posenet",
                                 f"{pose_model_dir}/coco_posenet.npz",
                                 device=pose_device)
    hand_detector = HandDetector("handnet",
                                 f"{pose_model_dir}/handnet.npz",
                                 device=pose_device)

    # cv2.namedWindow('display', flags=(cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE))
    if view_results: cv2.namedWindow('display')

    video_label_file = VideoLabelFile(cap.video_fname,
                                      fname_add='pre_points_pose')
    labels_current = defaultdict(lambda: [])
    labels_all_previous = video_label_file.load_previous()

    im_input = cap.read()
    im_input_shape = im_input.shape[0:2]

    first_run = True

    while (not cap.eof):
        fps_time_begin = time.perf_counter()
        debug_i += 1

        im_input = cap.read()
        current_frame_id = cap.frame_idx()
        # print(cap.info())

        im_pose = cv2.resize(im_input, (round(im_input_shape[1] / im_scale),
                                        round(im_input_shape[0] / im_scale)))
        if first_run:
            print(
                f"Video size {im_input.shape} -> Model input size {im_pose.shape}"
            )
            first_run = False

        ##########################################
        person_pose_array, _ = pose_detector(im_pose)
        im_display = cv2.addWeighted(
            im_pose, 0.6, draw_person_pose(im_pose, person_pose_array), 0.4, 0)

        for person_pose in person_pose_array:
            unit_length = pose_detector.get_unit_length(person_pose)

            # arr = np.array([a for a in person_pose if a is not None])
            # if arr.any():
            #     arr[:, 0:2] *= im_scale
            #     labels_current[current_frame_id].append(['pre_person_pose', arr.tolist()])

            # hands estimation
            hands = pose_detector.crop_hands(im_pose, person_pose, unit_length)
            if hands["left"] is not None:
                hand_img = hands["left"]["img"]
                bbox = hands["left"]["bbox"]
                hand_keypoints = hand_detector(hand_img, hand_type="left")
                im_display = draw_hand_keypoints(im_display, hand_keypoints,
                                                 (bbox[0], bbox[1]))
                cv2.rectangle(im_display, (bbox[0], bbox[1]),
                              (bbox[2], bbox[3]), (255, 255, 255), 1)

                if hand_keypoints[5] and hand_keypoints[8]:
                    f_points = np.array(
                        [hand_keypoints[5][:2], hand_keypoints[8][:2]])
                    f_points = (f_points +
                                np.array([bbox[0], bbox[1]])) * im_scale
                    #f_points = tuple(map(tuple, f_points.astype(int)))
                    f_points = f_points.astype(int).tolist()
                    labels_current[current_frame_id].append(f_points)

            if hands["right"] is not None:
                hand_img = hands["right"]["img"]
                bbox = hands["right"]["bbox"]
                hand_keypoints = hand_detector(hand_img, hand_type="right")
                im_display = draw_hand_keypoints(im_display, hand_keypoints,
                                                 (bbox[0], bbox[1]))
                cv2.rectangle(im_display, (bbox[0], bbox[1]),
                              (bbox[2], bbox[3]), (255, 255, 255), 1)

                if hand_keypoints[5] and hand_keypoints[8]:
                    f_points = np.array(
                        [hand_keypoints[5][:2], hand_keypoints[8][:2]])
                    f_points = (f_points +
                                np.array([bbox[0], bbox[1]])) * im_scale
                    #f_points = tuple(map(tuple, f_points.astype(int)))
                    f_points = f_points.astype(int).tolist()
                    labels_current[current_frame_id].append(f_points)

        #############################################
        for l in labels_current[current_frame_id]:
            cv2.circle(im_display,
                       (round(l[0][0] / im_scale), round(l[0][1] / im_scale)),
                       10, (255, 0, 0), 2)
            cv2.circle(im_display,
                       (round(l[1][0] / im_scale), round(l[1][1] / im_scale)),
                       10, (0, 255, 0), 2)

        cv2.putText(im_display,
                    f"frame {int(current_frame_id)}, fps: {int(fps)}.",
                    (10, im_display.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, (255, 255, 255), 2)

        if view_results:
            #cv2.imshow('display', im_display)
            cv2.imshow('display', im_pose)
        else:
            print(".", end="")
            sys.stdout.flush()

        # labels_current[current_frame_id].append

        #############################################
        ## KEYBOARD

        k = cv2.waitKey(5)
        if k == 27:  # esc
            break
        elif k == ord('c'):
            import ipdb
            ipdb.set_trace()
            # ipdb.set_trace()
            # pdb.set_trace()

        fps_timer_arr[debug_i % 16] = time.perf_counter() - fps_time_begin
        fps = int(len(fps_timer_arr) * 1 / sum(fps_timer_arr))

    print(". ")
    # cap.release()
    video_label_file.save_current_labels(labels_current,
                                         append_previous=False,
                                         custom_lists=True)

    if view_results: cv2.destroyAllWindows()
Ejemplo n.º 8
0
import cv2
import numpy as np
from hand_detector import HandDetector

camera = cv2.VideoCapture(0)
hd = HandDetector()
'''
Reference: https://google.github.io/mediapipe/solutions/hands.html
'''
tipsId = [4, 8, 12, 16, 20]

while True:
    _, frame = camera.read()
    frame = hd.find_hands(frame)
    hand_mark_list = hd.find_position(frame)

    if len(hand_mark_list) > 0:
        fingers = []

        # Thumb
        if hand_mark_list[tipsId[0]]['x'] < hand_mark_list[tipsId[0] - 1]['x']:
            fingers.append(0)
        else:
            fingers.append(1)

        # Other fingers
        for id in range(1, 5):
            if hand_mark_list[tipsId[id]]['y'] < hand_mark_list[tipsId[id] -
                                                                2]['y']:
                fingers.append(1)
            else:
Ejemplo n.º 9
0
def estimate_pose(img_path, gpu = -1):
    # parser = argparse.ArgumentParser(description='Pose detector')
    # parser.add_argument('--img', help='image file path')
    # parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
    # args = parser.parse_args()

    # load model
    print("Loading pose detection model...")
    pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=gpu)
    print("Loading hand detection model...")
    hand_detector = HandDetector("handnet", "models/handnet.npz", device=gpu)
    # face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu)

    # read image
    img = cv2.imread(img_path)

    # inference
    print("Estimating pose...")
    person_pose_array, _ = pose_detector(img)

    res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0)

    # will cause the loop below to perform only at most 1 iteration; which means only 1 person will be recognized
    has_detected = False

    # each person detected
    for person_pose in person_pose_array:
        if has_detected:
            continue

        has_detected = True

        print("Body:", person_pose)
        unit_length = pose_detector.get_unit_length(person_pose)

        # face estimation
        # print("Estimating face keypoints...")
        # cropped_face_img, bbox = pose_detector.crop_face(img, person_pose, unit_length)
        # if cropped_face_img is not None:
        #     face_keypoints = face_detector(cropped_face_img)
        #     res_img = draw_face_keypoints(res_img, face_keypoints, (bbox[0], bbox[1]))
        #     cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

        # hands estimation
        print("Estimating hands keypoints...")
        hands = pose_detector.crop_hands(img, person_pose, unit_length)
        if hands["left"] is not None:
            hand_img = hands["left"]["img"]
            bbox = hands["left"]["bbox"]
            hand_keypoints = hand_detector(hand_img, hand_type="left")
            print("Left hand: ", print_arr(hand_keypoints))

            res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1]))
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

        if hands["right"] is not None:
            hand_img = hands["right"]["img"]
            bbox = hands["right"]["bbox"]
            hand_keypoints = hand_detector(hand_img, hand_type="right")
            print("Right hand: ", print_arr(hand_keypoints))
            res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1]))
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

    print('Saving result into result.png...')
    cv2.imwrite('result.png', res_img)
Ejemplo n.º 10
0
@socketio.on("frame")
def handle_frame(data):
    print("Got Frame")
    start = timeit.default_timer()
    image = decode_base64(data['frame'])
    image = substract_background(img=image)
    boxes, scores = hand_detector.get_boxes(image, data["threshold"])
    if len(boxes) > 0:
        boxes, scores = filter_small_boxes(boxes, scores, 0.2)
    print(f"Found {len(boxes)} hands, with max score of {max(scores or [0])}")
    emit("box", {
        'boxes': boxes,
        'scores': scores
    })  # Send the client the box to show

    print(f"Finished processing frame in {timeit.default_timer() - start}sec")


@app.route('/', methods=['GET'])
def hello():
    return "Welcome to ASLie"


if __name__ == '__main__':
    print("Starting ASLie...")
    print("Loading hand detector...")
    hand_detector = HandDetector()
    print("Hand detector loaded.")
    print("ASLie ready :)")
    socketio.run(app, host="0.0.0.0", port="1607")
Ejemplo n.º 11
0
import cv2
from PIL import Image
import numpy as np

from hand_detector import HandDetector
from utils import crop

detector = HandDetector()

img = Image.open("b.jpg", "r")
img.show()
boxes, scores = detector.get_boxes(img)
print(boxes[0])
cropped = crop(img, box=boxes[0], score=scores[0])

cropped.show()
from matplotlib import pyplot as plt
# img.show()

hsv = cropped.convert("HSV")

# h = hsv.getchannel("H")
s = hsv.getchannel("S")
# v = hsv.getchannel("V")
# hsv.show()
# h.show()
s.show()
# v.show()

sarr = np.asarray(s)
mask = (sarr > 40)