def __init__(self, options): self.options = options self.max_score = 0 self.tracker = dlib.correlation_tracker() self.hand_detector = HandDetector(options) self.empty_frames = 0 self.wrong_frames = 0
class HandTracker: def __init__(self, options): self.options = options self.max_score = 0 self.tracker = dlib.correlation_tracker() self.hand_detector = HandDetector(options) self.empty_frames = 0 self.wrong_frames = 0 def get_hand_rect(self, frame): frame_scaled = cv2.resize(frame, (self.options[consts.tracking_image_width], self.options[consts.tracking_image_height])) score, det_rel = self.hand_detector.detect_hand(frame) if self.max_score == 0 and score > 0: position = util.from_relative(det_rel, frame_scaled.shape) position = util.fit_rect(position, frame_scaled.shape) self.tracker.start_track(frame_scaled, util.to_dlib(position)) self.max_score = score if self.max_score > 0: self.tracker.update(frame_scaled) position = util.fit_rect( util.from_dlib(self.tracker.get_position()), frame_scaled.shape) pos_rel = util.to_relative(position, frame_scaled.shape) if score <= 0: self.empty_frames += 1 if self.empty_frames >= self.options[consts.empty_frames]: self.max_score = 0 self.empty_frames = 0 else: self.empty_frames = 0 if util.are_different_locations(pos_rel, det_rel): self.wrong_frames += 1 if self.wrong_frames == 5: self.wrong_frames = 0 self.wrong_frames = 0 position = util.from_relative(det_rel, frame_scaled.shape) position = util.fit_rect(position, frame_scaled.shape) self.tracker.start_track(frame_scaled, util.to_dlib(position)) self.max_score = score else: self.wrong_frames = 0 rect = util.from_relative(pos_rel, frame.shape) hand_rect = util.to_square(rect, True) return hand_rect else: return None
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--img', help='image file path') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu) # read image img = cv2.imread(args.img) # inference print("Estimating pose...") person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) # each person detected
def convertData(gesture): parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) dataset = buildGestureDict("dataset/") gesturedf = pd.read_csv("sample.csv") for video in dataset[gesture]["videos"]: print("Currently processing the video for " + video["filename"]) startvideo = time.time() cap = cv2.VideoCapture(video["filepath"]) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) amount_of_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) print("Amount of Frames:", amount_of_frames) cap.set(cv2.CAP_PROP_FPS, 5) ret, img = cap.read() counter = 1 df = pd.DataFrame(columns=["Head", "Left", "Right"]) frame_tracker = int(amount_of_frames / 12) framecounter = 0 #print(frame_tracker) left = 0 right = 0 while ret: ret, img = cap.read() # get video frame if not ret: print("Failed to capture image") break person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) if (counter % frame_tracker == 0): for person_pose in person_pose_array: firstPerson = True if not firstPerson: continue unit_length = pose_detector.get_unit_length(person_pose) # hands estimation # print("Estimating hands keypoints...") hands = pose_detector.crop_hands(img, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") for x in range(len(hand_keypoints)): if (hand_keypoints[x] != None): hand_keypoints[x] = list( np.delete(hand_keypoints[x], 2)) hand_keypoints[x] = [ int(y) for y in hand_keypoints[x] ] res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) left = hand_keypoints cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) else: left = [[1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000]] if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") for x in range(len(hand_keypoints)): if (hand_keypoints[x] != None): hand_keypoints[x] = list( np.delete(hand_keypoints[x], 2)) hand_keypoints[x] = [ int(y) for y in hand_keypoints[x] ] res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) right = hand_keypoints cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) else: right = [[1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000]] print("Body Pose") person_pose = np.delete(person_pose, 9, 0) person_pose = np.delete(person_pose, 9, 0) person_pose = np.delete(person_pose, 10, 0) person_pose = np.delete(person_pose, 10, 0) person_pose = person_pose.tolist() for z in range(len(person_pose)): if (person_pose[z] != None): person_pose[z] = list(np.delete(person_pose[z], 2)) person_pose[z] = [int(a) for a in person_pose[z]] print(person_pose) print("Left") print(left) print("Right") print(right) cv2.imshow("result", res_img) head = person_pose for x in range(len(head)): if (head[x] == None): head[x] = [1000, 1000] pca = sklearnPCA(n_components=1) head = pca.fit_transform(head) dfhead = pd.DataFrame(data=head) dfhead = dfhead.T dfhead = dfhead.rename( columns={ 0: "head_1", 1: "head_2", 2: "head_3", 3: "head_4", 4: "head_5", 5: "head_6", 6: "head_7", 7: "head_8", 8: "head_9", 9: "head_10", 10: "head_11", 11: "head_12", 12: "head_13", 13: "head_14" }) for x in range(len(left)): if (left[x] == None): left[x] = [1000, 1000] pca = sklearnPCA(n_components=1) left = pca.fit_transform(left) dfleft = pd.DataFrame(data=left) dfleft = dfleft.T dfleft = dfleft.rename( columns={ 0: "left_1", 1: "left_2", 2: "left_3", 3: "left_4", 4: "left_5", 5: "left_6", 6: "left_7", 7: "left_8", 8: "left_9", 9: "left_10", 10: "left_11", 11: "left_12", 12: "left_13", 13: "left_14", 14: "left_15", 15: "left_16", 16: "left_17", 17: "left_18", 18: "left_19", 19: "left_20", 20: "left_21" }) for x in range(len(right)): if (right[x] == None): right[x] = [1000, 1000] pca = sklearnPCA(n_components=1) right = pca.fit_transform(right) dfright = pd.DataFrame(data=right) dfright = dfright.T dfright = dfright.rename( columns={ 0: "right_1", 1: "right_2", 2: "right_3", 3: "right_4", 4: "right_5", 5: "right_6", 6: "right_7", 7: "right_8", 8: "right_9", 9: "right_10", 10: "right_11", 11: "right_12", 12: "right_13", 13: "right_14", 14: "right_15", 15: "right_16", 16: "right_17", 17: "right_18", 18: "right_19", 19: "right_20", 20: "right_21" }) df2 = pd.concat([dfhead, dfleft, dfright], axis=1) df2["frame"] = framecounter df2["gesture"] = video["gesture"] df2["speaker"] = video["actor"] framecounter = framecounter + 1 df2["frame"] = df2["frame"].astype(int) newdf = newdf.append(df2, sort=False) gesturedf = gesturedf.append(df2, sort=False) firstPerson = False else: cv2.imshow("result", img) counter = counter + 1 #print("Frame",counter) if cv2.waitKey(1) & 0xFF == ord('q'): break #print(df) cap.release() cv2.destroyAllWindows() gesturedf.to_csv("dataset720new/" + gesture + ".csv", index=False) print("Done Recording for: " + gesture) print("Took " + str(time.time() - startvideo) + "seconds")
import os import cv2 import tensorflow as tf import controls from gesture_detector import GestureDetector, config, find_static_gesture, find_click from hand_detector import HandDetector physical_devices = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' if __name__ == "__main__": recognizer = GestureDetector() recognizer.get_classes() detector = HandDetector(max_hands=1, track_con=0.85) confidence = [0.0] gesture = "None" gesture_index = 0 mouse_mode = False k = [1920 / 640, 1080 / 480] # capture video from USB web-camera cap = cv2.VideoCapture(0) cv2.namedWindow("Источник", cv2.WINDOW_AUTOSIZE) # for pause after mouse mode skipped_frames = 0
def main(): # Set of commands that the module recognizes paired with actions # COMMAND : (Basic?, List of actions) # Dictionary updated as new commands added commands = { "GO" : (True, ["GO"]), "BACK" : (True, ["BACK"]), "LEFT" : (True, ["LEFT"]), "RIGHT" : (True, ["RIGHT"]) } # Initialize the HandDetector object detector = HandDetector() # Check command line arguments to pre-load a dataset or create new if len(sys.argv) == 1: model = md.Model() else: filename = str(sys.argv[1]) model = md.Model(filename = filename) try : # Calibrate with current background print("Starting Calibration") detector.calibrateBackground() print("Calibration Done") # Initialize status variables prev_img_hand = None prev_prediction = None prev_look = False running = True while running: # Wait for command from speech recognition module speech_fifo = open('../speechToHand.fifo', 'r') audio_cmd = speech_fifo.readline()[:-1] print("Command: " + str(audio_cmd)) # Check if known audio command if(audio_cmd in commands): is_basic , motions = commands[audio_cmd] if(is_basic): # For basic command, detect hand, add it to the model and train it img_hand = detector.detectHand() if(not img_hand is None): model.add(img_hand,audio_cmd) model.train() # Perform the motion sequence associated with command for motion_cmd in motions: # Send command to perform motion subprocess.check_output('echo ' + motion_cmd + ' > ../handToMotion.fifo', shell=True) subprocess.check_output('echo ' + motion_cmd + ' >> ../handToMotion.log', shell=True) # Wait until motion is completed motion_fifo = open('../motionToHand.fifo', 'r') motion_cmd = motion_fifo.readline()[:-1] # Calibrate background once motion complete detector.calibrateBackground() print("Calibration Done") # Send complete acknowledgement to speech-recognition subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True) subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True) prev_look = False # Reinforcement for previous detection elif (audio_cmd == "GOOD" and prev_img_hand is not None and prev_look): print("Reinforcement received") model.enforce(prev_img_hand,prev_prediction) model.train() prev_look = False # Send complete acknowledgement to speech-recognition subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True) subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True) # Command to recognize gesture and follow elif (audio_cmd == "LOOK"): # Detect hand and predict command img_hand = detector.detectHand() if(not img_hand is None): # Predict the command for the hand detected prediction = model.predict(img_hand) if(not prediction == ""): print("Prediction: " + str(prediction)) # Send command to perform motion subprocess.check_output('echo ' + prediction.upper() + ' > ../handToMotion.fifo', shell=True) subprocess.check_output('echo ' + prediction.upper() + ' >> ../handToMotion.log', shell=True) # Wait until motion is completed motion_fifo = open('../motionToHand.fifo', 'r') motion_cmd = motion_fifo.readline()[:-1] # Calibrate background once motion complete detector.calibrateBackground() print("Calibration Done") # Save images for reinforcement prev_img_hand = img_hand prev_prediction = prediction prev_look = True else: # No prediction print("No prediction") prev_look = False else: # No detection print("Hand not found") prev_look = False # Send complete acknowledgement to speech-recognition subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True) subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True) # Quit the program elif (audio_cmd == "QUIT") : running = False # Send quit command to motion subprocess.check_output('echo ' + audio_cmd.upper() + ' > ../handToMotion.fifo', shell=True) subprocess.check_output('echo ' + audio_cmd.upper() + ' >> ../handToMotion.log', shell=True) # Send complete acknowledgement to speech-recognition subprocess.check_output('echo "DONE" > ../handToSpeech.fifo', shell=True) subprocess.check_output('echo "DONE" >> ../handToSpeech.log', shell=True) # New voice command detected else: print("Looking") # Loop for detecting sequence of gestures flag = True motions = [] while(flag): # Indicate to animation that hand being detected subprocess.check_output('echo "CHANGE" > ../handToAnimation.fifo', shell=True) subprocess.check_output('echo "CHANGE" >> ../handToAnimation.log', shell=True) # Wait to ensure that hand gesture changed time.sleep(0.5) # Detect gesture img_hand = detector.detectHand() if(img_hand is None): # Stop if no hand found flag = False break # Predict the command for gesture detected and add prediction = model.predict(img_hand) if(not prediction == ""): print("Prediction added: " + str(prediction)) motions.append(prediction) # Break if 5 gestures detected if (len(motions) >= 5): print("Max limit") break else: # No prediction print("No Prediction") break # Buffer before next gesture detected time.sleep(0.5) print("Change Gesture") # Indicate to animation that hand not being detected subprocess.check_output('echo "CHANGE" > ../handToAnimation.fifo', shell=True) subprocess.check_output('echo "CHANGE" >> ../handToAnimation.log', shell=True) # Buffer time for change in hand gesture time.sleep(2) # Indicate to animation that looking for gestures over subprocess.check_output('echo "DONE" > ../handToAnimation.fifo', shell=True) subprocess.check_output('echo "DONE" >> ../handToAnimation.log', shell=True) # Perform the detected motion sequence if(len(motions) > 0): commands[audio_cmd] = (False,motions) print("New command performed") for motion_cmd in commands[audio_cmd][1]: # Send command to perform motion subprocess.check_output('echo ' + motion_cmd + ' > ../handToMotion.fifo', shell=True) subprocess.check_output('echo ' + motion_cmd + ' >> ../handToMotion.log', shell=True) # Wait until motion is completed motion_fifo = open('../motionToHand.fifo', 'r') motion_cmd = motion_fifo.readline()[:-1] # Calibrate background once motion complete detector.calibrateBackground() print("Calibration Done") # Send acknowledgement to speech-recognition that new command has been added subprocess.check_output('echo ' + audio_cmd + ' > ../handToSpeech.fifo', shell=True) subprocess.check_output('echo ' + audio_cmd + ' >> ../handToSpeech.log', shell=True) else: # Nack to speech-recognition to not add new command print("New command not mapped") # Send nack to speech recognition subprocess.check_output('echo "NONE" > ../handToSpeech.fifo', shell=True) subprocess.check_output('echo "NONE" >> ../handToSpeech.log', shell=True) prev_look = False except KeyboardInterrupt: pass # Save model model.save('knn_dataset.dat') print("Model saved") detector.closeCamera() print("Exit")
def main(cap, im_scale=2, view_results=False): debug_i = 0 fps_timer_arr = [0] * 16 fps = 0 # load model pose_device = 0 pose_model_dir = '../../Chainer_Realtime_Multi-Person_Pose_Estimation/models' pose_detector = PoseDetector("posenet", f"{pose_model_dir}/coco_posenet.npz", device=pose_device) hand_detector = HandDetector("handnet", f"{pose_model_dir}/handnet.npz", device=pose_device) # cv2.namedWindow('display', flags=(cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)) if view_results: cv2.namedWindow('display') video_label_file = VideoLabelFile(cap.video_fname, fname_add='pre_points_pose') labels_current = defaultdict(lambda: []) labels_all_previous = video_label_file.load_previous() im_input = cap.read() im_input_shape = im_input.shape[0:2] first_run = True while (not cap.eof): fps_time_begin = time.perf_counter() debug_i += 1 im_input = cap.read() current_frame_id = cap.frame_idx() # print(cap.info()) im_pose = cv2.resize(im_input, (round(im_input_shape[1] / im_scale), round(im_input_shape[0] / im_scale))) if first_run: print( f"Video size {im_input.shape} -> Model input size {im_pose.shape}" ) first_run = False ########################################## person_pose_array, _ = pose_detector(im_pose) im_display = cv2.addWeighted( im_pose, 0.6, draw_person_pose(im_pose, person_pose_array), 0.4, 0) for person_pose in person_pose_array: unit_length = pose_detector.get_unit_length(person_pose) # arr = np.array([a for a in person_pose if a is not None]) # if arr.any(): # arr[:, 0:2] *= im_scale # labels_current[current_frame_id].append(['pre_person_pose', arr.tolist()]) # hands estimation hands = pose_detector.crop_hands(im_pose, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") im_display = draw_hand_keypoints(im_display, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(im_display, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hand_keypoints[5] and hand_keypoints[8]: f_points = np.array( [hand_keypoints[5][:2], hand_keypoints[8][:2]]) f_points = (f_points + np.array([bbox[0], bbox[1]])) * im_scale #f_points = tuple(map(tuple, f_points.astype(int))) f_points = f_points.astype(int).tolist() labels_current[current_frame_id].append(f_points) if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") im_display = draw_hand_keypoints(im_display, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(im_display, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hand_keypoints[5] and hand_keypoints[8]: f_points = np.array( [hand_keypoints[5][:2], hand_keypoints[8][:2]]) f_points = (f_points + np.array([bbox[0], bbox[1]])) * im_scale #f_points = tuple(map(tuple, f_points.astype(int))) f_points = f_points.astype(int).tolist() labels_current[current_frame_id].append(f_points) ############################################# for l in labels_current[current_frame_id]: cv2.circle(im_display, (round(l[0][0] / im_scale), round(l[0][1] / im_scale)), 10, (255, 0, 0), 2) cv2.circle(im_display, (round(l[1][0] / im_scale), round(l[1][1] / im_scale)), 10, (0, 255, 0), 2) cv2.putText(im_display, f"frame {int(current_frame_id)}, fps: {int(fps)}.", (10, im_display.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) if view_results: #cv2.imshow('display', im_display) cv2.imshow('display', im_pose) else: print(".", end="") sys.stdout.flush() # labels_current[current_frame_id].append ############################################# ## KEYBOARD k = cv2.waitKey(5) if k == 27: # esc break elif k == ord('c'): import ipdb ipdb.set_trace() # ipdb.set_trace() # pdb.set_trace() fps_timer_arr[debug_i % 16] = time.perf_counter() - fps_time_begin fps = int(len(fps_timer_arr) * 1 / sum(fps_timer_arr)) print(". ") # cap.release() video_label_file.save_current_labels(labels_current, append_previous=False, custom_lists=True) if view_results: cv2.destroyAllWindows()
import cv2 import numpy as np from hand_detector import HandDetector camera = cv2.VideoCapture(0) hd = HandDetector() ''' Reference: https://google.github.io/mediapipe/solutions/hands.html ''' tipsId = [4, 8, 12, 16, 20] while True: _, frame = camera.read() frame = hd.find_hands(frame) hand_mark_list = hd.find_position(frame) if len(hand_mark_list) > 0: fingers = [] # Thumb if hand_mark_list[tipsId[0]]['x'] < hand_mark_list[tipsId[0] - 1]['x']: fingers.append(0) else: fingers.append(1) # Other fingers for id in range(1, 5): if hand_mark_list[tipsId[id]]['y'] < hand_mark_list[tipsId[id] - 2]['y']: fingers.append(1) else:
def estimate_pose(img_path, gpu = -1): # parser = argparse.ArgumentParser(description='Pose detector') # parser.add_argument('--img', help='image file path') # parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') # args = parser.parse_args() # load model print("Loading pose detection model...") pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=gpu) print("Loading hand detection model...") hand_detector = HandDetector("handnet", "models/handnet.npz", device=gpu) # face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu) # read image img = cv2.imread(img_path) # inference print("Estimating pose...") person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) # will cause the loop below to perform only at most 1 iteration; which means only 1 person will be recognized has_detected = False # each person detected for person_pose in person_pose_array: if has_detected: continue has_detected = True print("Body:", person_pose) unit_length = pose_detector.get_unit_length(person_pose) # face estimation # print("Estimating face keypoints...") # cropped_face_img, bbox = pose_detector.crop_face(img, person_pose, unit_length) # if cropped_face_img is not None: # face_keypoints = face_detector(cropped_face_img) # res_img = draw_face_keypoints(res_img, face_keypoints, (bbox[0], bbox[1])) # cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) # hands estimation print("Estimating hands keypoints...") hands = pose_detector.crop_hands(img, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") print("Left hand: ", print_arr(hand_keypoints)) res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") print("Right hand: ", print_arr(hand_keypoints)) res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) print('Saving result into result.png...') cv2.imwrite('result.png', res_img)
@socketio.on("frame") def handle_frame(data): print("Got Frame") start = timeit.default_timer() image = decode_base64(data['frame']) image = substract_background(img=image) boxes, scores = hand_detector.get_boxes(image, data["threshold"]) if len(boxes) > 0: boxes, scores = filter_small_boxes(boxes, scores, 0.2) print(f"Found {len(boxes)} hands, with max score of {max(scores or [0])}") emit("box", { 'boxes': boxes, 'scores': scores }) # Send the client the box to show print(f"Finished processing frame in {timeit.default_timer() - start}sec") @app.route('/', methods=['GET']) def hello(): return "Welcome to ASLie" if __name__ == '__main__': print("Starting ASLie...") print("Loading hand detector...") hand_detector = HandDetector() print("Hand detector loaded.") print("ASLie ready :)") socketio.run(app, host="0.0.0.0", port="1607")
import cv2 from PIL import Image import numpy as np from hand_detector import HandDetector from utils import crop detector = HandDetector() img = Image.open("b.jpg", "r") img.show() boxes, scores = detector.get_boxes(img) print(boxes[0]) cropped = crop(img, box=boxes[0], score=scores[0]) cropped.show() from matplotlib import pyplot as plt # img.show() hsv = cropped.convert("HSV") # h = hsv.getchannel("H") s = hsv.getchannel("S") # v = hsv.getchannel("V") # hsv.show() # h.show() s.show() # v.show() sarr = np.asarray(s) mask = (sarr > 40)