def hand_detection(ini_time): cap = cv2.VideoCapture(args.video_source) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) start_time = datetime.datetime.now() num_frames = 0 im_width, im_height = (cap.get(3), cap.get(4)) # max number of hands we want to detect/track num_hands_detect = 2 cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL) cv2.namedWindow('Hand wash status', cv2.WINDOW_NORMAL) count = 0 while (time.time() - ini_time) <= 30: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] ret, image_np = cap.read() # image_np = cv2.flip(image_np, 1) try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes on frame count += detector_utils.draw_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, image_np) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), image_np) cv2.imshow('Single-Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyWindow('Single-Threaded Detection') break else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) cv2.destroyWindow('Single-Threaded Detection')
def viewCam(self): start_time = datetime.datetime.now() num_frames = 0 im_width, im_height = (self.cap.get(3), self.cap.get(4)) # max number of hands we want to detect/track num_hands_detect = 2 # Expand dimensions since the model expects images to have shape: [1, None, None, 3] ret, image_np = self.cap.read() # image_np = cv2.flip(image_np, 1) try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes on frame detector_utils.draw_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, image_np) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), image_np) # cv2.imshow('Single-Threaded Detection', # cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) # convert image to RGB format image = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) # get image infos height, width, channel = image_np.shape step = channel * width # create QImage from image qImg = QImage(image_np.data, width, height, step, QImage.Format_RGB888) # show image in img_label #audio #audio_frame, val = self.player.get_frame() if self.i: self.player = MediaPlayer("vid1.MP4") self.i = False self.player.get_frame() self.ui.image_label.setPixmap(QPixmap.fromImage(qImg))
sess) # draw bounding boxes good_detector_boxes = detector_utils.draw_box_on_image( num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, frame) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image(str(good_bbox), frame) #add the tracker bounding box p1 = (int(tracker_bbox[0]), int(tracker_bbox[1])) p2 = (int(tracker_bbox[0] + tracker_bbox[2]), int(tracker_bbox[1] + tracker_bbox[3])) # write the tracked box cv2.rectangle(frame, p1, p2, (255, 0, 0), 3, 1) #convert to the new type of annotation tracker_bbox = xywh_to_xyxy(tracker_bbox) good_bbox, reset_tracker = combine_boxes(tracker_bbox, good_detector_boxes, good_bbox) #cv2.rectangle(frame, (int(good_bbox[0]), int(good_bbox[1])), (int(good_bbox[2]), int(good_bbox[3])), (0, 0, 255), 2, 1)
# actual detection boxes, scores = detector_utils.detect_objects( image_np, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image( num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, image_np) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image( str("Confidence threshold: " + str(args.score_thresh)), image_np) cv2.imshow('Single Threaded Detection', cv2.cvtColor( image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() time.sleep(10) else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps)))
boxes, scores = detector_utils.detect_objects( image_from_cv, graph, session) # Draw bounding boxes detector_utils.draw_box_on_image( num_hands_detect , args.score_thresh , scores , boxes , im_width , image_height , image_from_cv) # FPS num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time ) . total_seconds () fps = num_frames / elapsed_time if (args.display > 0): if (args.fps > 0): detector_utils.draw_fps_on_image( "FPS : " + str ( int ( fps ) ) , image_from_cv) cv2.putText(image_from_cv, chord, (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9) , 2) cv2.imshow(’Real Time Gesture Detection’, cv2 . cvtColor ( image_from_cv , cv2 .COLOR_RGB2BGR) ) # To quit application press q if cv2.waitKey(25) & 0xFF == ord (’q’): cv2 . destroyAllWindows () break else :print ("frames processed: ", num_frames, " elapsed time : " , elapsed_time , " fps : " , str ( int (fps))) for i in range ( num_hands_detect ) :
def detect_hands_create_boundingbox(input_path, display_frames=False): detection_graph, sess = detector_utils.load_inference_graph() score_thresh = 0.2 num_workers = 4 queue_size = 5 cap = cv2.VideoCapture(input_path) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) start_time = datetime.datetime.now() num_frames = 0 im_width, im_height = (cap.get(3), cap.get(4)) # max number of hands we want to detect/track num_hands_detect = 1 #cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL) ret, image_np = cap.read() processed_frames = [] while ret: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] ret, image_np = cap.read() # image_np = cv2.flip(image_np, 1) # if len(image_np) != 0: try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: continue # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes on frame detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, image_np) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if (display_frames): # Display FPS on frame if (fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), image_np) cv2.imshow('Single-Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break else: print("Hand Detector: frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) processed_frames.append(image_np) cv2.destroyAllWindows() return processed_frames
break else: if (num_frames == 400): num_frames = 0 start_time = datetime.datetime.now() else: print("frames processed: ", index, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) # print("frame ", index, num_frames, elapsed_time, fps) if (output_frame is not None): output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR) if (args.display > 0): if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), output_frame) cv2.imshow('Handpose', output_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: if (num_frames == 400): num_frames = 0 start_time = datetime.datetime.now() else: print("frames processed: ", index, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) else: print("video end") break elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time
def hd_main(eleft, eright, eup, video_source=0, num_hands=1, display=1, num_workers=4): print("detector started") input_q = Queue(maxsize=5) output_q = Queue(maxsize=5) output_boxes = Queue(maxsize=5) t.sleep(1) video_capture = WebcamVideoStream(src=video_source, width=300, height=200).start() cap_params = {} frame_processed = 0 cap_params['im_width'], cap_params['im_height'] = video_capture.size() cap_params['score_thresh'] = score_thresh # max number of hands we want to detect/track cap_params['num_hands_detect'] = num_hands print(cap_params) # spin up workers to paralleize detection. pool = Pool(num_workers, worker, (input_q, output_q, output_boxes, cap_params, frame_processed)) start_time = datetime.datetime.now() num_frames = 0 index = 0 cv2.namedWindow('Multi-Threaded Detection', cv2.WINDOW_NORMAL) while True: frame = video_capture.read() frame = cv2.flip(frame, 1) index += 1 input_q.put(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) output_frame = output_q.get() output_box = output_boxes.get() output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR) elapsed_time = (datetime.datetime.now() - start_time).total_seconds() num_frames += 1 fps = num_frames / elapsed_time if (output_frame is not None): (left, right, top, bottom) = (output_box[1], output_box[3], output_box[0], output_box[2]) my_detect_threshold = 0.8 ## How far right/ left hand should be for sending events if top < 0.4: # if the top of bounding box is in the top 40% of the screen eup.set() else: eup.clear() if (right > my_detect_threshold): eright.set() else: eright.clear() if (left < (1 - my_detect_threshold)): eleft.set() else: eleft.clear() if (display > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), output_frame) cv2.imshow('Multi-Threaded Detection', output_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: if (num_frames == 400): num_frames = 0 start_time = datetime.datetime.now() else: print("frames processed: ", index, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) else: break pool.terminate() video_capture.stop() cv2.destroyAllWindows()
def HandPose_main(keep_flg): #別ファイルから HandPose.py を動かすと if __name__ == '__main__': が動かないっぽいので、関数で代用 #if __name__ == '__main__': flg_video = 0 #「1」でカメラが接続されていない flg_break = 0 #「1」で最初のループを抜け終了する⇒正常終了 flg_restart = 0 #「1」でリスタートした際に hand_gui.py で eel が2度起動するのを防ぐ flg_start = 0 #「1」で開始時点でのカメラ消失 cnt_gui=0 #hand_guiにてeelを動かす用に使用(0:初回起動時、1:2回目以降起動時、2:カメラが切断された際にhtmlを閉じるために使用) width,height = autopy.screen.size() #eel で立ち上げた際の表示位置を指定するために取得 while(True): #カメラが再度接続するまでループ処理 #try: #カメラが接続されていないフラグの場合 if(flg_video == 1): if(cnt_gui == 2): eel.init('GUI/web') eel.start('html/connect.html', mode='chrome', size=(500,600), #サイズ指定(横, 縦) position=(width/2-250, height/2-300), #位置指定(left, top) block=False) cnt_gui = 0 print("connect 接続しているよ!!") try: eel.sleep(0.01) except: print("エラー発生!!!!") traceback.print_exc() continue #カメラが接続されているか確認 cap2 = cv2.VideoCapture(0) ret2, frame2 = cap2.read() if(ret2 is True): #カメラが接続されている場合 flg_video = 0 cnt_gui = 0 flg_restart = 1 print("webcamあったよ!!") eel.windowclose() continue #最初の while に戻る else: #カメラが接続されていない場合 #print("webcamないよ!!!") continue #最初の while に戻る #正常終了のフラグの場合 elif(flg_break == 1): break #最初の while を抜けて正常終了 #パーサを作る parser = argparse.ArgumentParser() #Webカメラの選択 parser.add_argument( '-src', '--source', dest='video_source', type=int, default=0, # default=hand_gui.cam_source(), help='Device index of the camera.') #手を識別する数 parser.add_argument( '-nhands', '--num_hands', dest='num_hands', type=int, default=1, help='Max number of hands to detect.') #FPSを表示するか parser.add_argument( '-fps', '--fps', dest='fps', type=int, default=1, help='Show FPS on detection/display visualization') #ビデオストリームの横幅 parser.add_argument( '-wd', '--width', dest='width', type=int, default=300, help='Width of the frames in the video stream.') #ビデオストリームの高さ parser.add_argument( '-ht', '--height', dest='height', type=int, default=200, help='Height of the frames in the video stream.') #OpenCVでのFPSの表示をするか否か parser.add_argument( '-ds', '--display', dest='display', type=int, default=1, help='Display the detected images using OpenCV. This reduces FPS') #ワーカーが同時に動く数を設定。 parser.add_argument( '-num-w', '--num-workers', dest='num_workers', type=int, default=4, help='Number of workers.') #FIFO Queueの最大サイズを設定。 parser.add_argument( '-q-size', '--queue-size', dest='queue_size', type=int, default=5, help='Size of the queue.') #設定項目を変数argsに代入する。 args = parser.parse_args() #各Queue変数の要素数の上限を設定 input_q = Queue(maxsize=args.queue_size) output_q = Queue(maxsize=args.queue_size) cropped_output_q = Queue(maxsize=args.queue_size) inferences_q = Queue(maxsize=args.queue_size) pointX_q = Queue(maxsize=args.queue_size)#worker内のp1用 pointY_q = Queue(maxsize=args.queue_size)#worker内のp2用 #初期設定したargsパーサーからWebカメラの指定及びサイズを取得 video_capture = WebcamVideoStream( src=args.video_source, width=args.width, height=args.height).start() cap_params = {} frame_processed = 0 cap_params['im_width'], cap_params['im_height'] = video_capture.size() print(cap_params['im_width'], cap_params['im_height']) cap_params['score_thresh'] = score_thresh # max number of hands we want to detect/track #検出/追跡する手の最大数 cap_params['num_hands_detect'] = args.num_hands print(cap_params, args) # Count number of files to increment new example directory #新しいサンプルディレクトリをインクリメントするファイルの数を数える poses = [] _file = open("poses.txt", "r") lines = _file.readlines() for line in lines: line = line.strip() if(line != ""): print(line) poses.append(line) # spin up workers to paralleize detection. #ワーカーをスピンアップして検出を並列化します。 pool = Pool(args.num_workers, worker, (input_q, output_q, cropped_output_q, inferences_q, pointX_q, pointY_q, cap_params, frame_processed)) #pool2 = Pool(1,hand_gui.start_gui,(output_q, cropped_output_q)) start_time = datetime.datetime.now() num_frames = 0 fps = 0 index = 0 # 切り抜く色範囲の上限下限を設定 lower_blue = np.array([0, 30, 60]) upper_blue = np.array([30, 200, 255]) cv2.namedWindow('Handpose', cv2.WINDOW_NORMAL) poseCount = [0,0,0,0,0,0,0] moveCount = [0,0,0,0] #cnt_gui=0 #hand_guiにてeelを動かす用に使用 cnt_pose=0 # name_pose="" flg_end = 0#システム終了フラグ while True: frame = video_capture.read() frame = cv2.flip(frame, 1) index += 1 gamma_config = 1.6 if(frame is None): #frame が None だと frame = gamma.gamma_correction(frame,gamma_config) で #cv2.error が発生するのでここで判定し、今の while を抜ける traceback.print_exc() #それぞれのフラグを立てて、システムを終了させ、最初の while に戻る flg_video = 1 cnt_gui = 2 try: #webcam が最初から接続されていない場合は except の動作 cnt_gui, flg_end, flg_restart, flg_start, keep_flg = hand_gui.start_gui(output_frame, cnt_gui, cnt_pose, name_pose, flg_restart, flg_start, keep_flg) except NameError as name_e: traceback.print_exc() flg_start = 1 print("webcam接続して!!!!") pool.terminate() video_capture.stop() cv2.destroyAllWindows() #eel.init('GUI/web') #eel.start( #'html/connect.html', # mode='chrome', # cmdline_args=['--start-fullscreen'], # block=False) #i=0 #while(i < 500): # eel.sleep(0.01) # i+=1 break frame = gamma.gamma_correction(frame,gamma_config) # cv2.imwrite('Poses/Save/aaa' + str(num_frames) + '.png', frame) #画像切り取るかどうか frame_cropped_flag = False #画面サイズを縮小させ稼働領域の調整を行う #各パラメーターに値を入力することで画像サイズを小さくできる if(frame_cropped_flag == True): # print(int(cap_params['im_width'])) # print(int(cap_params['im_height'])) left_params = int(cap_params['im_width'])//8 top_params = int(cap_params['im_height'])//8 right_params = int(cap_params['im_width'])-(int(cap_params['im_width'])//8) bottom_params = int(cap_params['im_height'])-(int(cap_params['im_height'])//8) #キャプチャした画像の切り取り # frame = frame[top_params:bottom_params,left_params:right_params].copy() frame = frame[100:200,100:200].copy() #背景切り抜きの為画像形式をBGRからHSVへ変更 hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV_FULL) #設定した色範囲を塗りつぶしたマスク画像を生成 mask = cv2.inRange(hsv, lower_blue, upper_blue) #生成したマスクで通常画像を切り抜き frame_masked = cv2.bitwise_and(hsv,hsv, mask=mask) # print(left_params,top_params,right_params,bottom_params) #マスク処理済の画像をHSV形式からRGB形式へ変換 input_q.put(cv2.cvtColor(frame_masked, cv2.COLOR_HSV2RGB)) # initialize the folder which contents html,js,css,etc output_frame = output_q.get() cropped_output = cropped_output_q.get() #hand_gui.start_gui(output_frame) output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR) #output_qの内容表示するためにhand_gui.start_guiへ cnt_gui, flg_end, flg_restart, flg_start, keep_flg = hand_gui.start_gui(output_frame, cnt_gui, cnt_pose, name_pose, flg_restart, flg_start,keep_flg) inferences = None try: inferences = inferences_q.get_nowait() except Exception as e: pass elapsed_time = (datetime.datetime.now() - start_time).total_seconds() num_frames += 1 fps = num_frames / elapsed_time # Display inferences #ポーズの手の形の推測グラフを表示する if(inferences is not None): #worker関数内のp1,p2の値を代入 x = pointX_q.get_nowait() y = pointY_q.get_nowait() gui.drawInferences(inferences, poses) #ポーズの形の信頼地が0.7を超えたらアクションを実行する for i in range(len(poses)): if(inferences[2] > 0.9): moveCount = PoseAction.pointerMove(x,y,moveCount) if(inferences[i] > 0.7): poseCount,moveCount = PoseAction.checkPose(x, y, poses,poses[i],poseCount,moveCount)#testに7割越え識別したポーズの名称が代入される。 cnt_pose = poseCount[i] #全ポーズのゲージを取得したい場合は[i]を外す name_pose = poses[i] if (cropped_output is not None): #切り取った画像をBGR形式からRGB形式へ変更する。 cropped_output = cv2.cvtColor(cropped_output, cv2.COLOR_RGB2BGR) if (args.display > 0): cv2.namedWindow('Cropped', cv2.WINDOW_NORMAL) cv2.resizeWindow('Cropped', 450, 300) cv2.imshow('Cropped', cropped_output) # cv2.imwrite('Poses/Three/Three_4/Three_4' + str(num_frames) + '.png', cropped_output) if cv2.waitKey(1) & 0xFF == ord('q'): flg_break = 1 break else: if (num_frames == 400): num_frames = 0 start_time = datetime.datetime.now() else: print("frames processed: ", index, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) # print("frame ", index, num_frames, elapsed_time, fps) if (output_frame is not None): # _, imencode_image = cv2.imencode('.jpg', output_frame) # base64_image2 = base64.b64encode(imencode_image) # eel.set_base64image2("data:image/jpg;base64," + base64_image2.decode("ascii")) # output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR) if (args.display > 0): if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), output_frame) cv2.imshow('Handpose', output_frame) if cv2.waitKey(1) & 0xFF == ord('q'): flg_break = 1 break else: if (num_frames == 400): num_frames = 0 start_time = datetime.datetime.now() else: print("frames processed: ", index, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) else: print("video end") flg_break = 1 break if(flg_end == 1): flg_break = 1 break #カメラが切断された際の例外処理(現状、全ての例外をキャッチしている) # except:# cv2.error as cv2_e: # traceback.print_exc() #それぞれのフラグを立てて、システムを終了させ、最初の while に戻る # flg_video = 1 # cnt_gui = 2 # cnt_gui, flg_end, flg_restart = hand_gui.start_gui(output_frame, cnt_gui, cnt_pose, name_pose, flg_restart) # pool.terminate() # video_capture.stop() # cv2.destroyAllWindows() elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time print("fps", fps) pool.terminate() video_capture.stop() cv2.destroyAllWindows()
im_height, webcam_img) detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, webcam_img) try: x_d = box_coordinates[0][0] - 10 y_d = box_coordinates[0][1] + 30 w_d = box_coordinates[1][0] - x_d h_d = box_coordinates[1][1] - y_d + 30 feed_image = webcam_img[y_d:y_d + h_d, x_d:x_d + w_d] feed_image = cv2.resize(feed_image, (28, 28)) #feed_image = cv2.cvtColor(feed_image, cv2.COLOR_BGR2GRAY) answer = predict_gesture(feed_image) detector_utils.draw_fps_on_image("Prediction: " + str(answer), webcam_img) cv2.imshow('feed_image', feed_image) except: #print('no boxes maybe') try: palms = palm_cascade.detectMultiScale( cv2.cvtColor(webcam_img, cv2.COLOR_BGR2GRAY), 1.3, 5) for (ex, ey, ew, eh) in palms: cv2.rectangle(webcam_img, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2) x_d = palms[0][0] y_d = palms[0][1] w_d = palms[0][2] h_d = palms[0][3]
boxes, scores = detector_utils.detect_objects(webcam_img, detection_graph, sess) #box_coordinates(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, image_np) box_coordinates = detector_utils.box_coordinates(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, webcam_img) detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, webcam_img) if cv2.waitKey(25) & 0xFF == ord('c'): calibarate = True if calibarate == True: detector_utils.draw_fps_on_image("calibarating", webcam_img) try: y_d = box_coordinates[0][1] x_d = box_coordinates[0][0] h_d = box_coordinates[1][1] - y_d w_d = box_coordinates[1][0] - x_d print('hand detected at: ', x_d, ' , ', y_d, ' , ', h_d, ' , ', w_d) model_hsv = image_hsv[y_d:y_d + h_d, x_d:x_d + w_d] except: try: palms = palm_cascade.detectMultiScale( cv2.cvtColor(webcam_img, cv2.COLOR_BGR2GRAY), 1.3, 5) (ex, ey, ew, eh) = palms[0] cv2.rectangle(webcam_img, (ex, ey), (ex + ew, ey + eh),
im_height, im_width = image_np_res.shape[:2] # actual detection boxes, scores = detector_utils.detect_objects(image_np_res, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, image_np_res) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), image_np_res) image_np = cv2.resize(image_np_res, (0, 0), fx=2, fy=2) cv2.imshow('Single Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps)))
print(f"IMAGE HERE!!!: {majority}") synthesis_input = texttospeech.types.SynthesisInput( text=majority[0]) response = client.synthesize_speech(synthesis_input, voice, audio_config) appendix = random.randint(0, 10000) # winsound.PlaySound(response.audio_content) # time.sleep(1000) with open(f'output-{appendix}.mp3', 'wb') as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"') out.close() playsound(f'output-{appendix}.mp3', block=True) if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image( "FPS : " + str(int(num_frames / 10)), image_np) cv2.imshow('Single-Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(27) & 0xFF == ord('q'): cv2.destroyAllWindows() break else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps)))
def Main(): parser = argparse.ArgumentParser() parser.add_argument('-sth', '--scorethreshold', dest='score_thresh', type=float, default=0.48, help='Score threshold for displaying bounding boxes') parser.add_argument('-fps', '--fps', dest='fps', type=int, default=1, help='Show FPS on detection/display visualization') parser.add_argument('-src', '--source', dest='video_source', default=0, help='Device index of the camera.') parser.add_argument('-wd', '--width', dest='width', type=int, default=640, help='Width of the frames in the video stream.') parser.add_argument('-ht', '--height', dest='height', type=int, default=480, help='Height of the frames in the video stream.') parser.add_argument( '-ds', '--display', dest='display', type=int, default=1, help='Display the detected images using OpenCV. This reduces FPS') parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int, default=16, help='Number of workers.') parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int, default=5, help='Size of the queue.') args = parser.parse_args() global cap, saveImg, gestname, path, real_punch, esti_punch cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) start_time = datetime.datetime.now() num_frames = 0 im_width, im_height = (cap.get(3), cap.get(4)) # max number of hands we want to detect/track num_hands_detect = 1 mod = myNN.loadCNN(0) while True: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] ret, image_np = cap.read() frame = image_np # image_np = cv2.flip(image_np, 1) try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") # actual detection boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, image_np) # get boxes result = detector_utils.get_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height) for res in result: #img = frame[res['top']:res['bottom'],res['left']:res['right']] img = frame width = res['right'] - res['left'] height = res['bottom'] - res['top'] if (width <= 0 or height <= 0): continue ww = width if (width > height) else height # expands the ROI by 20% expansion = int(ww * 0.2) masked = skinMask(img, res['left'] - expansion, res['top'] - expansion, ww + 2 * expansion, ww + 2 * expansion) if not masked is None: cv2.imshow('ROI', masked) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), image_np) cv2.imshow('Single Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) key = cv2.waitKey(25) & 0xFF if key == ord('q'): cv2.destroyAllWindows() break elif key == ord('s'): saveImg = not saveImg if gestname != '': saveImg = True else: print "Enter a gesture group name first, by pressing 'n'" saveImg = False elif key == ord('n'): gestname = raw_input("Enter the gesture folder name: ") try: os.makedirs(gestname) except OSError as e: # if directory already present if e.errno != 17: print 'Some issue while creating the directory named -' + gestname path = "./" + gestname + "/" elif key == ord(' '): real_punch += 1 print "real_punch:", real_punch, "estimated_punch:", esti_punch else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps)))
def main(): video_source = 0 num_hands = 2 fps = 1 # set to 0 to hide width = 300 height = 200 display = 1 # show detected hands num_workers = 4 queue_size = 5 input_q = Queue(maxsize=queue_size) output_q = Queue(maxsize=queue_size) video_capture = WebcamVideoStream(src=video_source, width=width, height=height).start() cap_params = {} frame_processed = 0 cap_params['im_width'], cap_params['im_height'] = video_capture.size() cap_params['score_thresh'] = score_thresh # max number of hands we want to detect/track cap_params['num_hands_detect'] = num_hands movement = Value('i', 0) movement_threshold = (width + height) / 3 # spin up workers to paralleize detection. pool = Pool(num_workers, worker, (input_q, output_q, cap_params, frame_processed, movement, movement_threshold)) start_time = datetime.datetime.now() num_frames = 0 fps = 0 index = 0 old_movement = 0 history_avg = 4 move_history = [0] * history_avg total_space = width + height cv2.namedWindow('Multi-Threaded Detection', cv2.WINDOW_NORMAL) while True: frame = video_capture.read() frame = cv2.flip(frame, 1) index += 1 input_q.put(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) output_frame = output_q.get() output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR) elapsed_time = (datetime.datetime.now() - start_time).total_seconds() num_frames += 1 fps = num_frames / elapsed_time # Calculate amount moved in the last 5 frames frame_rate = 5 if num_frames % frame_rate == 0: cur_movement = movement.value moved = (cur_movement - old_movement) / frame_rate old_movement = cur_movement # Track historical movement move_history.append(moved) total = 0 for i in range(len(move_history) - history_avg, len(move_history)): total += move_history[i] moved_avg = ((total / history_avg) / total_space) * 1000 print("Movement score: {}".format(moved_avg)) # print("frame ", index, num_frames, elapsed_time, fps) if (output_frame is not None): if (display > 0): if (fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), output_frame) # cv2.imshow('Multi-Threaded Detection', output_frame) ret, jpeg = cv2.imencode('.jpg', output_frame) jbytes = jpeg.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + jbytes + b'\r\n\r\n') if cv2.waitKey(1) & 0xFF == ord('q'): break else: if (num_frames == 400): num_frames = 0 start_time = datetime.datetime.now() else: print("frames processed: ", index, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) else: # print("video end") break elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time print("fps", fps) pool.terminate() video_capture.stop() cv2.destroyAllWindows()
def main(): global new_image rs_img = rs_process() rospy.init_node('hand_tracking', anonymous=True) rospy.loginfo("Hand Detection Start!") #Marker Publisher Initialize pub = rospy.Publisher('/hand_marker', Marker, queue_size=1) hand_mark = MarkerGenerator() hand_mark.type = Marker.SPHERE_LIST hand_mark.scale = [.07] * 3 hand_mark.frame_id = "/camera_color_optical_frame" hand_mark.id = 0 hand_mark.lifetime = 10000 #hand detect args parser = argparse.ArgumentParser() parser.add_argument('-sth', '--scorethreshold', dest='score_thresh', type=float, default=0.5, help='Score threshold for displaying bounding boxes') parser.add_argument('-fps', '--fps', dest='fps', type=int, default=1, help='Show FPS on detection/display visualization') parser.add_argument('-src', '--source', dest='video_source', default=0, help='Device index of the camera.') parser.add_argument('-wd', '--width', dest='width', type=int, default=640, help='Width of the frames in the video stream.') parser.add_argument('-ht', '--height', dest='height', type=int, default=360, help='Height of the frames in the video stream.') parser.add_argument( '-ds', '--display', dest='display', type=int, default=0, help='Display the detected images using OpenCV. This reduces FPS') parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int, default=4, help='Number of workers.') parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int, default=5, help='Size of the queue.') args = parser.parse_args() num_hands_detect = 2 im_width, im_height = (args.width, args.height) #time for fps calculation start_time = datetime.datetime.now() num_frames = 0 #skin filter color lower = np.array([0, 48, 80], dtype="uint8") upper = np.array([20, 255, 255], dtype="uint8") ####################################### #Define the frame to transform ####################################### target_frame = "/camera_color_optical_frame" ######FROM reference_frame = "/base_link" ####TO ##################################### #Define the numpy array to record the consequences of the hand location ###################################### # hand_pos = np.empty((1,3)) is_transform_target = False if (is_transform_target): listener = tf.TransformListener() listener.waitForTransform(reference_frame, target_frame, rospy.Time(0), rospy.Duration(4.0)) hand_mark.frame_id = reference_frame else: hand_mark.frame_id = target_frame hand_pose_list = [] while not rospy.is_shutdown(): #get rgb,depth frames for synchronized frames if not new_image: continue im_rgb = rs_image_rgb # im_rgb = cv2.cvtColor(rs_image_rgb, cv2.COLOR_BGR2RGB) im_depth = rs_image_depth new_image = False #add check # depth_map = np.array(rs_image_depth, dtype=np.uint8) depth_map = cv2.applyColorMap( cv2.convertScaleAbs(rs_image_depth, alpha=0.03), cv2.COLORMAP_JET) # cv2.imshow("Depth Image", depth_map) cv2.imshow("rs_image_rgb", rs_image_rgb) try: image_np = im_rgb except: print("Error converting to RGB") # actual hand detection boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, image_np) if (scores[0] > args.score_thresh): (left, right, top, bottom) = (boxes[0][1] * im_width, boxes[0][3] * im_width, boxes[0][0] * im_height, boxes[0][2] * im_height) p1 = (int(left), int(top)) p2 = (int(right), int(bottom)) # print p1,p2,int(left),int(top),int(right),int(bottom) image_hand = image_np[int(top):int(bottom), int(left):int(right)] # cv2.namedWindow("hand", cv2.WINDOW_NORMAL) cv2.imshow('hand', cv2.cvtColor(image_hand, cv2.COLOR_RGB2BGR)) align_hand = im_rgb[int(top):int(bottom), int(left):int(right)] align_depth = depth_map[int(top):int(bottom), int(left):int(right)] align_hand_detect = np.hstack((align_hand, align_depth)) # cv2.namedWindow('align hand', cv2.WINDOW_AUTOSIZE) # cv2.imshow('align hand', align_hand_detect) # cv2.imshow('align_hand_bgr', align_hand) align_hand = cv2.cvtColor(align_hand, cv2.COLOR_BGR2RGB) #skin filtering converted = cv2.cvtColor(align_hand, cv2.COLOR_BGR2HSV) skinMask = cv2.inRange(converted, lower, upper) # cv2.imshow("skinMask", skinMask) # apply a series of erosions and dilations to the mask # using an elliptical kernel # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11)) # skinMask = cv2.erode(skinMask, kernel, iterations = 2) # skinMask = cv2.dilate(skinMask, kernel, iterations = 2) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11)) skinMask = cv2.erode(skinMask, kernel, iterations=3) skinMask = cv2.dilate(skinMask, kernel, iterations=3) # blur the mask to help remove noise, then apply the # mask to the frame skinMask = cv2.GaussianBlur(skinMask, (3, 3), 0) skin = cv2.bitwise_and(align_hand, align_hand, mask=skinMask) # show the skin in the image along with the mask # cv2.imshow("images", np.hstack([align_hand, skin])) #end skin depth_pixel = [(int(left) + int(right)) / 2, (int(top) + int(bottom)) / 2] # depth_point = [0.0,0.0,0.0] depth_point = rs.rs2_deproject_pixel_to_point( depth_intrin, depth_pixel, im_depth[depth_pixel[1], depth_pixel[0]] * depth_scale) print depth_point hand_mark.counter = 0 t = rospy.get_time() hand_mark.color = [0, 1, 0, 1] # hand_mark.id = hand_mark.id + 1 # if (hand_mark.id > 100000) : # hand_mark.id = 0 # ## hand in /camera_color_optical_frame # print ('id ',hand_mark.id) m0 = hand_mark.marker(points=[(depth_point[0], depth_point[1], depth_point[2])]) hand_point_x = depth_point[0] hand_point_y = depth_point[1] hand_point_z = depth_point[2] if (is_transform_target): ######################################################################### ##convert /camera_color_optical_frame => /world ######################################################################### #transform position from target_frame to reference frame target_ref_camera = PointStamped() target_ref_camera.header.frame_id = target_frame target_ref_camera.header.stamp = rospy.Time(0) target_ref_camera.point = m.points[0] p = listener.transformPoint(reference_frame, target_ref_camera) # p=listener.transformPoint(reference_frame,hand_mark) m = hand_mark.marker(points=[(p.point.x, p.point.y, p.point.z)]) #substitute data for the variable hand_point_x = p.point.x hand_point_y = p.point.y hand_point_z = p.point.z # pub.publish(m) #offset z-axiz # hand_mark.id = 1 # m = hand_mark.marker(points= [(p.point.x, p.point.y, p.point.z + 0.10)]) # pub.publish(m) else: # print('published!') ####append the data if 0.15 <= hand_point_z <= 0.75 and -0.4 <= hand_point_x <= 0.4: print("recorded hand point") hand_pose = [ hand_point_x, hand_point_y, hand_point_z, 0.0, 0.0, 0.0, 1.0 ] print hand_pose hand_pose_list.append(hand_pose) pub.publish(m0) #substitute data for the variable hand_point_x = depth_point[0] hand_point_y = depth_point[1] - 0.08 hand_point_z = depth_point[2] #### offset z-axiz # hand_mark.id = 1 m1 = hand_mark.marker(points=[(depth_point[0], depth_point[1] - 0.08, depth_point[2])]) # m = hand_mark.marker(points= [(p.point.x, p.point.y, p.point.z + 0.10)]) # pub.publish(m1) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time #display window if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(float(fps)), image_np) cv2.imshow('Single Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() break print('save hand_pub.npy') # np.save('./hand_pub.npy',hand_pos) np.save("hand_pub", hand_pose_list)
#else: #cv2.putText(image_np,'Not Detected',(20,90),font,0.55,(0,0,255),1,cv2.LDR_SIZE) if start == 1: #cnt = 0 if cnt == -1 else cnt+1 #cv2.putText(image_np,'Entry:'+str(en),(20,90+space),font,0.55,(0,0,255),1,cv2.LDR_SIZE) #if cnt==0: # ex = ex+1 #cv2.putText(image_np,'Exit:'+str(ex),(20,90+2*space),font,0.55,(0,0,255),1,cv2.LDR_SIZE) cv2.putText(image_np,'FPS:'+str(int(fps)+1),(20,90+3*space),font,0.55,(0,0,255),1,cv2.LDR_SIZE) #cv2.putText(image_np,'Counter:'+str(int(en-ex)),(20,90+4*space),font,0.55,(0,0,255),1,cv2.LDR_SIZE) # FPS Calculator num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time detector_utils.draw_fps_on_image("Summary",image_np) cv2.imshow('Nymble Task',cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(1) & 0xFF == ord('q'): raise cv2.error except cv2.error: #Releaseing the code cap.release() cv2.destroyAllWindows() print (".") print("Done!")
def main(): global new_image rs_img = rs_process() rospy.init_node('hand_tracking', anonymous=True) rospy.loginfo("Hand Detection Start!") #Marker Publisher Initialize pub = rospy.Publisher('/hand_marker', Marker) hand_mark = MarkerGenerator() hand_mark.type = Marker.SPHERE_LIST hand_mark.scale = [.03] * 3 hand_mark.frame_id = '/camera_color_optical_frame' #hand detect args parser = argparse.ArgumentParser() parser.add_argument('-sth', '--scorethreshold', dest='score_thresh', type=float, default=0.2, help='Score threshold for displaying bounding boxes') parser.add_argument('-fps', '--fps', dest='fps', type=int, default=1, help='Show FPS on detection/display visualization') parser.add_argument('-src', '--source', dest='video_source', default=0, help='Device index of the camera.') parser.add_argument('-wd', '--width', dest='width', type=int, default=640, help='Width of the frames in the video stream.') parser.add_argument('-ht', '--height', dest='height', type=int, default=360, help='Height of the frames in the video stream.') parser.add_argument( '-ds', '--display', dest='display', type=int, default=0, help='Display the detected images using OpenCV. This reduces FPS') parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int, default=4, help='Number of workers.') parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int, default=5, help='Size of the queue.') args = parser.parse_args() im_width, im_height = (args.width, args.height) #time for fps calculation start_time = datetime.datetime.now() num_frames = 0 while not rospy.is_shutdown(): #get rgb,depth frames for synchronized frames if not new_image: continue im_rgb = rs_image_rgb im_depth = rs_image_depth new_image = False #add check # depth_map = np.array(rs_image_depth, dtype=np.uint8) depth_map = cv2.applyColorMap( cv2.convertScaleAbs(rs_image_depth, alpha=0.03), cv2.COLORMAP_JET) # cv2.imshow("Depth Image", depth_map) # cv2.imshow("Color Image", rs_image_rgb) try: image_np = im_rgb #cv2.cvtColor(im_rgb, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") # cv2.imshow("source image np", image_np) ############## ***** FROM HERE ***** ################### # Remove background - Set pixels further than clipping_distance to grey grey_color = 100 depth_image_3d = np.dstack( (im_depth, im_depth, im_depth)) #depth image is 1 channel, color is 3 channels bg_removed = np.where( (depth_image_3d > clipping_distance) | (depth_image_3d <= 0), grey_color, image_np) cv2.imshow("bg_removed", bg_removed) # img_hsv = cv2.cvtColor(bg_removed, cv2.COLOR_BGR2HSV) # lower_red = np.array([120,150,50]) # upper_red = np.array([255,255,180]) ## Gen lower mask (0-5) and upper mask (175-180) of RED mask1 = cv2.inRange(img_hsv, (0, 50, 20), (8, 255, 255)) # mask2 = cv2.inRange(img_hsv, (175,50,20), (180,255,255)) mask3 = cv2.inRange(img_hsv, (120, 120, 50), (255, 255, 255)) ## Merge the mask and crop the red regions redmask = cv2.bitwise_or(mask1, mask3) croped = cv2.bitwise_and(bg_removed, bg_removed, mask=redmask) # img_bw = 255*(cv2.cvtColor(croped, cv2.COLOR_BGR2GRAY) > 10).astype('uint8') # cv2.imshow("img_bw", img_bw) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) redmask = cv2.erode(redmask, kernel, iterations=1) redmask = cv2.dilate(redmask, kernel, iterations=1) gray_croped = cv2.bitwise_and(croped, croped, mask=redmask) # mask5 = np.dstack([mask4, mask4, mask4]) / 255 # gray_croped = croped * mask4 # gray_croped = cv2.bitwise_and(croped, croped, mask=mask4) cv2.imshow("gray_croped", gray_croped) # print(gray_croped.shape) box_index = np.where(gray_croped != 0) # print "boxindex : ", len(box_index), len(box_index[0]) # print(box_index) if len(box_index[0]) >= 10: # check red exists i_min = np.amin(box_index[1]) i_max = np.amax(box_index[1]) j_min = np.amin(box_index[0]) j_max = np.amax(box_index[0]) i_centor = int(i_min + i_max) / 2 j_centor = int(j_min + j_max) / 2 box_img = croped[i_min:i_max, j_min:j_max] # print(i_centor,j_centor) top = j_min bottom = j_max left = i_min right = i_max # (left, right, top, bottom) = (boxes[0][1] * im_width, boxes[0][3] * im_width, # boxes[0][0] * im_height, boxes[0][2] * im_height) #add l r t b print("detected red") p1 = (int(left), int(top)) p2 = (int(right), int(bottom)) print p1, p2, int(left), int(top), int(right), int(bottom) image_hand = image_np[int(top):int(bottom), int(left):int(right)] cv2.imshow('hand', cv2.cvtColor(image_hand, cv2.COLOR_RGB2BGR)) align_hand = im_rgb[int(top):int(bottom), int(left):int(right)] align_depth = depth_map[int(top):int(bottom), int(left):int(right)] align_hand_detect = np.hstack((align_hand, align_depth)) cv2.namedWindow('align hand', cv2.WINDOW_AUTOSIZE) cv2.imshow('align hand', align_hand_detect) # #skin filtering # converted = cv2.cvtColor(align_hand, cv2.COLOR_BGR2HSV) # skinMask = cv2.inRange(converted, lower, upper) # apply a series of erosions and dilations to the mask # using an elliptical kernel # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11)) # skinMask = cv2.erode(skinMask, kernel, iterations = 2) # skinMask = cv2.dilate(skinMask, kernel, iterations = 2) # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) # skinMask = cv2.erode(skinMask, kernel, iterations = 1) # skinMask = cv2.dilate(skinMask, kernel, iterations = 1) # # blur the mask to help remove noise, then apply the # # mask to the frame # skinMask = cv2.GaussianBlur(skinMask, (3, 3), 0) # skin = cv2.bitwise_and(align_hand, align_hand, mask = skinMask) depth_pixel = [(int(left) + int(right)) / 2, (int(top) + int(bottom)) / 2] depth_point = rs.rs2_deproject_pixel_to_point( depth_intrin, depth_pixel, im_depth[depth_pixel[1], depth_pixel[0]] * depth_scale) print depth_point hand_mark.counter = 0 t = rospy.get_time() hand_mark.color = [1, 0, 0, 1] m = hand_mark.marker(points=[(depth_point[0], depth_point[1], depth_point[2])]) # rospy.loginfo(m) pub.publish(m) # rate.sleep() # show the skin in the image along with the mask # cv2.imshow("images", np.hstack([align_hand, skin])) #end skin if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() break continue # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time #display window if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(float(fps)), image_np) cv2.imshow('Single Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() break
num_detects = deque(maxlen=N) #M of N filter q = deque(maxlen=taps) #rolling average filter ret, path_mask = cv2.threshold(path_mask, 127, 255, cv2.THRESH_BINARY) bounding = bound_binary_image(path_mask,im_width,im_height) ROI = path_mask[int(bounding[0]):int(bounding[1]), int(bounding[2]):int(bounding[3])] x_border = int((bounding[3]-bounding[2])/5) y_border = int((bounding[3]-bounding[2])/5) ROI_padded = cv2.copyMakeBorder(ROI,y_border,y_border,x_border,x_border,cv2.BORDER_CONSTANT, 0) model_input = cv2.resize(ROI_padded, dsize=(28, 28)) cv2.imwrite("dataset/qiao/"+str(j)+".png",model_input) j+=1 prediction = digit_model.predict_classes(model_input.reshape(1,28,28,1)) model_input = cv2.resize(model_input, (int(20 * 28), int(20 * 28)), interpolation=cv2.INTER_AREA) cv2.putText(model_input,"Prediction: "+str(prediction), (150, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (170, 170, 10), 2) cv2.imshow("path",model_input) #int(bounding[3]):int(bounding[2])]) if detected: # draw bounding boxes on frame point1 = (int(bbox[0]),int(bbox[1])) point2 = (int(bbox[0]+bbox[2]),int(bbox[1]+bbox[3])) cv2.rectangle(display_frame, point1, point2, (77, 255, 9), 3, 1) cv2.putText(display_frame, str(np.mean(q)) + " confidence", (150,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50,170,50), 2) fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer); detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), display_frame) cv2.imshow("Tracking", cv2.resize(display_frame, (int(3*im_width),int(3*im_height)), interpolation = cv2.INTER_AREA)) # Exit if ESC pressed k = cv2.waitKey(1) & 0xff if k == 27 : break
def main(): global new_image rs_img = rs_process() rospy.init_node('hand_tracking', anonymous=True) rospy.loginfo("Hand Detection Start!") #Marker Publisher Initialize pub = rospy.Publisher('/hand_marker', Marker) hand_mark = MarkerGenerator() hand_mark.type = Marker.SPHERE_LIST hand_mark.scale = [.04] * 3 hand_mark.frame_id = '/camera_color_optical_frame' # hand_mark.frame_id = 'iiwa_link_0' #hand detect args parser = argparse.ArgumentParser() parser.add_argument('-sth', '--scorethreshold', dest='score_thresh', type=float, default=0.2, help='Score threshold for displaying bounding boxes') parser.add_argument('-fps', '--fps', dest='fps', type=int, default=1, help='Show FPS on detection/display visualization') parser.add_argument('-src', '--source', dest='video_source', default=0, help='Device index of the camera.') parser.add_argument('-wd', '--width', dest='width', type=int, default=640, help='Width of the frames in the video stream.') parser.add_argument('-ht', '--height', dest='height', type=int, default=360, help='Height of the frames in the video stream.') parser.add_argument( '-ds', '--display', dest='display', type=int, default=0, help='Display the detected images using OpenCV. This reduces FPS') parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int, default=4, help='Number of workers.') parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int, default=5, help='Size of the queue.') args = parser.parse_args() num_hands_detect = 2 im_width, im_height = (args.width, args.height) #time for fps calculation start_time = datetime.datetime.now() num_frames = 0 #skin filter color lower = np.array([0, 48, 80], dtype="uint8") upper = np.array([20, 255, 255], dtype="uint8") while not rospy.is_shutdown(): #get rgb,depth frames for synchronized frames if not new_image: continue im_rgb = rs_image_rgb im_depth = rs_image_depth new_image = False #add check # depth_map = np.array(rs_image_depth, dtype=np.uint8) depth_map = cv2.applyColorMap( cv2.convertScaleAbs(rs_image_depth, alpha=0.03), cv2.COLORMAP_JET) cv2.imshow("Depth Image", depth_map) cv2.imshow("Color Image", rs_image_rgb) try: image_np = im_rgb #cv2.cvtColor(im_rgb, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") # cv2.imshow("source image np", image_np) # actual hand detection boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, image_np) if (scores[0] > args.score_thresh): (left, right, top, bottom) = (boxes[0][1] * im_width, boxes[0][3] * im_width, boxes[0][0] * im_height, boxes[0][2] * im_height) p1 = (int(left), int(top)) p2 = (int(right), int(bottom)) print p1, p2, int(left), int(top), int(right), int(bottom) image_hand = image_np[int(top):int(bottom), int(left):int(right)] cv2.namedWindow("hand", cv2.WINDOW_NORMAL) cv2.imshow('hand', cv2.cvtColor(image_hand, cv2.COLOR_RGB2BGR)) align_hand = im_rgb[int(top):int(bottom), int(left):int(right)] align_depth = depth_map[int(top):int(bottom), int(left):int(right)] align_hand_detect = np.hstack((align_hand, align_depth)) cv2.namedWindow('align hand', cv2.WINDOW_AUTOSIZE) cv2.imshow('align hand', align_hand_detect) #skin filtering converted = cv2.cvtColor(align_hand, cv2.COLOR_BGR2HSV) skinMask = cv2.inRange(converted, lower, upper) # apply a series of erosions and dilations to the mask # using an elliptical kernel # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11)) # skinMask = cv2.erode(skinMask, kernel, iterations = 2) # skinMask = cv2.dilate(skinMask, kernel, iterations = 2) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) skinMask = cv2.erode(skinMask, kernel, iterations=1) skinMask = cv2.dilate(skinMask, kernel, iterations=1) # blur the mask to help remove noise, then apply the # mask to the frame skinMask = cv2.GaussianBlur(skinMask, (3, 3), 0) skin = cv2.bitwise_and(align_hand, align_hand, mask=skinMask) depth_pixel = [(int(left) + int(right)) / 2, (int(top) + int(bottom)) / 2] dist = im_depth[depth_pixel[1], depth_pixel[0]] * depth_scale print dist depth_point = rs.rs2_deproject_pixel_to_point( depth_intrin, depth_pixel, dist) print depth_point hand_mark.counter = 0 t = rospy.get_time() hand_mark.color = [1, 0, 0, 1] m = hand_mark.marker(points=[(depth_point[0], depth_point[1], depth_point[2])]) # rospy.loginfo(m) pub.publish(m) # rate.sleep() # show the skin in the image along with the mask cv2.imshow("images", np.hstack([align_hand, skin])) #end skin # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time #display window if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(float(fps)), image_np) cv2.imshow('Single Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() break
def run(self): detection_graph, sess = detector_utils.load_inference_graph() # parameters for loading data and images detection_model_path = 'haarcascade_frontalface_default.xml' emotion_model_path = 'fer2013_mini_XCEPTION.102-0.66.hdf5' emotion_labels = get_labels('fer2013') # hyper-parameters for bounding boxes shape frame_window = 10 emotion_offsets = (20, 40) # loading models face_detection = load_detection_model(detection_model_path) emotion_classifier = load_model(emotion_model_path, compile=False) # getting input model shapes for inference emotion_target_size = emotion_classifier.input_shape[1:3] # starting lists for calculating modes emotion_window = [] start_time = datetime.datetime.now() im_width, im_height = (400, 350) num_hands_detect = 2 # max number of hands we want to detect/track, can scale this up min_threshold = 0.2 old_points = [None] * num_hands_detect cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL) while True: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] if self.image_queue.empty(): continue img_data = base64.b64decode(str(self.image_queue.get())) image_np = np.asarray(Image.open(io.BytesIO(img_data))) image_np = cv2.flip(image_np, 1) gray_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY) faces = detect_faces(face_detection, gray_image) # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects( image_np, detection_graph, sess) valid_hands = 0 calc_displacement = False for score in scores: if score > min_threshold: valid_hands += 1 if valid_hands == num_hands_detect: calc_displacement = True #tell function to return new displacement # draw bounding boxes on frame self.total_displacement += detector_utils.draw_box_on_image( num_hands_detect, min_threshold, scores, boxes, im_width, im_height, image_np, old_points, calc_displacement) #0.2 is the min threshold # Calculate Frames per second (FPS) self.num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = self.num_frames / elapsed_time if self.current_emotion in self.emotions: self.emotions[self.current_emotion] += 1 else: self.emotions[self.current_emotion] = 1 print(self.total_displacement / (10 * self.num_frames), self.current_emotion, self.emotions) for face_coordinates in faces: x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) gray_face = gray_image[y1:y2, x1:x2] try: gray_face = cv2.resize(gray_face, (emotion_target_size)) except: continue gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) emotion_prediction = emotion_classifier.predict(gray_face) emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction) self.current_emotion = emotion_labels[emotion_label_arg] emotion_window.append(self.current_emotion) if len(emotion_window) > frame_window: emotion_window.pop(0) try: emotion_mode = mode(emotion_window) except: continue draw_bounding_box(face_coordinates, image_np) # Display FPS on frame: detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), image_np) cv2.imshow('Single-Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break
boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes on frame detector_utils.draw_box_on_image(num_hands_detect, args.score_thresh, scores, boxes, im_width, im_height, orig_color_frame, debug_image, ct, num_frames) # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if num_frames % 30 == 0: print("Status:", ct.getStatus()) if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), debug_image) cv2.imshow('Single-Threaded Detection', debug_image) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break else: if num_frames % 30 == 0: print("Frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps)))
# rospy.loginfo(m) pub.publish(m) rate.sleep() # show the skin in the image along with the mask cv2.imshow("images", np.hstack([align_hand, skin])) #end skin # Calculate Frames per second (FPS) num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = num_frames / elapsed_time if (args.display > 0): # Display FPS on frame if (args.fps > 0): detector_utils.draw_fps_on_image("FPS : " + str(float(fps)), image_np) cv2.imshow('Single Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() break else: print("frames processed: ", num_frames, "elapsed time: ", elapsed_time, "fps: ", str(int(fps))) # raw_input('Press enter to continue: ')
if ret == True: if rx > 0 and ry > 0: skinMask(res) flag = True #print(flag) except: print("Did not detect hand, put hand within the camera's frame!") continue #sys.exit(0) if (args.display > 0): if flag == True: cv2.putText(image_np,output[retgesture],(15,40),font,0.75, (77, 255, 9), 2) if (args.fps > 0): detector_utils.draw_fps_on_image(None, image_np) if flag == True: if retgesture == 4 or retgesture == 14: gesture_identifier = 1 elif retgesture == 12 or retgesture == 13: gesture_identifier = 2 elif retgesture == 0 or retgesture == 1 or retgesture == 8 or retgesture == 9 or retgesture == 10 or retgesture == 15: gesture_identifier = 3 else: gesture_identifier = 0 print(gesture_identifier) cv2.imshow('RPS', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) cv2.moveWindow('RPS',0,0) if cv2.waitKey(5) & 0xFF == ord('q'): cv2.destroyAllWindows()