def __init__(self, args): """ Initialization of AI Whiteboard class args.trt :boolean : if True - use TensorRT engines for inference args.raspberry_pi_camera :boolean : if True - capture images from Raspberry Pi Camera """ super(AIWhiteboard, self).__init__() self.confidence_ft_threshold = config['confidence_ft_threshold'] self.confidence_hd_threshold = config['confidence_hd_threshold'] self.colors = [(15, 15, 240), (15, 240, 155), (240, 155, 15), (240, 15, 155), (240, 15, 240)] # init models self.hand_detector = YOLO( weights='weights/trained_yolo.h5', trt_engine='weights/engines/model_trained_yolo.fp16.engine', threshold=self.confidence_hd_threshold, trt=args.trt) self.fingertips_detector = Fingertips( weights='weights/classes8.h5', trt_engine='weights/engines/model_classes8.fp16.engine', trt=args.trt) if args.raspberry_pi_camera: self.cam = cv2.VideoCapture( gstreamer_pipeline(capture_width=config['cam_w'], capture_height=config['cam_h'], display_width=config['cam_w'], display_height=config['cam_h'], framerate=config['framerate']), cv2.CAP_GSTREAMER) else: self.cam = cv2.VideoCapture(0) self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, config['cam_w']) self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, config['cam_h']) origin_w = int(self.cam.get(cv2.CAP_PROP_FRAME_WIDTH)) origin_h = int(self.cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) # cropped coordinates (to get a square image) self.cropped_x_st = int(origin_w / 2) - int(origin_h / 2) self.cropped_x_end = int(origin_w / 2) + int(origin_h / 2) # whiteboard_tl - top left corner of whiteboard on cropped image # whiteboard_br - bottom right corner of whiteboard on cropped image self.whiteboard_tl = (int( (self.cropped_x_end - self.cropped_x_st - config['whiteboard_w']) / 2), int((origin_h - config['whiteboard_h']) / 2)) self.whiteboard_br = (int( (self.cropped_x_end - self.cropped_x_st + config['whiteboard_w']) / 2), int((origin_h + config['whiteboard_h']) / 2)) # Create a whiteboard self.whiteboard = np.zeros( (config['zoom_koef'] * config['whiteboard_h'], config['zoom_koef'] * config['whiteboard_w'], 3), np.uint8) + 255 # Create a info whiteboard for demonstration self.info_whiteboard = copy.deepcopy(self.whiteboard)
import cv2 import numpy as np import asyncio from cursor_func import cursorControl from unified_detector import Fingertips from hand_detector.detector import SOLO, YOLO status = False hand_detection_method = 'yolo' if hand_detection_method is 'solo': hand = SOLO(weights='weights/solo.h5', threshold=0.8) elif hand_detection_method is 'yolo': hand = YOLO(weights='weights/yolo.h5', threshold=0.9) else: assert False, "'" + hand_detection_method + "' hand detection does not exist. use either 'solo' or 'yolo' as hand detection method" fingertips = Fingertips(weights='weights/classes8.h5') cam = cv2.VideoCapture(0) print('Finger Tracking Cursor Control') async def main(): while True: ret, image = cam.read() if ret is False: break # hand detection
import cv2 import time import numpy as np from statistics import mean from hand_detector.detector import YOLO from unified_detector import Fingertips images = np.load('../dataset/test/images.npy') test_x = np.load('../dataset/test/test_x.npy') test_y_prob = np.load('../dataset/test/test_y_prob.npy') test_y_keys = np.load('../dataset/test/test_y_keys.npy') crop_info = np.load('../dataset/test/crop_info.npy') hand_model = YOLO(weights='../weights/yolo.h5', threshold=0.5) fingertips = Fingertips(weights='../weights/fingertip.h5') # classification ground_truth_class = np.array([0, 0, 0, 0, 0, 0, 0, 0]) prediction_class = np.array([0, 0, 0, 0, 0, 0, 0, 0]) # regression fingertip_err = np.array([0, 0, 0, 0, 0, 0, 0, 0]) avg_time = 0 iteration = 0 conf_mat = np.zeros(shape=(8, 8)) pr_prob_per_yolo = [] # prediction of probability performance using yolo pr_pos_per_yolo = [] # prediction of position performance using yolo for n_image, (info, image, cropped_image, gt_prob, gt_pos) in enumerate( zip(crop_info, images, test_x, test_y_prob, test_y_keys), 1): print('Images: ', n_image)
class AIWhiteboard(): """AI Whiteboard""" def __init__(self, args): """ Initialization of AI Whiteboard class args.trt :boolean : if True - use TensorRT engines for inference args.raspberry_pi_camera :boolean : if True - capture images from Raspberry Pi Camera """ super(AIWhiteboard, self).__init__() self.confidence_ft_threshold = config['confidence_ft_threshold'] self.confidence_hd_threshold = config['confidence_hd_threshold'] self.colors = [(15, 15, 240), (15, 240, 155), (240, 155, 15), (240, 15, 155), (240, 15, 240)] # init models self.hand_detector = YOLO( weights='weights/trained_yolo.h5', trt_engine='weights/engines/model_trained_yolo.fp16.engine', threshold=self.confidence_hd_threshold, trt=args.trt) self.fingertips_detector = Fingertips( weights='weights/classes8.h5', trt_engine='weights/engines/model_classes8.fp16.engine', trt=args.trt) if args.raspberry_pi_camera: self.cam = cv2.VideoCapture( gstreamer_pipeline(capture_width=config['cam_w'], capture_height=config['cam_h'], display_width=config['cam_w'], display_height=config['cam_h'], framerate=config['framerate']), cv2.CAP_GSTREAMER) else: self.cam = cv2.VideoCapture(0) self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, config['cam_w']) self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, config['cam_h']) origin_w = int(self.cam.get(cv2.CAP_PROP_FRAME_WIDTH)) origin_h = int(self.cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) # cropped coordinates (to get a square image) self.cropped_x_st = int(origin_w / 2) - int(origin_h / 2) self.cropped_x_end = int(origin_w / 2) + int(origin_h / 2) # whiteboard_tl - top left corner of whiteboard on cropped image # whiteboard_br - bottom right corner of whiteboard on cropped image self.whiteboard_tl = (int( (self.cropped_x_end - self.cropped_x_st - config['whiteboard_w']) / 2), int((origin_h - config['whiteboard_h']) / 2)) self.whiteboard_br = (int( (self.cropped_x_end - self.cropped_x_st + config['whiteboard_w']) / 2), int((origin_h + config['whiteboard_h']) / 2)) # Create a whiteboard self.whiteboard = np.zeros( (config['zoom_koef'] * config['whiteboard_h'], config['zoom_koef'] * config['whiteboard_w'], 3), np.uint8) + 255 # Create a info whiteboard for demonstration self.info_whiteboard = copy.deepcopy(self.whiteboard) def draw(self, prob, pos): """ Draw detected fingers on whiteboard prob :numpy array : array of confidance score of each finger according to Fingertips detector pos :numpy array : array of relative fingers position on whiteboard according to Fingertips detector """ # whiteboard shape width = config['whiteboard_w'] * config['zoom_koef'] height = config['whiteboard_h'] * config['zoom_koef'] # number of detected fingers n_fingers = int(np.sum(prob)) # one finger detected : INDEX | action: paint if n_fingers == 1 and prob[1] == 1.0: center = (int(pos[2] * width), int(pos[3] * height)) cv2.circle(self.whiteboard, center, radius=5, color=(0, 0, 0), thickness=-1) self.info_whiteboard = copy.deepcopy(self.whiteboard) cv2.circle(self.info_whiteboard, center, radius=5, color=(0, 20, 200), thickness=2) # two fingers detected: THUMB + INDEX | action: show pointer elif n_fingers == 2 and prob[1] == 1.0 and prob[0] == 1.0: center = (int(pos[2] * width), int(pos[3] * height)) self.info_whiteboard = copy.deepcopy(self.whiteboard) cv2.circle(self.info_whiteboard, center, radius=5, color=(255, 0, 0), thickness=2) # five fingers detected | action: erase elif n_fingers == 5: center = (int(pos[2] * width), int(pos[3] * height)) cv2.circle(self.whiteboard, center, radius=10, color=(255, 255, 255), thickness=-1) self.info_whiteboard = copy.deepcopy(self.whiteboard) cv2.circle(self.info_whiteboard, center, radius=12, color=(0, 255, 0), thickness=2) # two fingers detected: THUMB + PINKY | action: clean whiteboard elif n_fingers == 2 and prob[0] == 1.0 and prob[4] == 1.0: self.whiteboard = np.zeros((height, width, 3), np.uint8) + 255 self.info_whiteboard = copy.deepcopy(self.whiteboard) # three fingers detected: THUMB + MIDDLE + RING | action: save whiteboard elif n_fingers == 3 and prob[1] == 1.0 and prob[2] == 1.0 and prob[ 3] == 1.0: cv2.imwrite('saved/whiteboard.jpg', self.whiteboard) print('-- whiteboard.jpg saved! ') self.info_whiteboard = copy.deepcopy(self.whiteboard) # three fingers detected: THUMB + INDEX + PINKY | action: exit # elif n_fingers == 3 and prob[0] == 1.0 and prob[1] == 1.0 and prob[4] == 1.0: # info_whiteboard = copy.deepcopy(whiteboard) # k = 1 # print('=== EXIT ===') else: self.info_whiteboard = copy.deepcopy(self.whiteboard) def run(self): """ Run AI Whiteboard """ try: while True: ret, image = self.cam.read() image = image[:, self.cropped_x_st:self.cropped_x_end, :] if ret is False: break start = time.time() # hand detection # tl - top left corner of hand bbox on cropped image # br - bottom right corner of hand bbox on cropped image tl, br = self.hand_detector.detect(image=image) if tl and br is not None and br[0] - tl[0] >= 5 and br[1] - tl[ 1] >= 5: cropped_hand = image[tl[1]:br[1], tl[0]:br[0]] height_hand, width_hand, _ = cropped_hand.shape # gesture classification and fingertips regression prob, pos = self.fingertips_detector.classify( image=cropped_hand) pos = np.mean(pos, 0) # post-processing: absolute fingers position on an image prob = np.asarray([ (p >= self.confidence_ft_threshold) * 1.0 for p in prob ]) for i in range(0, len(pos), 2): pos[i] = pos[i] * width_hand + tl[0] pos[i + 1] = pos[i + 1] * height_hand + tl[1] # post-processing: relative fingers position on a whiteboard relative_pos = [] for i in range(0, len(pos), 2): tmp_x = max( -5, pos[i] - self.whiteboard_tl[0]) / config['whiteboard_w'] tmp_y = max( -5, pos[i + 1] - self.whiteboard_tl[1]) / config['whiteboard_h'] relative_pos.append(tmp_x) relative_pos.append(tmp_y) relative_pos = np.array(relative_pos) # draw on whiteboard self.draw(prob, relative_pos) # drawing fingertips index = 0 for c, p in enumerate(prob): if p >= self.confidence_ft_threshold: image = cv2.circle( image, (int(pos[index]), int(pos[index + 1])), radius=5, color=self.colors[c], thickness=-2) index += 2 k = cv2.waitKey(1) if k == 27: # Esc key to stop break end = time.time() str_fps = '{:.1f} fps'.format(1 / (end - start)) # print(str_fps) cv2.putText(image, str_fps, (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) image = cv2.rectangle( image, (self.whiteboard_tl[0], self.whiteboard_tl[1]), (self.whiteboard_br[0], self.whiteboard_br[1]), (255, 255, 255), 2) # display image cv2.imshow( 'Fingertips', cv2.resize(image, (config['zoom_koef'] * config['whiteboard_h'], config['zoom_koef'] * config['whiteboard_w']))) # display whiteboard cv2.imshow('AI_whiteboard', self.info_whiteboard) self.cam.release() cv2.destroyAllWindows() except Exception as e: self.cam.release() cv2.destroyAllWindows() print("Error: {}".format(e)) exit(1)
import cv2 import numpy as np from hand_detector.detector import YOLO from unified_detector import Fingertips hand = YOLO(weights='weights/yolo.h5', threshold=0.8) fingertips = Fingertips(weights='weights/classes8.h5') image = cv2.imread('data/sample.jpg') tl, br = hand.detect(image=image) if tl or br is not None: cropped_image = image[tl[1]:br[1], tl[0]:br[0]] height, width, _ = cropped_image.shape # gesture classification and fingertips regression prob, pos = fingertips.classify(image=cropped_image) pos = np.mean(pos, 0) # post-processing prob = np.asarray([(p >= 0.5) * 1.0 for p in prob]) for i in range(0, len(pos), 2): pos[i] = pos[i] * width + tl[0] pos[i + 1] = pos[i + 1] * height + tl[1] # drawing index = 0 color = [(15, 15, 240), (15, 240, 155), (240, 155, 15), (240, 15, 155), (240, 15, 240)] image = cv2.rectangle(image, (tl[0], tl[1]), (br[0], br[1]), (235, 26, 158), 2) for c, p in enumerate(prob):