class AIWhiteboard(): """AI Whiteboard""" def __init__(self, args): """ Initialization of AI Whiteboard class args.trt :boolean : if True - use TensorRT engines for inference args.raspberry_pi_camera :boolean : if True - capture images from Raspberry Pi Camera """ super(AIWhiteboard, self).__init__() self.confidence_ft_threshold = config['confidence_ft_threshold'] self.confidence_hd_threshold = config['confidence_hd_threshold'] self.colors = [(15, 15, 240), (15, 240, 155), (240, 155, 15), (240, 15, 155), (240, 15, 240)] # init models self.hand_detector = YOLO( weights='weights/trained_yolo.h5', trt_engine='weights/engines/model_trained_yolo.fp16.engine', threshold=self.confidence_hd_threshold, trt=args.trt) self.fingertips_detector = Fingertips( weights='weights/classes8.h5', trt_engine='weights/engines/model_classes8.fp16.engine', trt=args.trt) if args.raspberry_pi_camera: self.cam = cv2.VideoCapture( gstreamer_pipeline(capture_width=config['cam_w'], capture_height=config['cam_h'], display_width=config['cam_w'], display_height=config['cam_h'], framerate=config['framerate']), cv2.CAP_GSTREAMER) else: self.cam = cv2.VideoCapture(0) self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, config['cam_w']) self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, config['cam_h']) origin_w = int(self.cam.get(cv2.CAP_PROP_FRAME_WIDTH)) origin_h = int(self.cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) # cropped coordinates (to get a square image) self.cropped_x_st = int(origin_w / 2) - int(origin_h / 2) self.cropped_x_end = int(origin_w / 2) + int(origin_h / 2) # whiteboard_tl - top left corner of whiteboard on cropped image # whiteboard_br - bottom right corner of whiteboard on cropped image self.whiteboard_tl = (int( (self.cropped_x_end - self.cropped_x_st - config['whiteboard_w']) / 2), int((origin_h - config['whiteboard_h']) / 2)) self.whiteboard_br = (int( (self.cropped_x_end - self.cropped_x_st + config['whiteboard_w']) / 2), int((origin_h + config['whiteboard_h']) / 2)) # Create a whiteboard self.whiteboard = np.zeros( (config['zoom_koef'] * config['whiteboard_h'], config['zoom_koef'] * config['whiteboard_w'], 3), np.uint8) + 255 # Create a info whiteboard for demonstration self.info_whiteboard = copy.deepcopy(self.whiteboard) def draw(self, prob, pos): """ Draw detected fingers on whiteboard prob :numpy array : array of confidance score of each finger according to Fingertips detector pos :numpy array : array of relative fingers position on whiteboard according to Fingertips detector """ # whiteboard shape width = config['whiteboard_w'] * config['zoom_koef'] height = config['whiteboard_h'] * config['zoom_koef'] # number of detected fingers n_fingers = int(np.sum(prob)) # one finger detected : INDEX | action: paint if n_fingers == 1 and prob[1] == 1.0: center = (int(pos[2] * width), int(pos[3] * height)) cv2.circle(self.whiteboard, center, radius=5, color=(0, 0, 0), thickness=-1) self.info_whiteboard = copy.deepcopy(self.whiteboard) cv2.circle(self.info_whiteboard, center, radius=5, color=(0, 20, 200), thickness=2) # two fingers detected: THUMB + INDEX | action: show pointer elif n_fingers == 2 and prob[1] == 1.0 and prob[0] == 1.0: center = (int(pos[2] * width), int(pos[3] * height)) self.info_whiteboard = copy.deepcopy(self.whiteboard) cv2.circle(self.info_whiteboard, center, radius=5, color=(255, 0, 0), thickness=2) # five fingers detected | action: erase elif n_fingers == 5: center = (int(pos[2] * width), int(pos[3] * height)) cv2.circle(self.whiteboard, center, radius=10, color=(255, 255, 255), thickness=-1) self.info_whiteboard = copy.deepcopy(self.whiteboard) cv2.circle(self.info_whiteboard, center, radius=12, color=(0, 255, 0), thickness=2) # two fingers detected: THUMB + PINKY | action: clean whiteboard elif n_fingers == 2 and prob[0] == 1.0 and prob[4] == 1.0: self.whiteboard = np.zeros((height, width, 3), np.uint8) + 255 self.info_whiteboard = copy.deepcopy(self.whiteboard) # three fingers detected: THUMB + MIDDLE + RING | action: save whiteboard elif n_fingers == 3 and prob[1] == 1.0 and prob[2] == 1.0 and prob[ 3] == 1.0: cv2.imwrite('saved/whiteboard.jpg', self.whiteboard) print('-- whiteboard.jpg saved! ') self.info_whiteboard = copy.deepcopy(self.whiteboard) # three fingers detected: THUMB + INDEX + PINKY | action: exit # elif n_fingers == 3 and prob[0] == 1.0 and prob[1] == 1.0 and prob[4] == 1.0: # info_whiteboard = copy.deepcopy(whiteboard) # k = 1 # print('=== EXIT ===') else: self.info_whiteboard = copy.deepcopy(self.whiteboard) def run(self): """ Run AI Whiteboard """ try: while True: ret, image = self.cam.read() image = image[:, self.cropped_x_st:self.cropped_x_end, :] if ret is False: break start = time.time() # hand detection # tl - top left corner of hand bbox on cropped image # br - bottom right corner of hand bbox on cropped image tl, br = self.hand_detector.detect(image=image) if tl and br is not None and br[0] - tl[0] >= 5 and br[1] - tl[ 1] >= 5: cropped_hand = image[tl[1]:br[1], tl[0]:br[0]] height_hand, width_hand, _ = cropped_hand.shape # gesture classification and fingertips regression prob, pos = self.fingertips_detector.classify( image=cropped_hand) pos = np.mean(pos, 0) # post-processing: absolute fingers position on an image prob = np.asarray([ (p >= self.confidence_ft_threshold) * 1.0 for p in prob ]) for i in range(0, len(pos), 2): pos[i] = pos[i] * width_hand + tl[0] pos[i + 1] = pos[i + 1] * height_hand + tl[1] # post-processing: relative fingers position on a whiteboard relative_pos = [] for i in range(0, len(pos), 2): tmp_x = max( -5, pos[i] - self.whiteboard_tl[0]) / config['whiteboard_w'] tmp_y = max( -5, pos[i + 1] - self.whiteboard_tl[1]) / config['whiteboard_h'] relative_pos.append(tmp_x) relative_pos.append(tmp_y) relative_pos = np.array(relative_pos) # draw on whiteboard self.draw(prob, relative_pos) # drawing fingertips index = 0 for c, p in enumerate(prob): if p >= self.confidence_ft_threshold: image = cv2.circle( image, (int(pos[index]), int(pos[index + 1])), radius=5, color=self.colors[c], thickness=-2) index += 2 k = cv2.waitKey(1) if k == 27: # Esc key to stop break end = time.time() str_fps = '{:.1f} fps'.format(1 / (end - start)) # print(str_fps) cv2.putText(image, str_fps, (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) image = cv2.rectangle( image, (self.whiteboard_tl[0], self.whiteboard_tl[1]), (self.whiteboard_br[0], self.whiteboard_br[1]), (255, 255, 255), 2) # display image cv2.imshow( 'Fingertips', cv2.resize(image, (config['zoom_koef'] * config['whiteboard_h'], config['zoom_koef'] * config['whiteboard_w']))) # display whiteboard cv2.imshow('AI_whiteboard', self.info_whiteboard) self.cam.release() cv2.destroyAllWindows() except Exception as e: self.cam.release() cv2.destroyAllWindows() print("Error: {}".format(e)) exit(1)
# regression fingertip_err = np.array([0, 0, 0, 0, 0, 0, 0, 0]) avg_time = 0 iteration = 0 conf_mat = np.zeros(shape=(8, 8)) pr_prob_per_yolo = [] # prediction of probability performance using yolo pr_pos_per_yolo = [] # prediction of position performance using yolo for n_image, (info, image, cropped_image, gt_prob, gt_pos) in enumerate( zip(crop_info, images, test_x, test_y_prob, test_y_keys), 1): print('Images: ', n_image) tl = [info[0], info[1]] height, width = info[2], info[3] top_left, bottom_right = hand_model.detect(image) if top_left or bottom_right is not None: x1, y1, x2, y2 = int(top_left[0]), int(top_left[1]), int( bottom_right[0]), int(bottom_right[1]) cropped_image = image[y1:y2, x1:x2] height, width, _ = cropped_image.shape """ Predictions """ tic = time.time() prob, pos = fingertips.classify(image=cropped_image) pos = np.mean(pos, 0) """ Post processing """ threshold = 0.5 prob = np.asarray([(p >= threshold) * 1.0 for p in prob]) for i in range(0, len(gt_pos), 2): gt_pos[i] = gt_pos[i] * width / 128. + tl[0]
import cv2 import numpy as np from hand_detector.detector import YOLO from unified_detector import Fingertips hand = YOLO(weights='weights/yolo.h5', threshold=0.8) fingertips = Fingertips(weights='weights/classes8.h5') image = cv2.imread('data/sample.jpg') tl, br = hand.detect(image=image) if tl or br is not None: cropped_image = image[tl[1]:br[1], tl[0]:br[0]] height, width, _ = cropped_image.shape # gesture classification and fingertips regression prob, pos = fingertips.classify(image=cropped_image) pos = np.mean(pos, 0) # post-processing prob = np.asarray([(p >= 0.5) * 1.0 for p in prob]) for i in range(0, len(pos), 2): pos[i] = pos[i] * width + tl[0] pos[i + 1] = pos[i + 1] * height + tl[1] # drawing index = 0 color = [(15, 15, 240), (15, 240, 155), (240, 155, 15), (240, 15, 155), (240, 15, 240)] image = cv2.rectangle(image, (tl[0], tl[1]), (br[0], br[1]), (235, 26, 158), 2) for c, p in enumerate(prob):