def run(self, model_path, video_path=None, openposeJson=None, out_path=None, start_frame=0, conf_threshold=0.5, model2=None, model3=None): openpose_part = [ "Nose", "Neck", "RShoulder", "RElbow", "RWrist", "LShoulder", "LElbow", "LWrist", "MidHip", "RHip", "RKnee", "RAnkle", "LHip", "LKnee", "LAnkle", "REye", "LEye", "REar", "LEar", "LBigToe", "LSmallToe", "LHeel", "RBigToe", "RSmallToe", "RHeel", "Background" ] fingertips = Fingertips(weights='model_data/finmodel.h5') if video_path == None: return None video = cv2.VideoCapture(video_path) timeline = [] labelline = [] handStatus = [] if out_path: fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(out_path, fourcc, 10.0, (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))), isColor=True) vggmodel = load_model(model_path) if start_frame > 0: video.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame) accum_time = 0 curr_fps = 0 prev_time = timer() feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7) lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) color = np.random.randint(0, 255, (100, 3)) num_frame = 0 video_info = {} frame_info = [] lastTime = 0 while True: info, vimage = video.read() milliseconds = video.get(cv2.CAP_PROP_POS_MSEC) seconds = milliseconds / 1000 video_info[str(seconds)] = [] if not info: plt.figure(figsize=(100, 20)) for i in range(len(labelline)): if i == 0 or i == (len(labelline) - 1): continue if labelline[i] != labelline[ i - 1] and labelline[i] != labelline[i + 1]: labelline[i] = labelline[i - 1] for i in range(len(handStatus)): if i == 0 or i == (len(handStatus) - 1): continue if handStatus[i] != handStatus[ i - 1] and handStatus[i] != handStatus[i + 1]: handStatus[i] = handStatus[i - 1] #newlabelline = [] for i in range(len(labelline)): temp = [] #if i - 3 >=0: temp.append(handStatus[i-3]) if i - 2 >= 0: temp.append(labelline[i - 2]) if i - 1 >= 0: temp.append(labelline[i - 1]) temp.append(labelline[i]) if i + 1 < len(labelline): temp.append(labelline[i + 1]) if i + 2 < len(labelline): temp.append(labelline[i + 2]) #if i + 3 < len(handStatus): temp.append(handStatus[i+3]) labelline[i] = Counter(temp).most_common(1)[0][0] for i in range(len(handStatus)): temp = [] #if i - 3 >=0: temp.append(handStatus[i-3]) if i - 2 >= 0: temp.append(handStatus[i - 2]) if i - 1 >= 0: temp.append(handStatus[i - 1]) temp.append(handStatus[i]) if i + 1 < len(handStatus): temp.append(handStatus[i + 1]) if i + 2 < len(handStatus): temp.append(handStatus[i + 2]) #if i + 3 < len(handStatus): temp.append(handStatus[i+3]) handStatus[i] = Counter(temp).most_common(1)[0][0] #np.save("labelline.npy",labelline) plt.plot(timeline, labelline, label='hand exist', color='r') plt.plot(timeline, handStatus, label="hand status", color='b') finaltime = int(float(timeline[-1])) + 2 plt.hlines("hand exist", 0, finaltime, color="green", linestyles="dashed") plt.hlines("hand not exist", 0, finaltime, color="blue", linestyles="dashed") plt.hlines("touch exist", 0, finaltime, color="red", linestyles="dashed") plt.hlines("no touch exist", 0, finaltime, color="green", linestyles="dashed") plt.text(finaltime, "hand exist", "hand detected at each time", fontsize=10) plt.text(finaltime, "hand not exist", "hand not detected at each time", fontsize=10) plt.text(finaltime, "touch exist", "hand detected and touch valid at each time", fontsize=10) plt.text( finaltime, "no touch exist", "no hand or no touch valid though hand detected at each time", fontsize=10) plt.xlabel("time(ms)/per frame", fontsize=20) plt.ylabel( "hand relative label(blue is touch validation label, red is hand detection label)", fontsize=20) plt.legend() plt.savefig(video_path[:-4] + ".jpg") video.release() if out_path: out.release() cv2.destroyAllWindows() with open(video_path[:-4] + ".json", "a") as outfile: json.dump(video_info, outfile, ensure_ascii=False) outfile.write('\n') print("Over") return timeline.append(round(milliseconds, 2)) input_size = (self.input_shape[0], self.input_shape[1]) resized = cv2.resize(vimage, input_size) rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) inputs = image.img_to_array(rgb) input_image = preprocess_input(np.array([inputs])) res = [[]] #if type(res[0]) != list: res[0] = res[0].tolist() if openposeJson: #res = [[]] video_file_name = os.listdir(openposeJson) body_info = json.load( open(openposeJson + video_file_name[num_frame], "r"))["people"] for h in range(len(body_info)): for x in range(len(body_info[h]["pose_keypoints_2d"])): if int(body_info[h]["pose_keypoints_2d"][4]) != 0: if int(body_info[h]["pose_keypoints_2d"][25]) != 0: distance = int( (body_info[h]["pose_keypoints_2d"][25] - body_info[h]["pose_keypoints_2d"][4]) / 2) else: distance = int( (np.shape(vimage)[0] - body_info[h]["pose_keypoints_2d"][4]) / 2) else: distance = 100 if x / 3 == 4 or x / 3 == 7: tres = [] weightsum = 0 xpos = int(body_info[h]["pose_keypoints_2d"][x]) ypos = int(body_info[h]["pose_keypoints_2d"][x + 1]) elxpos = int(body_info[h]["pose_keypoints_2d"][x - 3]) elypos = int(body_info[h]["pose_keypoints_2d"][x - 2]) if xpos == 0 and ypos == 0: continue if elxpos >= xpos: xmin = ( xpos - distance) if (xpos - distance) > 0 else 0 xmax = (xpos + int(distance / 2)) if ( xpos + int(distance / 2)) < np.shape( vimage)[1] else np.shape(vimage)[1] else: xmin = (xpos - int(distance / 2)) if ( xpos - int(distance / 2)) > 0 else 0 xmax = ( xpos + distance) if (xpos + distance) < np.shape( vimage)[1] else np.shape(vimage)[1] if elypos >= ypos: ymin = ( ypos - distance) if (ypos - distance) > 0 else 0 ymax = (ypos + int(distance / 2)) if ( ypos + int(distance / 2)) < np.shape( vimage)[0] else np.shape(vimage)[0] else: ymin = (ypos - int(distance / 2)) if ( ypos - int(distance / 2)) > 0 else 0 ymax = ( ypos + distance) if (ypos + distance) < np.shape( vimage)[0] else np.shape(vimage)[0] print("distance is", distance, "box is", [xmin, ymin, xmax, ymax]) #cv2.rectangle(vimage,(xmin,ymin),(xmax,ymax),(255,0,0),1) crop_image = vimage[ymin:ymax, xmin:xmax] rgb_crop = cv2.cvtColor( cv2.resize(crop_image, input_size), cv2.COLOR_BGR2RGB) input_crop = preprocess_input( np.array([image.img_to_array(rgb_crop)])) if model2 == None or model3 == None: if len(res) > 0: res[0].append( self.bbox_util.detection_out( self.model.predict(input_crop))[0] [0]) else: if len( combine(self, model2, model3, None, input_crop, crop_image)) > 0: #indexpro = np.array(combine(self,model2,model3, None, input_crop,crop_image))[:,1] #maxindex = np.where(indexpro == np.max(indexpro))[0][0] #each = combine(self,model2,model3, None, input_crop,crop_image)[maxindex] for each in combine( self, model2, model3, None, input_crop, crop_image): #print(each) if each[1] < conf_threshold: continue #weightsum += each[1] if each[2] <= 1 and each[ 3] <= 1 and each[ 4] <= 1 and each[5] <= 1: each[2] = int( each[2] * np.shape(crop_image)[1]) + xmin each[3] = int( each[3] * np.shape(crop_image)[0]) + ymin each[4] = int( each[4] * np.shape(crop_image)[1]) + xmin each[5] = int( each[5] * np.shape(crop_image)[0]) + ymin else: each[2] = int(each[2]) + xmin each[3] = int(each[3]) + ymin each[4] = int(each[4]) + xmin each[5] = int(each[5]) + ymin res[0].append(each) print("res is", res) #tres.append(each) """ finalbox = [1,1,0,0,0,0] for each in tres: finalbox[2] = int(finalbox[2] + each[2] * each[1]/weightsum) finalbox[3] = int(finalbox[3] + each[3] * each[1]/weightsum) finalbox[4] = int(finalbox[4] + each[4] * each[1]/weightsum) finalbox[5] = int(finalbox[5] + each[5] * each[1]/weightsum) """ #print(xpos, ypos) if len(res[0]) == 0: if model2 == None or model3 == None: pred = self.model.predict(input_image) res = self.bbox_util.detection_out(pred) else: #ssd ensemble learning res = [ combine(self, model2, model3, None, input_image, vimage) ] if len(res) > 0 and len(res[0]) > 0: #labelline.append("hand exist") #deal with each frame temp = {} temp["hand"] = "exist" temp["hand status"] = [] temp["body part"] = [] temp["hand position"] = [] for each in res[0]: if each[1] < conf_threshold: continue if each[2] <= 1 and each[3] <= 1 and each[4] <= 1 and each[ 5] <= 1: xmin = int(each[2] * np.shape(vimage)[1]) ymin = int(each[3] * np.shape(vimage)[0]) xmax = int(each[4] * np.shape(vimage)[1]) ymax = int(each[5] * np.shape(vimage)[0]) else: xmin = int(each[2]) ymin = int(each[3]) xmax = int(each[4]) ymax = int(each[5]) test_img = vimage[ymin:ymax, xmin:xmax] height, width, _ = test_img.shape if height < 5 or width < 5: finum = 0 continue else: temp["hand position"].append([xmin, ymin, xmax, ymax]) # gesture classification and fingertips regression prob, pos = fingertips.classify(image=test_img) pos = np.mean(pos, 0) # post-processing prob = np.asarray([(p >= 0.5) * 1.0 for p in prob]) for i in range(0, len(pos), 2): pos[i] = pos[i] * width + xmin pos[i + 1] = pos[i + 1] * height + ymin # drawing index = 0 color = [(15, 15, 240), (15, 240, 155), (240, 155, 15), (240, 15, 155), (240, 15, 240)] #image = cv2.rectangle(image, (tl[0], tl[1]), (br[0], br[1]), (235, 26, 158), 2) finum = 0 for c, p in enumerate(prob): if p > 0.5: finum += 1 vimage = cv2.circle( vimage, (int(pos[index]), int(pos[index + 1])), radius=12, color=color[c], thickness=-2) index = index + 2 #edge post process """ edges = edge(None,test_img) edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB) test_img = cv2.subtract(test_img, edges) test_imgr90 = cv2.flip(cv2.transpose(test_img), 1) test_imgl90 = cv2.flip(cv2.transpose(test_img), 0) #test_imgr90 = cv2.flip(cv2.transpose(test_img), 1) test_imgr90 = cv2.resize(test_imgr90,(224,224)) test_imgl90 = cv2.resize(test_imgl90,(224,224)) test_imgr90 = preprocess_input(test_imgr90) test_imgl90 = preprocess_input(test_imgl90) test_img = cv2.resize(test_img, (224,224)) test_img = preprocess_input(test_img) #vgg submodel detection ans1 = vggmodel.predict(test_img.reshape(1,224,224,3)) #ans2 = vggmodel.predict(test_imgr90.reshape(1,224,224,3)) #ans3 = vggmodel.predict(test_imgl90.reshape(1,224,224,3)) pos = [ans1[0][0]] """ body_in = [] #for result in pos: # if result > 0.85: flag += 1 #print(flag) cv2.rectangle(vimage, (xmin, ymin), (xmax, ymax), color=(255, 0, 0), thickness=2) cv2.putText(vimage, "hand", (xmin, ymin - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 255, 0), 1) """ if flag == 0: for result in pos: result = result + 0.1 * (finum - 1) if result > 0.7 and finum >= 2: flag += 1 if finum >= 3: flag += 1 """ flag = 0 if flag == 0: vect1 = [xmin, ymin, xmax, ymax] pastTrue = 0 #print(frame_info) for framebefore in range(len(frame_info)): if frame_info[len(frame_info) - 1 - framebefore][0] == lastTime: t = frame_info[len(frame_info) - 1 - framebefore] vect2 = t[3:] vwidth = np.min([xmax, vect2[2]]) - np.max( [xmin, vect2[0]]) + 1 vheight = np.min([ymax, vect2[3]]) - np.max( [ymin, vect2[1]]) + 1 if vwidth < 0 or vheight < 0: continue nsq = (ymax - ymin + 1) * (xmax - xmin + 1) print("overlap fration:", vwidth * vheight / nsq) if vwidth * vheight / nsq > 0.6: pastTrue += 1 elif frame_info[len(frame_info) - 1 - framebefore][0] < lastTime: break if pastTrue > 0 and finum >= 1: flag += 1 #flag = 1 if openposeJson: video_file_name = os.listdir(openposeJson) body_info = json.load( open(openposeJson + video_file_name[num_frame], "r"))["people"] for h in range(len(body_info)): partsplit = { "main body": [], "left hand above": [], "left hand below": [], "right hand above": [], "right hand below": [], "left leg above": [], "left leg below": [], "right leg above": [], "right leg below": [], "head": [] } detail = body_info[h]["pose_keypoints_2d"] if detail[51] != 0 and detail[54] != 0 and detail[ 4] != 0: xminpos = int( np.minimum(detail[54], detail[51])) - 5 yminpos = int(detail[52]) - 50 xmaxpos = int( np.maximum(detail[51], detail[54])) + 5 ymaxpos = int(detail[4]) partsplit["head"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[6] != 0 and detail[15] != 0: xminpos = int(np.minimum( detail[15], detail[6])) yminpos = int(np.minimum( detail[7], detail[16])) xmaxpos = int(np.maximum( detail[6], detail[15])) if detail[24] != 0: ymaxpos = int(detail[25]) else: ymaxpos = np.shape(vimage)[0] partsplit["main body"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[9] != 0: xminpos = int( np.minimum(detail[6], detail[9])) yminpos = int( np.minimum(detail[7], detail[10])) xmaxpos = int( np.maximum(detail[6], detail[9])) ymaxpos = int( np.maximum(detail[7], detail[10])) partsplit["right hand above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[12] != 0: xminpos = int( np.minimum(detail[12], detail[9])) yminpos = int( np.minimum(detail[13], detail[10])) xmaxpos = int( np.maximum(detail[12], detail[9])) ymaxpos = int( np.maximum(detail[13], detail[10])) partsplit["right hand below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[18] != 0: xminpos = int( np.minimum(detail[15], detail[18])) yminpos = int( np.minimum(detail[16], detail[19])) xmaxpos = int( np.maximum(detail[15], detail[18])) ymaxpos = int( np.maximum(detail[16], detail[19])) partsplit["left hand above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[21] != 0: xminpos = int( np.minimum(detail[21], detail[18])) yminpos = int( np.minimum(detail[22], detail[19])) xmaxpos = int( np.maximum(detail[21], detail[18])) ymaxpos = int( np.maximum(detail[22], detail[19])) partsplit["left hand below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[27] != 0 and detail[30] != 0: xminpos = int( np.minimum(detail[24], detail[30])) yminpos = int( np.minimum(detail[28], detail[31])) xmaxpos = int( np.maximum(detail[24], detail[30])) ymaxpos = int( np.maximum(detail[28], detail[31])) partsplit["right leg above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[33] != 0: xminpos = int( np.minimum(detail[30], detail[33])) yminpos = int( np.minimum(detail[31], detail[34])) xmaxpos = int( np.maximum(detail[30], detail[33])) ymaxpos = int( np.maximum(detail[31], detail[34])) partsplit["right leg below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[36] != 0 and detail[39] != 0: xminpos = int( np.minimum(detail[24], detail[39])) yminpos = int( np.minimum(detail[37], detail[40])) xmaxpos = int( np.maximum(detail[24], detail[39])) ymaxpos = int( np.maximum(detail[37], detail[40])) partsplit["left leg above"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] if detail[42] != 0: xminpos = int( np.minimum(detail[39], detail[42])) yminpos = int( np.minimum(detail[40], detail[43])) xmaxpos = int( np.maximum(detail[39], detail[42])) ymaxpos = int( np.maximum(detail[40], detail[43])) partsplit["left leg below"] = [ xminpos, yminpos, xmaxpos, ymaxpos ] for x in range( len(body_info[h]["pose_keypoints_2d"])): if x % 3 == 0 and x / 3 != 4 and x / 3 != 7: xpos = int( body_info[h]["pose_keypoints_2d"][x]) ypos = int( body_info[h]["pose_keypoints_2d"][x + 1]) #print(xpos, ypos) if (xpos >= xmin and xpos <= xmax) and ( ypos >= ymin and ypos <= ymax): body_in.append(openpose_part[x // 3]) if True: for keyname in partsplit.keys(): if partsplit[keyname] != []: btemp = partsplit[keyname] #print(btemp) owidth = np.minimum( btemp[2], xmax) - np.maximum( xmin, btemp[0]) + 1 oheight = np.minimum( btemp[3], ymax) - np.maximum( ymin, btemp[1]) + 1 wholehand = (ymax - ymin + 1) * (xmax - xmin + 1) cv2.rectangle(vimage, (btemp[0], btemp[1]), (btemp[2], btemp[3]), (0, 0, 255), 1) cv2.putText( vimage, keyname, (int( (btemp[2] + btemp[0]) / 2) - 1, btemp[1] - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 255, 255), 1) #if keyname == "main body": # cv2.putText(vimage,keyname,(btemp[0], btemp[3] + 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,255,255), 1) # print("main body is", btemp,"hand is",[xmin,ymin,xmax,ymax]) if owidth < 0 or oheight < 0: continue oarea = owidth * oheight print("keyname is", keyname) print("flag is", flag) print("btemp is", btemp, "hand is", [xmin, ymin, xmax, ymax]) print("fraction is:", oarea / wholehand) if oarea / wholehand > 0.2: body_in.append(keyname) #print("body",btemp,"hand",[xmin,ymin,xmax,ymax]) #print((res)) for i in range(len(res[0])): if res[0][i][1] < conf_threshold: continue for j in range(i + 1, len(res[0])): if res[0][j][1] < conf_threshold: continue temp1 = res[0][i] temp2 = res[0][j] width = np.min([ int(temp1[4]), int(temp2[4]) ]) - np.max([int(temp1[2]), int(temp2[2])]) + 1 height = np.min([ int(temp1[5]), int(temp2[5]) ]) - np.max([int(temp1[3]), int(temp2[3])]) + 1 if width < 0 or height < 0: continue area1 = (temp1[5] - temp1[3] + 1) * (temp1[4] - temp1[2] + 1) area2 = (temp2[5] - temp2[3] + 1) * (temp2[4] - temp2[2] + 1) overlap = width * height ratio = overlap / (area1 + area2 - overlap) if ratio > 0.6: body_in.append("hand") print("body part is", body_in) frame_info.append( [milliseconds, flag, finum, xmin, ymin, xmax, ymax]) if flag > 0 and len(body_in) != 0: cv2.putText(vimage, "touch", (xmax, ymin - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1) temp["hand status"].append("touch") else: cv2.putText(vimage, "non - touch", (xmax, ymin - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1) temp["hand status"].append("non - touch") if temp["hand status"][-1] == "touch": temp["body part"].append(body_in) else: temp["body part"].append([]) if len(temp["hand status"]) == 0: video_info[str(seconds)].append("hand not exist") labelline.append("hand not exist") else: video_info[str(seconds)].append(temp) labelline.append("hand exist") if "touch" in temp["hand status"]: handStatus.append("touch exist") else: handStatus.append("no touch exist") else: video_info[str(seconds)].append("hand not exist") labelline.append("hand not exist") handStatus.append("no touch exist") curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time += exec_time curr_fps = int(1 / exec_time) num_frame += 1 lastTime = milliseconds #print(curr_time, res[0]) fps = "FPS:" + str(curr_fps) curr_fps = 0 cv2.rectangle(vimage, (0, 0), (50, 17), (255, 255, 255), -1) cv2.putText(vimage, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) cv2.imshow("SSD result", vimage) out.write(vimage) cv2.waitKey(1)
import numpy as np from statistics import mean from unified_detector import Fingertips from preprocess.label_gen_test import label_generator_testset test_image_file = [] directory = '../../EgoGesture Dataset/' test_folders = [ 'SingleOneTest', 'SingleTwoTest', 'SingleThreeTest', 'SingleFourTest', 'SingleFiveTest' ] for folder in test_folders: test_image_file = test_image_file + os.listdir(directory + folder + '/') model = Fingertips(weights='../weights/classes5.h5') # classification ground_truth_class = np.array([0, 0, 0, 0, 0]) prediction_class = np.array([0, 0, 0, 0, 0]) # regression fingertip_err = np.array([0, 0, 0, 0, 0]) avg_time = 0 iteration = 0 for image_numbers, image_name in enumerate(test_image_file, 1): print('Images: ', image_numbers) image, tl, cropped_image, ground_truths = label_generator_testset( directory=directory, image_name=image_name, type='Test') height, width, _ = cropped_image.shape
import cv2 import time import numpy as np from statistics import mean from hand_detector.detector import YOLO from unified_detector import Fingertips images = np.load('../dataset/test/images.npy') test_x = np.load('../dataset/test/test_x.npy') test_y_prob = np.load('../dataset/test/test_y_prob.npy') test_y_keys = np.load('../dataset/test/test_y_keys.npy') crop_info = np.load('../dataset/test/crop_info.npy') hand_model = YOLO(weights='../weights/yolo.h5', threshold=0.5) fingertips = Fingertips(weights='../weights/fingertip.h5') # classification ground_truth_class = np.array([0, 0, 0, 0, 0, 0, 0, 0]) prediction_class = np.array([0, 0, 0, 0, 0, 0, 0, 0]) # regression fingertip_err = np.array([0, 0, 0, 0, 0, 0, 0, 0]) avg_time = 0 iteration = 0 conf_mat = np.zeros(shape=(8, 8)) pr_prob_per_yolo = [] # prediction of probability performance using yolo pr_pos_per_yolo = [] # prediction of position performance using yolo for n_image, (info, image, cropped_image, gt_prob, gt_pos) in enumerate( zip(crop_info, images, test_x, test_y_prob, test_y_keys), 1): print('Images: ', n_image)
from cursor_func import cursorControl from unified_detector import Fingertips from hand_detector.detector import SOLO, YOLO status = False hand_detection_method = 'yolo' if hand_detection_method is 'solo': hand = SOLO(weights='weights/solo.h5', threshold=0.8) elif hand_detection_method is 'yolo': hand = YOLO(weights='weights/yolo.h5', threshold=0.9) else: assert False, "'" + hand_detection_method + "' hand detection does not exist. use either 'solo' or 'yolo' as hand detection method" fingertips = Fingertips(weights='weights/classes8.h5') cam = cv2.VideoCapture(0) print('Finger Tracking Cursor Control') async def main(): while True: ret, image = cam.read() if ret is False: break # hand detection tl, br = hand.detect(image=image) if tl and br is not None: cropped_image = image[tl[1]:br[1], tl[0]: br[0]]
import cv2 import numpy as np from unified_detector import Fingertips from hand_detector.detector import SOLO, YOLO hand_detection_method = 'yolo' if hand_detection_method is 'solo': hand = SOLO(weights='weights/solo.h5', threshold=0.8) elif hand_detection_method is 'yolo': hand = YOLO(weights='weights/yolo.h5', threshold=0.8) else: assert False, "'" + hand_detection_method + \ "' hand detection does not exist. use either 'solo' or 'yolo' as hand detection method" fingertips = Fingertips(weights='weights/fingertip.h5') cam = cv2.VideoCapture(0) print('Unified Gesture & Fingertips Detection') while True: ret, image = cam.read() if ret is False: break # hand detection tl, br = hand.detect(image=image) if tl and br is not None: cropped_image = image[tl[1]:br[1], tl[0]: br[0]]