def init_models(device="CPU"): # Using global variables, not defining new variables global face_detection global facial_landmarks_detection global head_pose_estimation global gaze_estimation start = time.time() face_detection = Face_Detection(path_face_detection, device) face_detection.load_model() fd_load_time = (time.time() - start) start = time.time() facial_landmarks_detection = Facial_Landmarks_Detection( path_facial_landmarks_detection, device) facial_landmarks_detection.load_model() fld_load_time = (time.time() - start) start = time.time() head_pose_estimation = Head_Pose_Estimation(path_head_pose_estimation, device) head_pose_estimation.load_model() hpe_load_time = (time.time() - start) start = time.time() gaze_estimation = Gaze_Estimation(path_gaze_estimation, device) gaze_estimation.load_model() ge_load_time = (time.time() - start) return (fd_load_time, fld_load_time, hpe_load_time, ge_load_time)
def init_models(device="CPU"): # Using global variables, not defining new variables global face_detection global facial_landmarks_detection global head_pose_estimation global gaze_estimation log.info("Loading Face Detection model...") face_detection = Face_Detection(path_face_detection, device) face_detection.load_model() log.info("DONE\n") log.info("Loading Face Landmarks Detection model...") facial_landmarks_detection = Facial_Landmarks_Detection( path_facial_landmarks_detection, device) facial_landmarks_detection.load_model() log.info("DONE\n") log.info("Loading Head Pose Estimation model...") head_pose_estimation = Head_Pose_Estimation(path_head_pose_estimation, device) head_pose_estimation.load_model() log.info("DONE\n") log.info("Loading Gaze Estimation model...") gaze_estimation = Gaze_Estimation(path_gaze_estimation, device) gaze_estimation.load_model() log.info("DONE\n")
def test_gaze_estimation(): model = Gaze_Estimation("models/intel/gaze-estimation-adas-0002/FP16-INT8/gaze-estimation-adas-0002.xml") model.load_model() left_eye = cv2.imread("media/left_eye.jpg") right_eye = cv2.imread("media/right_eye.jpg") angles = [-14.323277473449707, -2.0438201427459717, 4.142961502075195] gaze_vector = model.predict(left_eye, right_eye, angles) print("GazeVector: " + str(gaze_vector))
def __init__(self, args): # load the objects corresponding to the models self.face_detection = Face_Detection(args.face_detection_model, args.device, args.extensions, args.perf_counts) self.gaze_estimation = Gaze_Estimation(args.gaze_estimation_model, args.device, args.extensions, args.perf_counts) self.head_pose_estimation = Head_Pose_Estimation( args.head_pose_estimation_model, args.device, args.extensions, args.perf_counts) self.facial_landmarks_detection = Facial_Landmarks_Detection( args.facial_landmarks_detection_model, args.device, args.extensions, args.perf_counts) start_models_load_time = time.time() self.face_detection.load_model() self.gaze_estimation.load_model() self.head_pose_estimation.load_model() self.facial_landmarks_detection.load_model() logger = logging.getLogger() input_T = args.input_type input_F = args.input_file if input_T.lower() == 'cam': # open the video feed self.feed = InputFeeder(args.input_type, args.input_file) self.feed.load_data() else: if not os.path.isfile(input_F): logger.error('Unable to find specified video file') exit(1) file_extension = input_F.split(".")[-1] if (file_extension in ['jpg', 'jpeg', 'bmp']): self.feed = InputFeeder(args.input_type, args.input_file) self.feed.load_data() elif (file_extension in ['avi', 'mp4']): self.feed = InputFeeder(args.input_type, args.input_file) self.feed.load_data() else: logger.error( "Unsupported file Extension. Allowed ['jpg', 'jpeg', 'bmp', 'avi', 'mp4']" ) exit(1) print("Models total loading time :", time.time() - start_models_load_time) # init mouse controller self.mouse_controller = MouseController('low', 'fast')
def get_gazestimator(args): model_gaze = args.model_gaze if model_gaze: gaze_estimator = Gaze_Estimation(model_name=model_gaze, device=args.device, extensions=args.cpu_extension) return gaze_estimator
def __init__(self, device='CPU', mouse_con=False, face_dec=None, fac_land=None, head_pose=None, gaze=None, show_video=False, save_video=False): ''' all models should be put in here ''' if face_dec and fac_land and head_pose and gaze: self.face_dec, self.fac_land, self.head_pose, self.gaze = FaceDetectionModel( face_dec, device=device), FacialLandmarksDetection( fac_land, device=device), Head_Pose_Estimation( head_pose, device=device), Gaze_Estimation(gaze, device=device) self.face_dec.load_model() self.fac_land.load_model() self.head_pose.load_model() self.gaze.load_model() else: raise ValueError('Missing Arguments') if mouse_con: self.mouse_con = MouseController("low", "fast") self.show_video, self.save_video = show_video, save_video
def main(): args = build_argparser().parse_args() frame_num = 0 inference_time = 0 counter = 0 # Initialize the Inference Engine fd = FaceDetection() fld = Facial_Landmarks_Detection() ge = Gaze_Estimation() hp = Head_Pose_Estimation() # Load Models fd.load_model(args.face_detection_model, args.device, args.cpu_extension) fld.load_model(args.facial_landmark_model, args.device, args.cpu_extension) ge.load_model(args.gaze_estimation_model, args.device, args.cpu_extension) hp.load_model(args.head_pose_model, args.device, args.cpu_extension) # Mouse Controller precision and speed mc = MouseController('medium', 'fast') # feed input from an image, webcam, or video to model if args.input == "cam": feed = InputFeeder("cam") else: assert os.path.isfile(args.input), "Specified input file doesn't exist" feed = InputFeeder("video", args.input) feed.load_data() frame_count = 0 for frame in feed.next_batch(): frame_count += 1 inf_start = time.time() if frame is not None: try: key = cv2.waitKey(60) det_time = time.time() - inf_start # make predictions detected_face, face_coords = fd.predict( frame.copy(), args.prob_threshold) hp_output = hp.predict(detected_face.copy()) left_eye, right_eye, eye_coords = fld.predict( detected_face.copy()) new_mouse_coord, gaze_vector = ge.predict( left_eye, right_eye, hp_output) stop_inference = time.time() inference_time = inference_time + stop_inference - inf_start counter = counter + 1 # Visualization preview = args.visualization if preview: preview_frame = frame.copy() face_frame = detected_face.copy() draw_face_bbox(preview_frame, face_coords) display_hp(preview_frame, hp_output, face_coords) draw_landmarks(face_frame, eye_coords) draw_gaze(face_frame, gaze_vector, left_eye.copy(), right_eye.copy(), eye_coords) if preview: img = np.hstack((cv2.resize(preview_frame, (500, 500)), cv2.resize(face_frame, (500, 500)))) else: img = cv2.resize(frame, (500, 500)) cv2.imshow('Visualization', img) # set speed if frame_count % 5 == 0: mc.move(new_mouse_coord[0], new_mouse_coord[1]) # INFO log.info("NUMBER OF FRAMES: {} ".format(frame_num)) log.info("INFERENCE TIME: {}ms".format(det_time * 1000)) frame_num += 1 if key == 27: break except: print( 'Not supported image or video file format. Please send in a supported video format.' ) exit() feed.close()
def benchmark(args): print("runing benchmark") #file=open(args.c) #confs=json.loads(file.read()) input_type=args.t input_files=args.l face_lt_start=time.time() face_detect=face_detection(args.fm, args.d, args.p, args.e) face_detect.load_model() face_lt=time.time()-face_lt_start landmark_lt_start=time.time() landmarks_model=LandmarksDetection(args.lm, args.d, args.e) landmarks_model.load_model() landmark_lt=time.time()-landmark_lt_start head_pose_lt_start=time.time() head_pose=Head_Pose(args.hm, args.d, args.e) head_pose.load_model() head_pose_lt=time.time()-head_pose_lt_start gaze_lt_start=time.time() gaze_estimation=Gaze_Estimation(args.gm, args.d, args.e) gaze_estimation.load_model() gaze_lt=time.time()-gaze_lt_start feed=InputFeeder(input_type='video', input_file=input_files) feed.load_data() for batch in feed.next_batch(): face_inf_start=time.time() cropped_face=face_detect.predict(batch) face_inf_time=time.time()-face_inf_start landmark_inf_start=time.time() cropped_left_eye, cropped_right_eye = landmarks_model.predict(cropped_face) landmark_inf_time=time.time()-landmark_inf_start head_pose_inf_start=time.time() head_angles = head_pose.predict(cropped_face) head_pose_inf_time=time.time()-head_pose_inf_start gaze_inf_start=time.time() x,y = gaze_estimation.predict(cropped_left_eye, cropped_right_eye, head_angles) gaze_inf_time=time.time()-gaze_inf_start #plotting load_time models=['Face_detect', 'landmark_detect', 'Head_pose_est', 'Gaze est'] loading_times=[face_lt, landmark_lt, head_pose_lt, gaze_lt] plot_loading_time(models, loading_times, args.b) #plotting inference_time inference_times=[face_inf_time, landmark_inf_time, head_pose_inf_time, gaze_inf_time] plot_inf_time(models, inference_times, args.b) logging.info("Benchmarking done!") break feed.close()
def main(args): input_type=args.t input_files=args.l flags=args.f face_detect=Face_Detection(face_model_path, args.d, args.p, args.e) face_detect.load_model() landmarks_model=LandmarksDetection(landmarks_model_path, args.d, args.e) landmarks_model.load_model() head_pose=Head_Pose(hpose_model_path, args.d, args.e) head_pose.load_model() gaze_estimation=Gaze_Estimation(gaze_model_path, args.d, args.e) gaze_estimation.load_model() if input_type == 'cam': feeder = InputFeeder(input_type='cam') else: if not os.path.isfile(input_files): logging.error("Could not find the input file") exit(1) feed= InputFeeder(input_type='video', input_file=input_files) #feed=InputFeeder(input_type=input_type, input_file= input_files) try: feed.load_data() except Exception: logging.error("Could not load data from input file", exc_info=True) for batch in feed.next_batch(): try: cropped_face, coords=face_detect.predict(batch) if type(cropped_face) == int: logging.info("Face not detected") if key == 27: break continue cropped_left_eye, cropped_right_eye, left_eye_cord, right_eye_cord = landmarks_model.predict(cropped_face) head_angles = head_pose.predict(cropped_face) x,y = gaze_estimation.predict(cropped_left_eye, cropped_right_eye, head_angles) except Exception: logging.error("An error occured while running predictions", exc_info=True) if flags != 0: if flags == 'FD': cv2.rectangle(batch, (coords[0], coords[1]), (coords[2], coords[3]), (255, 0, 0), 3) if flags =='FL': cv2.rectangle(cropped_face, (left_eye_cord[0], left_eye_cord[1]), (left_eye_cord[2], left_eye_cord[3]), (255, 0, 0), 3) cv2.rectangle(cropped_face, (right_eye_cord[0], right_eye_cord[1]), (right_eye_cord[2], right_eye_cord[3]), (255, 0, 0), 3) if flags =='HP': cv2.putText(batch, "Head angles: yaw={:.2f} , pitch={:.2f}, roll={:.2f}".format( head_angles[0], head_angles[1], head_angles[2]), (20, 40), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 2) if flags == 'GE': left_eye_mid_x= (left_eye_cord[2]-left_eye_cord[0])/2 + left_eye_cord[0] left_eye_mid_y=(left_eye_cord[3]-left_eye_cord[1])/2 + left_eye_cord[1] right_eye_mid_x=(right_eye_cord[2]-right_eye_cord[0])/2 + right_eye_cord[0] right_eye_mid_y=(right_eye_cord[3]- right_eye_cord[1])/2 + right_eye_cord[1] left_eye_new_x=int(left_eye_mid_x + x*160) left_eye_new_y=int(left_eye_mid_y + y*160*-1) right_eye_new_x=int(right_eye_mid_x + x*160) right_eye_new_y=int(right_eye_mid_y + y*160*-1) cv2.line(cropped_face, (int(left_eye_mid_x), int(left_eye_mid_y)), (int(left_eye_new_x), int(left_eye_new_y)), (255, 0, 255), 5) cv2.line(cropped_face, (int(right_eye_mid_x), int(right_eye_mid_y)), (int(right_eye_new_x), int(right_eye_new_y)), (255, 0, 255), 5) mouse=MouseController(precision='low', speed='fast') mouse.move(x,y) batch = imutils.resize(batch, width=500) cv2.imshow('frame', batch) key = cv2.waitKey(1) & 0xFF feed.close()
def main(args): # enable logging for the function logger = logging.getLogger(__name__) # grab the parsed parameters faceModel = args.m_f facial_LandmarksModel = args.m_l headPoseEstimationModel = args.m_h GazeEstimationModel = args.m_g device = args.d inputFile = args.i output_path = args.o_p modelArchitecture = args.modelAr visualization_flag = args.vf # initialize feed single_image_format = ['jpg', 'tif', 'png', 'jpeg', 'bmp'] if inputFile.split(".")[-1].lower() in single_image_format: feed = InputFeeder('image', inputFile) elif args.i == 'cam': feed = InputFeeder('cam') else: feed = InputFeeder('video', inputFile) ##Load model time face detection faceStart_model_load_time = time.time() faceDetection = FaceDetection(faceModel, device) faceModelView = faceDetection.load_model() faceDetection.check_model() total_facemodel_load_time = time.time() - faceStart_model_load_time ##Load model time headpose estimatiom heaadposeStart_model_load_time = time.time() headPose = headPoseEstimation(headPoseEstimationModel, device) headPoseModelView = headPose.load_model() headPose.check_model() heaadposeTotal_model_load_time = time.time( ) - heaadposeStart_model_load_time ##Load model time face_landmarks estimation face_landmarksStart_model_load_time = time.time() face_landmarks = Face_landmarks(facial_LandmarksModel, device) faceLandmarksModelView = face_landmarks.load_model() face_landmarks.check_model() face_landmarksTotal_model_load_time = time.time( ) - face_landmarksStart_model_load_time ##Load model time face_landmarks estimation GazeEstimationStart_model_load_time = time.time() GazeEstimation = Gaze_Estimation(GazeEstimationModel, device) GazeModelView = GazeEstimation.load_model() GazeEstimation.check_model() GazeEstimationTotal_model_load_time = time.time( ) - GazeEstimationStart_model_load_time if modelArchitecture == 'yes': print("The model architecture of gaze mode is ", GazeModelView) print("model architecture for landmarks is", faceLandmarksModelView) print("model architecture for headpose is", headPoseModelView) print("model architecture for face is", faceModelView) # count the number of frames frameCount = 0 input_feeder = InputFeeder('video', inputFile) w, h = feed.load_data() for _, frame in feed.next_batch(): if not _: break frameCount += 1 key = cv2.waitKey(60) start_imageface_inference_time = time.time() imageface = faceDetection.predict(frame, w, h) imageface_inference_time = time.time() - start_imageface_inference_time if 'm_f' in visualization_flag: cv2.imshow('cropped face', imageface) if type(imageface) == int: logger.info("no face detected") if key == 27: break continue start_imagePose_inference_time = time.time() imageAngles, imagePose = headPose.predict(imageface) imagePose_inference_time = time.time() - start_imagePose_inference_time if 'm_h' in visualization_flag: cv2.imshow('Head Pose Angles', imagePose) start_landmarkImage_inference_time = time.time() leftEye, rightEye, landmarkImage = face_landmarks.predict(imageface) landmarkImage_inference_time = time.time( ) - start_landmarkImage_inference_time if leftEye.any() == None or rightEye.any() == None: logger.info( "image probably too dark or eyes covered, hence could not detect landmarks" ) continue if 'm_l' in visualization_flag: cv2.imshow('Face output', landmarkImage) start_GazeEstimation_inference_time = time.time() x, y = GazeEstimation.predict(leftEye, rightEye, imageAngles) GazeEstimation_inference_time = time.time( ) - start_GazeEstimation_inference_time if 'm_g' in visualization_flag: # cv2.putText(landmarkedFace, "Estimated x:{:.2f} | Estimated y:{:.2f}".format(x,y), (10,20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0,255,0),1) cv2.imshow('Gaze Estimation', landmarkImage) mouseVector = MouseController('medium', 'fast') if frameCount % 5 == 0: mouseVector.move(x, y) if key == 27: break if imageface_inference_time != 0 and landmarkImage_inference_time != 0 and imagePose_inference_time != 0 and GazeEstimation_inference_time != 0: fps_face = 1 / imageface_inference_time fps_landmark = 1 / landmarkImage_inference_time fps_headpose = 1 / imagePose_inference_time fps_gaze = 1 / GazeEstimation_inference_time with open( os.path.join(output_path, device, 'face', 'face_stats.txt'), 'w') as f: f.write(str(imageface_inference_time) + '\n') f.write(str(fps_face) + '\n') f.write(str(total_facemodel_load_time) + '\n') with open( os.path.join(output_path, device, 'landmark', 'landmark_stats.txt'), 'w') as f: f.write(str(landmarkImage_inference_time) + '\n') f.write(str(fps_landmark) + '\n') f.write(str(face_landmarksTotal_model_load_time) + '\n') with open( os.path.join(output_path, device, 'headpose', 'headpose_stats.txt'), 'w') as f: f.write(str(imagePose_inference_time) + '\n') f.write(str(fps_headpose) + '\n') f.write(str(heaadposeTotal_model_load_time) + '\n') with open( os.path.join(output_path, device, 'gaze', 'gaze_stats.txt'), 'w') as f: f.write(str(GazeEstimation_inference_time) + '\n') f.write(str(fps_gaze) + '\n') f.write(str(GazeEstimationTotal_model_load_time) + '\n') logger.info("The End") VIS = visualize(output_path, device) VIS.visualize1() VIS.visualize2() VIS.visualize3() cv2.destroyAllWindows() feed.close()
def main(args): fd = Face_Detection( "models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001", args.device, args.extensions) start = time.time() fd.load_model() logging.info(f"------Loading Times {args.precision}------") logging.info("Face Detection: {:.5f} sec".format(time.time() - start)) fl = Facial_Landmarks( f"models/intel/landmarks-regression-retail-0009/{args.precision}/landmarks-regression-retail-0009", args.device, args.extensions) start = time.time() fl.load_model() logging.info("Facial Landmarks: {:.5f} sec".format(time.time() - start)) hp = Head_Pose_Estimation( f"models/intel/head-pose-estimation-adas-0001/{args.precision}/head-pose-estimation-adas-0001", args.device, args.extensions) start = time.time() hp.load_model() logging.info("Head Pose Estimation: {:.5f} sec".format(time.time() - start)) gs = Gaze_Estimation( f"models/intel/gaze-estimation-adas-0002/{args.precision}/gaze-estimation-adas-0002", args.device, args.extensions) start = time.time() gs.load_model() logging.info("Gaze Estimation: {:.5f} sec".format(time.time() - start)) input_feed = InputFeeder(args.type, args.input) input_feed.load_data() mc = MouseController("high", "fast") inf_time = [0, 0, 0, 0, 0] # fd, fl, hp, gs, frames for frame in input_feed.next_batch(): if frame is not None: inf_time[4] += 1 # face detection start = time.time() face_frame = fd.predict(frame.copy()) inf_time[0] += time.time() - start # eye detection through facial landmarks start = time.time() left_eye_image, left_x, left_y, right_eye_image, right_x, right_y = fl.predict( face_frame) inf_time[1] += time.time() - start # head pose start = time.time() yaw, pitch, roll = hp.predict(face_frame) inf_time[2] += time.time() - start # gaze estimation start = time.time() gaze_vector = gs.predict(left_eye_image, right_eye_image, (yaw, pitch, roll)) inf_time[3] += time.time() - start # mouse move mc.move(gaze_vector[0], gaze_vector[1]) if args.visualize: face_frame = cv2.circle(face_frame, (right_x, right_y), 5, (255, 0, 0), -5) face_frame = cv2.circle(face_frame, (left_x, left_y), 5, (255, 0, 0), -5) cv2.putText( face_frame, "yaw:{:.2f} - pitch:{:.2f} - roll:{:.2f}".format( yaw, pitch, roll), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 0, 0), 1) cv2.putText( face_frame, "gaze-vector x:{:.2f} - y:{:.2f} - z:{:.2f}".format( yaw, pitch, roll), (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 0, 0), 1) cv2.imshow('left eye', left_eye_image) cv2.imshow('right eye', right_eye_image) x, y, z = gaze_vector cv2.arrowedLine( face_frame, (left_x, left_y), (left_x + int(x * 100), left_y + int(-y * 100)), (0, 0, 255), 2) cv2.arrowedLine( face_frame, (right_x, right_y), (right_x + int(x * 100), right_y + int(-y * 100)), (0, 0, 255), 2) cv2.imshow('face detection', face_frame) cv2.waitKey(60) else: break # inference benchmarks logging.info(f"------Inference Times {args.precision}------") logging.info("Face Detection: {:.5f} sec".format(inf_time[0] / inf_time[4])) logging.info("Facial Landmarks: {:.5f} sec".format(inf_time[1] / inf_time[4])) logging.info("Head Pose Estimation: {:.5f} sec".format(inf_time[2] / inf_time[4])) logging.info("Gaze Estimation: {:.5f} sec".format(inf_time[3] / inf_time[4])) input_feed.close() cv2.destroyAllWindows()
def infer_on_video(args): draw_flag = args.b device = args.device input_path = args.input_path input_type = args.input_type output_path = args.output_path precision = args.accuracy locations = {} locations[FACE_DETECTION_MODEL] = os.path.join( MODEL_PATH, FACE_DETECTION_MODEL, 'INT1', FACE_DETECTION_MODEL + ".xml") if precision is not None: log.info( "The face-detection-adas-binary-0001 always use INT1 precision") for model_name in [ FACIAL_LANDMARKS_DETECTION_MODEL, HEAD_POSE_ESTIMATION_MODEL, GAZE_ESTIMATION_MODEL ]: locations[model_name] = find_exist_model_file(precision, model_name) # Initilize feeder feed = InputFeeder(input_type=input_type, input_file=input_path) feed.load_data() # Grab the shape of the input input_width = feed.getWidth() input_height = feed.getHeight() # Create a video writer for the output video # out = cv2.VideoWriter('../out.mp4', CODEC, 30, (input_width,input_height)) mouse_controller = MouseController(MOUSE_PRECISION, MOUSE_SPEED) start_model_load_time = time.time() # model initialization face_detection = Face_Detection(locations[FACE_DETECTION_MODEL], device, extensions=CPU_EXTENSION) facial_landmarks_detection = Facial_Landmarks_Detection( locations[FACIAL_LANDMARKS_DETECTION_MODEL], device, extensions=CPU_EXTENSION) head_pose_estimation = Head_Pose_Estimation( locations[HEAD_POSE_ESTIMATION_MODEL], device, extensions=CPU_EXTENSION) gaze_estimation = Gaze_Estimation(locations[GAZE_ESTIMATION_MODEL], device, extensions=CPU_EXTENSION) total_model_load_time = time.time() - start_model_load_time counter = 0 start_inference_time = time.time() # Process frames until the video ends, or process is exited for ret, batch in feed.next_batch(BATCH_SIZE): if not ret: break counter += 1 gaze_lines = [] out_frame = batch.copy() key = cv2.waitKey(60) # Face detection face_detection_output = face_detection.predict(batch) # face_detection_output = [ image_id, label, conf, xmin, ymin, xmax, ymax ] face_xmin = abs(int(face_detection_output[3] * input_width)) face_ymin = abs(int(face_detection_output[4] * input_height)) face_xmax = abs(int(face_detection_output[5] * input_width)) face_ymax = abs(int(face_detection_output[6] * input_height)) if (face_ymax - face_ymin) <= 0 or (face_xmax - face_xmin) <= 0: continue # Crop the face image face = batch[face_ymin:face_ymax, face_xmin:face_xmax] if draw_flag == True: cv2.rectangle(out_frame, (face_xmin, face_ymin), (face_xmax, face_ymax), (255, 255, 0), 2) # Find facial landmarks (to find eyes) eyes = facial_landmarks_detection.predict(face) # Estimate head orientation (yaw=Y, pitch=X, role=Z) yaw, pitch, roll = head_pose_estimation.predict(face) eye_images = [] for eye in eyes: face_height, face_width, _ = face.shape eye_xmin = int(eye[_X] * face_width - EYE_RADIUS) eye_ymin = int(eye[_Y] * face_height - EYE_RADIUS) eye_xmax = int(eye[_X] * face_width + EYE_RADIUS) eye_ymax = int(eye[_Y] * face_height + EYE_RADIUS) if (eye_ymax - eye_ymin) <= 0 or (eye_xmax - eye_xmin) <= 0: continue # crop and resize eye_images.append(face[eye_ymin:eye_ymax, eye_xmin:eye_xmax].copy()) # Draw eye boundary boxes if draw_flag == True: cv2.rectangle(out_frame, (eye_xmin + face_xmin, eye_ymin + face_ymin), (eye_xmax + face_xmin, eye_ymax + face_ymin), (0, 255, 0), 2) # gaze estimation gaze_vec_norm = gaze_estimation.predict(eye_images, [yaw, pitch, 0]) cos = math.cos(math.radians(roll)) sin = math.sin(math.radians(roll)) tmpx = gaze_vec_norm[0] * cos + gaze_vec_norm[1] * sin tmpy = -gaze_vec_norm[0] * sin + gaze_vec_norm[1] * cos gaze_vec_norm = [tmpx, tmpy] # Store gaze line coordinations for eye in eyes: eye[_X] = int(eye[_X] * face_width) eye[_Y] = int(eye[_Y] * face_height) gaze_lines.append( get_gaze_line(eye, face_xmin, face_ymin, gaze_vec_norm)) if draw_flag: # Drawing gaze lines for gaze_line in gaze_lines: start_point = (gaze_line[0][_X], gaze_line[0][_Y]) end_point = (gaze_line[1][_X], gaze_line[1][_Y]) draw_gaze_line(out_frame, start_point, end_point) # start point of middle gaze line start_point = ((gaze_lines[0][0][_X] + gaze_lines[1][0][_X]) / 2, (gaze_lines[0][0][_Y] + gaze_lines[1][0][_Y]) / 2) # end point of middle gaze line end_point = ((gaze_lines[0][1][_X] + gaze_lines[1][1][_X]) / 2, (gaze_lines[0][1][_Y] + gaze_lines[1][1][_Y]) / 2) gaze_mid_line = [start_point, end_point] mouse_point = get_mouse_point(gaze_mid_line, input_width, input_height) log.debug("mouse_point[_X], mouse_point[_Y]: %s, %s", mouse_point[_X], mouse_point[_Y]) # cv2.circle(out_frame, mouse_point, 10, (255, 255, 255), -1) mouse_controller.move(mouse_point[_X], mouse_point[_Y]) # write out_frames with batch size for _ in range(BATCH_SIZE): cv2.imshow("video", out_frame) # out.write(out_frame) if key == 27: break total_inference_time = time.time() - start_inference_time total_inference_time = round(total_inference_time, 1) fps = counter / total_inference_time with open(os.path.join(output_path, 'stats.txt'), 'w') as f: f.write(str(total_inference_time) + '\n') f.write(str(fps) + '\n') f.write(str(total_model_load_time) + '\n') # Release the out writer, capture, and destroy any OpenCV windows log.info("Input stream ended...") cv2.destroyAllWindows() # out.release() feed.close()
def main(): # loads argparser args = build_argparser().parse_args() input_type = args.input_type input_file = args.video output_path = args.output_path threshold = args.threshold extension = args.extension version = args.version device = args.device face_model = args.fd_model facial_model = args.fl_model headpose_model = args.hp_model gaze_model = args.ga_model show_image = args.show_image # Start logger # Basic logger log = setup_logger('basic_logger', 'log/logging_basic.log') log.info("Start computer_pointer.py") # Time logger log_time = setup_logger('time_logger', "log/logging_time.log") log_time.info("Start time logger") # Get Openvinoversion openvino_version = (openvino.__file__) print ("Openvino version: "+ str(openvino_version)) # Load Facedetection facedetection = Facedetection(face_model, threshold, device, extension, version) print("Load class Facedetection = OK") print("--------") start_load_time_face = time.time() facedetection.load_model() print("Load model facedetection = Finished") log.info("Load model facedetection = Finished") print("--------") total_model_load_time_face = (time.time() - start_load_time_face)*1000 log_time.info('Facedetection load time: ' + str(round(total_model_load_time_face, 3))) # Load facial landmark faciallandmarks = Facial_Landmarks(facial_model, threshold, device, extension, version) print("Load class Facial_Landmarks = OK") print("--------") start_load_time_facial = time.time() faciallandmarks.load_model() print("Load model Facial_Landmarks = Finished") log.info("Load model Facial_Landmarks = Finished") print("--------") total_model_load_time_facial = (time.time() - start_load_time_facial)*1000 log_time.info('Facial_Landmarks load time: ' + str(round(total_model_load_time_facial, 3))) # Load head_pose_estimation headposeestimation = Head_Pose_Estimation(headpose_model, device, extension, version, threshold) print("Load class head_pose_estimation = OK") print("--------") start_load_time_headpose = time.time() headposeestimation.load_model() print("Load model head_pose_estimation = Finished") log.info("Load model head_pose_estimation = Finished") print("--------") total_model_load_time_headpose = (time.time() - start_load_time_headpose)*1000 log_time.info('Headpose load time: ' + str(round(total_model_load_time_headpose, 3))) # Load gaze_estimation gazeestimation = Gaze_Estimation(gaze_model, threshold, device, extension, version) print("Load class gaze_estimation = OK") print("--------") start_load_time_gaze = time.time() gazeestimation.load_model() print("Load model gaze_estimation = Finished") log.info("Load model gaze_estimation = Finished") print("--------") total_model_load_time_gaze = (time.time() - start_load_time_gaze)*1000 total_model_load_time = (time.time() - start_load_time_face)*1000 log_time.info('Gaze load time: ' + str(round(total_model_load_time_gaze, 3))) log_time.info('Total model load time: ' + str(round(total_model_load_time, 3))) log_time.info('##################') log.info('All models are loaded!') feed = InputFeeder(input_type, input_file) log.info('Input Feeder is loaded') feed.load_data() # Output video initial_w = int(feed.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(feed.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(feed.cap.get(cv2.CAP_PROP_FPS)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_video = cv2.VideoWriter(output_path, fourcc, fps, (initial_w, initial_h)) inference_time_face_total = [] inference_time_facial_total = [] inference_time_headpose_total = [] inference_time_gaze_total = [] inference_time_total = [] try: for batch in feed.next_batch(): if batch is None: break ## facedetection ## ## Inference time start_inference_time_face = time.time() print("Start facedetection") log.info("Start facedetection") print("Cap is feeded to the face detection!") face_batch = batch.copy() face_image, face_cropped, coords= facedetection.predict(face_batch) ## Average inference time inference_time_face = (time.time() - start_inference_time_face)*1000 inference_time_face_total.append(inference_time_face) len_face = len(inference_time_face_total) avg_inference_time_face = sum(inference_time_face_total)/len_face log_time.info(('Average face inference time: ' + str(avg_inference_time_face))) log.info('Inference facedetetion is finished') if not coords: print("No face detected") log.debug("No face detected") continue print("The video from the face detection is writen to the output path") out_video.write(face_image) print("End facedetection") ## faciallandmark ## ## Inference time start_inference_time_facial = time.time() if (face_cropped is None) or (len(face_cropped)==0): print("No Face above threshold detected") log.error("No Face above threshold detected") else: print("Start faciallandmark") log.info("Start faciallandmark") print("The cropped face image is feeded to the faciallandmarks detection.") left_eye_image, right_eye_image, nose_image, lip_corner_left_image, lip_corner_right_image= faciallandmarks.predict(face_cropped.copy()) print("End faciallandmarks") log.info("End faciallandmarks") ## Average inference time inference_time_facial = (time.time() - start_inference_time_facial)*1000 inference_time_facial_total.append(inference_time_facial) len_facial = len(inference_time_facial_total) avg_inference_time_facial = sum(inference_time_facial_total)/len_facial log_time.info(('Average facial inference time: ' + str(avg_inference_time_facial))) # headposeestimation ## Inference time start_inference_time_headpose = time.time() print("Start headposeestimation") log.info("Start headposeestimation") print("The cropped face image is feeded to the headposeestimation.") head_pose_angles = headposeestimation.predict(face_cropped) #print("Head pose angeles: ", head_pose_angles) print("End faciallheadposeestimationandmarks") log.info("End faciallheadposeestimationandmarks") ## Average inference time inference_time_headpose = (time.time() - start_inference_time_headpose)*1000 inference_time_headpose_total.append(inference_time_headpose) len_headpose = len(inference_time_headpose_total) avg_inference_time_headpose = sum(inference_time_headpose_total)/len_headpose log_time.info(('Average headpose inference time: ' + str(avg_inference_time_headpose))) # gazeestimation ## Inference time start_inference_time_gaze = time.time() print("Start gazeestimation") log.info("Start gazeestimation") gaze_result, tmpX, tmpY, gaze_vector02 = gazeestimation.predict(left_eye_image, right_eye_image, head_pose_angles) print("End gazeestimation") #print('Gaze results:', gaze_result) log.info("Gaze results: ({})".format(str(gaze_result))) log.info("End gazeestimation") ## Average inference time inference_time_gaze = (time.time() - start_inference_time_gaze)*1000 inference_time_gaze_total.append(inference_time_gaze) len_gaze = len(inference_time_gaze_total) avg_inference_time_gaze = sum(inference_time_gaze_total)/len_gaze log_time.info(('Average gaze inference time: ' + str(avg_inference_time_gaze))) ## Total Inference time inference_time_total.append((time.time() - start_inference_time_face)*1000) inference_time_all_models = sum(inference_time_total) log_time.info(('Total inference time: ' + str(inference_time_all_models))) log_time.info('----') # If show_image is 'yes' then the ouput images are displaed if show_image == 'yes': cv2.imshow('Cropped Face', face_cropped) cv2.imshow('Left eye', left_eye_image) cv2.imshow('right eye', right_eye_image) cv2.imshow('Nose', nose_image) cv2.imshow('Lip left', lip_corner_left_image) cv2.imshow('Lip right', lip_corner_right_image) cv2.waitKey(28) # mouse controller log.info('Start mousecontroller') mousecontroller = MouseController('medium', 'fast') mousecontroller.move(tmpX, tmpY) input_feed.close() cv2.destroyAllWindows() log.info('End of program') except Exception as e: print ("Could not run Inference: ", e) log.error(e)
def model_pipelines(args): # Parameters which were parsed are assigned #device = args.dev #customLayers = args.lay inputFile = args.inp visual_flag = args.vf faceDetectionModel = args.mfd landmarksDetectionModel = args.mld headPoseEstimationModel = args.mhp gazeDetectionModel = args.mgd start_time = time.time() # Logging is enabled log = logging.getLogger(__name__) log.info('----------THE BEGINNING----------') log.info('Start Time: {0}'. format(str(start_time))) # The feed is initialised single_image = ['jpg','tif','png','jpeg', 'bmp'] if inputFile.split(".")[-1].lower() in single_image: input_feed = InputFeeder('image', inputFile) elif args.inp == 'cam': input_feed = InputFeeder('cam') else: input_feed = InputFeeder('video', inputFile) # Feed data is loaded log.info('Loading data...') input_feed.load_data() log.info('Data Loaded. Beginning inference...') # The models are initialised and loaded here face_model_load_start_time = time.time() landmark_model_load_start_time = time.time() headpose_model_load_start_time = time.time() gaze_model_load_start_time = time.time() ppl_fd = Face_Detection(faceDetectionModel) ppl_fl = Facial_Landmarks_Detection(landmarksDetectionModel) ppl_hd = Head_Pose_Estimation(headPoseEstimationModel) ppl_ge = Gaze_Estimation(gazeDetectionModel) face_model_load_time = time.time() - face_model_load_start_time landmark_model_load_time = time.time() - landmark_model_load_start_time headpose_model_load_time = time.time() - headpose_model_load_start_time gaze_model_load_time = time.time() - gaze_model_load_start_time log.info('Face Detection object initialized') log.info('Facial Landmarks object initialized') log.info('Head Pose object initialized') log.info('Gaze object initialized') log.info('All models loaded and checked') load_time = [face_model_load_time, landmark_model_load_time, headpose_model_load_time, gaze_model_load_time] # count the number of frames frameCount = 0 # collate frames from the feeder and feed into the detection pipelines for _, frame in input_feed.next_batch(): if not _: break frameCount += 1 if frameCount % 5 == 0: cv2.imshow('video', cv2.resize(frame, (500, 500))) key = cv2.waitKey(100) # Get the time for the model inference face_inference_start_time = time.time() face_crop = ppl_fd.predict(frame) face_inference_time = time.time() - face_inference_start_time if 'mfd' in visual_flag: cv2.imshow('The cropped face', face_crop) if type(face_crop) == int: log.info("No face can be detected") if key == 27: break continue # Get the time for the model inference landmark_inference_start_time = time.time() eye_image_left, eye_image_right, face_landmarked = ppl_fl.predict(face_crop.copy()) landmark_inference_time = time.time() - landmark_inference_start_time # Get face landmark results if 'mld' in visual_flag: cv2.imshow('Face output', face_landmarked) if eye_image_left.any() == None or eye_image_right.any() == None: log.info("Landmarks could not be detected, check that the eyes are visible and the image is bright") continue # Get the time for the model inference headpose_inference_start_time = time.time() head_pose_angles, head_pose_image = ppl_hd.predict(face_crop.copy()) headpose_inference_time = time.time() - headpose_inference_start_time # Get head pose results if 'mhp' in visual_flag: cv2.imshow('Head Pose Angles', head_pose_image) # Get the time for the model inference gaze_inference_start_time = time.time() coord_x, coord_y = ppl_ge.predict(eye_image_left ,eye_image_right, head_pose_angles) gaze_inference_time = time.time() - gaze_inference_start_time # Get gaze detection results if 'mgd' in visual_flag: cv2.putText(face_landmarked, "Estimated x:{:.2f} | Estimated y:{:.2f}".format(coord_x, coord_y), (10,20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0,255,0),1) cv2.imshow('Gaze Estimation', face_landmarked) mCoord = MouseController('medium','fast') # Move the mouse based on the coordinates received if frameCount % 5 == 0: mCoord.move(coord_x, coord_y) if key == 27: break inference_time = [face_inference_time, landmark_inference_time, headpose_inference_time, gaze_inference_time] results(args, inference_time, load_time) if key == ord('x'): log.warning('KeyboardInterrupt: `X` was pressed') results(args, inference_time, load_time) sys.exit() log.info('End Time: {0}'. format(str(time.time() - start_time))) log.info('----------THE END----------') cv2.destroyAllWindows() input_feed.close()
def main(): try: logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler("Computer_Pointer_Controller.log"), logging.StreamHandler() ]) except: print("File cannot be created") args = build_argparser() video_path = args.i visualize = args.flags count = 0 fd_inference_time = 0 fld_inference_time = 0 hp_inference_time = 0 ge_inference_time = 0 MC = MouseController('medium', 'fast') logging.info("############## Model Load Time #############") start_time = time.time() first_model_time = start_time FD = Face_Detection(device=args.d, threshold=args.prob, extensions=args.l) FD.load_model(model_path=args.f) logging.info("Face Detection Model: {:.3f}ms".format( 1000 * (time.time() - first_model_time))) second_model_time = time.time() FLD = Facial_Landmarks_Detection(device=args.d, extensions=args.l) FLD.load_model(model_path=args.fl) logging.info("Facial Landmarks Detection Model: {:.3f}ms".format( 1000 * (time.time() - second_model_time))) third_model_time = time.time() HPE = Head_Pose_Estimation(device=args.d, extensions=args.l) HPE.load_model(model_path=args.hp) logging.info("Head Pose Estimation Model: {:.3f}ms".format( 1000 * (time.time() - third_model_time))) fourth_model_time = time.time() GE = Gaze_Estimation(device=args.d, extensions=args.l) GE.load_model(model_path=args.g) logging.info("Gaze Estimation Model: {:.3f}ms".format( 1000 * (time.time() - fourth_model_time))) logging.info("############## End ######################### ") Total_Model_Load_Time = 1000 * (time.time() - start_time) ##### LOADING VIDEO FILE ##### if (video_path == "cam"): IF = InputFeeder("cam") else: IF = InputFeeder("video", video_path) IF.load_data() ##### MODEL INFERENCE ##### start_inf_time = time.time() for flag, frame in IF.next_batch(): if not flag: break if (count % 5 == 0): cv2.imshow('frame', cv2.resize(frame, (500, 500))) key = cv2.waitKey(60) count = count + 1 start_time_1 = time.time() face, face_coordinates = FD.predict(frame, args.it) fd_inference_time += (time.time() - start_time_1) start_time_2 = time.time() left_eye_image, right_eye_image, eye_coordinates = FLD.predict( face, args.it) fld_inference_time += (time.time() - start_time_2) start_time_3 = time.time() head_pose_angles = HPE.predict(face, args.it) hp_inference_time += (time.time() - start_time_3) start_time_4 = time.time() mouse_coordinates, gaze_vector = GE.predict(left_eye_image, right_eye_image, head_pose_angles, args.it) ge_inference_time += (time.time() - start_time_4) if (len(visualize) != 0): frame_visualize = frame.copy() if ("fd" in visualize): if (len(visualize) == 1): cv2.rectangle(frame_visualize, (face_coordinates[0], face_coordinates[1]), (face_coordinates[2], face_coordinates[3]), (255, 0, 255), 2) else: frame_visualize = face.copy() if ("fld" in visualize): if not "fd" in visualize: frame_visualize = face.copy() cv2.circle(frame_visualize, (eye_coordinates['left_eye'][0], eye_coordinates['left_eye'][1]), 25, (0, 0, 255), 2) cv2.circle(frame_visualize, (eye_coordinates['right_eye'][0], eye_coordinates['right_eye'][1]), 25, (0, 0, 255), 2) if ("hp" in visualize): cv2.putText( frame_visualize, "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}". format(head_pose_angles[0], head_pose_angles[1], head_pose_angles[2]), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 0.255, (0, 255, 0), 1) if ("ge" in visualize): h = face.shape[0] arrow = h * 0.7 arrow_X = gaze_vector[0] * arrow arrow_Y = -gaze_vector[1] * arrow cv2.arrowedLine( frame_visualize, (eye_coordinates['left_eye'][0], eye_coordinates['left_eye'][1]), (int(eye_coordinates['left_eye'][0] + arrow_X), int(eye_coordinates['left_eye'][1] + arrow_Y)), (255, 0, 0), 2) cv2.arrowedLine( frame_visualize, (eye_coordinates['right_eye'][0], eye_coordinates['right_eye'][1]), (int(eye_coordinates['right_eye'][0] + arrow_X), int(eye_coordinates['right_eye'][1] + arrow_Y)), (255, 0, 0), 2) if (count % 5 == 0): cv2.imshow('Visualization', cv2.resize(frame_visualize, (500, 500))) if (count % 5 == 0): MC.move(mouse_coordinates[0], mouse_coordinates[1]) if key == 27: break Total_Inference_Time = time.time() - start_inf_time if (count > 0): logging.info("############## Models Inference time #######") logging.info("Face Detection:{:.3f}ms".format( 1000 * fd_inference_time / count)) logging.info("Facial Landmarks Detection:{:.3f}ms".format( 1000 * fld_inference_time / count)) logging.info("Headpose Estimation:{:.3f}ms".format( 1000 * hp_inference_time / count)) logging.info("Gaze Estimation:{:.3f}ms".format( 1000 * ge_inference_time / count)) logging.info("############## End #########################") logging.info("############## Summarized Results ##########") logging.info( "Total Model Load Time: {:.3f}ms".format(Total_Model_Load_Time)) logging.info("Total Inference Time: {:.3f}s".format(Total_Inference_Time)) logging.info("FPS:{}".format(count / Total_Inference_Time)) logging.info("############ End ###########################") cv2.destroyAllWindows() IF.close()
def main(args): ## loading models try: input_file = args.input mode_visualization = args.mode_visualization if input_file == "CAM": input_feeder = InputFeeder("cam") else: if not os.path.isfile(input_file): log.error("ERROR: INPUT PATH IS NOT VALID") exit(1) input_feeder = InputFeeder("video", input_file) face_detection_class = Face_Detection( model=args.face_detection, device=args.device, extensions=args.cpu_extension) face_landmarks_class = Landmarks_Detection( model=args.face_landmark, device=args.device, extensions=args.cpu_extension) head_pose_class = Head_Pose(model=args.head_pose, device=args.device, extensions=args.cpu_extension) gaze_estimation_class = Gaze_Estimation( model=args.gaze_estimation, device=args.device, extensions=args.cpu_extension) mouse_control = MouseController('medium', 'fast') start_time = time.time() ## Load the models one by one and all necessary info face_det_time = time.time() face_detection_class.load_model() print("Face Detection Load Time: time: {:.3f} ms".format( (time.time() - face_det_time) * 1000)) face_land_time = time.time() face_landmarks_class.load_model() print("Facial landmarks load Time: time: {:.3f} ms".format( (time.time() - face_land_time) * 1000)) head_po_time = time.time() head_pose_class.load_model() print("Head pose load time: time: {:.3f} ms".format( (time.time() - head_po_time) * 1000)) gaze_est_time = time.time() gaze_estimation_class.load_model() print("Gaze estimation load time: time: {:.3f} ms".format( (time.time() - gaze_est_time) * 1000)) total_time = time.time() - start_time print("Total loading time taken: time: {:.3f} ms".format( total_time * 1000)) print("All models are loaded successfully..") input_feeder.load_data() print("Feeder is loaded") except: print('Error occured on loading models in app') ## performing inferences try: start_inference_time = time.time() frame_count = 0 for flag, frame in input_feeder.next_batch(): if not flag: break frame_count += 1 if frame_count == 0: cv2.imshow('video', cv2.resize(frame, (700, 700))) key = cv2.waitKey(60) crop_face, face_coords = face_detection_class.predict( frame.copy(), args.conf_threshold) if type(crop_face) == int: log.error("Unable to detect the face.") if key == 27: break continue ## perform inference head_angle = head_pose_class.predict(crop_face.copy()) left_eye, right_eye, eye_coords = face_landmarks_class.predict( crop_face.copy()) mouse_position, gaze_vector = gaze_estimation_class.predict( left_eye, right_eye, head_angle) ## checking for extra flags if (not len(mode_visualization) == 0): p_frame = frame.copy() if ('fd' in mode_visualization): p_frame = crop_face if ('fl' in mode_visualization): cv2.rectangle( crop_face, (eye_coords[0][0] - 10, eye_coords[0][1] - 10), (eye_coords[0][2] + 10, eye_coords[0][3] + 10), (0, 255, 0), 1) cv2.rectangle( crop_face, (eye_coords[1][0] - 10, eye_coords[1][1] - 10), (eye_coords[1][2] + 10, eye_coords[1][3] + 10), ( 0, 255, 0, ), 1) if ('hp' in mode_visualization): cv2.putText( p_frame, "Head Positions: :{:.2f} :{:.2f} :{:.2f}".format( head_angle[0], head_angle[1], head_angle[2]), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1) if ('ge' in mode_visualization): i, j, k = int(gaze_vector[0] * 12), int( gaze_vector[1] * 12), 160 l_eye = cv2.line(left_eye.copy(), (i - k, j - k), (i + k, j + k), (0, 255, 255), 2) cv2.line(l_eye, (i - k, j + k), (i + k, j - k), (255, 0, 255), 2) r_eye = cv2.line(right_eye.copy(), (i - k, j - k), (i + k, j + k), (0, 255, 255), 2) cv2.line(r_eye, (i - k, j + k), (i + k, j - k), (0, 255, 255), 2) l_eye = crop_face[eye_coords[0][1]:eye_coords[0][3], eye_coords[0][0]:eye_coords[0][2]] r_eye = crop_face[eye_coords[1][1]:eye_coords[1][3], eye_coords[1][0]:eye_coords[1][2]] cv2.imshow("visual for client", cv2.resize(p_frame, (700, 700))) if frame_count % 1 == 0: mouse_control.move(mouse_position[0], mouse_position[1]) if key == 27: break ## working on inference time and frames per second total_infer_time = time.time() - start_inference_time frames_per_sec = int(frame_count) / total_infer_time print("Time counter: {:.3f} seconds".format(frame_count)) print("Total inference time: {:.3f} seconds".format( total_infer_time)) print("FPs: {:.3f} fps ".format(frames_per_sec)) except: print('Error on performing inference in app file') print("All Done...") cv2.destroyAllWindows() input_feeder.close()
class Computer_Pointer_Controller: def __init__(self, args): # load the objects corresponding to the models self.face_detection = Face_Detection(args.face_detection_model, args.device, args.extensions, args.perf_counts) self.gaze_estimation = Gaze_Estimation(args.gaze_estimation_model, args.device, args.extensions, args.perf_counts) self.head_pose_estimation = Head_Pose_Estimation( args.head_pose_estimation_model, args.device, args.extensions, args.perf_counts) self.facial_landmarks_detection = Facial_Landmarks_Detection( args.facial_landmarks_detection_model, args.device, args.extensions, args.perf_counts) start_models_load_time = time.time() self.face_detection.load_model() self.gaze_estimation.load_model() self.head_pose_estimation.load_model() self.facial_landmarks_detection.load_model() logger = logging.getLogger() input_T = args.input_type input_F = args.input_file if input_T.lower() == 'cam': # open the video feed self.feed = InputFeeder(args.input_type, args.input_file) self.feed.load_data() else: if not os.path.isfile(input_F): logger.error('Unable to find specified video file') exit(1) file_extension = input_F.split(".")[-1] if (file_extension in ['jpg', 'jpeg', 'bmp']): self.feed = InputFeeder(args.input_type, args.input_file) self.feed.load_data() elif (file_extension in ['avi', 'mp4']): self.feed = InputFeeder(args.input_type, args.input_file) self.feed.load_data() else: logger.error( "Unsupported file Extension. Allowed ['jpg', 'jpeg', 'bmp', 'avi', 'mp4']" ) exit(1) print("Models total loading time :", time.time() - start_models_load_time) # init mouse controller self.mouse_controller = MouseController('low', 'fast') def run(self): inferences_times = [] face_detections_times = [] for batch in self.feed.next_batch(): if batch is None: break # as we want the webcam to act as a mirror, flip the frame batch = cv2.flip(batch, 1) inference_time = time.time() face = self.face_detection.predict(batch) if face is None: logger.error('Unable to detect the face.') continue else: face_detections_times.append(time.time() - inference_time) left_eye_image, right_eye_image = self.facial_landmarks_detection.predict( face) if left_eye_image is None or right_eye_image is None: continue head_pose_angles = self.head_pose_estimation.predict(face) if head_pose_angles is None: continue vector = self.gaze_estimation.predict(left_eye_image, right_eye_image, head_pose_angles) inferences_times.append(time.time() - inference_time) if args.show_face == "True": cv2.imshow("Detected face", face) cv2.waitKey(1) self.mouse_controller.move(vector[0], vector[1]) self.feed.close() cv2.destroyAllWindows() print("Average face detection inference time:", sum(face_detections_times) / len(face_detections_times)) print("Average total inferences time:", sum(inferences_times) / len(inferences_times))
def infer_on_stream(args): network_fd = Face_Detection(args.face_detection_model, args.device) network_hp = Head_Pose_Estimation(args.head_pose_model, args.device) network_fl = Facial_Landmarks_Detection(args.facial_landmarks_model, args.device) network_ge = Gaze_Estimation(args.gaze_estimation_model, args.device) mouse_cont = MouseController(args.mouse_precision, args.mouse_speed) starting_loading = time.time() network_fd.load_model() network_hp.load_model() network_fl.load_model() network_ge.load_model() duration_loading = time.time() - starting_loading input_type = handle_input(args.input) feed = InputFeeder(input_type=input_type, input_file=args.input) feed.load_data() starting_inference = time.time() for flag, frame in feed.next_batch(): if not flag: break key_pressed = cv2.waitKey(60) out_frame, face, face_coords = network_fd.predict( frame, args.prob_threshold, args.display) if len(face_coords) == 0: log.error("There is no face in the stream!") continue out_frame, head_angle = network_hp.predict(out_frame, face, face_coords, args.display) out_frame, eye_left, eye_right, eye_center = network_fl.predict( out_frame, face, face_coords, args.display) out_frame, gaze = network_ge.predict(out_frame, eye_left, eye_right, eye_center, head_angle, args.display) mouse_cont.move(gaze[0], gaze[1]) if key_pressed == 27: break cv2.imshow('Visualization', cv2.resize(out_frame, (600, 400))) duration_inference = time.time() - starting_inference print("Total loading time is: {}\nTotal inference time is: {} ".format( duration_loading, duration_inference)) feed.close() cv2.destroyAllWindows