def __init__(self, config: yacs.config.CfgNode): self.config = config self.gaze_estimator = GazeEstimator(config) self.visualizer = Visualizer(self.gaze_estimator.camera) self.cap = self._create_capture() self.output_dir = self._create_output_dir() self.writer = self._create_video_writer() self.stop = False self.show_bbox = self.config.demo.show_bbox self.show_head_pose = self.config.demo.show_head_pose self.show_landmarks = self.config.demo.show_landmarks self.show_normalized_image = self.config.demo.show_normalized_image self.show_template_model = self.config.demo.show_template_model
def __init__(self, **params): self.cfg = get_default_config() self.cfg.merge_from_file( os.path.join(params.get('model'), 'config.yaml')) self.cfg.merge_from_list([ 'face_detector.dlib.model', os.path.join(os.environ['DLIB_FACE_DIR'], 'shape_predictor_68_face_landmarks.dat') ]) self.cfg.merge_from_list([ 'gaze_estimator.checkpoint', os.path.join(params.get('model'), 'checkpoint.pth') ]) self.cfg['gaze_estimator']['normalized_camera_params'] = os.path.join( params.get('model'), 'normalized_camera_params_eye.yaml') self.cfg['gaze_estimator']['camera_params'] = os.path.join( params.get('model'), 'sample_params.yaml') self.cfg['device'] = 'cpu' self.gaze_estimator = GazeEstimator(self.cfg) self.visualizer = Visualizer(self.gaze_estimator.camera)
class Demo: QUIT_KEYS = {27, ord('q')} def __init__(self, config: yacs.config.CfgNode): self.config = config self.gaze_estimator = GazeEstimator(config) self.visualizer = Visualizer(self.gaze_estimator.camera) self.cap = self._create_capture() self.output_dir = self._create_output_dir() self.writer = self._create_video_writer() self.stop = False self.show_bbox = self.config.demo.show_bbox self.show_head_pose = self.config.demo.show_head_pose self.show_landmarks = self.config.demo.show_landmarks self.show_normalized_image = self.config.demo.show_normalized_image self.show_template_model = self.config.demo.show_template_model def run(self) -> None: while True: if self.config.demo.display_on_screen: self._wait_key() if self.stop: break ok, frame = self.cap.read() if not ok: break undistorted = cv2.undistort( frame, self.gaze_estimator.camera.camera_matrix, self.gaze_estimator.camera.dist_coefficients) self.visualizer.set_image(frame.copy()) faces = self.gaze_estimator.detect_faces(undistorted) for face in faces: self.gaze_estimator.estimate_gaze(undistorted, face) self._draw_face_bbox(face) self._draw_head_pose(face) self._draw_landmarks(face) self._draw_face_template_model(face) self._draw_gaze_vector(face) self._display_normalized_image(face) if self.config.demo.use_camera: self.visualizer.image = self.visualizer.image[:, ::-1] if self.writer: self.writer.write(self.visualizer.image) if self.config.demo.display_on_screen: cv2.imshow('frame', self.visualizer.image) self.cap.release() if self.writer: self.writer.release() def _create_capture(self) -> cv2.VideoCapture: if self.config.demo.use_camera: cap = cv2.VideoCapture(0) elif self.config.demo.video_path: cap = cv2.VideoCapture(self.config.demo.video_path) else: raise ValueError cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.gaze_estimator.camera.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.gaze_estimator.camera.height) return cap def _create_output_dir(self) -> Optional[pathlib.Path]: if not self.config.demo.output_dir: return output_dir = pathlib.Path(self.config.demo.output_dir) output_dir.mkdir(exist_ok=True, parents=True) return output_dir @staticmethod def _create_timestamp() -> str: dt = datetime.datetime.now() return dt.strftime('%Y%m%d_%H%M%S') def _create_video_writer(self) -> Optional[cv2.VideoWriter]: if not self.output_dir: return None ext = self.config.demo.output_file_extension if ext == 'mp4': fourcc = cv2.VideoWriter_fourcc(*'H264') elif ext == 'avi': fourcc = cv2.VideoWriter_fourcc(*'PIM1') else: raise ValueError output_path = self.output_dir / f'{self._create_timestamp()}.{ext}' writer = cv2.VideoWriter(output_path.as_posix(), fourcc, 30, (self.gaze_estimator.camera.width, self.gaze_estimator.camera.height)) if writer is None: raise RuntimeError return writer def _wait_key(self) -> None: key = cv2.waitKey(self.config.demo.wait_time) & 0xff if key in self.QUIT_KEYS: self.stop = True elif key == ord('b'): self.show_bbox = not self.show_bbox elif key == ord('l'): self.show_landmarks = not self.show_landmarks elif key == ord('h'): self.show_head_pose = not self.show_head_pose elif key == ord('n'): self.show_normalized_image = not self.show_normalized_image elif key == ord('t'): self.show_template_model = not self.show_template_model def _draw_face_bbox(self, face: Face) -> None: if not self.show_bbox: return self.visualizer.draw_bbox(face.bbox) def _draw_head_pose(self, face: Face) -> None: if not self.show_head_pose: return # Draw the axes of the model coordinate system length = self.config.demo.head_pose_axis_length self.visualizer.draw_model_axes(face, length, lw=2) euler_angles = face.head_pose_rot.as_euler('XYZ', degrees=True) pitch, yaw, roll = face.change_coordinate_system(euler_angles) logger.info(f'[head] pitch: {pitch:.2f}, yaw: {yaw:.2f}, ' f'roll: {roll:.2f}, distance: {face.distance:.2f}') def _draw_landmarks(self, face: Face) -> None: if not self.show_landmarks: return self.visualizer.draw_points(face.landmarks, color=(0, 255, 255), size=1) def _draw_face_template_model(self, face: Face) -> None: if not self.show_template_model: return self.visualizer.draw_3d_points(face.model3d, color=(255, 0, 525), size=1) def _display_normalized_image(self, face: Face) -> None: if not self.config.demo.display_on_screen: return if not self.show_normalized_image: return if self.config.mode == GazeEstimationMethod.MPIIGaze.name: reye = face.reye.normalized_image leye = face.leye.normalized_image normalized = np.hstack([reye, leye]) elif self.config.mode == GazeEstimationMethod.MPIIFaceGaze.name: normalized = face.normalized_image else: raise ValueError if self.config.demo.use_camera: normalized = normalized[:, ::-1] cv2.imshow('normalized', normalized) def _draw_gaze_vector(self, face: Face) -> None: length = self.config.demo.gaze_visualization_length if self.config.mode == GazeEstimationMethod.MPIIGaze.name: for key in [FacePartsName.REYE, FacePartsName.LEYE]: eye = getattr(face, key.name.lower()) self.visualizer.draw_3d_line( eye.center, eye.center + length * eye.gaze_vector) pitch, yaw = np.rad2deg(eye.vector_to_angle(eye.gaze_vector)) logger.info( f'[{key.name.lower()}] pitch: {pitch:.2f}, yaw: {yaw:.2f}') elif self.config.mode == GazeEstimationMethod.MPIIFaceGaze.name: self.visualizer.draw_3d_line( face.center, face.center + length * face.gaze_vector) pitch, yaw = np.rad2deg(face.vector_to_angle(face.gaze_vector)) logger.info(f'[face] pitch: {pitch:.2f}, yaw: {yaw:.2f}') else: raise ValueError
t = -time.time() # measure model loading time faceDetector = FaceDetector(precision=args.precision, concurrency=args.concurrency, device=args.device, extensions=args.ext) eyeDetector = EyeDetector(precision=args.precision, concurrency=args.concurrency, device=args.device, extensions=args.ext) headPoseEstimator = HeadPoseEstimator(precision=args.precision, concurrency=args.concurrency, device=args.device, extensions=args.ext) gazeEstimator = GazeEstimator(precision=args.precision, concurrency=args.concurrency, device=args.device, extensions=args.ext) mouseController = MouseController(precision='high', speed=args.speed.lower(), failsafe=args.failsafe) t += time.time() logging.info(f'Model Loading Time: {t:.4} s') logging.info('Running...') q = deque() # the processing queue faces_produced = 0 head_poses_produced = 0 eyes_produced = 0 hpae_consumed = 0
def infer_on_stream(args): try: log.basicConfig( level=log.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[log.FileHandler("app.log"), log.StreamHandler()]) mouse_controller = MouseController(precision="low", speed="fast") start_model_load_time = time.time() face_detector = FaceDetector(args.model_face_detection) facial_landmarks_detector = FacialLandmarksDetector( args.model_facial_landmarks_detection) head_pose_estimator = HeadPoseEstimator( args.model_head_pose_estimation) gaze_estimator = GazeEstimator(args.model_gaze_estimation) face_detector.load_model() facial_landmarks_detector.load_model() head_pose_estimator.load_model() gaze_estimator.load_model() total_model_load_time = time.time() - start_model_load_time log.info("Model load time: {:.1f}ms".format(1000 * total_model_load_time)) output_directory = os.path.join(args.output_path + '\\' + args.device) if not os.path.exists(output_directory): os.makedirs(output_directory) feed = InputFeeder(args.input_type, args.input_path) feed.load_data() out_video = feed.get_out_video(output_directory) frame_counter = 0 start_inference_time = time.time() total_prepocess_time = 0 while True: try: frame = next(feed.next_batch()) except StopIteration: break frame_counter += 1 face_boxes = face_detector.predict(frame) for face_box in face_boxes: face_image = get_crop_image(frame, face_box) eye_boxes, eye_centers = facial_landmarks_detector.predict( face_image) left_eye_image, right_eye_image = [ get_crop_image(face_image, eye_box) for eye_box in eye_boxes ] head_pose_angles = head_pose_estimator.predict(face_image) gaze_x, gaze_y = gaze_estimator.predict( right_eye_image, head_pose_angles, left_eye_image) draw_gaze_line(frame, face_box, eye_centers, gaze_x, gaze_y) if args.show_input: cv2.imshow('im', frame) if args.move_mouse: mouse_controller.move(gaze_x, gaze_y) total_prepocess_time += face_detector.preprocess_time + facial_landmarks_detector.preprocess_time + \ head_pose_estimator.preprocess_time + gaze_estimator.preprocess_time break if out_video is not None: out_video.write(frame) if args.input_type == "image": cv2.imwrite(os.path.join(output_directory, 'output_image.jpg'), frame) key_pressed = cv2.waitKey(60) if key_pressed == 27: break total_time = time.time() - start_inference_time total_inference_time = round(total_time, 1) fps = frame_counter / total_inference_time log.info("Inference time:{:.1f}ms".format(1000 * total_inference_time)) log.info("Input/output preprocess time:{:.1f}ms".format( 1000 * total_prepocess_time)) log.info("FPS:{}".format(fps)) with open(os.path.join(output_directory, 'stats.txt'), 'w') as f: f.write(str(total_inference_time) + '\n') f.write(str(total_prepocess_time) + '\n') f.write(str(fps) + '\n') f.write(str(total_model_load_time) + '\n') feed.close() cv2.destroyAllWindows() except Exception as e: log.exception("Something wrong when running inference:" + str(e))
def main(args): fd_infer_time, ld_infer_time, hpe_infer_time, ge_infer_time = 0 ,0 ,0 ,0 start = time.time() face_detector = FaceDetector(args.model_fd, args.device_fd, args.ext_fd) fd_load_time = time.time() - start start = time.time() landmarks_detector = LandmarksDetector(args.model_ld, args.device_ld, args.ext_ld) ld_load_time = time.time() - start start = time.time() head_pose_estimator = HeadPoseEstimator(args.model_hpe, args.device_hpe, args.ext_hpe) hpe_load_time = time.time() - start start = time.time() gaze_estimator = GazeEstimator(args.model_ge, args.device_ge, args.ext_ge) ge_load_time = time.time() - start log.info("Models Loading...") log.info("Face detection load time :{:.4f}ms".format(fd_load_time)) log.info("Landmarks estimation load time :{:.4f}ms".format(ld_load_time)) log.info("Head pose estimation load time :{:.4f}ms".format(hpe_load_time)) log.info("Gaze estimation load time :{:.4f}ms".format(ge_load_time)) log.info('All Models loaded') mouse_controller = MouseController('high', 'fast') if args.input == 0: input_feeder = InputFeeder('cam', args.input) elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): input_feeder = InputFeeder('image', args.input) else: input_feeder = InputFeeder('video', args.input) input_feeder.load_data() init_w = input_feeder.init_w init_h = input_feeder.init_h counter = 0 for flag, frame in input_feeder.next_batch(): if not flag: break counter +=1 key = cv2.waitKey(60) try: start = time.time() outputs = face_detector.predict(frame) face = face_detector.preprocess_output(frame, outputs, init_w, init_h) fd_infer_time += time.time() - start start = time.time() outputs = landmarks_detector.predict(face) left_eye, right_eye, real_landmraks = landmarks_detector.preprocess_output(face, outputs) ld_infer_time += time.time() - start start = time.time() outputs = head_pose_estimator.predict(face) head_pose_angles = head_pose_estimator.preprocess_output(outputs) hpe_infer_time += time.time() - start start = time.time() outputs = gaze_estimator.predict(left_eye, right_eye, head_pose_angles) gaze = gaze_estimator.preprocess_output(outputs) ge_infer_time += time.time() - start log.info("Face detection time :{:.4f}ms".format(fd_infer_time/counter)) log.info("Landmarks estimation time :{:.4f}ms".format(ld_infer_time/counter)) log.info("Head pose estimation time :{:.4f}ms".format(hpe_infer_time/counter)) log.info("Gaze estimation time :{:.4f}ms".format(ge_infer_time/counter)) if args.input != 0: drawer = Drawer(face, real_landmraks, head_pose_angles, gaze) drawer.draw_landmarks(20) drawer.draw_head_pose() drawer.draw_gazes() drawer.show() roll_cos = math.cos(head_pose_angles[2] * math.pi/180) roll_sin = math.sin(head_pose_angles[2] * math.pi/180) mouse_x = gaze[0] * roll_cos + gaze[0] * roll_sin mouse_y = gaze[1] * roll_cos + gaze[1] * roll_sin mouse_controller.move(mouse_x, mouse_y) except Exception as e: log.error(e) finally: if key == 27: break input_feeder.close()
def main(args): print("Main script running...") log_name = 'stats_' + args.device + '_' + args.hpe + args.fld + args.ge if not os.path.exists('output'): os.makedirs('output') print(f"Logging to: output/{log_name}") log = open('output/' + log_name, 'w+') print("Initializing models...") fd = FaceDetector( model_name= 'models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001', device=args.device, extensions=None) fd.load_model() if args.v: print(f"Face Detection Load Time: {fd.load_time}") hpe = HeadPoseEstimator( model_name= f'models/intel/head-pose-estimation-adas-0001/{args.hpe}/head-pose-estimation-adas-0001', device=args.device, extensions=None) hpe.load_model() if args.v: print(f"Head Pose Estimation Load Time: {hpe.load_time}") fld = FacialLandmarkDetector( model_name= f'models/intel/landmarks-regression-retail-0009/{args.fld}/landmarks-regression-retail-0009', device=args.device, extensions=None) fld.load_model() if args.v: print(f"Facial Landmarks Detection Load Time: {fld.load_time}") ge = GazeEstimator( model_name= f'models/intel/gaze-estimation-adas-0002/{args.ge}/gaze-estimation-adas-0002', device=args.device, extensions=None) ge.load_model() if args.v: print(f"Gaze Estimation Load Time: {ge.load_time}") image = False print("Initializing source feed...") feed = InputFeeder(input_type=args.input_type, input_file=args.input_file) if args.input_type == 'image': image = True feed.load_data() for batch in feed.next_batch(): if args.v: print() cv2.imshow('Batch', batch) if image: cv2.imwrite('output/Batch.png', batch) coords, bounding_face = fd.predict(batch) if not coords: print("No face") continue if image: cv2.imwrite('output/Face.png', bounding_face) box = coords[0] face = bounding_face[box[1]:box[3], box[0]:box[2]] if args.v: print(f"Face Time: {fd.infer_time}") log.write("FD_infer: " + str(fd.infer_time) + "\n") if image: cv2.imshow('Cropped Face', face) # Landmark Detection coords, landmark_detection, landmark_points = fld.predict(face) if image: cv2.imwrite('output/Landmarks.png', landmark_detection) if image: cv2.imshow('Landmark Detection', landmark_detection) if args.v: print(f"Landmark Time: {fld.infer_time}") log.write("FLD_infer: " + str(fld.infer_time) + "\n") right_box, left_box = coords[0:2] if args.v: print(f"Eye Coords: {coords}") if left_box == None or right_box == None: print("No eyes") continue left_eye = face[left_box[1]:left_box[3], left_box[0]:left_box[2]] cv2.putText(face, 'L', (left_box[0], left_box[3]), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2) right_eye = face[right_box[1]:right_box[3], right_box[0]:right_box[2]] cv2.putText(face, 'R', (right_box[0], right_box[3]), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2) if args.v: print(f"Eye Shape: {left_eye.shape} :: {right_eye.shape}") #Head Pose Estimation head_yaw, head_pitch, head_roll = hpe.predict(face) if args.v: print(f"Head Pose Time: {hpe.infer_time}") log.write("HPE_infer: " + str(hpe.infer_time) + "\n") head_angles = [head_yaw[0][0], head_pitch[0][0], head_roll[0][0]] #Gaze Estimation # expects pose as (yaw, pitch, and roll) gaze = ge.predict(left_eye, right_eye, head_angles) if args.v: print(f"Gaze Time: {ge.infer_time}") log.write("GE_infer: " + str(ge.infer_time) + "\n") gaze_point = (int(gaze[0][0] * 50), int(gaze[0][1] * 50)) arrows = cv2.arrowedLine(face, landmark_points[0], (landmark_points[0][0] + gaze_point[0], landmark_points[0][1] - gaze_point[1]), (0, 0, 255), 2) arrows = cv2.arrowedLine(face, landmark_points[1], (landmark_points[1][0] + gaze_point[0], landmark_points[1][1] - gaze_point[1]), (0, 0, 255), 2) if image: cv2.imwrite('output/Gaze.png', arrows) if not image: mouse = MouseController(precision='medium', speed='medium') mouse.move(gaze[0][0], gaze[0][1]) if image: cv2.imshow('Arrows', arrows) if image: log.write("FD_LoadTime: " + str(fd.load_time) + "\n") log.write("FD_PreprocessTime: " + str(fd.preprocess_input_time) + "\n") log.write("FD_PostrocessTime: " + str(fd.preprocess_output_time) + "\n") log.write("FLD_LoadTime: " + str(fld.load_time) + "\n") log.write("FLD_PreprocessTime: " + str(fld.preprocess_input_time) + "\n") log.write("FLD_PostprocessTime: " + str(fld.preprocess_output_time) + "\n") log.write("HPE_LoadTime: " + str(hpe.load_time) + "\n") log.write("HPE_PreprocessTime: " + str(hpe.preprocess_input_time) + "\n") log.write("GE_LoadTime: " + str(ge.load_time) + "\n") log.write("GE_PreprocessTime: " + str(ge.preprocess_input_time) + "\n") cv2.waitKey(0) else: if cv2.waitKey(15) & 0xFF == ord('q'): break feed.close() log.close() cv2.destroyAllWindows
def main(): args = build_argparser().parse_args() input_file = args.input logger = log.getLogger() if input_file == "CAM": input_feeder = InputFeeder("cam") else: if not os.path.isfile(input_file): logger.error("Path should be file") exit(1) input_feeder = InputFeeder("video", input_file) face_detector = FaceDetector( args.face_detection_model, device=args.device, threshold=args.threshold, extensions=args.extensions, ) face_landmark_detector = FaceLandmarkDetector( args.face_landmark_model, device=args.device, threshold=args.threshold, extensions=args.extensions, ) head_pose_estimator = HeadPoseEstimator( args.head_pose_model, device=args.device, threshold=args.threshold, extensions=args.extensions, ) gaze_estimator = GazeEstimator( args.gaze_estimation_model, device=args.device, threshold=args.threshold, extensions=args.extensions, ) mouse_controller = MouseController("medium", "fast") face_detector.load_model() face_landmark_detector.load_model() head_pose_estimator.load_model() gaze_estimator.load_model() input_feeder.load_data() width = 1000 height = int(width * 9 / 16) for flag, frame in input_feeder.next_batch(): if not flag: break pressed_key = cv2.waitKey(60) face_detected = face_detector.predict(frame) if face_detected: face_coordinates, face_image = face_detected if not face_coordinates: continue else: continue if "fd" in args.visualization: cv2.rectangle( frame, (face_coordinates[0], face_coordinates[1]), (face_coordinates[2], face_coordinates[3]), (36, 255, 12), 2, ) cv2.putText( frame, "Face Detected", (face_coordinates[0], face_coordinates[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2, ) left_eye_img, righ_eye_img, eye_coords = face_landmark_detector.predict( face_image ) if "fl" in args.visualization: frame_eye_coords_min = ( np.array(eye_coords)[:, :2] + np.array(face_coordinates)[:2] ) frame_eye_coords_max = ( np.array(eye_coords)[:, 2:] + np.array(face_coordinates)[:2] ) cv2.rectangle( frame, (frame_eye_coords_min[0][0], frame_eye_coords_min[0][1]), (frame_eye_coords_max[0][0], frame_eye_coords_max[0][1]), (36, 255, 12), 2, ) cv2.rectangle( frame, (frame_eye_coords_min[1][0], frame_eye_coords_min[1][1]), (frame_eye_coords_max[1][0], frame_eye_coords_max[1][1]), (36, 255, 12), 2, ) head_pose_estimate = head_pose_estimator.predict(face_image) if "hp" in args.visualization: cv2.putText( frame, "yaw:{:.1f}|pitch:{:.1f}|roll:{:.1f}".format(*head_pose_estimate), (20, 35), cv2.FONT_HERSHEY_COMPLEX, 1.2, (36, 255, 12), 3, ) mouse_coordinate, gaze_vector = gaze_estimator.predict( left_eye_img, righ_eye_img, head_pose_estimate ) if "ge" in args.visualization: head_pose_estimate = np.array(head_pose_estimate) yaw, pitch, roll = head_pose_estimate * np.pi / 180.0 focal_length = 950 scale = 100 origin = ( int( face_coordinates[0] + (face_coordinates[2] - face_coordinates[0]) / 2 ), int( face_coordinates[1] + (face_coordinates[3] - face_coordinates[1]) / 2 ), ) r_x = np.array( [ [1, 0, 0], [0, math.cos(pitch), -math.sin(pitch)], [0, math.sin(pitch), math.cos(pitch)], ] ) r_y = np.array( [ [math.cos(yaw), 0, -math.sin(yaw)], [0, 1, 0], [math.sin(yaw), 0, math.cos(yaw)], ] ) r_z = np.array( [ [math.cos(roll), -math.sin(roll), 0], [math.sin(roll), math.cos(roll), 0], [0, 0, 1], ] ) r = r_z @ r_y @ r_x zaxis = np.array(([0, 0, -1 * scale]), dtype="float32") offset = np.array(([0, 0, focal_length]), dtype="float32") zaxis = np.dot(r, zaxis) + offset tip = ( int(zaxis[0] / zaxis[2] * focal_length) + origin[0], int(zaxis[1] / zaxis[2] * focal_length) + origin[1], ) cv2.arrowedLine(frame, origin, tip, (0, 0, 255), 3, tipLength=0.3) cv2.imshow("frame", cv2.resize(frame, (width, height))) mouse_controller.move(mouse_coordinate[0], mouse_coordinate[1]) if pressed_key == 27: logger.error("exit key is pressed..") break
class GazeModel: def __init__(self, **params): self.cfg = get_default_config() self.cfg.merge_from_file( os.path.join(params.get('model'), 'config.yaml')) self.cfg.merge_from_list([ 'face_detector.dlib.model', os.path.join(os.environ['DLIB_FACE_DIR'], 'shape_predictor_68_face_landmarks.dat') ]) self.cfg.merge_from_list([ 'gaze_estimator.checkpoint', os.path.join(params.get('model'), 'checkpoint.pth') ]) self.cfg['gaze_estimator']['normalized_camera_params'] = os.path.join( params.get('model'), 'normalized_camera_params_eye.yaml') self.cfg['gaze_estimator']['camera_params'] = os.path.join( params.get('model'), 'sample_params.yaml') self.cfg['device'] = 'cpu' self.gaze_estimator = GazeEstimator(self.cfg) self.visualizer = Visualizer(self.gaze_estimator.camera) def _draw_face_bbox(self, face: Face) -> None: self.visualizer.draw_bbox(face.bbox) def _draw_head_pose(self, face: Face) -> None: length = self.cfg.demo.head_pose_axis_length self.visualizer.draw_model_axes(face, length, lw=2) euler_angles = face.head_pose_rot.as_euler('XYZ', degrees=True) pitch, yaw, roll = face.change_coordinate_system(euler_angles) def _draw_landmarks(self, face: Face) -> None: self.visualizer.draw_points(face.landmarks, color=(0, 255, 255), size=1) def _draw_face_template_model(self, face: Face) -> None: self.visualizer.draw_3d_points(face.model3d, color=(255, 0, 525), size=1) def _draw_gaze_vector(self, face: Face) -> None: length = self.cfg.demo.gaze_visualization_length if self.cfg.mode == GazeEstimationMethod.MPIIGaze.name: for key in [FacePartsName.REYE, FacePartsName.LEYE]: eye = getattr(face, key.name.lower()) self.visualizer.draw_3d_line( eye.center, eye.center + length * eye.gaze_vector) pitch, yaw = np.rad2deg(eye.vector_to_angle(eye.gaze_vector)) elif self.cfg.mode == GazeEstimationMethod.MPIIFaceGaze.name: self.visualizer.draw_3d_line( face.center, face.center + length * face.gaze_vector) pitch, yaw = np.rad2deg(face.vector_to_angle(face.gaze_vector)) else: raise ValueError def process(self, frame): if frame.shape[2] > 3: frame = frame[:, :, 0:3] frame = cv2.resize(frame, (640, 480)) undistorted = cv2.undistort( frame, self.gaze_estimator.camera.camera_matrix, self.gaze_estimator.camera.dist_coefficients) self.visualizer.set_image(frame.copy()) faces = self.gaze_estimator.detect_faces(undistorted) for face in faces: self.gaze_estimator.estimate_gaze(undistorted, face) self._draw_face_bbox(face) self._draw_head_pose(face) self._draw_landmarks(face) self._draw_face_template_model(face) self._draw_gaze_vector(face) return self.visualizer.image
def main(): args = get_args() log.basicConfig(filename='example.log', level=log.DEBUG) inputFile = args.input #inputFile = "./bin/demo.mp4" mouse = MouseController("high", "fast") frame_count = 0 focal_length = 950.0 scale = 50 #print(f"Visual flag: {args.visual_flag}") if inputFile.lower() == "cam": feed = InputFeeder('cam') log.info("Video source: " + str(inputFile)) else: if not os.path.isfile(inputFile): log.error("Unable to find file: " + inputFile) exit(1) feed = InputFeeder("video", inputFile) log.info("Video source: " + str(inputFile)) log.info("InputFeeder initialized") log.info("Device: " + str(args.device)) log.info("Face detection model: " + str(args.facedetectionmodel)) log.info("Facial landmarks model: " + str(args.faciallandmarksmodel)) log.info("Head pose estimation model: " + str(args.headposemodel)) log.info("Gaze estimation model: " + str(args.gazeestimationmodel)) if args.stats == 1: print("Running statistics...") inference_times = [] fdm_inference_times = [] hpm_inference_times = [] flm_inference_times = [] gem_inference_times = [] start_time = time.time() # Create instances of the different models fdm = FaceDetector(args.facedetectionmodel, args.device, args.cpu_extension) if args.stats == 1: start_time = time.time() fdm.load_model() fdm_load_time = time.time() - start_time else: fdm.load_model() fdm.check_model() hpm = HeadPoseEstimator(args.headposemodel, args.device, args.cpu_extension) if args.stats == 1: start_time = time.time() hpm.load_model() hpm_load_time = time.time() - start_time else: hpm.load_model() hpm.check_model() flm = FacialLandmarksDetector(args.faciallandmarksmodel, args.device, args.cpu_extension) if args.stats == 1: start_time = time.time() flm.load_model() flm_load_time = time.time() - start_time else: flm.load_model() flm.check_model() gem = GazeEstimator(args.gazeestimationmodel, args.device, args.cpu_extension) if args.stats == 1: start_time = time.time() gem.load_model() gem_load_time = time.time() - start_time else: gem.load_model() gem.check_model() if args.stats == 1: duration_loading = time.time() - start_time print( f"Duration for loading and checking the models: {duration_loading}" ) log.info( f"Duration for loading and checking the models: {duration_loading}" ) cv2.namedWindow('preview', cv2.WINDOW_NORMAL) cv2.resizeWindow('preview', 600, 600) feed.load_data() for ret, frame in feed.next_batch(): if not ret: break if frame is not None: frame_count += 1 key = cv2.waitKey(60) if args.stats == 1: start_time = time.time() # Run face detection face_crop, face_coords = fdm.predict(frame.copy()) print("Face crop shape: " + str(face_crop.shape)) frame_h, frame_w = frame.shape[:2] (xmin, ymin, xmax, ymax) = face_coords face_frame = frame[ymin:ymax, xmin:xmax] #center_of_face = (xmin + face_frame.shape[1] / 2, ymin + face_frame.shape[0] / 2, 0) # 0 for colour channel #print("Center of face " + str(center_of_face)) try: # Check if face was detected if type(face_coords) == int: print("Unable to detect face") if key == 27: break continue # Facial landmark detection left_eye_crop, right_eye_crop, landmarks, crop_coords = flm.predict( face_crop.copy()) #print("Landmarks" +str(landmarks)) left_eye = (landmarks[0], landmarks[1]) right_eye = (landmarks[2], landmarks[3]) # Landmark position based on complete frame landmarks_viz = landmarks landmarks_viz[0] = landmarks_viz[0] + xmin landmarks_viz[1] = landmarks_viz[1] + ymin landmarks_viz[2] = landmarks_viz[2] + xmin landmarks_viz[3] = landmarks_viz[3] + ymin crop_coords_viz = (crop_coords[0] + xmin, crop_coords[1] + ymin, crop_coords[2] + xmin, crop_coords[3] + ymin, crop_coords[4] + xmin, crop_coords[5] + ymin, crop_coords[6] + xmin, crop_coords[7] + ymin) left_eye_viz = (landmarks_viz[0], landmarks_viz[1]) right_eye_viz = (landmarks_viz[2], landmarks_viz[3]) third_eye_viz_x = (landmarks_viz[2] - landmarks_viz[0]) / 2 + landmarks_viz[0] third_eye_viz_y = (landmarks_viz[3] - landmarks_viz[1]) / 2 + landmarks_viz[1] third_eye_viz = (third_eye_viz_x, third_eye_viz_y) #print(landmarks_viz[0], landmarks_viz[2], third_eye_viz_x) # Head pose estimation head_pose = hpm.predict(face_crop.copy()) print("Head pose: " + str(head_pose)) (yaw, pitch, roll) = head_pose frame = display_head_pose(frame, pitch, roll, yaw) # Send inputs to GazeEstimator gaze_vector = gem.predict(head_pose, left_eye_crop, right_eye_crop) if args.stats == 1: inference_time = time.time() - start_time inference_times.append(inference_time) print(gaze_vector) frame = display_gaze(frame, gaze_vector) # Control the mouse if frame_count % 5 == 0: mouse_x, mouse_y = get_mouse_vector(gaze_vector, roll) print("Mouse vector:" + str(mouse_x) + " - " + str(mouse_y)) mouse.move(mouse_x, mouse_y) currentMouseX, currentMouseY = pyautogui.position() print("Mouse coordinates: " + str(currentMouseX) + ", " + str(currentMouseY)) if args.visual_flag == 1: frame = draw_bounding_box(frame, face_coords) left_eye_frame = crop_coords_viz[0:4] right_eye_frame = crop_coords_viz[4:] frame = draw_bounding_box(frame, left_eye_frame) frame = draw_bounding_box(frame, right_eye_frame) frame = visualize_landmark(frame, left_eye_viz) frame = visualize_landmark(frame, right_eye_viz, color=(0, 0, 255)) frame = visualize_gaze(frame, gaze_vector, landmarks_viz) # visualize the axes of the HeadPoseEstimator results #frame = hpm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length) frame = hpm.draw_axes(frame.copy(), third_eye_viz, yaw, pitch, roll, scale, focal_length) #hdm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length) cv2.imshow('preview', frame) cv2.imshow('left eye', left_eye_crop) cv2.imshow('right eye', right_eye_crop) except Exception as e: print("Unable to predict using model" + str(e) + " for frame " + str(frame_count)) log.error("Unable to predict using model" + str(e) + " for frame " + str(frame_count)) continue if args.stats == 1: avg_inference_time = sum(inference_times) / len(inference_times) print("Average inference time: " + str(avg_inference_time)) log.info("Average inference time: " + str(avg_inference_time)) log.info("Load time for face detection model: " + str(fdm_load_time)) log.info("Load time for facial landmarks model: " + str(flm_load_time)) log.info("Load time for head pose detection model: " + str(hpm_load_time)) log.info("Load time for gaze estimation model: " + str(gem_load_time)) cv2.destroyAllWindows() feed.close()