Example #1
0
    def __init__(self, config: yacs.config.CfgNode):
        self.config = config
        self.gaze_estimator = GazeEstimator(config)
        self.visualizer = Visualizer(self.gaze_estimator.camera)

        self.cap = self._create_capture()
        self.output_dir = self._create_output_dir()
        self.writer = self._create_video_writer()

        self.stop = False
        self.show_bbox = self.config.demo.show_bbox
        self.show_head_pose = self.config.demo.show_head_pose
        self.show_landmarks = self.config.demo.show_landmarks
        self.show_normalized_image = self.config.demo.show_normalized_image
        self.show_template_model = self.config.demo.show_template_model
Example #2
0
 def __init__(self, **params):
     self.cfg = get_default_config()
     self.cfg.merge_from_file(
         os.path.join(params.get('model'), 'config.yaml'))
     self.cfg.merge_from_list([
         'face_detector.dlib.model',
         os.path.join(os.environ['DLIB_FACE_DIR'],
                      'shape_predictor_68_face_landmarks.dat')
     ])
     self.cfg.merge_from_list([
         'gaze_estimator.checkpoint',
         os.path.join(params.get('model'), 'checkpoint.pth')
     ])
     self.cfg['gaze_estimator']['normalized_camera_params'] = os.path.join(
         params.get('model'), 'normalized_camera_params_eye.yaml')
     self.cfg['gaze_estimator']['camera_params'] = os.path.join(
         params.get('model'), 'sample_params.yaml')
     self.cfg['device'] = 'cpu'
     self.gaze_estimator = GazeEstimator(self.cfg)
     self.visualizer = Visualizer(self.gaze_estimator.camera)
    t = -time.time()  # measure model loading time
    faceDetector = FaceDetector(precision=args.precision,
                                concurrency=args.concurrency,
                                device=args.device,
                                extensions=args.ext)
    eyeDetector = EyeDetector(precision=args.precision,
                              concurrency=args.concurrency,
                              device=args.device,
                              extensions=args.ext)
    headPoseEstimator = HeadPoseEstimator(precision=args.precision,
                                          concurrency=args.concurrency,
                                          device=args.device,
                                          extensions=args.ext)
    gazeEstimator = GazeEstimator(precision=args.precision,
                                  concurrency=args.concurrency,
                                  device=args.device,
                                  extensions=args.ext)
    mouseController = MouseController(precision='high',
                                      speed=args.speed.lower(),
                                      failsafe=args.failsafe)
    t += time.time()

    logging.info(f'Model Loading Time: {t:.4} s')

    logging.info('Running...')

    q = deque()  # the processing queue
    faces_produced = 0
    head_poses_produced = 0
    eyes_produced = 0
    hpae_consumed = 0
Example #4
0
def infer_on_stream(args):
    try:
        log.basicConfig(
            level=log.INFO,
            format="%(asctime)s [%(levelname)s] %(message)s",
            handlers=[log.FileHandler("app.log"),
                      log.StreamHandler()])

        mouse_controller = MouseController(precision="low", speed="fast")

        start_model_load_time = time.time()

        face_detector = FaceDetector(args.model_face_detection)
        facial_landmarks_detector = FacialLandmarksDetector(
            args.model_facial_landmarks_detection)
        head_pose_estimator = HeadPoseEstimator(
            args.model_head_pose_estimation)
        gaze_estimator = GazeEstimator(args.model_gaze_estimation)
        face_detector.load_model()
        facial_landmarks_detector.load_model()
        head_pose_estimator.load_model()
        gaze_estimator.load_model()

        total_model_load_time = time.time() - start_model_load_time
        log.info("Model load time: {:.1f}ms".format(1000 *
                                                    total_model_load_time))

        output_directory = os.path.join(args.output_path + '\\' + args.device)
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        feed = InputFeeder(args.input_type, args.input_path)
        feed.load_data()
        out_video = feed.get_out_video(output_directory)

        frame_counter = 0
        start_inference_time = time.time()
        total_prepocess_time = 0

        while True:
            try:
                frame = next(feed.next_batch())
            except StopIteration:
                break
            frame_counter += 1

            face_boxes = face_detector.predict(frame)
            for face_box in face_boxes:
                face_image = get_crop_image(frame, face_box)
                eye_boxes, eye_centers = facial_landmarks_detector.predict(
                    face_image)
                left_eye_image, right_eye_image = [
                    get_crop_image(face_image, eye_box)
                    for eye_box in eye_boxes
                ]
                head_pose_angles = head_pose_estimator.predict(face_image)
                gaze_x, gaze_y = gaze_estimator.predict(
                    right_eye_image, head_pose_angles, left_eye_image)
                draw_gaze_line(frame, face_box, eye_centers, gaze_x, gaze_y)
                if args.show_input:
                    cv2.imshow('im', frame)
                if args.move_mouse:
                    mouse_controller.move(gaze_x, gaze_y)
                total_prepocess_time += face_detector.preprocess_time + facial_landmarks_detector.preprocess_time + \
                    head_pose_estimator.preprocess_time + gaze_estimator.preprocess_time
                break

            if out_video is not None:
                out_video.write(frame)
            if args.input_type == "image":
                cv2.imwrite(os.path.join(output_directory, 'output_image.jpg'),
                            frame)

            key_pressed = cv2.waitKey(60)
            if key_pressed == 27:
                break

        total_time = time.time() - start_inference_time
        total_inference_time = round(total_time, 1)
        fps = frame_counter / total_inference_time
        log.info("Inference time:{:.1f}ms".format(1000 * total_inference_time))
        log.info("Input/output preprocess time:{:.1f}ms".format(
            1000 * total_prepocess_time))
        log.info("FPS:{}".format(fps))

        with open(os.path.join(output_directory, 'stats.txt'), 'w') as f:
            f.write(str(total_inference_time) + '\n')
            f.write(str(total_prepocess_time) + '\n')
            f.write(str(fps) + '\n')
            f.write(str(total_model_load_time) + '\n')

        feed.close()
        cv2.destroyAllWindows()
    except Exception as e:
        log.exception("Something wrong when running inference:" + str(e))
Example #5
0
def main(args):

    fd_infer_time, ld_infer_time, hpe_infer_time, ge_infer_time = 0 ,0 ,0 ,0

    start = time.time()
    face_detector = FaceDetector(args.model_fd, args.device_fd, args.ext_fd)
    fd_load_time = time.time() - start 

    start = time.time()
    landmarks_detector = LandmarksDetector(args.model_ld, args.device_ld, args.ext_ld)
    ld_load_time = time.time() - start 

    start = time.time()
    head_pose_estimator = HeadPoseEstimator(args.model_hpe, args.device_hpe, args.ext_hpe)
    hpe_load_time = time.time() - start 

    start = time.time()
    gaze_estimator = GazeEstimator(args.model_ge, args.device_ge, args.ext_ge)
    ge_load_time = time.time() - start 
    log.info("Models Loading...")
    log.info("Face detection load time       :{:.4f}ms".format(fd_load_time))    
    log.info("Landmarks estimation load time :{:.4f}ms".format(ld_load_time))     
    log.info("Head pose estimation load time :{:.4f}ms".format(hpe_load_time))     
    log.info("Gaze estimation load time      :{:.4f}ms".format(ge_load_time))  
    log.info('All Models loaded')
    mouse_controller = MouseController('high', 'fast')


    if args.input == 0:
        input_feeder = InputFeeder('cam', args.input)
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        input_feeder = InputFeeder('image', args.input)
    else:
        input_feeder = InputFeeder('video', args.input)
    
    input_feeder.load_data()
    init_w  = input_feeder.init_w
    init_h =  input_feeder.init_h
    

    counter = 0

    for flag, frame in input_feeder.next_batch():
        
        if not flag:
            break

        counter +=1

        key = cv2.waitKey(60)
        try:
            start = time.time()
            outputs = face_detector.predict(frame)
            
            face = face_detector.preprocess_output(frame, outputs, init_w, init_h)
            
            fd_infer_time += time.time() - start

            start = time.time()
            outputs = landmarks_detector.predict(face)
            
            left_eye, right_eye, real_landmraks = landmarks_detector.preprocess_output(face, outputs)
           
            ld_infer_time += time.time() - start

            start = time.time()

            outputs = head_pose_estimator.predict(face)
            head_pose_angles = head_pose_estimator.preprocess_output(outputs)
            
            hpe_infer_time += time.time() - start

            
            start = time.time()
            
            outputs = gaze_estimator.predict(left_eye, right_eye, head_pose_angles)
            
            gaze = gaze_estimator.preprocess_output(outputs)
            
            ge_infer_time += time.time() - start
  

            log.info("Face detection time       :{:.4f}ms".format(fd_infer_time/counter))    
            log.info("Landmarks estimation time :{:.4f}ms".format(ld_infer_time/counter))     
            log.info("Head pose estimation time :{:.4f}ms".format(hpe_infer_time/counter))     
            log.info("Gaze estimation time      :{:.4f}ms".format(ge_infer_time/counter))     

            if args.input != 0:
                drawer = Drawer(face, real_landmraks, head_pose_angles, gaze)
                drawer.draw_landmarks(20)
                drawer.draw_head_pose()
                drawer.draw_gazes()
                drawer.show()
            roll_cos = math.cos(head_pose_angles[2] *  math.pi/180)

            roll_sin = math.sin(head_pose_angles[2] *  math.pi/180)

            mouse_x = gaze[0] * roll_cos + gaze[0] * roll_sin
            mouse_y = gaze[1] * roll_cos + gaze[1] * roll_sin

            mouse_controller.move(mouse_x, mouse_y)

        except Exception as e:
            log.error(e)
        finally:
            if key == 27:
                break

    input_feeder.close()
def main(args):
    print("Main script running...")
    log_name = 'stats_' + args.device + '_' + args.hpe + args.fld + args.ge

    if not os.path.exists('output'):
        os.makedirs('output')
    print(f"Logging to: output/{log_name}")
    log = open('output/' + log_name, 'w+')

    print("Initializing models...")

    fd = FaceDetector(
        model_name=
        'models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001',
        device=args.device,
        extensions=None)

    fd.load_model()

    if args.v: print(f"Face Detection Load Time: {fd.load_time}")

    hpe = HeadPoseEstimator(
        model_name=
        f'models/intel/head-pose-estimation-adas-0001/{args.hpe}/head-pose-estimation-adas-0001',
        device=args.device,
        extensions=None)
    hpe.load_model()

    if args.v: print(f"Head Pose Estimation Load Time: {hpe.load_time}")

    fld = FacialLandmarkDetector(
        model_name=
        f'models/intel/landmarks-regression-retail-0009/{args.fld}/landmarks-regression-retail-0009',
        device=args.device,
        extensions=None)
    fld.load_model()

    if args.v: print(f"Facial Landmarks Detection Load Time: {fld.load_time}")

    ge = GazeEstimator(
        model_name=
        f'models/intel/gaze-estimation-adas-0002/{args.ge}/gaze-estimation-adas-0002',
        device=args.device,
        extensions=None)
    ge.load_model()

    if args.v: print(f"Gaze Estimation Load Time: {ge.load_time}")

    image = False

    print("Initializing source feed...")
    feed = InputFeeder(input_type=args.input_type, input_file=args.input_file)
    if args.input_type == 'image':
        image = True

    feed.load_data()

    for batch in feed.next_batch():
        if args.v:
            print()
        cv2.imshow('Batch', batch)
        if image:
            cv2.imwrite('output/Batch.png', batch)

        coords, bounding_face = fd.predict(batch)
        if not coords:
            print("No face")
            continue
        if image: cv2.imwrite('output/Face.png', bounding_face)
        box = coords[0]
        face = bounding_face[box[1]:box[3], box[0]:box[2]]

        if args.v:
            print(f"Face Time: {fd.infer_time}")
        log.write("FD_infer: " + str(fd.infer_time) + "\n")
        if image:
            cv2.imshow('Cropped Face', face)

        # Landmark Detection
        coords, landmark_detection, landmark_points = fld.predict(face)
        if image: cv2.imwrite('output/Landmarks.png', landmark_detection)
        if image: cv2.imshow('Landmark Detection', landmark_detection)
        if args.v: print(f"Landmark Time: {fld.infer_time}")
        log.write("FLD_infer: " + str(fld.infer_time) + "\n")
        right_box, left_box = coords[0:2]
        if args.v: print(f"Eye Coords: {coords}")

        if left_box == None or right_box == None:
            print("No eyes")
            continue

        left_eye = face[left_box[1]:left_box[3], left_box[0]:left_box[2]]
        cv2.putText(face, 'L', (left_box[0], left_box[3]),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

        right_eye = face[right_box[1]:right_box[3], right_box[0]:right_box[2]]
        cv2.putText(face, 'R', (right_box[0], right_box[3]),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

        if args.v:
            print(f"Eye Shape: {left_eye.shape} :: {right_eye.shape}")

        #Head Pose Estimation
        head_yaw, head_pitch, head_roll = hpe.predict(face)
        if args.v: print(f"Head Pose Time: {hpe.infer_time}")
        log.write("HPE_infer: " + str(hpe.infer_time) + "\n")
        head_angles = [head_yaw[0][0], head_pitch[0][0], head_roll[0][0]]

        #Gaze Estimation
        # expects pose as  (yaw, pitch, and roll)
        gaze = ge.predict(left_eye, right_eye, head_angles)

        if args.v:
            print(f"Gaze Time: {ge.infer_time}")
        log.write("GE_infer: " + str(ge.infer_time) + "\n")
        gaze_point = (int(gaze[0][0] * 50), int(gaze[0][1] * 50))

        arrows = cv2.arrowedLine(face, landmark_points[0],
                                 (landmark_points[0][0] + gaze_point[0],
                                  landmark_points[0][1] - gaze_point[1]),
                                 (0, 0, 255), 2)
        arrows = cv2.arrowedLine(face, landmark_points[1],
                                 (landmark_points[1][0] + gaze_point[0],
                                  landmark_points[1][1] - gaze_point[1]),
                                 (0, 0, 255), 2)
        if image:
            cv2.imwrite('output/Gaze.png', arrows)

        if not image:
            mouse = MouseController(precision='medium', speed='medium')
            mouse.move(gaze[0][0], gaze[0][1])

        if image:
            cv2.imshow('Arrows', arrows)

        if image:
            log.write("FD_LoadTime: " + str(fd.load_time) + "\n")
            log.write("FD_PreprocessTime: " + str(fd.preprocess_input_time) +
                      "\n")
            log.write("FD_PostrocessTime: " + str(fd.preprocess_output_time) +
                      "\n")

            log.write("FLD_LoadTime: " + str(fld.load_time) + "\n")
            log.write("FLD_PreprocessTime: " + str(fld.preprocess_input_time) +
                      "\n")
            log.write("FLD_PostprocessTime: " +
                      str(fld.preprocess_output_time) + "\n")

            log.write("HPE_LoadTime: " + str(hpe.load_time) + "\n")
            log.write("HPE_PreprocessTime: " + str(hpe.preprocess_input_time) +
                      "\n")

            log.write("GE_LoadTime: " + str(ge.load_time) + "\n")
            log.write("GE_PreprocessTime: " + str(ge.preprocess_input_time) +
                      "\n")

            cv2.waitKey(0)
        else:
            if cv2.waitKey(15) & 0xFF == ord('q'):
                break

    feed.close()
    log.close()
    cv2.destroyAllWindows
Example #7
0
def main():
    args = build_argparser().parse_args()
    input_file = args.input
    logger = log.getLogger()
    if input_file == "CAM":
        input_feeder = InputFeeder("cam")
    else:
        if not os.path.isfile(input_file):
            logger.error("Path should be file")
            exit(1)
        input_feeder = InputFeeder("video", input_file)

    face_detector = FaceDetector(
        args.face_detection_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    face_landmark_detector = FaceLandmarkDetector(
        args.face_landmark_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    head_pose_estimator = HeadPoseEstimator(
        args.head_pose_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    gaze_estimator = GazeEstimator(
        args.gaze_estimation_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    mouse_controller = MouseController("medium", "fast")

    face_detector.load_model()
    face_landmark_detector.load_model()
    head_pose_estimator.load_model()
    gaze_estimator.load_model()

    input_feeder.load_data()

    width = 1000
    height = int(width * 9 / 16)

    for flag, frame in input_feeder.next_batch():

        if not flag:
            break
        pressed_key = cv2.waitKey(60)

        face_detected = face_detector.predict(frame)
        if face_detected:
            face_coordinates, face_image = face_detected
            if not face_coordinates:
                continue
        else:
            continue
        if "fd" in args.visualization:
            cv2.rectangle(
                frame,
                (face_coordinates[0], face_coordinates[1]),
                (face_coordinates[2], face_coordinates[3]),
                (36, 255, 12),
                2,
            )
            cv2.putText(
                frame,
                "Face Detected",
                (face_coordinates[0], face_coordinates[1] - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.9,
                (36, 255, 12),
                2,
            )

        left_eye_img, righ_eye_img, eye_coords = face_landmark_detector.predict(
            face_image
        )
        if "fl" in args.visualization:
            frame_eye_coords_min = (
                np.array(eye_coords)[:, :2] + np.array(face_coordinates)[:2]
            )
            frame_eye_coords_max = (
                np.array(eye_coords)[:, 2:] + np.array(face_coordinates)[:2]
            )
            cv2.rectangle(
                frame,
                (frame_eye_coords_min[0][0], frame_eye_coords_min[0][1]),
                (frame_eye_coords_max[0][0], frame_eye_coords_max[0][1]),
                (36, 255, 12),
                2,
            )
            cv2.rectangle(
                frame,
                (frame_eye_coords_min[1][0], frame_eye_coords_min[1][1]),
                (frame_eye_coords_max[1][0], frame_eye_coords_max[1][1]),
                (36, 255, 12),
                2,
            )

        head_pose_estimate = head_pose_estimator.predict(face_image)
        if "hp" in args.visualization:
            cv2.putText(
                frame,
                "yaw:{:.1f}|pitch:{:.1f}|roll:{:.1f}".format(*head_pose_estimate),
                (20, 35),
                cv2.FONT_HERSHEY_COMPLEX,
                1.2,
                (36, 255, 12),
                3,
            )

        mouse_coordinate, gaze_vector = gaze_estimator.predict(
            left_eye_img, righ_eye_img, head_pose_estimate
        )
        if "ge" in args.visualization:
            head_pose_estimate = np.array(head_pose_estimate)
            yaw, pitch, roll = head_pose_estimate * np.pi / 180.0

            focal_length = 950
            scale = 100

            origin = (
                int(
                    face_coordinates[0]
                    + (face_coordinates[2] - face_coordinates[0]) / 2
                ),
                int(
                    face_coordinates[1]
                    + (face_coordinates[3] - face_coordinates[1]) / 2
                ),
            )

            r_x = np.array(
                [
                    [1, 0, 0],
                    [0, math.cos(pitch), -math.sin(pitch)],
                    [0, math.sin(pitch), math.cos(pitch)],
                ]
            )
            r_y = np.array(
                [
                    [math.cos(yaw), 0, -math.sin(yaw)],
                    [0, 1, 0],
                    [math.sin(yaw), 0, math.cos(yaw)],
                ]
            )
            r_z = np.array(
                [
                    [math.cos(roll), -math.sin(roll), 0],
                    [math.sin(roll), math.cos(roll), 0],
                    [0, 0, 1],
                ]
            )
            r = r_z @ r_y @ r_x

            zaxis = np.array(([0, 0, -1 * scale]), dtype="float32")
            offset = np.array(([0, 0, focal_length]), dtype="float32")
            zaxis = np.dot(r, zaxis) + offset
            tip = (
                int(zaxis[0] / zaxis[2] * focal_length) + origin[0],
                int(zaxis[1] / zaxis[2] * focal_length) + origin[1],
            )

            cv2.arrowedLine(frame, origin, tip, (0, 0, 255), 3, tipLength=0.3)

        cv2.imshow("frame", cv2.resize(frame, (width, height)))
        mouse_controller.move(mouse_coordinate[0], mouse_coordinate[1])

        if pressed_key == 27:
            logger.error("exit key is pressed..")
            break
Example #8
0
def main():
    args = get_args()

    log.basicConfig(filename='example.log', level=log.DEBUG)

    inputFile = args.input
    #inputFile = "./bin/demo.mp4"

    mouse = MouseController("high", "fast")

    frame_count = 0
    focal_length = 950.0
    scale = 50

    #print(f"Visual flag: {args.visual_flag}")

    if inputFile.lower() == "cam":
        feed = InputFeeder('cam')
        log.info("Video source: " + str(inputFile))

    else:
        if not os.path.isfile(inputFile):
            log.error("Unable to find file: " + inputFile)
            exit(1)
        feed = InputFeeder("video", inputFile)
        log.info("Video source: " + str(inputFile))
        log.info("InputFeeder initialized")

    log.info("Device: " + str(args.device))
    log.info("Face detection model: " + str(args.facedetectionmodel))
    log.info("Facial landmarks model: " + str(args.faciallandmarksmodel))
    log.info("Head pose estimation model: " + str(args.headposemodel))
    log.info("Gaze estimation model: " + str(args.gazeestimationmodel))

    if args.stats == 1:
        print("Running statistics...")
        inference_times = []
        fdm_inference_times = []
        hpm_inference_times = []
        flm_inference_times = []
        gem_inference_times = []
        start_time = time.time()

    # Create instances of the different models
    fdm = FaceDetector(args.facedetectionmodel, args.device,
                       args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        fdm.load_model()
        fdm_load_time = time.time() - start_time
    else:
        fdm.load_model()
    fdm.check_model()

    hpm = HeadPoseEstimator(args.headposemodel, args.device,
                            args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        hpm.load_model()
        hpm_load_time = time.time() - start_time
    else:
        hpm.load_model()
    hpm.check_model()

    flm = FacialLandmarksDetector(args.faciallandmarksmodel, args.device,
                                  args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        flm.load_model()
        flm_load_time = time.time() - start_time
    else:
        flm.load_model()
    flm.check_model()

    gem = GazeEstimator(args.gazeestimationmodel, args.device,
                        args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        gem.load_model()
        gem_load_time = time.time() - start_time
    else:
        gem.load_model()
    gem.check_model()

    if args.stats == 1:
        duration_loading = time.time() - start_time
        print(
            f"Duration for loading and checking the models: {duration_loading}"
        )
        log.info(
            f"Duration for loading and checking the models: {duration_loading}"
        )

    cv2.namedWindow('preview', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('preview', 600, 600)

    feed.load_data()
    for ret, frame in feed.next_batch():
        if not ret:
            break

        if frame is not None:
            frame_count += 1
            key = cv2.waitKey(60)

            if args.stats == 1:
                start_time = time.time()

            # Run face detection
            face_crop, face_coords = fdm.predict(frame.copy())
            print("Face crop shape: " + str(face_crop.shape))
            frame_h, frame_w = frame.shape[:2]
            (xmin, ymin, xmax, ymax) = face_coords
            face_frame = frame[ymin:ymax, xmin:xmax]
            #center_of_face = (xmin + face_frame.shape[1] / 2, ymin + face_frame.shape[0] / 2, 0) # 0 for colour channel
            #print("Center of face " + str(center_of_face))

            try:
                # Check if face was detected
                if type(face_coords) == int:
                    print("Unable to detect face")
                    if key == 27:
                        break
                    continue

                # Facial landmark detection
                left_eye_crop, right_eye_crop, landmarks, crop_coords = flm.predict(
                    face_crop.copy())
                #print("Landmarks" +str(landmarks))
                left_eye = (landmarks[0], landmarks[1])
                right_eye = (landmarks[2], landmarks[3])

                # Landmark position based on complete frame
                landmarks_viz = landmarks
                landmarks_viz[0] = landmarks_viz[0] + xmin
                landmarks_viz[1] = landmarks_viz[1] + ymin
                landmarks_viz[2] = landmarks_viz[2] + xmin
                landmarks_viz[3] = landmarks_viz[3] + ymin

                crop_coords_viz = (crop_coords[0] + xmin, crop_coords[1] +
                                   ymin, crop_coords[2] + xmin,
                                   crop_coords[3] + ymin, crop_coords[4] +
                                   xmin, crop_coords[5] + ymin,
                                   crop_coords[6] + xmin,
                                   crop_coords[7] + ymin)

                left_eye_viz = (landmarks_viz[0], landmarks_viz[1])
                right_eye_viz = (landmarks_viz[2], landmarks_viz[3])

                third_eye_viz_x = (landmarks_viz[2] -
                                   landmarks_viz[0]) / 2 + landmarks_viz[0]
                third_eye_viz_y = (landmarks_viz[3] -
                                   landmarks_viz[1]) / 2 + landmarks_viz[1]
                third_eye_viz = (third_eye_viz_x, third_eye_viz_y)
                #print(landmarks_viz[0], landmarks_viz[2], third_eye_viz_x)

                # Head pose estimation
                head_pose = hpm.predict(face_crop.copy())
                print("Head pose: " + str(head_pose))
                (yaw, pitch, roll) = head_pose
                frame = display_head_pose(frame, pitch, roll, yaw)

                # Send inputs to GazeEstimator
                gaze_vector = gem.predict(head_pose, left_eye_crop,
                                          right_eye_crop)

                if args.stats == 1:
                    inference_time = time.time() - start_time
                    inference_times.append(inference_time)

                print(gaze_vector)
                frame = display_gaze(frame, gaze_vector)

                # Control the mouse
                if frame_count % 5 == 0:
                    mouse_x, mouse_y = get_mouse_vector(gaze_vector, roll)
                    print("Mouse vector:" + str(mouse_x) + " - " +
                          str(mouse_y))
                    mouse.move(mouse_x, mouse_y)
                    currentMouseX, currentMouseY = pyautogui.position()
                    print("Mouse coordinates: " + str(currentMouseX) + ", " +
                          str(currentMouseY))

                if args.visual_flag == 1:

                    frame = draw_bounding_box(frame, face_coords)

                    left_eye_frame = crop_coords_viz[0:4]
                    right_eye_frame = crop_coords_viz[4:]
                    frame = draw_bounding_box(frame, left_eye_frame)
                    frame = draw_bounding_box(frame, right_eye_frame)

                    frame = visualize_landmark(frame, left_eye_viz)
                    frame = visualize_landmark(frame,
                                               right_eye_viz,
                                               color=(0, 0, 255))

                    frame = visualize_gaze(frame, gaze_vector, landmarks_viz)

                    # visualize the axes of the HeadPoseEstimator results
                    #frame = hpm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length)
                    frame = hpm.draw_axes(frame.copy(), third_eye_viz, yaw,
                                          pitch, roll, scale, focal_length)
                    #hdm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length)

                cv2.imshow('preview', frame)
                cv2.imshow('left eye', left_eye_crop)
                cv2.imshow('right eye', right_eye_crop)

            except Exception as e:
                print("Unable to predict using model" + str(e) +
                      " for frame " + str(frame_count))
                log.error("Unable to predict using model" + str(e) +
                          " for frame " + str(frame_count))
            continue

    if args.stats == 1:
        avg_inference_time = sum(inference_times) / len(inference_times)
        print("Average inference time: " + str(avg_inference_time))
        log.info("Average inference time: " + str(avg_inference_time))
        log.info("Load time for face detection model: " + str(fdm_load_time))
        log.info("Load time for facial landmarks model: " + str(flm_load_time))
        log.info("Load time for head pose detection model: " +
                 str(hpm_load_time))
        log.info("Load time for gaze estimation model: " + str(gem_load_time))
    cv2.destroyAllWindows()
    feed.close()