def init_models(device="CPU"):
    # Using global variables, not defining new variables
    global face_detection
    global facial_landmarks_detection
    global head_pose_estimation
    global gaze_estimation

    start = time.time()
    face_detection = Face_Detection(path_face_detection, device)
    face_detection.load_model()
    fd_load_time = (time.time() - start)

    start = time.time()
    facial_landmarks_detection = Facial_Landmarks_Detection(
        path_facial_landmarks_detection, device)
    facial_landmarks_detection.load_model()
    fld_load_time = (time.time() - start)

    start = time.time()
    head_pose_estimation = Head_Pose_Estimation(path_head_pose_estimation,
                                                device)
    head_pose_estimation.load_model()
    hpe_load_time = (time.time() - start)

    start = time.time()
    gaze_estimation = Gaze_Estimation(path_gaze_estimation, device)
    gaze_estimation.load_model()
    ge_load_time = (time.time() - start)

    return (fd_load_time, fld_load_time, hpe_load_time, ge_load_time)
def init_models(device="CPU"):
    # Using global variables, not defining new variables
    global face_detection
    global facial_landmarks_detection
    global head_pose_estimation
    global gaze_estimation

    log.info("Loading Face Detection model...")
    face_detection = Face_Detection(path_face_detection, device)
    face_detection.load_model()
    log.info("DONE\n")

    log.info("Loading Face Landmarks Detection model...")
    facial_landmarks_detection = Facial_Landmarks_Detection(
        path_facial_landmarks_detection, device)
    facial_landmarks_detection.load_model()
    log.info("DONE\n")

    log.info("Loading Head Pose Estimation model...")
    head_pose_estimation = Head_Pose_Estimation(path_head_pose_estimation,
                                                device)
    head_pose_estimation.load_model()
    log.info("DONE\n")

    log.info("Loading Gaze Estimation model...")
    gaze_estimation = Gaze_Estimation(path_gaze_estimation, device)
    gaze_estimation.load_model()
    log.info("DONE\n")
Exemple #3
0
def test_gaze_estimation():
    model = Gaze_Estimation("models/intel/gaze-estimation-adas-0002/FP16-INT8/gaze-estimation-adas-0002.xml")
    model.load_model()
    left_eye = cv2.imread("media/left_eye.jpg")
    right_eye = cv2.imread("media/right_eye.jpg")
    angles = [-14.323277473449707, -2.0438201427459717, 4.142961502075195]
    gaze_vector = model.predict(left_eye, right_eye, angles)
    print("GazeVector: " + str(gaze_vector))
Exemple #4
0
    def __init__(self, args):

        # load the objects corresponding to the models
        self.face_detection = Face_Detection(args.face_detection_model,
                                             args.device, args.extensions,
                                             args.perf_counts)
        self.gaze_estimation = Gaze_Estimation(args.gaze_estimation_model,
                                               args.device, args.extensions,
                                               args.perf_counts)
        self.head_pose_estimation = Head_Pose_Estimation(
            args.head_pose_estimation_model, args.device, args.extensions,
            args.perf_counts)
        self.facial_landmarks_detection = Facial_Landmarks_Detection(
            args.facial_landmarks_detection_model, args.device,
            args.extensions, args.perf_counts)

        start_models_load_time = time.time()
        self.face_detection.load_model()
        self.gaze_estimation.load_model()
        self.head_pose_estimation.load_model()
        self.facial_landmarks_detection.load_model()

        logger = logging.getLogger()
        input_T = args.input_type
        input_F = args.input_file

        if input_T.lower() == 'cam':
            # open the video feed
            self.feed = InputFeeder(args.input_type, args.input_file)
            self.feed.load_data()
        else:
            if not os.path.isfile(input_F):
                logger.error('Unable to find specified video file')
                exit(1)
            file_extension = input_F.split(".")[-1]
            if (file_extension in ['jpg', 'jpeg', 'bmp']):
                self.feed = InputFeeder(args.input_type, args.input_file)
                self.feed.load_data()
            elif (file_extension in ['avi', 'mp4']):
                self.feed = InputFeeder(args.input_type, args.input_file)
                self.feed.load_data()
            else:
                logger.error(
                    "Unsupported file Extension. Allowed ['jpg', 'jpeg', 'bmp', 'avi', 'mp4']"
                )
                exit(1)

        print("Models total loading time :",
              time.time() - start_models_load_time)

        # init mouse controller
        self.mouse_controller = MouseController('low', 'fast')
def get_gazestimator(args):
    model_gaze = args.model_gaze
    if model_gaze:
        gaze_estimator = Gaze_Estimation(model_name=model_gaze,
                                         device=args.device,
                                         extensions=args.cpu_extension)
    return gaze_estimator
    def __init__(self,
                 device='CPU',
                 mouse_con=False,
                 face_dec=None,
                 fac_land=None,
                 head_pose=None,
                 gaze=None,
                 show_video=False,
                 save_video=False):
        '''
        all models should be put in here 
        '''
        if face_dec and fac_land and head_pose and gaze:
            self.face_dec, self.fac_land, self.head_pose, self.gaze = FaceDetectionModel(
                face_dec, device=device), FacialLandmarksDetection(
                    fac_land, device=device), Head_Pose_Estimation(
                        head_pose,
                        device=device), Gaze_Estimation(gaze, device=device)
            self.face_dec.load_model()
            self.fac_land.load_model()
            self.head_pose.load_model()
            self.gaze.load_model()
        else:
            raise ValueError('Missing Arguments')

        if mouse_con:
            self.mouse_con = MouseController("low", "fast")

        self.show_video, self.save_video = show_video, save_video
Exemple #7
0
def main():
    args = build_argparser().parse_args()

    frame_num = 0
    inference_time = 0
    counter = 0

    # Initialize the Inference Engine
    fd = FaceDetection()
    fld = Facial_Landmarks_Detection()
    ge = Gaze_Estimation()
    hp = Head_Pose_Estimation()

    # Load Models
    fd.load_model(args.face_detection_model, args.device, args.cpu_extension)
    fld.load_model(args.facial_landmark_model, args.device, args.cpu_extension)
    ge.load_model(args.gaze_estimation_model, args.device, args.cpu_extension)
    hp.load_model(args.head_pose_model, args.device, args.cpu_extension)

    # Mouse Controller precision and speed
    mc = MouseController('medium', 'fast')

    # feed input from an image, webcam, or video to model
    if args.input == "cam":
        feed = InputFeeder("cam")
    else:
        assert os.path.isfile(args.input), "Specified input file doesn't exist"
        feed = InputFeeder("video", args.input)
    feed.load_data()
    frame_count = 0
    for frame in feed.next_batch():
        frame_count += 1
        inf_start = time.time()
        if frame is not None:
            try:
                key = cv2.waitKey(60)

                det_time = time.time() - inf_start

                # make predictions
                detected_face, face_coords = fd.predict(
                    frame.copy(), args.prob_threshold)
                hp_output = hp.predict(detected_face.copy())
                left_eye, right_eye, eye_coords = fld.predict(
                    detected_face.copy())
                new_mouse_coord, gaze_vector = ge.predict(
                    left_eye, right_eye, hp_output)

                stop_inference = time.time()
                inference_time = inference_time + stop_inference - inf_start
                counter = counter + 1

                # Visualization
                preview = args.visualization
                if preview:
                    preview_frame = frame.copy()
                    face_frame = detected_face.copy()

                    draw_face_bbox(preview_frame, face_coords)
                    display_hp(preview_frame, hp_output, face_coords)
                    draw_landmarks(face_frame, eye_coords)
                    draw_gaze(face_frame, gaze_vector, left_eye.copy(),
                              right_eye.copy(), eye_coords)

                if preview:
                    img = np.hstack((cv2.resize(preview_frame, (500, 500)),
                                     cv2.resize(face_frame, (500, 500))))
                else:
                    img = cv2.resize(frame, (500, 500))

                cv2.imshow('Visualization', img)

                # set speed
                if frame_count % 5 == 0:
                    mc.move(new_mouse_coord[0], new_mouse_coord[1])

                # INFO
                log.info("NUMBER OF FRAMES: {} ".format(frame_num))
                log.info("INFERENCE TIME: {}ms".format(det_time * 1000))

                frame_num += 1

                if key == 27:
                    break
            except:
                print(
                    'Not supported image or video file format. Please send in a supported video format.'
                )
                exit()
    feed.close()
Exemple #8
0
def benchmark(args):
    print("runing benchmark")
    #file=open(args.c)
    #confs=json.loads(file.read())
    

    
    input_type=args.t
    input_files=args.l
    
    
    face_lt_start=time.time()
    face_detect=face_detection(args.fm, args.d, args.p, args.e)

    face_detect.load_model()
    face_lt=time.time()-face_lt_start
    
    
    landmark_lt_start=time.time()
    landmarks_model=LandmarksDetection(args.lm, args.d, args.e)

    landmarks_model.load_model()
    landmark_lt=time.time()-landmark_lt_start

    
    head_pose_lt_start=time.time()
    head_pose=Head_Pose(args.hm, args.d, args.e)
    head_pose.load_model()
    head_pose_lt=time.time()-head_pose_lt_start

    
    gaze_lt_start=time.time()
    gaze_estimation=Gaze_Estimation(args.gm, args.d, args.e)
    gaze_estimation.load_model()
    gaze_lt=time.time()-gaze_lt_start


    feed=InputFeeder(input_type='video', input_file=input_files)

    feed.load_data()

    for batch in feed.next_batch():
        
        face_inf_start=time.time()
        cropped_face=face_detect.predict(batch)
        face_inf_time=time.time()-face_inf_start
      
        landmark_inf_start=time.time()
        cropped_left_eye, cropped_right_eye = landmarks_model.predict(cropped_face)
        landmark_inf_time=time.time()-landmark_inf_start
        
        
        head_pose_inf_start=time.time()
        head_angles = head_pose.predict(cropped_face)
        head_pose_inf_time=time.time()-head_pose_inf_start
        
        
        gaze_inf_start=time.time()
        x,y = gaze_estimation.predict(cropped_left_eye, cropped_right_eye, head_angles)
        gaze_inf_time=time.time()-gaze_inf_start
        
        
        #plotting load_time
        models=['Face_detect', 'landmark_detect', 'Head_pose_est', 'Gaze est']
        loading_times=[face_lt, landmark_lt, head_pose_lt, gaze_lt]
        plot_loading_time(models, loading_times, args.b)
        
        #plotting inference_time
        inference_times=[face_inf_time, landmark_inf_time, head_pose_inf_time, gaze_inf_time]
        plot_inf_time(models, inference_times, args.b)
        
        logging.info("Benchmarking done!")
        
        

        break
    feed.close()
Exemple #9
0
def main(args):
    
    
    input_type=args.t
    input_files=args.l
    flags=args.f
    
    face_detect=Face_Detection(face_model_path, args.d, args.p, args.e)

    face_detect.load_model()

    landmarks_model=LandmarksDetection(landmarks_model_path, args.d, args.e)

    landmarks_model.load_model()

    head_pose=Head_Pose(hpose_model_path, args.d, args.e)
    head_pose.load_model()

    gaze_estimation=Gaze_Estimation(gaze_model_path, args.d, args.e)
    gaze_estimation.load_model()

    if input_type == 'cam':
        feeder = InputFeeder(input_type='cam')
    else:
        if not os.path.isfile(input_files):
            logging.error("Could not find the input file")
            exit(1)
        feed= InputFeeder(input_type='video', input_file=input_files)
    #feed=InputFeeder(input_type=input_type, input_file= input_files)

    
    try:
        feed.load_data()
    except Exception:
        logging.error("Could not load data from input file", exc_info=True)
    
    
    
    for batch in feed.next_batch():
        
        try:
            
            cropped_face, coords=face_detect.predict(batch)
            
            if type(cropped_face) == int:
                logging.info("Face not detected")
                if key == 27:
                    break
                continue
            
            cropped_left_eye, cropped_right_eye, left_eye_cord, right_eye_cord = landmarks_model.predict(cropped_face)
            head_angles = head_pose.predict(cropped_face)
            x,y = gaze_estimation.predict(cropped_left_eye, cropped_right_eye, head_angles)
        
        except Exception:
            logging.error("An error occured while running predictions", exc_info=True)
        
        if flags != 0:
            
        
            if flags == 'FD':
                cv2.rectangle(batch, (coords[0], coords[1]), (coords[2], coords[3]), (255, 0, 0), 3)
            if flags =='FL':
                cv2.rectangle(cropped_face, (left_eye_cord[0], left_eye_cord[1]), (left_eye_cord[2], left_eye_cord[3]), (255, 0, 0), 3)
                cv2.rectangle(cropped_face, (right_eye_cord[0], right_eye_cord[1]), (right_eye_cord[2], right_eye_cord[3]), (255, 0, 0), 3)
            if flags =='HP':
                cv2.putText(batch,
                "Head angles: yaw={:.2f} , pitch={:.2f}, roll={:.2f}".format(
                    head_angles[0], head_angles[1], head_angles[2]),
                            (20, 40),
                            cv2.FONT_HERSHEY_COMPLEX,
                            1, (255, 0, 255), 2)
            if flags == 'GE':
                
                left_eye_mid_x= (left_eye_cord[2]-left_eye_cord[0])/2 + left_eye_cord[0]
                left_eye_mid_y=(left_eye_cord[3]-left_eye_cord[1])/2 + left_eye_cord[1]
                
                right_eye_mid_x=(right_eye_cord[2]-right_eye_cord[0])/2 + right_eye_cord[0]
                right_eye_mid_y=(right_eye_cord[3]- right_eye_cord[1])/2 + right_eye_cord[1]
                
                left_eye_new_x=int(left_eye_mid_x + x*160)
                left_eye_new_y=int(left_eye_mid_y + y*160*-1)
                right_eye_new_x=int(right_eye_mid_x + x*160)
                right_eye_new_y=int(right_eye_mid_y + y*160*-1)
                cv2.line(cropped_face, (int(left_eye_mid_x), int(left_eye_mid_y)), (int(left_eye_new_x), int(left_eye_new_y)), (255, 0, 255), 5)
                cv2.line(cropped_face, (int(right_eye_mid_x), int(right_eye_mid_y)), (int(right_eye_new_x), int(right_eye_new_y)), (255, 0, 255), 5)
                
        

                
                
        mouse=MouseController(precision='low', speed='fast')
        mouse.move(x,y)    
        
        
        batch = imutils.resize(batch, width=500)
        cv2.imshow('frame', batch)
        key = cv2.waitKey(1) & 0xFF
    feed.close()
Exemple #10
0
def main(args):
    # enable logging for the function
    logger = logging.getLogger(__name__)

    # grab the parsed parameters
    faceModel = args.m_f
    facial_LandmarksModel = args.m_l
    headPoseEstimationModel = args.m_h
    GazeEstimationModel = args.m_g
    device = args.d
    inputFile = args.i
    output_path = args.o_p
    modelArchitecture = args.modelAr
    visualization_flag = args.vf

    # initialize feed
    single_image_format = ['jpg', 'tif', 'png', 'jpeg', 'bmp']
    if inputFile.split(".")[-1].lower() in single_image_format:
        feed = InputFeeder('image', inputFile)
    elif args.i == 'cam':
        feed = InputFeeder('cam')
    else:
        feed = InputFeeder('video', inputFile)

    ##Load model time face detection
    faceStart_model_load_time = time.time()
    faceDetection = FaceDetection(faceModel, device)
    faceModelView = faceDetection.load_model()
    faceDetection.check_model()
    total_facemodel_load_time = time.time() - faceStart_model_load_time

    ##Load model time headpose estimatiom
    heaadposeStart_model_load_time = time.time()
    headPose = headPoseEstimation(headPoseEstimationModel, device)
    headPoseModelView = headPose.load_model()
    headPose.check_model()
    heaadposeTotal_model_load_time = time.time(
    ) - heaadposeStart_model_load_time

    ##Load model time face_landmarks estimation
    face_landmarksStart_model_load_time = time.time()
    face_landmarks = Face_landmarks(facial_LandmarksModel, device)
    faceLandmarksModelView = face_landmarks.load_model()
    face_landmarks.check_model()
    face_landmarksTotal_model_load_time = time.time(
    ) - face_landmarksStart_model_load_time

    ##Load model time face_landmarks estimation
    GazeEstimationStart_model_load_time = time.time()
    GazeEstimation = Gaze_Estimation(GazeEstimationModel, device)
    GazeModelView = GazeEstimation.load_model()
    GazeEstimation.check_model()
    GazeEstimationTotal_model_load_time = time.time(
    ) - GazeEstimationStart_model_load_time

    if modelArchitecture == 'yes':
        print("The model architecture of gaze mode is ", GazeModelView)
        print("model architecture for landmarks is", faceLandmarksModelView)
        print("model architecture for headpose is", headPoseModelView)
        print("model architecture for face is", faceModelView)

        # count the number of frames
    frameCount = 0
    input_feeder = InputFeeder('video', inputFile)
    w, h = feed.load_data()
    for _, frame in feed.next_batch():

        if not _:
            break
        frameCount += 1
        key = cv2.waitKey(60)
        start_imageface_inference_time = time.time()
        imageface = faceDetection.predict(frame, w, h)
        imageface_inference_time = time.time() - start_imageface_inference_time

        if 'm_f' in visualization_flag:
            cv2.imshow('cropped face', imageface)

        if type(imageface) == int:
            logger.info("no face detected")
            if key == 27:
                break
            continue

        start_imagePose_inference_time = time.time()
        imageAngles, imagePose = headPose.predict(imageface)
        imagePose_inference_time = time.time() - start_imagePose_inference_time

        if 'm_h' in visualization_flag:
            cv2.imshow('Head Pose Angles', imagePose)

        start_landmarkImage_inference_time = time.time()
        leftEye, rightEye, landmarkImage = face_landmarks.predict(imageface)
        landmarkImage_inference_time = time.time(
        ) - start_landmarkImage_inference_time

        if leftEye.any() == None or rightEye.any() == None:
            logger.info(
                "image probably too dark or eyes covered, hence could not detect landmarks"
            )
            continue

        if 'm_l' in visualization_flag:
            cv2.imshow('Face output', landmarkImage)

        start_GazeEstimation_inference_time = time.time()
        x, y = GazeEstimation.predict(leftEye, rightEye, imageAngles)
        GazeEstimation_inference_time = time.time(
        ) - start_GazeEstimation_inference_time

        if 'm_g' in visualization_flag:
            #             cv2.putText(landmarkedFace, "Estimated x:{:.2f} | Estimated y:{:.2f}".format(x,y), (10,20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0,255,0),1)
            cv2.imshow('Gaze Estimation', landmarkImage)

        mouseVector = MouseController('medium', 'fast')

        if frameCount % 5 == 0:
            mouseVector.move(x, y)

        if key == 27:
            break

        if imageface_inference_time != 0 and landmarkImage_inference_time != 0 and imagePose_inference_time != 0 and GazeEstimation_inference_time != 0:

            fps_face = 1 / imageface_inference_time
            fps_landmark = 1 / landmarkImage_inference_time
            fps_headpose = 1 / imagePose_inference_time
            fps_gaze = 1 / GazeEstimation_inference_time

            with open(
                    os.path.join(output_path, device, 'face',
                                 'face_stats.txt'), 'w') as f:
                f.write(str(imageface_inference_time) + '\n')
                f.write(str(fps_face) + '\n')
                f.write(str(total_facemodel_load_time) + '\n')

            with open(
                    os.path.join(output_path, device, 'landmark',
                                 'landmark_stats.txt'), 'w') as f:
                f.write(str(landmarkImage_inference_time) + '\n')
                f.write(str(fps_landmark) + '\n')
                f.write(str(face_landmarksTotal_model_load_time) + '\n')

            with open(
                    os.path.join(output_path, device, 'headpose',
                                 'headpose_stats.txt'), 'w') as f:
                f.write(str(imagePose_inference_time) + '\n')
                f.write(str(fps_headpose) + '\n')
                f.write(str(heaadposeTotal_model_load_time) + '\n')

            with open(
                    os.path.join(output_path, device, 'gaze',
                                 'gaze_stats.txt'), 'w') as f:
                f.write(str(GazeEstimation_inference_time) + '\n')
                f.write(str(fps_gaze) + '\n')
                f.write(str(GazeEstimationTotal_model_load_time) + '\n')

    logger.info("The End")
    VIS = visualize(output_path, device)
    VIS.visualize1()
    VIS.visualize2()
    VIS.visualize3()
    cv2.destroyAllWindows()
    feed.close()
def main(args):
    fd = Face_Detection(
        "models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001",
        args.device, args.extensions)
    start = time.time()
    fd.load_model()
    logging.info(f"------Loading Times {args.precision}------")
    logging.info("Face Detection: {:.5f} sec".format(time.time() - start))

    fl = Facial_Landmarks(
        f"models/intel/landmarks-regression-retail-0009/{args.precision}/landmarks-regression-retail-0009",
        args.device, args.extensions)
    start = time.time()
    fl.load_model()
    logging.info("Facial Landmarks: {:.5f} sec".format(time.time() - start))
    hp = Head_Pose_Estimation(
        f"models/intel/head-pose-estimation-adas-0001/{args.precision}/head-pose-estimation-adas-0001",
        args.device, args.extensions)
    start = time.time()
    hp.load_model()
    logging.info("Head Pose Estimation: {:.5f} sec".format(time.time() -
                                                           start))
    gs = Gaze_Estimation(
        f"models/intel/gaze-estimation-adas-0002/{args.precision}/gaze-estimation-adas-0002",
        args.device, args.extensions)
    start = time.time()
    gs.load_model()
    logging.info("Gaze Estimation: {:.5f} sec".format(time.time() - start))

    input_feed = InputFeeder(args.type, args.input)
    input_feed.load_data()

    mc = MouseController("high", "fast")

    inf_time = [0, 0, 0, 0, 0]  # fd, fl, hp, gs, frames
    for frame in input_feed.next_batch():

        if frame is not None:
            inf_time[4] += 1
            # face detection
            start = time.time()
            face_frame = fd.predict(frame.copy())
            inf_time[0] += time.time() - start
            # eye detection through facial landmarks
            start = time.time()
            left_eye_image, left_x, left_y, right_eye_image, right_x, right_y = fl.predict(
                face_frame)
            inf_time[1] += time.time() - start
            # head pose
            start = time.time()
            yaw, pitch, roll = hp.predict(face_frame)
            inf_time[2] += time.time() - start
            # gaze estimation
            start = time.time()
            gaze_vector = gs.predict(left_eye_image, right_eye_image,
                                     (yaw, pitch, roll))
            inf_time[3] += time.time() - start

            # mouse move
            mc.move(gaze_vector[0], gaze_vector[1])

            if args.visualize:
                face_frame = cv2.circle(face_frame, (right_x, right_y), 5,
                                        (255, 0, 0), -5)
                face_frame = cv2.circle(face_frame, (left_x, left_y), 5,
                                        (255, 0, 0), -5)
                cv2.putText(
                    face_frame,
                    "yaw:{:.2f} - pitch:{:.2f} - roll:{:.2f}".format(
                        yaw, pitch, roll), (20, 20), cv2.FONT_HERSHEY_SIMPLEX,
                    0.3, (255, 0, 0), 1)
                cv2.putText(
                    face_frame,
                    "gaze-vector x:{:.2f} - y:{:.2f} - z:{:.2f}".format(
                        yaw, pitch, roll), (20, 40), cv2.FONT_HERSHEY_SIMPLEX,
                    0.3, (255, 0, 0), 1)
                cv2.imshow('left eye', left_eye_image)
                cv2.imshow('right eye', right_eye_image)
                x, y, z = gaze_vector
                cv2.arrowedLine(
                    face_frame, (left_x, left_y),
                    (left_x + int(x * 100), left_y + int(-y * 100)),
                    (0, 0, 255), 2)
                cv2.arrowedLine(
                    face_frame, (right_x, right_y),
                    (right_x + int(x * 100), right_y + int(-y * 100)),
                    (0, 0, 255), 2)
                cv2.imshow('face detection', face_frame)
                cv2.waitKey(60)

        else:
            break
    # inference benchmarks

    logging.info(f"------Inference Times {args.precision}------")
    logging.info("Face Detection: {:.5f} sec".format(inf_time[0] /
                                                     inf_time[4]))
    logging.info("Facial Landmarks: {:.5f} sec".format(inf_time[1] /
                                                       inf_time[4]))
    logging.info("Head Pose Estimation: {:.5f} sec".format(inf_time[2] /
                                                           inf_time[4]))
    logging.info("Gaze Estimation: {:.5f} sec".format(inf_time[3] /
                                                      inf_time[4]))
    input_feed.close()
    cv2.destroyAllWindows()
Exemple #12
0
def infer_on_video(args):
    draw_flag = args.b
    device = args.device
    input_path = args.input_path
    input_type = args.input_type
    output_path = args.output_path
    precision = args.accuracy

    locations = {}

    locations[FACE_DETECTION_MODEL] = os.path.join(
        MODEL_PATH, FACE_DETECTION_MODEL, 'INT1',
        FACE_DETECTION_MODEL + ".xml")

    if precision is not None:
        log.info(
            "The face-detection-adas-binary-0001 always use INT1 precision")

    for model_name in [
            FACIAL_LANDMARKS_DETECTION_MODEL, HEAD_POSE_ESTIMATION_MODEL,
            GAZE_ESTIMATION_MODEL
    ]:
        locations[model_name] = find_exist_model_file(precision, model_name)

    # Initilize feeder
    feed = InputFeeder(input_type=input_type, input_file=input_path)
    feed.load_data()

    # Grab the shape of the input
    input_width = feed.getWidth()
    input_height = feed.getHeight()

    # Create a video writer for the output video
    # out = cv2.VideoWriter('../out.mp4', CODEC, 30, (input_width,input_height))

    mouse_controller = MouseController(MOUSE_PRECISION, MOUSE_SPEED)

    start_model_load_time = time.time()

    # model initialization
    face_detection = Face_Detection(locations[FACE_DETECTION_MODEL],
                                    device,
                                    extensions=CPU_EXTENSION)
    facial_landmarks_detection = Facial_Landmarks_Detection(
        locations[FACIAL_LANDMARKS_DETECTION_MODEL],
        device,
        extensions=CPU_EXTENSION)
    head_pose_estimation = Head_Pose_Estimation(
        locations[HEAD_POSE_ESTIMATION_MODEL],
        device,
        extensions=CPU_EXTENSION)
    gaze_estimation = Gaze_Estimation(locations[GAZE_ESTIMATION_MODEL],
                                      device,
                                      extensions=CPU_EXTENSION)

    total_model_load_time = time.time() - start_model_load_time

    counter = 0
    start_inference_time = time.time()

    # Process frames until the video ends, or process is exited
    for ret, batch in feed.next_batch(BATCH_SIZE):
        if not ret:
            break
        counter += 1
        gaze_lines = []
        out_frame = batch.copy()

        key = cv2.waitKey(60)

        # Face detection
        face_detection_output = face_detection.predict(batch)

        # face_detection_output = [ image_id, label, conf, xmin, ymin, xmax, ymax ]
        face_xmin = abs(int(face_detection_output[3] * input_width))
        face_ymin = abs(int(face_detection_output[4] * input_height))
        face_xmax = abs(int(face_detection_output[5] * input_width))
        face_ymax = abs(int(face_detection_output[6] * input_height))

        if (face_ymax - face_ymin) <= 0 or (face_xmax - face_xmin) <= 0:
            continue

        # Crop the face image
        face = batch[face_ymin:face_ymax, face_xmin:face_xmax]

        if draw_flag == True:
            cv2.rectangle(out_frame, (face_xmin, face_ymin),
                          (face_xmax, face_ymax), (255, 255, 0), 2)

        # Find facial landmarks (to find eyes)
        eyes = facial_landmarks_detection.predict(face)

        # Estimate head orientation (yaw=Y, pitch=X, role=Z)
        yaw, pitch, roll = head_pose_estimation.predict(face)

        eye_images = []
        for eye in eyes:
            face_height, face_width, _ = face.shape
            eye_xmin = int(eye[_X] * face_width - EYE_RADIUS)
            eye_ymin = int(eye[_Y] * face_height - EYE_RADIUS)
            eye_xmax = int(eye[_X] * face_width + EYE_RADIUS)
            eye_ymax = int(eye[_Y] * face_height + EYE_RADIUS)

            if (eye_ymax - eye_ymin) <= 0 or (eye_xmax - eye_xmin) <= 0:
                continue

            # crop and resize
            eye_images.append(face[eye_ymin:eye_ymax,
                                   eye_xmin:eye_xmax].copy())

            # Draw eye boundary boxes
            if draw_flag == True:
                cv2.rectangle(out_frame,
                              (eye_xmin + face_xmin, eye_ymin + face_ymin),
                              (eye_xmax + face_xmin, eye_ymax + face_ymin),
                              (0, 255, 0), 2)

        # gaze estimation
        gaze_vec_norm = gaze_estimation.predict(eye_images, [yaw, pitch, 0])

        cos = math.cos(math.radians(roll))
        sin = math.sin(math.radians(roll))
        tmpx = gaze_vec_norm[0] * cos + gaze_vec_norm[1] * sin
        tmpy = -gaze_vec_norm[0] * sin + gaze_vec_norm[1] * cos
        gaze_vec_norm = [tmpx, tmpy]

        # Store gaze line coordinations
        for eye in eyes:
            eye[_X] = int(eye[_X] * face_width)
            eye[_Y] = int(eye[_Y] * face_height)
            gaze_lines.append(
                get_gaze_line(eye, face_xmin, face_ymin, gaze_vec_norm))

        if draw_flag:
            # Drawing gaze lines
            for gaze_line in gaze_lines:
                start_point = (gaze_line[0][_X], gaze_line[0][_Y])
                end_point = (gaze_line[1][_X], gaze_line[1][_Y])

                draw_gaze_line(out_frame, start_point, end_point)

        # start point of middle gaze line
        start_point = ((gaze_lines[0][0][_X] + gaze_lines[1][0][_X]) / 2,
                       (gaze_lines[0][0][_Y] + gaze_lines[1][0][_Y]) / 2)

        # end point of middle gaze line
        end_point = ((gaze_lines[0][1][_X] + gaze_lines[1][1][_X]) / 2,
                     (gaze_lines[0][1][_Y] + gaze_lines[1][1][_Y]) / 2)

        gaze_mid_line = [start_point, end_point]

        mouse_point = get_mouse_point(gaze_mid_line, input_width, input_height)

        log.debug("mouse_point[_X], mouse_point[_Y]: %s, %s", mouse_point[_X],
                  mouse_point[_Y])

        # cv2.circle(out_frame, mouse_point, 10, (255, 255, 255), -1)
        mouse_controller.move(mouse_point[_X], mouse_point[_Y])

        # write out_frames with batch size
        for _ in range(BATCH_SIZE):
            cv2.imshow("video", out_frame)
            # out.write(out_frame)

        if key == 27:
            break

    total_inference_time = time.time() - start_inference_time
    total_inference_time = round(total_inference_time, 1)
    fps = counter / total_inference_time

    with open(os.path.join(output_path, 'stats.txt'), 'w') as f:
        f.write(str(total_inference_time) + '\n')
        f.write(str(fps) + '\n')
        f.write(str(total_model_load_time) + '\n')

    # Release the out writer, capture, and destroy any OpenCV windows
    log.info("Input stream ended...")
    cv2.destroyAllWindows()
    # out.release()
    feed.close()
Exemple #13
0
def main():
    
    # loads argparser
    args = build_argparser().parse_args()
    input_type = args.input_type
    input_file = args.video
    output_path = args.output_path
    threshold = args.threshold
    extension = args.extension
    version = args.version
    device = args.device
    face_model = args.fd_model
    facial_model = args.fl_model
    headpose_model = args.hp_model
    gaze_model = args.ga_model
    show_image = args.show_image

    # Start logger   
        # Basic logger
    log = setup_logger('basic_logger', 'log/logging_basic.log')
    log.info("Start computer_pointer.py")
        # Time logger
    log_time = setup_logger('time_logger', "log/logging_time.log")
    log_time.info("Start time logger")

    # Get Openvinoversion
    openvino_version = (openvino.__file__)
    print ("Openvino version: "+ str(openvino_version))
        
    # Load Facedetection
    facedetection = Facedetection(face_model, threshold, device, extension, version)
    print("Load class Facedetection = OK")
    print("--------")
    start_load_time_face = time.time()
    facedetection.load_model()
    print("Load model facedetection = Finished")
    log.info("Load model facedetection = Finished")
    print("--------")
    total_model_load_time_face = (time.time() - start_load_time_face)*1000
    log_time.info('Facedetection load time: ' + str(round(total_model_load_time_face, 3)))
    
    # Load facial landmark
    faciallandmarks = Facial_Landmarks(facial_model, threshold, device, extension, version)
    print("Load class Facial_Landmarks = OK")
    print("--------")
    start_load_time_facial = time.time()
    faciallandmarks.load_model()
    print("Load model Facial_Landmarks = Finished")
    log.info("Load model Facial_Landmarks = Finished")
    print("--------")
    total_model_load_time_facial = (time.time() - start_load_time_facial)*1000
    log_time.info('Facial_Landmarks load time: ' + str(round(total_model_load_time_facial, 3)))
    
    # Load head_pose_estimation
    headposeestimation = Head_Pose_Estimation(headpose_model, device, extension, version, threshold)
    print("Load class head_pose_estimation = OK")
    print("--------")
    start_load_time_headpose = time.time()
    headposeestimation.load_model()
    print("Load model head_pose_estimation = Finished")
    log.info("Load model head_pose_estimation = Finished")
    print("--------")
    total_model_load_time_headpose = (time.time() - start_load_time_headpose)*1000
    log_time.info('Headpose load time: ' + str(round(total_model_load_time_headpose, 3)))
    
    # Load gaze_estimation
    gazeestimation = Gaze_Estimation(gaze_model, threshold, device, extension, version)
    print("Load class gaze_estimation = OK")
    print("--------")
    start_load_time_gaze = time.time()
    gazeestimation.load_model()
    print("Load model gaze_estimation = Finished")
    log.info("Load model gaze_estimation = Finished")
    print("--------")
    total_model_load_time_gaze = (time.time() - start_load_time_gaze)*1000
    total_model_load_time = (time.time() - start_load_time_face)*1000
    
    log_time.info('Gaze load time: ' + str(round(total_model_load_time_gaze, 3)))
    log_time.info('Total model load time: ' + str(round(total_model_load_time, 3)))
    log_time.info('##################')
    log.info('All models are loaded!')
    
    feed = InputFeeder(input_type, input_file)
    log.info('Input Feeder is loaded')
    feed.load_data()

    # Output video
    initial_w = int(feed.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(feed.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(feed.cap.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out_video = cv2.VideoWriter(output_path, fourcc, fps, (initial_w, initial_h))

    inference_time_face_total = []
    inference_time_facial_total = []
    inference_time_headpose_total = []
    inference_time_gaze_total = []
    inference_time_total = []
    
    try:
        for batch in feed.next_batch():
            if batch is None:
                break
            
            
            ## facedetection ##
                ## Inference time
            start_inference_time_face = time.time()
            print("Start facedetection")
            log.info("Start facedetection")
            print("Cap is feeded to the face detection!")
            face_batch = batch.copy()
            face_image, face_cropped, coords= facedetection.predict(face_batch)
                ## Average inference time
            inference_time_face = (time.time() - start_inference_time_face)*1000
            inference_time_face_total.append(inference_time_face)
            len_face = len(inference_time_face_total)
            avg_inference_time_face = sum(inference_time_face_total)/len_face
            log_time.info(('Average face inference time: ' + str(avg_inference_time_face)))
            log.info('Inference facedetetion is finished')
            
            if not coords:
                print("No face detected")
                log.debug("No face detected")
                continue
            
            print("The video from the face detection is writen to the output path")
            out_video.write(face_image)
            print("End facedetection")

            ## faciallandmark ##
                ## Inference time
            start_inference_time_facial = time.time()
            if (face_cropped is None) or (len(face_cropped)==0):
                print("No Face above threshold detected")
                log.error("No Face above threshold detected")
            else:
                print("Start faciallandmark")
                log.info("Start faciallandmark")
                print("The cropped face image is feeded to the faciallandmarks detection.")
                left_eye_image, right_eye_image, nose_image, lip_corner_left_image, lip_corner_right_image= faciallandmarks.predict(face_cropped.copy())
                print("End faciallandmarks")
                log.info("End faciallandmarks")

                ## Average inference time
                inference_time_facial = (time.time() - start_inference_time_facial)*1000
                inference_time_facial_total.append(inference_time_facial)
                len_facial = len(inference_time_facial_total)
                avg_inference_time_facial = sum(inference_time_facial_total)/len_facial
                log_time.info(('Average facial inference time: ' + str(avg_inference_time_facial)))

                # headposeestimation
                ## Inference time
                
                start_inference_time_headpose = time.time()
                print("Start headposeestimation")
                log.info("Start headposeestimation")
                print("The cropped face image is feeded to the headposeestimation.")
                head_pose_angles = headposeestimation.predict(face_cropped)
                #print("Head pose angeles: ", head_pose_angles)
                print("End faciallheadposeestimationandmarks")
                log.info("End faciallheadposeestimationandmarks")

                ## Average inference time
                inference_time_headpose = (time.time() - start_inference_time_headpose)*1000
                inference_time_headpose_total.append(inference_time_headpose)
                len_headpose = len(inference_time_headpose_total)
                avg_inference_time_headpose = sum(inference_time_headpose_total)/len_headpose
                log_time.info(('Average headpose inference time: ' + str(avg_inference_time_headpose)))

                # gazeestimation
                ## Inference time
                
                start_inference_time_gaze = time.time()
                print("Start gazeestimation")
                log.info("Start gazeestimation")
                gaze_result, tmpX, tmpY, gaze_vector02 = gazeestimation.predict(left_eye_image, right_eye_image,
                                                                                head_pose_angles)
                print("End gazeestimation")
                #print('Gaze results:', gaze_result)
                log.info("Gaze results: ({})".format(str(gaze_result)))
                log.info("End gazeestimation")

                ## Average inference time
                inference_time_gaze = (time.time() - start_inference_time_gaze)*1000
                inference_time_gaze_total.append(inference_time_gaze)
                len_gaze = len(inference_time_gaze_total)
                avg_inference_time_gaze = sum(inference_time_gaze_total)/len_gaze
                log_time.info(('Average gaze inference time: ' + str(avg_inference_time_gaze)))

                ## Total Inference time
                inference_time_total.append((time.time() - start_inference_time_face)*1000)
                inference_time_all_models = sum(inference_time_total)
                log_time.info(('Total inference time: ' + str(inference_time_all_models)))
                log_time.info('----')

                # If show_image is 'yes' then the ouput images are displaed
                if show_image == 'yes':
                    cv2.imshow('Cropped Face', face_cropped)
                    cv2.imshow('Left eye', left_eye_image)
                    cv2.imshow('right eye', right_eye_image)
                    cv2.imshow('Nose', nose_image)
                    cv2.imshow('Lip left', lip_corner_left_image)
                    cv2.imshow('Lip right', lip_corner_right_image)
                    
                    cv2.waitKey(28)
                
                # mouse controller
                log.info('Start mousecontroller')
                mousecontroller = MouseController('medium', 'fast')
                mousecontroller.move(tmpX, tmpY)

        input_feed.close()
        cv2.destroyAllWindows()
        log.info('End of program')

    except Exception as e:
        print ("Could not run Inference: ", e)
        log.error(e)
Exemple #14
0
def model_pipelines(args):
    
    # Parameters which were parsed are assigned
    
    #device = args.dev
    #customLayers = args.lay
    inputFile = args.inp
    visual_flag = args.vf
    
    faceDetectionModel = args.mfd
    landmarksDetectionModel = args.mld
    headPoseEstimationModel = args.mhp
    gazeDetectionModel = args.mgd
    start_time = time.time()
    # Logging is enabled 
    log = logging.getLogger(__name__)
    
    log.info('----------THE BEGINNING----------')
    log.info('Start Time: {0}'. format(str(start_time))) 

    # The feed is initialised
    single_image = ['jpg','tif','png','jpeg', 'bmp']
    if inputFile.split(".")[-1].lower() in single_image:
        input_feed = InputFeeder('image', inputFile)
    elif args.inp == 'cam':
        input_feed = InputFeeder('cam')
    else:
        input_feed = InputFeeder('video', inputFile)

    # Feed data is loaded
    log.info('Loading data...')
    input_feed.load_data()
    log.info('Data Loaded. Beginning inference...')

    # The models are initialised and loaded here

    face_model_load_start_time = time.time()
    landmark_model_load_start_time = time.time()
    headpose_model_load_start_time = time.time()
    gaze_model_load_start_time = time.time()
    
    ppl_fd = Face_Detection(faceDetectionModel)
    ppl_fl = Facial_Landmarks_Detection(landmarksDetectionModel)
    ppl_hd = Head_Pose_Estimation(headPoseEstimationModel)
    ppl_ge = Gaze_Estimation(gazeDetectionModel)
    
    face_model_load_time = time.time() - face_model_load_start_time
    landmark_model_load_time = time.time() - landmark_model_load_start_time
    headpose_model_load_time = time.time() - headpose_model_load_start_time
    gaze_model_load_time = time.time() - gaze_model_load_start_time
    
    log.info('Face Detection object initialized')
    log.info('Facial Landmarks object initialized')
    log.info('Head Pose object initialized')
    log.info('Gaze object initialized')
    
    log.info('All models loaded and checked')
    
    load_time = [face_model_load_time, landmark_model_load_time, headpose_model_load_time, gaze_model_load_time]
      
    # count the number of frames
    frameCount = 0

    # collate frames from the feeder and feed into the detection pipelines
    for _, frame in input_feed.next_batch():

        if not _:
            break
        frameCount += 1
        
        if frameCount % 5 == 0:
            cv2.imshow('video', cv2.resize(frame, (500, 500)))

        key = cv2.waitKey(100)
        
        # Get the time for the model inference
        face_inference_start_time = time.time()
        face_crop = ppl_fd.predict(frame)
        face_inference_time = time.time() - face_inference_start_time
        
        if 'mfd' in visual_flag:
            cv2.imshow('The cropped face', face_crop)
            
        if type(face_crop) == int:
            log.info("No face can be detected")
            
            if key == 27:
                break
            
            continue
        
        # Get the time for the model inference
        landmark_inference_start_time = time.time()
        eye_image_left, eye_image_right, face_landmarked = ppl_fl.predict(face_crop.copy())
        landmark_inference_time = time.time() - landmark_inference_start_time
       
        # Get face landmark results
        if 'mld' in visual_flag:
            cv2.imshow('Face output', face_landmarked)
            
        if eye_image_left.any() == None or eye_image_right.any() == None:
            log.info("Landmarks could not be detected, check that the eyes are visible and the image is bright")
            continue
        
        # Get the time for the model inference
        headpose_inference_start_time = time.time()
        head_pose_angles, head_pose_image = ppl_hd.predict(face_crop.copy())   
        headpose_inference_time = time.time() - headpose_inference_start_time
        
        # Get head pose results
        if 'mhp' in visual_flag:
            cv2.imshow('Head Pose Angles', head_pose_image)
        
        # Get the time for the model inference
        gaze_inference_start_time = time.time()
        coord_x, coord_y = ppl_ge.predict(eye_image_left ,eye_image_right, head_pose_angles)
        gaze_inference_time = time.time() - gaze_inference_start_time

        # Get gaze detection results
        if 'mgd' in visual_flag:
            cv2.putText(face_landmarked, "Estimated x:{:.2f} | Estimated y:{:.2f}".format(coord_x, coord_y), (10,20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0,255,0),1)
            cv2.imshow('Gaze Estimation', face_landmarked)


        mCoord = MouseController('medium','fast')
        
        # Move the mouse based on the coordinates received
        if frameCount % 5 == 0:
            mCoord.move(coord_x, coord_y)

        if key == 27:
            break
        
        inference_time = [face_inference_time, landmark_inference_time, headpose_inference_time, gaze_inference_time]
        results(args, inference_time, load_time)
        
        if key == ord('x'):
            log.warning('KeyboardInterrupt: `X` was pressed')
            results(args, inference_time, load_time)
            sys.exit()
        
        
    log.info('End Time: {0}'. format(str(time.time() - start_time)))   
    log.info('----------THE END----------')
    cv2.destroyAllWindows()
    input_feed.close()
def main():

    try:
        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s [%(levelname)s] %(message)s",
            handlers=[
                logging.FileHandler("Computer_Pointer_Controller.log"),
                logging.StreamHandler()
            ])
    except:
        print("File cannot be created")

    args = build_argparser()
    video_path = args.i
    visualize = args.flags
    count = 0
    fd_inference_time = 0
    fld_inference_time = 0
    hp_inference_time = 0
    ge_inference_time = 0

    MC = MouseController('medium', 'fast')

    logging.info("############## Model Load Time #############")

    start_time = time.time()
    first_model_time = start_time
    FD = Face_Detection(device=args.d, threshold=args.prob, extensions=args.l)
    FD.load_model(model_path=args.f)
    logging.info("Face Detection Model: {:.3f}ms".format(
        1000 * (time.time() - first_model_time)))

    second_model_time = time.time()
    FLD = Facial_Landmarks_Detection(device=args.d, extensions=args.l)
    FLD.load_model(model_path=args.fl)
    logging.info("Facial Landmarks Detection Model: {:.3f}ms".format(
        1000 * (time.time() - second_model_time)))

    third_model_time = time.time()
    HPE = Head_Pose_Estimation(device=args.d, extensions=args.l)
    HPE.load_model(model_path=args.hp)
    logging.info("Head Pose Estimation Model: {:.3f}ms".format(
        1000 * (time.time() - third_model_time)))

    fourth_model_time = time.time()
    GE = Gaze_Estimation(device=args.d, extensions=args.l)
    GE.load_model(model_path=args.g)
    logging.info("Gaze Estimation Model: {:.3f}ms".format(
        1000 * (time.time() - fourth_model_time)))
    logging.info("############## End ######################### ")

    Total_Model_Load_Time = 1000 * (time.time() - start_time)

    ##### LOADING VIDEO FILE #####

    if (video_path == "cam"):
        IF = InputFeeder("cam")
    else:
        IF = InputFeeder("video", video_path)
    IF.load_data()

    ##### MODEL INFERENCE #####

    start_inf_time = time.time()
    for flag, frame in IF.next_batch():

        if not flag:
            break

        if (count % 5 == 0):
            cv2.imshow('frame', cv2.resize(frame, (500, 500)))

        key = cv2.waitKey(60)

        count = count + 1

        start_time_1 = time.time()
        face, face_coordinates = FD.predict(frame, args.it)
        fd_inference_time += (time.time() - start_time_1)

        start_time_2 = time.time()
        left_eye_image, right_eye_image, eye_coordinates = FLD.predict(
            face, args.it)
        fld_inference_time += (time.time() - start_time_2)

        start_time_3 = time.time()
        head_pose_angles = HPE.predict(face, args.it)
        hp_inference_time += (time.time() - start_time_3)

        start_time_4 = time.time()
        mouse_coordinates, gaze_vector = GE.predict(left_eye_image,
                                                    right_eye_image,
                                                    head_pose_angles, args.it)
        ge_inference_time += (time.time() - start_time_4)

        if (len(visualize) != 0):
            frame_visualize = frame.copy()

            if ("fd" in visualize):
                if (len(visualize) == 1):
                    cv2.rectangle(frame_visualize,
                                  (face_coordinates[0], face_coordinates[1]),
                                  (face_coordinates[2], face_coordinates[3]),
                                  (255, 0, 255), 2)
                else:
                    frame_visualize = face.copy()

            if ("fld" in visualize):
                if not "fd" in visualize:
                    frame_visualize = face.copy()

                cv2.circle(frame_visualize, (eye_coordinates['left_eye'][0],
                                             eye_coordinates['left_eye'][1]),
                           25, (0, 0, 255), 2)
                cv2.circle(frame_visualize, (eye_coordinates['right_eye'][0],
                                             eye_coordinates['right_eye'][1]),
                           25, (0, 0, 255), 2)

            if ("hp" in visualize):
                cv2.putText(
                    frame_visualize,
                    "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".
                    format(head_pose_angles[0], head_pose_angles[1],
                           head_pose_angles[2]), (10, 20),
                    cv2.FONT_HERSHEY_COMPLEX, 0.255, (0, 255, 0), 1)

            if ("ge" in visualize):
                h = face.shape[0]
                arrow = h * 0.7
                arrow_X = gaze_vector[0] * arrow
                arrow_Y = -gaze_vector[1] * arrow
                cv2.arrowedLine(
                    frame_visualize, (eye_coordinates['left_eye'][0],
                                      eye_coordinates['left_eye'][1]),
                    (int(eye_coordinates['left_eye'][0] + arrow_X),
                     int(eye_coordinates['left_eye'][1] + arrow_Y)),
                    (255, 0, 0), 2)
                cv2.arrowedLine(
                    frame_visualize, (eye_coordinates['right_eye'][0],
                                      eye_coordinates['right_eye'][1]),
                    (int(eye_coordinates['right_eye'][0] + arrow_X),
                     int(eye_coordinates['right_eye'][1] + arrow_Y)),
                    (255, 0, 0), 2)
            if (count % 5 == 0):

                cv2.imshow('Visualization',
                           cv2.resize(frame_visualize, (500, 500)))

        if (count % 5 == 0):
            MC.move(mouse_coordinates[0], mouse_coordinates[1])

        if key == 27:
            break

    Total_Inference_Time = time.time() - start_inf_time
    if (count > 0):
        logging.info("############## Models Inference time #######")
        logging.info("Face Detection:{:.3f}ms".format(
            1000 * fd_inference_time / count))
        logging.info("Facial Landmarks Detection:{:.3f}ms".format(
            1000 * fld_inference_time / count))
        logging.info("Headpose Estimation:{:.3f}ms".format(
            1000 * hp_inference_time / count))
        logging.info("Gaze Estimation:{:.3f}ms".format(
            1000 * ge_inference_time / count))
        logging.info("############## End #########################")

    logging.info("############## Summarized Results ##########")
    logging.info(
        "Total Model Load Time: {:.3f}ms".format(Total_Model_Load_Time))
    logging.info("Total Inference Time: {:.3f}s".format(Total_Inference_Time))
    logging.info("FPS:{}".format(count / Total_Inference_Time))
    logging.info("############ End ###########################")
    cv2.destroyAllWindows()
    IF.close()
    def main(args):
        ## loading models
        try:
            input_file = args.input
            mode_visualization = args.mode_visualization

            if input_file == "CAM":
                input_feeder = InputFeeder("cam")
            else:
                if not os.path.isfile(input_file):
                    log.error("ERROR: INPUT PATH IS NOT VALID")
                    exit(1)
                input_feeder = InputFeeder("video", input_file)

            face_detection_class = Face_Detection(
                model=args.face_detection,
                device=args.device,
                extensions=args.cpu_extension)
            face_landmarks_class = Landmarks_Detection(
                model=args.face_landmark,
                device=args.device,
                extensions=args.cpu_extension)
            head_pose_class = Head_Pose(model=args.head_pose,
                                        device=args.device,
                                        extensions=args.cpu_extension)
            gaze_estimation_class = Gaze_Estimation(
                model=args.gaze_estimation,
                device=args.device,
                extensions=args.cpu_extension)

            mouse_control = MouseController('medium', 'fast')
            start_time = time.time()

            ## Load the models one by one and all necessary info

            face_det_time = time.time()
            face_detection_class.load_model()
            print("Face Detection Load Time: time: {:.3f} ms".format(
                (time.time() - face_det_time) * 1000))

            face_land_time = time.time()
            face_landmarks_class.load_model()
            print("Facial landmarks load Time: time: {:.3f} ms".format(
                (time.time() - face_land_time) * 1000))

            head_po_time = time.time()
            head_pose_class.load_model()
            print("Head pose load time: time: {:.3f} ms".format(
                (time.time() - head_po_time) * 1000))

            gaze_est_time = time.time()
            gaze_estimation_class.load_model()
            print("Gaze estimation load time: time: {:.3f} ms".format(
                (time.time() - gaze_est_time) * 1000))

            total_time = time.time() - start_time
            print("Total loading time taken: time: {:.3f} ms".format(
                total_time * 1000))

            print("All models are loaded successfully..")

            input_feeder.load_data()
            print("Feeder is loaded")
        except:
            print('Error occured on loading models in app')

        ## performing inferences
        try:
            start_inference_time = time.time()
            frame_count = 0
            for flag, frame in input_feeder.next_batch():
                if not flag:
                    break
                frame_count += 1
                if frame_count == 0:
                    cv2.imshow('video', cv2.resize(frame, (700, 700)))

                key = cv2.waitKey(60)
                crop_face, face_coords = face_detection_class.predict(
                    frame.copy(), args.conf_threshold)
                if type(crop_face) == int:
                    log.error("Unable to detect the face.")
                    if key == 27:
                        break
                    continue

                ## perform inference
                head_angle = head_pose_class.predict(crop_face.copy())
                left_eye, right_eye, eye_coords = face_landmarks_class.predict(
                    crop_face.copy())
                mouse_position, gaze_vector = gaze_estimation_class.predict(
                    left_eye, right_eye, head_angle)

                ## checking for extra flags
                if (not len(mode_visualization) == 0):
                    p_frame = frame.copy()
                    if ('fd' in mode_visualization):
                        p_frame = crop_face
                    if ('fl' in mode_visualization):
                        cv2.rectangle(
                            crop_face,
                            (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                            (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                            (0, 255, 0), 1)
                        cv2.rectangle(
                            crop_face,
                            (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                            (eye_coords[1][2] + 10, eye_coords[1][3] + 10), (
                                0,
                                255,
                                0,
                            ), 1)

                    if ('hp' in mode_visualization):
                        cv2.putText(
                            p_frame,
                            "Head Positions: :{:.2f} :{:.2f} :{:.2f}".format(
                                head_angle[0], head_angle[1],
                                head_angle[2]), (10, 20),
                            cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1)

                    if ('ge' in mode_visualization):
                        i, j, k = int(gaze_vector[0] * 12), int(
                            gaze_vector[1] * 12), 160

                        l_eye = cv2.line(left_eye.copy(), (i - k, j - k),
                                         (i + k, j + k), (0, 255, 255), 2)
                        cv2.line(l_eye, (i - k, j + k), (i + k, j - k),
                                 (255, 0, 255), 2)

                        r_eye = cv2.line(right_eye.copy(), (i - k, j - k),
                                         (i + k, j + k), (0, 255, 255), 2)
                        cv2.line(r_eye, (i - k, j + k), (i + k, j - k),
                                 (0, 255, 255), 2)

                        l_eye = crop_face[eye_coords[0][1]:eye_coords[0][3],
                                          eye_coords[0][0]:eye_coords[0][2]]
                        r_eye = crop_face[eye_coords[1][1]:eye_coords[1][3],
                                          eye_coords[1][0]:eye_coords[1][2]]

                    cv2.imshow("visual for client",
                               cv2.resize(p_frame, (700, 700)))

                if frame_count % 1 == 0:
                    mouse_control.move(mouse_position[0], mouse_position[1])
                if key == 27:
                    break
            ## working on inference time and frames per second
            total_infer_time = time.time() - start_inference_time
            frames_per_sec = int(frame_count) / total_infer_time

            print("Time counter: {:.3f} seconds".format(frame_count))
            print("Total inference time: {:.3f} seconds".format(
                total_infer_time))
            print("FPs: {:.3f} fps ".format(frames_per_sec))
        except:
            print('Error on performing inference in app file')

        print("All Done...")

        cv2.destroyAllWindows()
        input_feeder.close()
Exemple #17
0
class Computer_Pointer_Controller:
    def __init__(self, args):

        # load the objects corresponding to the models
        self.face_detection = Face_Detection(args.face_detection_model,
                                             args.device, args.extensions,
                                             args.perf_counts)
        self.gaze_estimation = Gaze_Estimation(args.gaze_estimation_model,
                                               args.device, args.extensions,
                                               args.perf_counts)
        self.head_pose_estimation = Head_Pose_Estimation(
            args.head_pose_estimation_model, args.device, args.extensions,
            args.perf_counts)
        self.facial_landmarks_detection = Facial_Landmarks_Detection(
            args.facial_landmarks_detection_model, args.device,
            args.extensions, args.perf_counts)

        start_models_load_time = time.time()
        self.face_detection.load_model()
        self.gaze_estimation.load_model()
        self.head_pose_estimation.load_model()
        self.facial_landmarks_detection.load_model()

        logger = logging.getLogger()
        input_T = args.input_type
        input_F = args.input_file

        if input_T.lower() == 'cam':
            # open the video feed
            self.feed = InputFeeder(args.input_type, args.input_file)
            self.feed.load_data()
        else:
            if not os.path.isfile(input_F):
                logger.error('Unable to find specified video file')
                exit(1)
            file_extension = input_F.split(".")[-1]
            if (file_extension in ['jpg', 'jpeg', 'bmp']):
                self.feed = InputFeeder(args.input_type, args.input_file)
                self.feed.load_data()
            elif (file_extension in ['avi', 'mp4']):
                self.feed = InputFeeder(args.input_type, args.input_file)
                self.feed.load_data()
            else:
                logger.error(
                    "Unsupported file Extension. Allowed ['jpg', 'jpeg', 'bmp', 'avi', 'mp4']"
                )
                exit(1)

        print("Models total loading time :",
              time.time() - start_models_load_time)

        # init mouse controller
        self.mouse_controller = MouseController('low', 'fast')

    def run(self):
        inferences_times = []
        face_detections_times = []
        for batch in self.feed.next_batch():
            if batch is None:
                break

            # as we want the webcam to act as a mirror, flip the frame
            batch = cv2.flip(batch, 1)

            inference_time = time.time()
            face = self.face_detection.predict(batch)
            if face is None:
                logger.error('Unable to detect the face.')
                continue
            else:
                face_detections_times.append(time.time() - inference_time)

                left_eye_image, right_eye_image = self.facial_landmarks_detection.predict(
                    face)
                if left_eye_image is None or right_eye_image is None:
                    continue
                head_pose_angles = self.head_pose_estimation.predict(face)
                if head_pose_angles is None:
                    continue
                vector = self.gaze_estimation.predict(left_eye_image,
                                                      right_eye_image,
                                                      head_pose_angles)
                inferences_times.append(time.time() - inference_time)
                if args.show_face == "True":
                    cv2.imshow("Detected face", face)
                    cv2.waitKey(1)
                self.mouse_controller.move(vector[0], vector[1])

        self.feed.close()
        cv2.destroyAllWindows()
        print("Average face detection inference time:",
              sum(face_detections_times) / len(face_detections_times))
        print("Average total inferences time:",
              sum(inferences_times) / len(inferences_times))
def infer_on_stream(args):

    network_fd = Face_Detection(args.face_detection_model, args.device)
    network_hp = Head_Pose_Estimation(args.head_pose_model, args.device)
    network_fl = Facial_Landmarks_Detection(args.facial_landmarks_model,
                                            args.device)
    network_ge = Gaze_Estimation(args.gaze_estimation_model, args.device)

    mouse_cont = MouseController(args.mouse_precision, args.mouse_speed)

    starting_loading = time.time()

    network_fd.load_model()
    network_hp.load_model()
    network_fl.load_model()
    network_ge.load_model()

    duration_loading = time.time() - starting_loading

    input_type = handle_input(args.input)

    feed = InputFeeder(input_type=input_type, input_file=args.input)

    feed.load_data()

    starting_inference = time.time()

    for flag, frame in feed.next_batch():
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        out_frame, face, face_coords = network_fd.predict(
            frame, args.prob_threshold, args.display)

        if len(face_coords) == 0:
            log.error("There is no face in the stream!")
            continue

        out_frame, head_angle = network_hp.predict(out_frame, face,
                                                   face_coords, args.display)
        out_frame, eye_left, eye_right, eye_center = network_fl.predict(
            out_frame, face, face_coords, args.display)
        out_frame, gaze = network_ge.predict(out_frame, eye_left, eye_right,
                                             eye_center, head_angle,
                                             args.display)

        mouse_cont.move(gaze[0], gaze[1])

        if key_pressed == 27:
            break

        cv2.imshow('Visualization', cv2.resize(out_frame, (600, 400)))

    duration_inference = time.time() - starting_inference

    print("Total loading time is: {}\nTotal inference time is: {} ".format(
        duration_loading, duration_inference))

    feed.close()
    cv2.destroyAllWindows