Example #1
0
 def __init__(self):
     if self.recognition_object is None and self.detection_object is None:
         self.recognition_object = FaceRecognition()
         self.detection_object = FaceDetection()
     self.recognition_object.initialize()
     self.create_main_window(self.initialize_window_size(400, 400))
     self.set_main_window_buttons()
Example #2
0
def compare(root, f1, f2):
    global face_det
    global face_recon
    global face_align
    if not face_det:
        face_det = FaceDetection(gpu_id)
    if not face_recon:
        face_recon = FaceRecogniton(gpu_id)
    if not face_align:
        face_align = FaceAlignment(gpu_id)
    time_start = time.time()
    img_a = cv2.imread(root + '/' + f1)
    img_b = cv2.imread(root + '/' + f2)

    bbox_list1, a_point = face_det.get_max_bounding_box_by_image(img_a)
    bbox_list2, b_point = face_det.get_max_bounding_box_by_image(img_b)
    similarity = 0
    if bbox_list1 and bbox_list2:
        a_aligned_faces = face_align.affine_face(img_a, a_point)
        b_aligned_faces = face_align.affine_face(img_b, b_point)
        similarity = face_recon.face_compare(a_aligned_faces, b_aligned_faces)
        #print similarity
        time_end = time.time()
        time_use = int(1000 * (time_end - time_start))
        #print 'time_used:' + str(time_use)
    return similarity, time_use
Example #3
0
 def __init__(self, num_of_cameras=1, record_video=False):
     self.is_capturing_video = True
     self.video_cameras = VideoCameras(num_of_cameras)
     self.record_video = record_video
     self.face_detection = FaceDetection()
     self.face_frame_morpher = FaceFrameMorpher()
     self.video_recorder = VideoRecorder()
def run_inference(args):

    feed = InputFeeder(input_type='video', input_file=args.input)
    feed.load_data()
    for batch in feed.next_batch():
        cv2.imshow("Output", cv2.resize(batch, (500, 500)))
        key = cv2.waitKey(60)

        if (key == 27):
            break

        # getting face
        faceDetection = FaceDetection(model_name=args.face_detection_model)
        faceDetection.load_model()
        face = faceDetection.predict(batch)

        # getting eyes
        facialLandmarksDetection = FacialLandmarksDetection(
            args.facial_landmarks_detection_model)
        facialLandmarksDetection.load_model()
        left_eye, right_eye = facialLandmarksDetection.predict(face)

        # getting head pose angles
        headPoseEstimation = HeadPoseEstimation(
            args.head_pose_estimation_model)
        headPoseEstimation.load_model()
        head_pose = headPoseEstimation.predict(face)
        print("head pose angles: ", head_pose)

        # get mouse points
        gazeEstimation = GazeEstimation(args.gaze_estimation_model)
        gazeEstimation.load_model()
        mouse_coords = gazeEstimation.predict(left_eye, right_eye, head_pose)
        print("gaze  output: ", mouse_coords)
    feed.close()
def models_handler(logger, args):
	## put all path of model from args in to dict
	Dict_model_path = {
		'Face': args.face_detection_path,
		'Landmarks': args.facial_landmarks_path,
		'Headpose': args.head_pose_path,
		'Gaze': args.gaze_estimation_path
	}

	## check if model exists in given path
	for model_key in Dict_model_path.keys():
		# print(Dict_model_path[model_key])
		if not os.path.isfile(Dict_model_path[model_key]):
			print("\n## " + model_key + " Model path not exists: " + Dict_model_path[model_key] + ' Please try again !!!')
			logger.error("## " + model_key + " Model path not exists: " + Dict_model_path[model_key] + ' Please try again !!!')
			exit(1)
		else:
			print('## '+model_key + " Model path is correct: " + Dict_model_path[model_key] + '\n')
			logger.info('## '+model_key + " Model path is correct: " + Dict_model_path[model_key])

	## initialize face detection mode
	model_fd = FaceDetection(Dict_model_path['Face'], args.device, args.cpu_extension)
	## initialize facial landmarks detection model
	model_fld = FacialLandmarkDetection(Dict_model_path['Landmarks'], args.device, args.cpu_extension)
	## initialize head pose estimation model
	model_hpe = HeadPoseEstimation(Dict_model_path['Headpose'], args.device, args.cpu_extension)
	## initialize gaze estimation model
	model_ge = GazeEstimation(Dict_model_path['Gaze'], args.device, args.cpu_extension)

	return model_fd, model_fld, model_hpe, model_ge
    def __init__(self, args):
        '''
        This method instances variables for the Facial Landmarks Detection Model.

        Args:
        args = All arguments parsed by the arguments parser function

        Return:
        None
        '''

        init_start_time = time.time()
        self.output_path = args.output_path
        self.show_output = args.show_output
        self.total_processing_time = 0
        self.count_batch = 0
        self.inference_speed = []
        self.avg_inference_speed = 0

        if args.all_devices != 'CPU':
            args.face_device = args.all_devices
            args.face_landmark_device = args.all_devices
            args.head_pose_device = args.all_devices
            args.gaze_device = args.all_devices

        model_init_start = time.time()
        self.face_model = FaceDetection(args.face_model, args.face_device,
                                        args.face_device_ext,
                                        args.face_prob_threshold)
        self.landmarks_model = FacialLandmarksDetection(
            args.face_landmark_model, args.face_landmark_device,
            args.face_landmark_device_ext, args.face_landmark_prob_threshold)
        self.head_pose_model = HeadPoseEstimation(
            args.head_pose_model, args.head_pose_device,
            args.head_pose_device_ext, args.head_pose_prob_threshold)
        self.gaze_model = GazeEstimation(args.gaze_model, args.gaze_device,
                                         args.gaze_device_ext,
                                         args.gaze_prob_threshold)
        self.model_init_time = time.time() - model_init_start
        log.info('[ Main ] All required models initiallized')

        self.mouse_control = MouseController(args.precision, args.speed)
        log.info('[ Main ] Mouse controller successfully initialized')

        self.input_feeder = InputFeeder(args.batch_size, args.input_type,
                                        args.input_file)
        log.info('[ Main ] Initialized input feeder')

        model_load_start = time.time()
        self.face_model.load_model()
        self.landmarks_model.load_model()
        self.head_pose_model.load_model()
        self.gaze_model.load_model()

        self.model_load_time = time.time() - model_load_start
        self.app_init_time = time.time() - init_start_time
        log.info('[ Main ] All moadels loaded to Inference Engine\n')

        return None
Example #7
0
def load_models(path):
    global gender, expression, multiple, face_detection, landmarks2d, landmarks3d
    gender = Gender(os.path.join(path, "gender.zip"))
    expression = Expression(os.path.join(path, "expression.zip"))
    multiple = Multiple(os.path.join(path, "multiple"))
    face_detection = FaceDetection(os.path.join(path, "face_detection"))
    landmarks2d = LandMarks2D(path)
    landmarks3d = LandMarks3D(path)
Example #8
0
    def __init__(self):
        self.serial_transmitter = SerialTransmitter()
        self.face_detection = FaceDetection()
        self.json = Json()

        self.settings = self.json.get_json()
        self.camera_fov = camera_fov
        self.resolution = self.face_detection.resolution
        self.pos = ()
Example #9
0
 def load_modules(self):
     self._speech = Speech(self.session)
     self._motion = Motion(self.session)
     self._tablet = Tablet(self.session)
     self._face_detection = FaceDetection(self.session, self)
     self._wave_detection = WavingDetection(self.session)
     self._audio_player = AudioPlayer(self.session)
     self._speech_recognition = SpeechRecognition(self.session)
     self._system = System(self.session)
Example #10
0
def load_models():
    print("loading face detection model")
    face_detection_model = FaceDetection()

    # load face validation model
    print("loading face validation model")
    face_validation_model = FaceValidation()

    # SyncNet
    print("loading speaker validation model")
    speaker_validation = SpeakerValidation()
    return face_detection_model, face_validation_model, speaker_validation
Example #11
0
    def preprocess_output_face_detection(self, outputs, width, height,
                                         threshold, frame):
        """
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        """
        face_detection = FaceDetection()

        coords = []
        coords, frame = face_detection.preprocess_output(
            outputs, width, height, threshold, frame, self.output_name)
        return coords, frame
Example #12
0
    def __init__(self, mode=None):
        self.servo_pos = (90, 90)
        self.allow_save = True
        
        self.json = Json('settings.json')
        self.settings = self.json.get_json()

        if mode == 'debug': self.serial_transmitter = SerialTransmitter(arduino_connect=False)
        else: self.serial_transmitter = SerialTransmitter(arduino_connect=True, move_threshold=0.05)
        self.ui = UICallibrate(self.settings, (640, 480), move_factor=0.5)
        self.face_detection = FaceDetection()

        self.calibrate()
Example #13
0
def prepare_processing_engines():
    """
    Loads all machine learning models for processing an image.
    Returns:
        Image processor, which detects faces on image and classify their life stage.
    """
    res10_face_model = Res10FaceDetection(
        'models/caffe/res10_300x300_ssd_iter_140000.caffemodel',
        'models/caffe/deploy.prototxt')
    face_detection_backend = FaceDetection(res10_face_model)
    life_stage_backend = LifeStagePrediction('models/life_stage_model.h5')
    image_processor = FaceDetectionAndLifeStageClassification(
        face_detection_backend, life_stage_backend)
    return image_processor
 def __init__(self):
     self._isOn = False
     self._isRun = False
     self._on = False
     self._run = False
     self._face = False
     self._forward = 0.0
     self._rotation = 0.0
     self._srvCmd = rospy.Service('pimouse_cmd', PiMouseCmd, self.CommandCallback)
     self._srvClientOn = rospy.ServiceProxy('motor_on', Trigger)
     self._srvClientOff = rospy.ServiceProxy('motor_off', Trigger)
     rospy.on_shutdown(self._srvClientOff.call)
     self._wallAround = WallAround()
     self._faceToFace = FaceToFace()
     self._faceDetection = FaceDetection()
Example #15
0
 def __init__(self):
     self.frame_in = np.zeros((10, 10, 3), np.uint8)
     self.frame_ROI = np.zeros((10, 10, 3), np.uint8)
     self.frame_out = np.zeros((10, 10, 3), np.uint8)
     self.samples = []
     self.buffer_size = 100
     self.times = []
     self.data_buffer = []
     self.fps = 0
     self.fft = []
     self.freqs = []
     self.t0 = time.time()
     self.bpm = 0
     self.fd = FaceDetection()
     self.bpms = []
     self.peaks = []
def main():
    video_stream = cv.VideoCapture(0)
    detector = FaceDetection()

    while True:
        ret, frame = video_stream.read()
        rects, labels = detector.process_frame(frame)
        for (x, y, w, h), label in zip(rects, labels):
            cv.rectangle(frame, (x, y), (x + w, y + h), colourDict[label], 2)
            cv.putText(frame, label, (x, y), cv.FONT_HERSHEY_SIMPLEX, 1.0,
                       (255, 255, 255))
        cv.imshow("EyeTracker", frame)
        if cv.waitKey(1) & 0xFF == ord('q'):
            break
    video_stream.release()
    cv.destroyAllWindows()
Example #17
0
def main():
    global GUI_HAS_FACE
    ap = argparse.ArgumentParser()
    ap.add_argument("-p", "--shape-predictor", required=True, #ili p ili shape predictor se koriste u commandlineu, true jer je obavezno
        help="path to facial landmark predictor") #kad upisemo help u cl nam to ispise
    args = ap.parse_args()

    p = Printer(value = False)
    p.start()

    gui = Gui(value = False) #konstruktor za gui + salje se pocetna vrijednost flag-a za lice
    #gui.start()

    face = FaceDetection(args.shape_predictor, face_callbacks=[p.face_update, gui.check_face]) #konstruktor za facedetection + salje flagove za lice u navedene funkcije (face_update, check_face)
    face.start()


    gui.mainloop() #uvijek mora bit na kraju
Example #18
0
def load_models(p):
    """
    Load the OpenVINO models in a dictionary to handle them more easily

    Input: `p`, a dictionary with the models' paths
    """
    # Get the device ('CPU' will be selected if None)
    models = {}
    models['fd'] = FaceDetection()
    models['lm'] = Landmarks()
    models['hp'] = HeadPose()
    models['ge'] = GazeEstimator()

    # Load all the files with the relative device
    for label in ['fd','lm','hp','ge']:
        start = time.time()
        models[label].load_model(p[f'mod_{label}'], device=p[f'device_{label}'])    
        print(f'Model: {MODELS[label]} --- Loading time: {1000*(time.time()-start):.1f} ms')

    return models
Example #19
0
def face_detection(environ, start_response):
    from face_detection import FaceDetection
    face = FaceDetection()
    status = '200 OK'
    headers = [('Content-type', 'text/plain; charset=utf-8')]

    start_response(status, headers)

    params = environ['params']
    image = params.get('image')

    try:
        faces = face.face_detection(image)
        if isinstance(faces, numpy.ndarray):
            data = faces.tolist()
            return [json.dumps(dict(status=0, data=data)).encode('utf-8')]
        else:
            return ['{"status":0,"data":[]}'.encode('utf-8')]
    except Exception as error:
        res = '{"status":-1,"data":"%s"}' % (str(error))
        return [res.encode('utf-8')]
 def copy_all2(self,file_infos, dst_dir):
     if self.face_detection:
         from face_detection import FaceDetection
         fd = FaceDetection()
     
     classses = []
     counter=0
     mean_image = np.zeros(self.input_shape,np.float32)
     for path, class_feature in file_infos:
         img = cv2.imread(path, cv2.IMREAD_COLOR)
         if self.face_detection:
             img = cv2.resize(img, (self.img_resize,self.img_resize), cv2.INTER_AREA)
             img = fd.crop_face(img) 
             if img is not None:
                 img = cv2.resize(img, (self.input_width,self.input_height), cv2.INTER_AREA)
                 if dst_dir is self.db_age_train_folder_path or dst_dir is self.db_sex_train_folder_path:
                     mean_image += img
                     counter +=1
                 cv2.imwrite( os.path.join(os.path.join( dst_dir, class_feature ), os.path.basename(path)), img)
                 classses.append( class_feature )
         else:
             img = cv2.resize(img, (self.input_width,self.input_height), cv2.INTER_AREA)
             if dst_dir is self.db_age_train_folder_path or dst_dir is self.db_sex_train_folder_path:
                 mean_image += img
                 counter+=1
             cv2.imwrite( os.path.join(os.path.join( dst_dir, class_feature ), os.path.basename(path)), img)
             classses.append( class_feature )
     
     
     if dst_dir is self.db_age_train_folder_path:
         mean_image /= counter
         mean_image = np.asarray(mean_image,np.uint8)
         cv2.imwrite( self.age_mean_image_path ,mean_image)
     elif dst_dir is self.db_sex_train_folder_path:
         mean_image /= counter
         mean_image = np.asarray(mean_image,np.uint8)
         cv2.imwrite( self.sex_mean_image_path ,mean_image )
     
     return classses
def main():

    args = get_args().parse_args()
    path_filender = args.input
    four_flags = args.flags_checker
    loger = logging.getLogger()
    feeder_in = None
    out_path = args.out_path

    if path_filender.lower() == "cam":
        feeder_in = InputFeeder("cam")
    else:
        if not os.path.isfile(path_filender):
            loger.error("The video was not found")
            exit(1)
        feeder_in = InputFeeder("video", path_filender)

    model_locations = {
        'FaceDetection': args.face_detection_model,
        'HeadPoseEstimation': args.head_pose_estimation_model,
        'FacialLandmarksDetection': args.facial_landmarks_detection_model,
        'GazeEstimation': args.gaze_estimation_model
    }

    for key_name in model_locations.keys():
        if not os.path.isfile(model_locations[key_name]):
            loger.error("The system cannot find the " + key_name + " xml file")
            exit(1)

    dt = FaceDetection(model_locations['FaceDetection'], args.device,
                       args.cpu_extension)
    pe = HeadPoseEstimation(model_locations['HeadPoseEstimation'], args.device,
                            args.cpu_extension)
    ld = FacialLandmarksDetection(model_locations['FacialLandmarksDetection'],
                                  args.device, args.cpu_extension)
    ge = GazeEstimation(model_locations['GazeEstimation'], args.device,
                        args.cpu_extension)

    cursor = MouseController('medium', 'fast')

    feeder_in.load_data()
    model_load_time_start = time.time()
    dt.load_model()
    pe.load_model()
    ld.load_model()
    ge.load_model()
    total_load_time = time.time() - model_load_time_start

    frame_counter = 0
    inference_time_start = time.time()
    for ret, frame in feeder_in.next_batch():
        if not ret:
            break
        frame_counter = frame_counter + 1
        if frame_counter % 1 == 0:
            cv2.imshow('video', cv2.resize(frame, (600, 600)))

        key = cv2.waitKey(60)

        face_detected, coords_face = dt.predict(frame, args.p_th)
        if type(face_detected) == int:
            loger.error("The system cannot detect any face.")
            if key == 27:
                break
            continue

        head_pose_output = pe.predict(face_detected)
        eye_left_detect, eye_right_detect, eye_coordinates_detect = ld.predict(
            face_detected)
        coordi_update_pointer, coordi_gaze = ge.predict(
            eye_left_detect, eye_right_detect, head_pose_output)

        if (not len(four_flags) == 0):
            result_app = frame
            if 'fad' in four_flags:
                result_app = face_detected
            if 'hpe' in four_flags:
                cv2.putText(
                    result_app,
                    "HP Angles: YAW:{:.3f} * PITCH:{:.3f} * ROLL:{:.3f}".
                    format(head_pose_output[0], head_pose_output[1],
                           head_pose_output[2]), (5, 40),
                    cv2.FONT_HERSHEY_COMPLEX, 0.25, (153, 76, 0), 0)
            if 'fld' in four_flags:
                cv2.rectangle(face_detected,
                              (eye_coordinates_detect[0][0] - 4,
                               eye_coordinates_detect[0][1] - 4),
                              (eye_coordinates_detect[0][2] + 4,
                               eye_coordinates_detect[0][3] + 4),
                              (255, 255, 0), 4)
                cv2.rectangle(face_detected,
                              (eye_coordinates_detect[1][0] - 4,
                               eye_coordinates_detect[1][1] - 4),
                              (eye_coordinates_detect[1][2] + 4,
                               eye_coordinates_detect[1][3] + 4),
                              (255, 255, 0), 4)
            if 'gae' in four_flags:
                x = int(coordi_gaze[0] * 2)
                y = int(coordi_gaze[1] * 2)
                w = 150
                right_E = cv2.line(eye_right_detect, (x - w, y - w),
                                   (x + w, y + w), (51, 255, 153), 1)
                cv2.line(right_E, (x - w, y + w), (x + w, y - w),
                         (51, 255, 253), 1)
                left_E = cv2.line(eye_left_detect, (x - w, y - w),
                                  (x + w, y + w), (51, 255, 153), 1)
                cv2.line(left_E, (x - w, y + w), (x + w, y - w),
                         (51, 255, 253), 1)
                face_detected[
                    eye_coordinates_detect[1][1]:eye_coordinates_detect[1][3],
                    eye_coordinates_detect[1][0]:eye_coordinates_detect[1]
                    [2]] = right_E
                face_detected[
                    eye_coordinates_detect[0][1]:eye_coordinates_detect[0][3],
                    eye_coordinates_detect[0][0]:eye_coordinates_detect[0]
                    [2]] = left_E

            cv2.imshow("Result of the App", cv2.resize(result_app, (600, 600)))

        if frame_counter % 5 == 0:
            cursor.move(coordi_update_pointer[0], coordi_update_pointer[1])
        if key == 27:
            break

    total_time = time.time() - inference_time_start
    total_time_for_inference = round(total_time, 1)
    fps = frame_counter / total_time_for_inference

    with open(out_path + 'stats.txt', 'w') as f:
        f.write('Inference time: ' + str(total_time_for_inference) + '\n')
        f.write('FPS: ' + str(fps) + '\n')
        f.write('Model load time: ' + str(total_load_time) + '\n')

    loger.error("The video stream is over...")
    cv2.destroyAllWindows()
    feeder_in.close()
Example #22
0
def main():
    args = build_argparser().parse_args()
    device_name = args.device
    prob_threshold = args.prob_threshold
    logger_object = log.getLogger()

    # Initialize variables with the input arguments
    model_path_dict = {
        'FaceDetectionModel': args.faceDetectionModel,
        'FacialLandmarkModel': args.facialLandmarksModel,
        'HeadPoseEstimationModel': args.headPoseEstimationModel,
        'GazeEstimationModel': args.gazeEstimationModel
    }

    # Instantiate model
    face_model = FaceDetection(model_path_dict['FaceDetectionModel'], device_name, threshold=prob_threshold)
    landmark_model = FacialLandmarksDetection(model_path_dict['FacialLandmarkModel'], device_name,
                                              threshold=prob_threshold)
    head_pose_model = HeadPoseEstimation(model_path_dict['HeadPoseEstimationModel'], device_name,
                                         threshold=prob_threshold)
    gaze_model = GazeEstimation(model_path_dict['GazeEstimationModel'], device_name, threshold=prob_threshold)
    mouse_controller = MouseController('medium', 'fast')

    # Load Models and get time
    start_time = time.time()
    face_model.load_model()
    logger_object.error("Face detection model loaded: time: {:.3f} ms".format((time.time() - start_time) * 1000))

    first_mark = time.time()
    landmark_model.load_model()
    logger_object.error(
        "Facial landmarks detection model loaded: time: {:.3f} ms".format((time.time() - first_mark) * 1000))

    second_mark = time.time()
    head_pose_model.load_model()
    logger_object.error("Head pose estimation model loaded: time: {:.3f} ms".format((time.time() - second_mark) * 1000))

    third_mark = time.time()
    gaze_model.load_model()
    logger_object.error("Gaze estimation model loaded: time: {:.3f} ms".format((time.time() - third_mark) * 1000))
    load_total_time = time.time() - start_time
    logger_object.error("Total loading time: time: {:.3f} ms".format(load_total_time * 1000))
    logger_object.error("All models are loaded successfully..")

    # Check extention of these unsupported layers
    face_model.check_model()
    landmark_model.check_model()
    head_pose_model.check_model()
    gaze_model.check_model()

    preview_flags = args.previewFlags
    input_filename = args.input
    output_path = args.output_path
    prob_threshold = args.prob_threshold

    if input_filename.lower() == 'cam':
        input_feeder = InputFeeder(input_type='cam')
    else:
        if not os.path.isfile(input_filename):
            logger_object.error("Unable to find specified video file")
            exit(1)
        input_feeder = InputFeeder(input_type='video', input_file=input_filename)

    for model_path in list(model_path_dict.values()):
        if not os.path.isfile(model_path):
            logger_object.error("Unable to find specified model file" + str(model_path))
            exit(1)

    input_feeder.load_data()
    width = int(input_feeder.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(input_feeder.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(input_feeder.cap.get(cv2.CAP_PROP_FPS))
    out_video = cv2.VideoWriter(os.path.join('output_video.mp4'), cv2.VideoWriter_fourcc(*'avc1'), fps,
                                (width, height), True)

    frame_counter = 0
    start_inf_time = time.time()
    for ret, frame in input_feeder.next_batch():
        if not ret:
            break
        frame_counter += 1
        key = cv2.waitKey(60)

        try:
            cropped_image, face_cords = face_model.predict(frame, prob_threshold)

            if type(cropped_image) == int:
                print("Unable to detect the face")
                if key == 27:
                    break
                continue

            left_eye, right_eye, eye_cords = landmark_model.predict(cropped_image)
            pose_output = head_pose_model.predict(cropped_image)
            x, y, z = gaze_model.predict(left_eye, right_eye, pose_output, cropped_image, eye_cords)

            mouse_controller.move(x, y)
        except Exception as e:
            print(str(e) + " for frame " + str(frame_counter))
            continue

        image = cv2.resize(frame, (width, height))
        if not len(preview_flags) == 0:
            preview_frame = frame.copy()

            if 'fd' in preview_flags:
                if len(preview_flags) != 1:
                    preview_frame = cropped_image
                    cv2.rectangle(frame, (face_cords[0], face_cords[1]), (face_cords[2], face_cords[3]), (0, 0, 255), 3)

            if 'hp' in preview_flags:
                cv2.putText(
                    frame,
                    "Pose Angles: yaw= {:.2f} , pitch= {:.2f} , roll= {:.2f}".format(
                        pose_output[0], pose_output[1], pose_output[2]),
                    (20, 40),
                    cv2.FONT_HERSHEY_DUPLEX,
                    1, (255, 0, 0), 3)

            if 'ge' in preview_flags:
                cv2.putText(
                    frame,
                    "Gaze vector: x= {:.2f} , y= {:.2f} , z= {:.2f}".format(
                        x, y, z),
                    (15, 100),
                    cv2.FONT_HERSHEY_COMPLEX,
                    1, (0, 255, 0), 3)

            image = np.hstack((cv2.resize(frame, (500, 500)), cv2.resize(preview_frame, (500, 500))))

        cv2.imshow('preview', image)
        out_video.write(image)

        if frame_counter % 5 == 0:
            mouse_controller.move(x, y)

        if key == 27:
            break

    inference_time = round(time.time() - start_inf_time, 1)
    fps = int(frame_counter) / inference_time
    logger_object.error("counter {} seconds".format(frame_counter))
    logger_object.error("total inference time {} seconds".format(inference_time))
    logger_object.error("fps {} frame/second".format(fps))
    with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'stats.txt'), 'w') as f:
        f.write('inference time : ' + str(inference_time) + '\n')
        f.write('fps: ' + str(fps) + '\n')
        f.write('Models Loading: '+ str(load_total_time) + '\n')
    logger_object.error('Video stream ended')
    cv2.destroyAllWindows()
    input_feeder.close()
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    global INFO
    global DELAY
    global POSE_CHECKED
    #controller = MouseController()

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = args_parser().parse_args()
    logger = log.getLogger()

    if args.input == 'cam':
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(os.path.join(args.output_dir, "shopper.mp4"),
                          cv2.VideoWriter_fourcc(*"MP4V"), fps,
                          (initial_w, initial_h), True)
    frame_count = 0

    job_id = 1  #os.environ['PBS_JOBID']
    progress_file_path = os.path.join(args.output_dir,
                                      'i_progress_' + str(job_id) + '.txt')

    infer_time_start = time.time()

    if input_stream:
        cap.open(args.input)
        # Adjust DELAY to match the number of FPS of the video file
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        return

    # Initialise the class
    if args.cpu_extension:
        facedet = FaceDetection(args.facemodel,
                                args.confidence,
                                extensions=args.cpu_extension)
        posest = HeadPoseEstimation(args.posemodel,
                                    args.confidence,
                                    extensions=args.cpu_extension)
        landest = FaceLandmarksDetection(args.landmarksmodel,
                                         args.confidence,
                                         extensions=args.cpu_extension)
        gazeest = GazeEstimation(args.gazemodel,
                                 args.confidence,
                                 extensions=args.cpu_extension)
    else:
        facedet = FaceDetection(args.facemodel, args.confidence)
        posest = HeadPoseEstimation(args.posemodel, args.confidence)
        landest = FaceLandmarksDetection(args.landmarksmodel, args.confidence)
        gazeest = GazeEstimation(args.gazemodel, args.confidence)

    # infer_network_pose = Network()
    # Load the network to IE plugin to get shape of input layer
    facedet.load_model()
    posest.load_model()
    landest.load_model()
    gazeest.load_model()
    print("loaded models")

    ret, frame = cap.read()
    while ret:
        looking = 0
        POSE_CHECKED = False
        ret, frame = cap.read()
        frame_count += 1
        if not ret:
            print("checkpoint *BREAKING")
            break

        if frame is None:
            log.error("checkpoint ERROR! blank FRAME grabbed")
            break

        initial_w = int(cap.get(3))
        initial_h = int(cap.get(4))

        # Start asynchronous inference for specified request
        inf_start_fd = time.time()
        # Results of the output layer of the network
        coords, frame = facedet.predict(frame)
        det_time_fd = time.time() - inf_start_fd
        if len(coords) > 0:
            [xmin, ymin, xmax,
             ymax] = coords[0]  # use only the first detected face
            head_pose = frame[ymin:ymax, xmin:xmax]
            inf_start_hp = time.time()
            is_looking, pose_angles = posest.predict(head_pose)
            if is_looking:
                det_time_hp = time.time() - inf_start_hp
                POSE_CHECKED = True
                #print(is_looking)
                inf_start_lm = time.time()
                coords, f = landest.predict(head_pose)
                frame[ymin:ymax, xmin:xmax] = f
                det_time_lm = time.time() - inf_start_lm

                [[xlmin, ylmin, xlmax, ylmax], [xrmin, yrmin, xrmax,
                                                yrmax]] = coords
                left_eye_image = frame[ylmin:ylmax, xlmin:xlmax]
                right_eye_image = frame[yrmin:yrmax, xrmin:xrmax]
                output = gazeest.predict(left_eye_image, right_eye_image,
                                         pose_angles)
        # Draw performance stats
        inf_time_message = "Face Inference time: {:.3f} ms.".format(
            det_time_fd * 1000)
        if POSE_CHECKED:
            cv2.putText(
                frame, "Head pose Inference time: {:.3f} ms.".format(
                    det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (255, 255, 255), 1)
            cv2.putText(frame, inf_time_message, (0, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        out.write(frame)
        print("frame", frame_count)
        if frame_count % 10 == 0:
            print(time.time() - infer_time_start)
            progressUpdate(progress_file_path,
                           int(time.time() - infer_time_start), frame_count,
                           video_len)
        if args.output_dir:
            total_time = time.time() - infer_time_start
            with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f:
                f.write(str(round(total_time, 1)) + '\n')
                f.write(str(frame_count) + '\n')
    facedet.clean()
    posest.clean()
    landest.clean()
    gazeest.clean()
    out.release()
    cap.release()
    cv2.destroyAllWindows()
Example #24
0
def infer_on_stream(args):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    :param args: Command line arguments parsed by `build_argparser()`
    :return: None
    """
    try:
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s [%(levelname)s] %(message)s",
                            handlers=[
                                logging.FileHandler("gaze-app.log"),
                                logging.StreamHandler()
                            ])

        # Initialise the class
        mc = MouseController("low", "fast")
        #mc.move(100,100)
        fdnet = FaceDetection(args.fdmodel)
        lmnet = FacialLandmarks(args.lmmodel)
        hpnet = HeadPoseEstimation(args.hpmodel)
        genet = GazeEstimation(args.gemodel)

        ### Load the model through ###
        logging.info("============== Models Load time ===============")
        start_time = time.time()
        fdnet.load_model()
        logging.info("Face Detection Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))
        fdnet.check_model()
        logging.info("Face Detection estimation layers loaded correctly")

        start_time = time.time()
        lmnet.load_model()
        logging.info("Facial Landmarks Detection Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))
        lmnet.check_model()
        logging.info("Facial Landmarks estimation layers loaded correctly")

        start_time = time.time()
        hpnet.load_model()
        logging.info("Headpose Estimation Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))
        hpnet.check_model()
        logging.info("Head pose estimation layers loaded correctly")

        start_time = time.time()
        genet.load_model()
        logging.info("Gaze Estimation Model: {:.1f}ms".format(
            1000 * (time.time() - start_time)))

        genet.check_model()
        logging.info("Gaze estimation layers loaded correctly")
        logging.info("==============  End =====================")
        # Get and open video capture
        feeder = InputFeeder('video', args.input)
        feeder.load_data()
        # FPS = feeder.get_fps()

        # Grab the shape of the input
        # width = feeder.get_width()
        # height = feeder.get_height()

        # init scene variables
        frame_count = 0

        ### Loop until stream is over ###
        fd_infertime = 0
        lm_infertime = 0
        hp_infertime = 0
        ge_infertime = 0
        while True:
            # Read the next frame
            try:
                frame = next(feeder.next_batch())
            except StopIteration:
                break

            key_pressed = cv2.waitKey(60)
            frame_count += 1
            #print(int((frame_count) % int(FPS)))

            # face detection
            fd_process_time = time.time()
            p_frame = fdnet.preprocess_input(frame)
            start_time = time.time()
            fnoutput = fdnet.predict(p_frame)
            fd_infertime += time.time() - start_time
            out_frame, fboxes = fdnet.preprocess_output(
                fnoutput, frame, args.print)
            logging.info(
                "Face Detection Model processing time : {:.1f}ms".format(
                    1000 * (time.time() - fd_process_time)))

            #for each face
            for fbox in fboxes:

                # fbox = (xmin,ymin,xmax,ymax)
                # get face landmarks
                # crop face from frame
                face = frame[fbox[1]:fbox[3], fbox[0]:fbox[2]]
                lm_process_time = time.time()
                p_frame = lmnet.preprocess_input(face)
                start_time = time.time()
                lmoutput = lmnet.predict(p_frame)
                lm_infertime += time.time() - start_time
                out_frame, left_eye_point, right_eye_point = lmnet.preprocess_output(
                    lmoutput, fbox, out_frame, args.print)
                logging.info(
                    "Landmarks model processing time : {:.1f}ms".format(
                        1000 * (time.time() - lm_process_time)))

                # get head pose estimation
                hp_process_time = time.time()
                p_frame = hpnet.preprocess_input(face)
                start_time = time.time()
                hpoutput = hpnet.predict(p_frame)
                hp_infertime += time.time() - start_time
                out_frame, headpose_angels = hpnet.preprocess_output(
                    hpoutput, out_frame, face, fbox, args.print)
                logging.info(
                    "Headpose estimation model processing time : {:.1f}ms".
                    format(1000 * (time.time() - hp_process_time)))

                # get gaze  estimation
                gaze_process_time = time.time()
                out_frame, left_eye, right_eye = genet.preprocess_input(
                    out_frame, face, left_eye_point, right_eye_point,
                    args.print)
                start_time = time.time()
                geoutput = genet.predict(left_eye, right_eye, headpose_angels)
                ge_infertime += time.time() - start_time
                out_frame, gazevector = genet.preprocess_output(
                    geoutput, out_frame, fbox, left_eye_point, right_eye_point,
                    args.print)
                logging.info(
                    "Gaze estimation model processing time : {:.1f}ms".format(
                        1000 * (time.time() - gaze_process_time)))

                if (not args.no_video):
                    cv2.imshow('im', out_frame)

                if (not args.no_move):
                    mc.move(gazevector[0], gazevector[1])

                #consider only first detected face in the frame
                break

            # Break if escape key pressed
            if key_pressed == 27:
                break

        #logging inference times
        if (frame_count > 0):
            logging.info(
                "============== Models Inference time ===============")
            logging.info("Face Detection:{:.1f}ms".format(1000 * fd_infertime /
                                                          frame_count))
            logging.info("Facial Landmarks Detection:{:.1f}ms".format(
                1000 * lm_infertime / frame_count))
            logging.info("Headpose Estimation:{:.1f}ms".format(
                1000 * hp_infertime / frame_count))
            logging.info("Gaze Estimation:{:.1f}ms".format(
                1000 * ge_infertime / frame_count))
            logging.info("============== End ===============================")

        # Release the capture and destroy any OpenCV windows
        feeder.close()
        cv2.destroyAllWindows()
    except Exception as ex:
        logging.exception("Error in inference:" + str(ex))
def inference(args):

    time_sheet = {
        'face_infr': [],
        'landmark_infr': [],
        'head_infr': [],
        'gaze_infr': [],
        'infr_per_frame': []
    }

    logging.basicConfig(filename='result.log', level=logging.INFO)
    logging.info(
        "================================================================================="
    )
    logging.info("Precision(face,landmark,head,gaze): FP32-INT1,FP{0},FP{1},FP{2}".format(\
            args.landmark_model.split("FP")[1].split("\\")[0],
            args.head_model.split("FP")[1].split("\\")[0],
            args.gaze_model.split("FP")[1].split("\\")[0]))

    model_load_start = time.time()

    face_detection = FaceDetection(args.face_model)
    face_detection.load_model()
    landmark_regression = LandmarkRegression(args.landmark_model)
    landmark_regression.load_model()
    head_pose = HeadPose(args.head_model)
    head_pose.load_model()
    gaze_estimation = GazeEstimation(args.gaze_model)
    gaze_estimation.load_model()

    logging.info("4 models load time: {0:.4f}sec".format(time.time() -
                                                         model_load_start))

    mouse_controller = MouseController('high', 'fast')

    cv2.namedWindow('preview', cv2.WND_PROP_FULLSCREEN)
    cv2.setWindowProperty('preview', cv2.WND_PROP_FULLSCREEN,
                          cv2.WINDOW_FULLSCREEN)

    input_feeder = InputFeeder(args.input_type, args.input_file)
    input_feeder.load_data()

    total_infr_start = time.time()

    for image in input_feeder.next_batch():
        if image is None:
            break
        face_infr_start = time.time()
        face_image = face_detection.predict(image)
        time_sheet['face_infr'].append(time.time() - face_infr_start)

        landmark_infr_start = time.time()
        left_eye_image, right_eye_image = landmark_regression.predict(
            np.copy(face_image))
        time_sheet['landmark_infr'].append(time.time() - landmark_infr_start)

        head_infr_start = time.time()
        head_pose_angles = head_pose.predict(np.copy(face_image))
        time_sheet['head_infr'].append(time.time() - head_infr_start)

        gaze_infr_start = time.time()
        x, y, z = gaze_estimation.predict(left_eye_image, right_eye_image,
                                          head_pose_angles)
        time_sheet['gaze_infr'].append(time.time() - gaze_infr_start)
        time_sheet['infr_per_frame'].append(time.time() - face_infr_start)
        cv2.imshow('preview', image)
        mouse_controller.move(x, y)
        key = cv2.waitKey(20)
        if key == 27:  # exit on ESC
            break

    logging.info("Face model avg inference per frame: {0:.4f}sec".format(
        np.mean(time_sheet['face_infr'])))
    logging.info("Landmark model avg inference per frame: {0:.4f}sec".format(
        np.mean(time_sheet['landmark_infr'])))
    logging.info("Head model avg inference per frame: {0:.4f}sec".format(
        np.mean(time_sheet['head_infr'])))
    logging.info("Gaze model avg inference per frame: {0:.4f}sec".format(
        np.mean(time_sheet['gaze_infr'])))
    logging.info("4 Model avg inference per frame: {0:.4f}sec".format(
        np.mean(time_sheet['infr_per_frame'])))
    logging.info("Total inference time: {0:.4f}sec".format(time.time() -
                                                           total_infr_start))
    logging.info(
        "====================================END==========================================\n"
    )

    input_feeder.close()
    cv2.destroyAllWindows()
Example #26
0
from face_detection import FaceDetection
from face_recognition import FaceRecognition
from utils import read_image

if __name__ == "__main__":
    # Init the Face Detection and Face Recognition classes
    detection = FaceDetection()
    recognition = FaceRecognition()

    # Read the image
    image = read_image('./yolov3/data/samples/person.jpg')

    # Detect a face in the image (if many, returns the biggest one; if none, returns None)
    bounding_box = detection.detect(image)

    # bounding_box is a dictionary with parameters: x1, y1, x2, y2, width, height
    print(bounding_box)

    if bounding_box is not None:
        # Plot the bounding box on the image
        detection.plot(image, bounding_box)

        # Extract the face from the image
        face = recognition.extract(image, bounding_box)

        # Check if the face is from an employee, return True or False
        is_employee = recognition.recognize(face)

        if is_employee:
            print('Opening Door')
def main():

    # Grab command line args
    args = build_argparser().parse_args()
    flags = args.models_outputs_flags

    logger = logging.getLogger()
    input_file_path = args.input
    input_feeder = None
    if input_file_path.lower() == "cam":
        input_feeder = InputFeeder("cam")
    else:
        if not os.path.isfile(input_file_path):
            logger.error("Unable to find specified video file")
            exit(1)
        input_feeder = InputFeeder("video", input_file_path)

    model_path_dict = {
        'FaceDetection': args.face_detection_model,
        'FacialLandmarks': args.facial_landmarks_model,
        'GazeEstimation': args.gaze_estimation_model,
        'HeadPoseEstimation': args.head_pose_estimation_model
    }

    for file_name_key in model_path_dict.keys():
        if not os.path.isfile(model_path_dict[file_name_key]):
            logger.error("Unable to find specified " + file_name_key +
                         " xml file")
            exit(1)

    fdm = FaceDetection(model_path_dict['FaceDetection'], args.device,
                        args.cpu_extension)
    flm = FacialLandmarks(model_path_dict['FacialLandmarks'], args.device,
                          args.cpu_extension)
    gem = GazeEstimation(model_path_dict['GazeEstimation'], args.device,
                         args.cpu_extension)
    hpem = HeadPoseEstimation(model_path_dict['HeadPoseEstimation'],
                              args.device, args.cpu_extension)

    mc = MouseController('medium', 'fast')

    input_feeder.load_data()
    fdm.load_model()
    flm.load_model()
    hpem.load_model()
    gem.load_model()

    frame_count = 0
    for ret, frame in input_feeder.next_batch():
        if not ret:
            break
        frame_count += 1
        if frame_count % 5 == 0:
            cv2.imshow('video', cv2.resize(frame, (500, 500)))

        key = cv2.waitKey(60)
        cropped_face, face_coords = fdm.predict(frame, args.prob_threshold)
        if type(cropped_face) == int:
            logger.error("Unable to detect any face.")
            if key == 27:
                break
            continue

        hp_output = hpem.predict(cropped_face)

        left_eye_img, right_eye_img, eye_coords = flm.predict(cropped_face)

        new_mouse_coord, gaze_vector = gem.predict(left_eye_img, right_eye_img,
                                                   hp_output)

        if (not len(flags) == 0):
            preview_frame = frame
            if 'fd' in flags:
                preview_frame = cropped_face
            if 'fld' in flags:
                cv2.rectangle(cropped_face,
                              (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                              (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                              (0, 255, 0), 3)
                cv2.rectangle(cropped_face,
                              (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                              (eye_coords[1][2] + 10, eye_coords[1][3] + 10),
                              (0, 255, 0), 3)

            if 'hp' in flags:
                cv2.putText(
                    preview_frame,
                    "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".
                    format(hp_output[0], hp_output[1], hp_output[2]), (10, 20),
                    cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1)
            if 'ge' in flags:
                x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] *
                                                        12), 160
                left_eye = cv2.line(left_eye_img, (x - w, y - w),
                                    (x + w, y + w), (255, 0, 255), 2)
                cv2.line(left_eye, (x - w, y + w), (x + w, y - w),
                         (255, 0, 255), 2)
                right_eye = cv2.line(right_eye_img, (x - w, y - w),
                                     (x + w, y + w), (255, 0, 255), 2)
                cv2.line(right_eye, (x - w, y + w), (x + w, y - w),
                         (255, 0, 255), 2)
                cropped_face[eye_coords[0][1]:eye_coords[0][3],
                             eye_coords[0][0]:eye_coords[0][2]] = left_eye
                cropped_face[eye_coords[1][1]:eye_coords[1][3],
                             eye_coords[1][0]:eye_coords[1][2]] = right_eye

            cv2.imshow("Visualization", cv2.resize(preview_frame, (500, 500)))

        if frame_count % 5 == 0:
            mc.move(new_mouse_coord[0], new_mouse_coord[1])
        if key == 27:
            break
    logger.error("VideoStream ended...")
    cv2.destroyAllWindows()
    input_feeder.close()
Example #28
0
def compare_faces(train_enc, test_enc, tol=0.3):
    train_enc = np.array(train_enc)
    train_enc = train_enc.reshape(train_enc.shape[0], train_enc.shape[2])
    test_enc = np.array(list(test_enc))
    test_enc = test_enc.reshape(test_enc.shape[0], test_enc.shape[2])
    euc_dist = np.linalg.norm(train_enc - test_enc, axis=1)
    #print(euc_dist)
    return np.where(euc_dist <= tol)[0]


# Load required models
face_detection_model_path = args['model']
landmarks_model_path = args['landmarks']

print('Info : Loading required models ...')
face_detection_model = FaceDetection(face_detection_model_path)
face_alignment_model = AlignFace(landmarks_model_path)
print('Done.')

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

# use gui to let user choose train images
main_win = tkinter.Tk()
main_win.withdraw()
images = filedialog.askopenfilenames(title='choose train images')

if len(images) > 0:
    encoder = GetFaceEncodings(detection_model=face_detection_model,
                               landmarks_model=face_alignment_model)
Example #29
0
def main():
    args = build_argparser().parse_args()

    frame_num = 0
    inference_time = 0
    counter = 0

    # Initialize the Inference Engine
    fd = FaceDetection()
    fld = Facial_Landmarks_Detection()
    ge = Gaze_Estimation()
    hp = Head_Pose_Estimation()

    # Load Models
    fd.load_model(args.face_detection_model, args.device, args.cpu_extension)
    fld.load_model(args.facial_landmark_model, args.device, args.cpu_extension)
    ge.load_model(args.gaze_estimation_model, args.device, args.cpu_extension)
    hp.load_model(args.head_pose_model, args.device, args.cpu_extension)

    # Mouse Controller precision and speed
    mc = MouseController('medium', 'fast')

    # feed input from an image, webcam, or video to model
    if args.input == "cam":
        feed = InputFeeder("cam")
    else:
        assert os.path.isfile(args.input), "Specified input file doesn't exist"
        feed = InputFeeder("video", args.input)
    feed.load_data()
    frame_count = 0
    for frame in feed.next_batch():
        frame_count += 1
        inf_start = time.time()
        if frame is not None:
            try:
                key = cv2.waitKey(60)

                det_time = time.time() - inf_start

                # make predictions
                detected_face, face_coords = fd.predict(
                    frame.copy(), args.prob_threshold)
                hp_output = hp.predict(detected_face.copy())
                left_eye, right_eye, eye_coords = fld.predict(
                    detected_face.copy())
                new_mouse_coord, gaze_vector = ge.predict(
                    left_eye, right_eye, hp_output)

                stop_inference = time.time()
                inference_time = inference_time + stop_inference - inf_start
                counter = counter + 1

                # Visualization
                preview = args.visualization
                if preview:
                    preview_frame = frame.copy()
                    face_frame = detected_face.copy()

                    draw_face_bbox(preview_frame, face_coords)
                    display_hp(preview_frame, hp_output, face_coords)
                    draw_landmarks(face_frame, eye_coords)
                    draw_gaze(face_frame, gaze_vector, left_eye.copy(),
                              right_eye.copy(), eye_coords)

                if preview:
                    img = np.hstack((cv2.resize(preview_frame, (500, 500)),
                                     cv2.resize(face_frame, (500, 500))))
                else:
                    img = cv2.resize(frame, (500, 500))

                cv2.imshow('Visualization', img)

                # set speed
                if frame_count % 5 == 0:
                    mc.move(new_mouse_coord[0], new_mouse_coord[1])

                # INFO
                log.info("NUMBER OF FRAMES: {} ".format(frame_num))
                log.info("INFERENCE TIME: {}ms".format(det_time * 1000))

                frame_num += 1

                if key == 27:
                    break
            except:
                print(
                    'Not supported image or video file format. Please send in a supported video format.'
                )
                exit()
    feed.close()
Example #30
0
def main(args):
    # enable logging for the function
    logger = logging.getLogger(__name__)

    # grab the parsed parameters
    faceModel = args.m_f
    facial_LandmarksModel = args.m_l
    headPoseEstimationModel = args.m_h
    GazeEstimationModel = args.m_g
    device = args.d
    inputFile = args.i
    output_path = args.o_p
    modelArchitecture = args.modelAr
    visualization_flag = args.vf

    # initialize feed
    single_image_format = ['jpg', 'tif', 'png', 'jpeg', 'bmp']
    if inputFile.split(".")[-1].lower() in single_image_format:
        feed = InputFeeder('image', inputFile)
    elif args.i == 'cam':
        feed = InputFeeder('cam')
    else:
        feed = InputFeeder('video', inputFile)

    ##Load model time face detection
    faceStart_model_load_time = time.time()
    faceDetection = FaceDetection(faceModel, device)
    faceModelView = faceDetection.load_model()
    faceDetection.check_model()
    total_facemodel_load_time = time.time() - faceStart_model_load_time

    ##Load model time headpose estimatiom
    heaadposeStart_model_load_time = time.time()
    headPose = headPoseEstimation(headPoseEstimationModel, device)
    headPoseModelView = headPose.load_model()
    headPose.check_model()
    heaadposeTotal_model_load_time = time.time(
    ) - heaadposeStart_model_load_time

    ##Load model time face_landmarks estimation
    face_landmarksStart_model_load_time = time.time()
    face_landmarks = Face_landmarks(facial_LandmarksModel, device)
    faceLandmarksModelView = face_landmarks.load_model()
    face_landmarks.check_model()
    face_landmarksTotal_model_load_time = time.time(
    ) - face_landmarksStart_model_load_time

    ##Load model time face_landmarks estimation
    GazeEstimationStart_model_load_time = time.time()
    GazeEstimation = Gaze_Estimation(GazeEstimationModel, device)
    GazeModelView = GazeEstimation.load_model()
    GazeEstimation.check_model()
    GazeEstimationTotal_model_load_time = time.time(
    ) - GazeEstimationStart_model_load_time

    if modelArchitecture == 'yes':
        print("The model architecture of gaze mode is ", GazeModelView)
        print("model architecture for landmarks is", faceLandmarksModelView)
        print("model architecture for headpose is", headPoseModelView)
        print("model architecture for face is", faceModelView)

        # count the number of frames
    frameCount = 0
    input_feeder = InputFeeder('video', inputFile)
    w, h = feed.load_data()
    for _, frame in feed.next_batch():

        if not _:
            break
        frameCount += 1
        key = cv2.waitKey(60)
        start_imageface_inference_time = time.time()
        imageface = faceDetection.predict(frame, w, h)
        imageface_inference_time = time.time() - start_imageface_inference_time

        if 'm_f' in visualization_flag:
            cv2.imshow('cropped face', imageface)

        if type(imageface) == int:
            logger.info("no face detected")
            if key == 27:
                break
            continue

        start_imagePose_inference_time = time.time()
        imageAngles, imagePose = headPose.predict(imageface)
        imagePose_inference_time = time.time() - start_imagePose_inference_time

        if 'm_h' in visualization_flag:
            cv2.imshow('Head Pose Angles', imagePose)

        start_landmarkImage_inference_time = time.time()
        leftEye, rightEye, landmarkImage = face_landmarks.predict(imageface)
        landmarkImage_inference_time = time.time(
        ) - start_landmarkImage_inference_time

        if leftEye.any() == None or rightEye.any() == None:
            logger.info(
                "image probably too dark or eyes covered, hence could not detect landmarks"
            )
            continue

        if 'm_l' in visualization_flag:
            cv2.imshow('Face output', landmarkImage)

        start_GazeEstimation_inference_time = time.time()
        x, y = GazeEstimation.predict(leftEye, rightEye, imageAngles)
        GazeEstimation_inference_time = time.time(
        ) - start_GazeEstimation_inference_time

        if 'm_g' in visualization_flag:
            #             cv2.putText(landmarkedFace, "Estimated x:{:.2f} | Estimated y:{:.2f}".format(x,y), (10,20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0,255,0),1)
            cv2.imshow('Gaze Estimation', landmarkImage)

        mouseVector = MouseController('medium', 'fast')

        if frameCount % 5 == 0:
            mouseVector.move(x, y)

        if key == 27:
            break

        if imageface_inference_time != 0 and landmarkImage_inference_time != 0 and imagePose_inference_time != 0 and GazeEstimation_inference_time != 0:

            fps_face = 1 / imageface_inference_time
            fps_landmark = 1 / landmarkImage_inference_time
            fps_headpose = 1 / imagePose_inference_time
            fps_gaze = 1 / GazeEstimation_inference_time

            with open(
                    os.path.join(output_path, device, 'face',
                                 'face_stats.txt'), 'w') as f:
                f.write(str(imageface_inference_time) + '\n')
                f.write(str(fps_face) + '\n')
                f.write(str(total_facemodel_load_time) + '\n')

            with open(
                    os.path.join(output_path, device, 'landmark',
                                 'landmark_stats.txt'), 'w') as f:
                f.write(str(landmarkImage_inference_time) + '\n')
                f.write(str(fps_landmark) + '\n')
                f.write(str(face_landmarksTotal_model_load_time) + '\n')

            with open(
                    os.path.join(output_path, device, 'headpose',
                                 'headpose_stats.txt'), 'w') as f:
                f.write(str(imagePose_inference_time) + '\n')
                f.write(str(fps_headpose) + '\n')
                f.write(str(heaadposeTotal_model_load_time) + '\n')

            with open(
                    os.path.join(output_path, device, 'gaze',
                                 'gaze_stats.txt'), 'w') as f:
                f.write(str(GazeEstimation_inference_time) + '\n')
                f.write(str(fps_gaze) + '\n')
                f.write(str(GazeEstimationTotal_model_load_time) + '\n')

    logger.info("The End")
    VIS = visualize(output_path, device)
    VIS.visualize1()
    VIS.visualize2()
    VIS.visualize3()
    cv2.destroyAllWindows()
    feed.close()