コード例 #1
0
    def initialize_models(self):
        try:
            model_precision = self.args.model.upper()

            self.face_detection_model = Model_Face_Detection(
                "models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001"
            )
            start = time.time()
            self.face_detection_model.load_model()
            self.face_detection_load_time = time.time() - start

            self.facial_landmark_detection_model = Model_Facial_Landmark_Detection(
                f"models/intel/landmarks-regression-retail-0009/{model_precision}/landmarks-regression-retail-0009",
                self.args.device.upper())
            start = time.time()
            self.facial_landmark_detection_model.load_model()
            self.facial_landmark_detection_load_time = time.time() - start

            self.head_pose_estimation_model = Model_Head_Pose_estimation(
                f"models/intel/head-pose-estimation-adas-0001/{model_precision}/head-pose-estimation-adas-0001",
                self.args.device.upper())
            start = time.time()
            self.head_pose_estimation_model.load_model()
            self.head_pose_estimation_load_time = time.time() - start

            self.gaze_estimation_model = Model_Gaze_Estimation(
                f"models/intel/gaze-estimation-adas-0002/{model_precision}/gaze-estimation-adas-0002",
                self.args.device.upper())
            start = time.time()
            self.gaze_estimation_model.load_model()
            self.gaze_estimation_load_time = time.time() - start
        except Exception as err:
            log.error("Could not load model. Cause: ", str(err))
コード例 #2
0
def infer(args):
    """
    Initialize the inference network, and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :return: None
    """
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    if args.input.lower() == "cam":
        input_feeder = InputFeeder("cam")
    else:
        if not os.path.isfile(args.input):
            logger.error("Unable to find input file")
            exit(1)

        input_feeder = InputFeeder("video", args.input)

    start_time = time.time()
    model_fd = Model_Face_Detection(args.facedetectionmodel, args.device,
                                    args.cpu_extension)
    model_fld = Model_Facial_Landmarks_Detection(args.faciallandmarkmodel,
                                                 args.device,
                                                 args.cpu_extension)
    model_ge = Model_Gaze_Estimation(args.gazeestimationmodel, args.device,
                                     args.cpu_extension)
    model_hp = Model_Head_Pose_Estimation(args.headposemodel, args.device,
                                          args.cpu_extension)

    mc = MouseController('medium', 'fast')

    input_feeder.load_data()

    model_fd.load_model()
    model_fld.load_model()
    model_ge.load_model()
    model_hp.load_model()

    loading_time = time.time() - start_time
    logger.info("Loading time of the models: " + str(loading_time) + " s")

    frame_count = 0
    inference_time = 0
    for flag, frame in input_feeder.next_batch():
        if not flag:
            break
        if frame is None:
            continue
        key = cv2.waitKey(60)
        if key == 27:
            break
        frame_count += 1

        start_inference = time.time()
        face, face_coords = model_fd.predict(frame, prob_threshold)
        if type(face) == int:
            logger.error("No face detected.")
            continue

        out_hp = model_hp.predict(face)
        left_eye, right_eye, eye_coords = model_fld.predict(face)
        mouse_coord, gaze_vector = model_ge.predict(left_eye, right_eye,
                                                    out_hp)

        inference_time += time.time() - start_inference
        if len(args.flags) != 0:
            frame_p = frame.copy()
            if 'fd' in args.flags:
                frame_p = face

            if 'fld' in args.flags:
                cv2.rectangle(face,
                              (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                              (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                              (0, 255, 0), 3)
                cv2.rectangle(face,
                              (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                              (eye_coords[1][2] + 10, eye_coords[1][3] + 10),
                              (0, 255, 0), 3)

            if 'hp' in args.flags:
                cv2.putText(
                    frame_p,
                    "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".
                    format(out_hp[0], out_hp[1], out_hp[2]), (10, 20),
                    cv2.FONT_HERSHEY_COMPLEX, 0.2, (255, 255, 255), 1)
            if 'ge' in args.flags:
                x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] *
                                                        12), 160
                le = cv2.line(left_eye, (x - w, y - w), (x + w, y + w),
                              (255, 0, 255), 2)
                cv2.line(le, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2)
                re = cv2.line(right_eye, (x - w, y - w), (x + w, y + w),
                              (255, 0, 255), 2)
                cv2.line(re, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2)
                face[eye_coords[0][1]:eye_coords[0][3],
                     eye_coords[0][0]:eye_coords[0][2]] = le
                face[eye_coords[1][1]:eye_coords[1][3],
                     eye_coords[1][0]:eye_coords[1][2]] = re

            cv2.imshow("visualization", cv2.resize(preview_frame, (500, 500)))

        inference_time += time.time() - start_inference

        # mouse move at 5 FPS
        if frame_count % 5 == 0:
            mc.move(mouse_coord[0], new_mouse_coord[1])

    logger.info("Total inference time {} s".format(inference_time))
    logger.info("Average inference time {} s".format(inference_time /
                                                     frame_count))
    logger.info("FPS {} frame/second".format(frame_count /
                                             (inference_time * 5)))

    cv2.destroyAllWindows()
    input_feeder.close()
コード例 #3
0
ファイル: main.py プロジェクト: ahirsarthak/Computer-Pointer
def main():
    args = build_argparser().parse_args()
    previewFlags = args.previewFlags

    logger = logging.getLogger()
    inputFile = args.input
    inputFeeder = None

    if inputFile.lower() == "cam":
        inputFeeder = InputFeeder("cam")
    else:
        if not os.path.isfile(inputFile):
            logger.error("Unable to find input file")
            exit(1)

        inputFeeder = InputFeeder("video", inputFile)

    start_loading = time.time()

    mfd = Model_Face_Detection(args.facedetectionmodel, args.device,
                               args.cpu_extension)
    mfld = Model_Facial_Landmarks_Detection(args.faciallandmarkmodel,
                                            args.device, args.cpu_extension)
    mge = Model_Gaze_Estimation(args.gazeestimationmodel, args.device,
                                args.cpu_extension)
    mhpe = Model_Head_Pose_Estimation(args.headposemodel, args.device,
                                      args.cpu_extension)

    mc = MouseController('medium', 'fast')

    inputFeeder.load_data()

    mfd.load_model()
    mfld.load_model()
    mge.load_model()
    mhpe.load_model()

    model_loading_time = time.time() - start_loading

    counter = 0
    frame_count = 0
    inference_time = 0
    start_inf_time = time.time()
    for ret, frame in inputFeeder.next_batch():
        if not ret:
            break

        if frame is not None:
            frame_count += 1
            if frame_count % 5 == 0:
                cv2.imshow('video', cv2.resize(frame, (500, 500)))

            key = cv2.waitKey(60)
            start_inference = time.time()

            croppedFace, face_coords = mfd.predict(frame.copy(),
                                                   args.prob_threshold)
            if type(croppedFace) == int:
                logger.error("No face detected.")
                if key == 27:
                    break

                continue

            hp_out = mhpe.predict(croppedFace.copy())

            left_eye, right_eye, eye_coords = mfld.predict(croppedFace.copy())

            new_mouse_coord, gaze_vector = mge.predict(left_eye, right_eye,
                                                       hp_out)

            stop_inference = time.time()
            inference_time = inference_time + stop_inference - start_inference
            counter = counter + 1
            if (not len(previewFlags) == 0):
                preview_window = frame.copy()

                if 'fd' in previewFlags:
                    if len(previewFlags) != 1:
                        preview_window = croppedFace
                    else:
                        cv2.rectangle(preview_window,
                                      (face_coords[0], face_coords[1]),
                                      (face_coords[2], face_coords[3]),
                                      (0, 150, 0), 3)

                if 'fld' in previewFlags:
                    if not 'fd' in previewFlags:
                        preview_window = croppedFace.copy()

                    cv2.rectangle(
                        preview_window,
                        (eye_coords[0][0] - 10, eye_coords[0][1] - 10),
                        (eye_coords[0][2] + 10, eye_coords[0][3] + 10),
                        (0, 255, 0), 3)
                    cv2.rectangle(
                        preview_window,
                        (eye_coords[1][0] - 10, eye_coords[1][1] - 10),
                        (eye_coords[1][2] + 10, eye_coords[1][3] + 10),
                        (0, 255, 0), 3)

                if 'hp' in previewFlags:
                    cv2.putText(
                        preview_window,
                        "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".
                        format(hp_out[0], hp_out[1],
                               hp_out[2]), (50, 50), cv2.FONT_HERSHEY_COMPLEX,
                        1, (0, 255, 0), 1, cv2.LINE_AA)

                if 'ge' in previewFlags:
                    if not 'fd' in previewFlags:
                        preview_window = croppedFace.copy()

                    x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] *
                                                            12), 160

                    le = cv2.line(left_eye.copy(), (x - w, y - w),
                                  (x + w, y + w), (255, 0, 255), 2)
                    cv2.line(le, (x - w, y + w), (x + w, y - w), (255, 0, 255),
                             2)

                    re = cv2.line(right_eye.copy(), (x - w, y - w),
                                  (x + w, y + w), (255, 0, 255), 2)
                    cv2.line(re, (x - w, y + w), (x + w, y - w), (255, 0, 255),
                             2)

                    preview_window[eye_coords[0][1]:eye_coords[0][3],
                                   eye_coords[0][0]:eye_coords[0][2]] = le
                    preview_window[eye_coords[1][1]:eye_coords[1][3],
                                   eye_coords[1][0]:eye_coords[1][2]] = re

            if len(previewFlags) != 0:
                img_hor = np.hstack((cv2.resize(frame, (500, 500)),
                                     cv2.resize(preview_window, (500, 500))))
            else:
                img_hor = cv2.resize(frame, (500, 500))

            cv2.imshow('Visualization', img_hor)

            if frame_count % 5 == 0:
                mc.move(new_mouse_coord[0], new_mouse_coord[1])

            if key == 27:
                break

    fps = frame_count / inference_time

    logger.error("video ended...")
    logger.error("Total loading time of the models: " +
                 str(model_loading_time) + " s")
    logger.error("total inference time {} seconds".format(inference_time))
    logger.error("Average inference time: " +
                 str(inference_time / frame_count) + " s")
    logger.error("fps {} frame/second".format(fps / 5))

    cv2.destroyAllWindows()
    inputFeeder.close()
コード例 #4
0
def main():

    # get arguments
    args = get_args()
    visualization_list = args.visualize
    prob = args.prob
    if prob == None:
        prob = 0.5
    input_type = args.input_file
    input_path = args.input_path

    #logging config

    logging.basicConfig(filename="app.log",
                        level=logging.DEBUG,
                        format='%(asctime)s:%(levelname)s:%(message)s')

    # Initialize models
    try:
        fd = Model_Face_Detection(args.face_detection, args.device,
                                  args.extention)
        ld = Model_Facial_Landmark_Detection(args.landmark_detection,
                                             args.device, args.extention)
        hp = Model_Head_Pose_Estimation(args.head_pose_detection, args.device,
                                        args.extention)
        gd = Model_Gaze_Estimation(args.gaze_detection, args.device,
                                   args.extention)
    except:
        logging.error("Error in initializing models")
        exit(1)

    # load models
    try:
        start_loading_time_fd = time.time()
        fd.load_model()
        fd_time_diff = time.time() - start_loading_time_fd
        start_loading_time_ld = time.time()
        ld.load_model()
        ld_time_diff = time.time() - start_loading_time_ld
        start_loading_time_hp = time.time()
        hp.load_model()
        hp_time_diff = time.time() - start_loading_time_hp
        start_loading_time_gd = time.time()
        gd.load_model()
        gd_time_diff = time.time() - start_loading_time_gd
    except:
        logging.error("Error in loading the models")
        exit(1)

    logging.debug(
        "Loading times are facial detection : {} , landmark detection : {} , head pose detection : {} , gaze estimation : {} "
        .format(fd_time_diff, ld_time_diff, hp_time_diff, gd_time_diff))

    if input_type.lower() != "cam":
        if not os.path.isfile(input_path):
            logging.error("Unable to find specified video file")
            exit(1)
    else:
        input_path = None

    # Initialize input feed and load data
    input_feed = InputFeeder(input_type, input_path)
    input_feed.load_data()

    avg_inf_time = {"fd": [], "ld": [], "hp": [], "gd": []}

    for ret, frame in input_feed.next_batch():

        if not ret:
            break

        show_frame = frame
        outs_fd, fd_inf_time = fd.predict(frame.copy(), prob)

        if len(outs_fd) == 0:
            continue

        start_point = outs_fd[0]
        end_point = outs_fd[1]

        cropped_face = crop_face(start_point, end_point, frame)

        # predict facial landmark on cropped image
        outs_ld, ld_inf_time = ld.predict(cropped_face.copy())
        if len(outs_ld) == 0:
            continue

        # extract coordinates for left and right eye
        p1 = tuple(sum(x) for x in zip(outs_ld[0][0], start_point))
        p2 = tuple(sum(x) for x in zip(outs_ld[0][1], start_point))
        p3 = tuple(sum(x) for x in zip(outs_ld[0][2], start_point))
        p4 = tuple(sum(x) for x in zip(outs_ld[0][3], start_point))

        start_left_bb = tuple(sum(x) for x in zip(outs_ld[0][4], start_point))
        end_left_bb = tuple(sum(x) for x in zip(outs_ld[0][5], start_point))
        start_right_bb = tuple(sum(x) for x in zip(outs_ld[0][6], start_point))
        end_right_bb = tuple(sum(x) for x in zip(outs_ld[0][7], start_point))

        left_eye, right_eye = crop_eyes(
            frame.copy(),
            (start_left_bb, end_left_bb, start_right_bb, end_right_bb))

        # pitch, roll and yaw estimation on cropped face
        outs_hp, hp_inf_time = hp.predict(cropped_face.copy())
        p, r, y = outs_hp

        # gaze estimation
        outs_gd, gd_inf_time = gd.predict(left_eye, right_eye,
                                          np.array([[y, p, r]]))

        # adding inference time to dictionary
        avg_inf_time["fd"].append(fd_inf_time)
        avg_inf_time["ld"].append(ld_inf_time)
        avg_inf_time["hp"].append(hp_inf_time)
        avg_inf_time["gd"].append(gd_inf_time)

        ## Control Mouse pointer
        mc = MouseController("high", "fast")
        if len(outs_gd) == 0:
            continue

        mc.move(outs_gd[0], outs_gd[1])

        if len(visualization_list) != 0:
            show_visualization(
                frame, visualization_list, start_point, end_point,
                (start_left_bb, end_left_bb, start_right_bb, end_right_bb),
                [p1, p2, p3, p4], (p, r, y), outs_gd)

        key = cv2.waitKey(1)
        if key == ord('q'):
            break

    logging.debug(
        "Average inf. time are fd : {}, ld : {}, hp : {}, gd : {}".format(
            sum(avg_inf_time["fd"]) / len(avg_inf_time["fd"]),
            sum(avg_inf_time["ld"]) / len(avg_inf_time["ld"]),
            sum(avg_inf_time["hp"]) / len(avg_inf_time["hp"]),
            sum(avg_inf_time["gd"]) / len(avg_inf_time["gd"])))
    logging.debug(
        "Total inf. time are fd : {}, ld : {}, hp : {}, gd : {}".format(
            sum(avg_inf_time["fd"]), sum(avg_inf_time["ld"]),
            sum(avg_inf_time["hp"]), sum(avg_inf_time["gd"])))
    logging.debug("FPS time are fd : {}, ld : {}, hp : {}, gd : {}".format(
        1 / (sum(avg_inf_time["fd"]) / len(avg_inf_time["fd"])),
        1 / (sum(avg_inf_time["ld"]) / len(avg_inf_time["ld"])),
        1 / (sum(avg_inf_time["hp"]) / len(avg_inf_time["hp"])),
        1 / (sum(avg_inf_time["gd"]) / len(avg_inf_time["gd"]))))
    logging.info("Stream Ended")
    cv2.destroyAllWindows()
    input_feed.close()
コード例 #5
0
class Application:
    def __init__(self):
        self.args = None
        self.feed = None
        self.face_detection_model = None
        self.facial_landmark_detection_model = None
        self.gaze_estimation_model = None
        self.head_pose_estimation_model = None
        self.frame = None
        self.width = None
        self.Height = None
        self.mc = MouseController("high", "fast")
        self.face_detection_load_time = 0
        self.facial_landmark_detection_load_time = 0
        self.gaze_estimation_load_time = 0
        self.head_pose_estimation_load_time = 0
        self.face_detection_infer_time = 0
        self.facial_landmark_detection_infer_time = 0
        self.gaze_estimation_infer_time = 0
        self.head_pose_estimation_infer_time = 0
        self.frames = 0

    def initialize_argparser(self):
        """
        Parse command line arguments.

        :return: command line arguments
        """
        parser = ArgumentParser()
        parser.add_argument("-t",
                            "--input-type",
                            required=True,
                            type=str,
                            help="Type of input (video or cam)")
        parser.add_argument("-i",
                            "--input",
                            required=True,
                            type=str,
                            help="Input file")
        parser.add_argument("-o",
                            "--out",
                            type=str,
                            default=None,
                            help="Output file with the processed content")
        parser.add_argument("-p",
                            "--preview",
                            action='store_true',
                            default=False,
                            help="Should preview face and eyes")
        parser.add_argument("--notmove",
                            action='store_true',
                            default=False,
                            help="Should not move mouse")
        parser.add_argument(
            "-m",
            "--model",
            type=str,
            default="FP32",
            help="Model precision to use. One of FP32, FP16 or FP16-INT8")
        parser.add_argument(
            "-d",
            "--device",
            type=str,
            default="CPU",
            help="Device used to process model. One or CPU or GPU")
        parser.add_argument("-v",
                            "--verbose",
                            action='store_true',
                            default=False,
                            help="Enable DEBUG messages")

        self.args = parser.parse_args()

    def initialize_logging(self):
        if self.args.verbose:
            log.basicConfig(level=log.DEBUG)
        else:
            log.basicConfig(level=log.ERROR)

    def initialize_feed(self):
        self.feed = InputFeeder(self.args.input_type, self.args.input)
        self.feed.load_data()

    def initialize_window(self):
        if self.args.preview:
            cv2.namedWindow('preview')
            cv2.namedWindow('face')
            cv2.namedWindow('left eye')
            cv2.namedWindow('right eye')
            cv2.namedWindow('gaze')

    def show_main_frame(self):
        cv2.imshow('preview', self.frame)

    def esc_key_pressed(self):
        key_pressed = cv2.waitKey(1)
        if key_pressed == 27:
            return True

    def infer_face(self):
        start = time.time()
        face_frame = self.face_detection_model.predict(self.frame)
        self.face_detection_infer_time += time.time() - start
        return face_frame

    def infer_eyes(self, face_frame, show=False):
        start = time.time()
        left_eye_pos, right_eye_pos, left_eye, right_eye = self.facial_landmark_detection_model.predict(
            face_frame)
        self.facial_landmark_detection_infer_time += time.time() - start

        if show:
            tmp_face = face_frame.copy()
            cv2.circle(tmp_face, (left_eye_pos[0], left_eye_pos[1]), 5,
                       (0, 255, 0))
            cv2.circle(tmp_face, (right_eye_pos[0], right_eye_pos[1]), 5,
                       (0, 255, 0))
            cv2.imshow('face', tmp_face)
            cv2.imshow('left eye', left_eye)
            cv2.imshow('right eye', right_eye)

        return left_eye, right_eye

    def infer_pose(self, face_frame, show=False):
        start = time.time()
        yaw, pitch, roll = self.head_pose_estimation_model.predict(face_frame)
        self.head_pose_estimation_infer_time += time.time() - start
        return yaw, pitch, roll

    def infer_gaze(self,
                   cropped_left_eye,
                   cropped_right_eye,
                   yaw,
                   pitch,
                   roll,
                   show=False):
        start = time.time()
        gaze = self.gaze_estimation_model.predict(cropped_left_eye,
                                                  cropped_right_eye, yaw,
                                                  pitch, roll)
        self.gaze_estimation_infer_time += time.time() - start
        if show:
            img = np.zeros([100, 100, 3], dtype=np.uint8)
            img.fill(255)
            cv2.circle(img, (50, 50), 50, (0, 255, 0))
            cv2.arrowedLine(img, (50, 50),
                            (50 + int(gaze[0] * 70), 50 + int(-gaze[1] * 70)),
                            (255, 0, 0), 2)
            cv2.imshow('gaze', img)
        return gaze

    def infer_frame(self):
        self.show_main_frame()
        if self.esc_key_pressed():
            return False
        self.frames += 1
        face_frame = self.infer_face()
        if face_frame is not None:
            cropped_left_eye, cropped_right_eye = self.infer_eyes(
                face_frame, self.args.preview)
            yaw, pitch, roll = self.infer_pose(face_frame, self.args.preview)
            gaze = self.infer_gaze(cropped_left_eye, cropped_right_eye, yaw,
                                   pitch, roll, self.args.preview)
            if not self.args.notmove:
                self.mc.move(gaze[0], gaze[1])

    def process_feed(self):
        try:
            for batch in self.feed.next_batch():
                self.frame = batch
                if batch is not None:
                    if self.infer_frame() is False:
                        break
                else:
                    break

            log.info("Face detection model load time: {:.2f}ms".format(
                1000 * self.face_detection_infer_time))
            log.info(
                "Facial landmark detection model load time: {:.2f}ms".format(
                    1000 * self.facial_landmark_detection_infer_time))
            log.info("Head Pose estimation model load: {:.2f}ms".format(
                1000 * self.head_pose_estimation_infer_time))
            log.info("Gaze estimation model load time: {:.2f}ms".format(
                1000 * self.gaze_estimation_infer_time))

            log.info(
                "Face detection model inference mean time: {:.2f}ms".format(
                    1000 * self.face_detection_infer_time / self.frames))
            log.info(
                "Facial landmark detection model inference mean time: {:.2f}ms"
                .format(1000 * self.facial_landmark_detection_infer_time /
                        self.frames))
            log.info(
                "Head Pose estimation model inference mean time: {:.2f}ms".
                format(1000 * self.head_pose_estimation_infer_time /
                       self.frames))
            log.info(
                "Gaze estimation model inference mean time: {:.2f}ms".format(
                    1000 * self.gaze_estimation_infer_time / self.frames))

        except Exception as err:
            log.error("Could not infer. Cause: ", str(err))

    def initialize_models(self):
        try:
            model_precision = self.args.model.upper()

            self.face_detection_model = Model_Face_Detection(
                "models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001"
            )
            start = time.time()
            self.face_detection_model.load_model()
            self.face_detection_load_time = time.time() - start

            self.facial_landmark_detection_model = Model_Facial_Landmark_Detection(
                f"models/intel/landmarks-regression-retail-0009/{model_precision}/landmarks-regression-retail-0009",
                self.args.device.upper())
            start = time.time()
            self.facial_landmark_detection_model.load_model()
            self.facial_landmark_detection_load_time = time.time() - start

            self.head_pose_estimation_model = Model_Head_Pose_estimation(
                f"models/intel/head-pose-estimation-adas-0001/{model_precision}/head-pose-estimation-adas-0001",
                self.args.device.upper())
            start = time.time()
            self.head_pose_estimation_model.load_model()
            self.head_pose_estimation_load_time = time.time() - start

            self.gaze_estimation_model = Model_Gaze_Estimation(
                f"models/intel/gaze-estimation-adas-0002/{model_precision}/gaze-estimation-adas-0002",
                self.args.device.upper())
            start = time.time()
            self.gaze_estimation_model.load_model()
            self.gaze_estimation_load_time = time.time() - start
        except Exception as err:
            log.error("Could not load model. Cause: ", str(err))

    def run(self):
        self.initialize_argparser()
        self.initialize_logging()
        self.initialize_models()
        self.initialize_feed()
        self.initialize_window()
        self.process_feed()
        self.feed.close()
コード例 #6
0
def main():

    args = build_argparser().parse_args()
    previewFlags = args.previewFlags
    
    logger = logging.getLogger()
    inputFile = args.input
    inputFeeder = None

    if inputFile.lower()=="cam":
        inputFeeder=InputFeeder("cam")
    if not os.path.isfile(inputFile):
        logger.error("Unable to find input file")
        exit(1)
    inputFeeder=InputFeeder("video",inputFile)


    mfd=Model_Face_Detection(args.facedetectionmodel,args.device,args.cpu_extension)
    mfld=Model_Facial_Landmarks_Detection(args.faciallandmarkmodel,args.device,args.cpu_extension)
    mge=Model_Gaze_Estimation(args.gazeestimationmodel,args.device,args.cpu_extension)
    mhpe=Model_Head_Pose_Estimation(args.headposemodel,args.device,args.cpu_extension)

    mc = MouseController('medium','fast')
    #inputFeeder=InputFeeder("cam")
    inputFeeder.load_data()

    mfd.load_model()
    mfld.load_model()
    mge.load_model()
    mhpe.load_model()
    frame_count = 0
    for ret, frame in inputFeeder.next_batch():


        if frame is not None:

            frame_count+=1
            if frame_count%5==0:
                cv2.imshow('video',cv2.resize(frame,(500,500)))
        
            key = cv2.waitKey(60)
            croppedFace, face_coords = mfd.predict(frame.copy(), args.prob_threshold)
            if type(croppedFace)==int:
                logger.error("No face detected.")
                if key==27:
                    break
                continue
            
            hp_out = mhpe.predict(croppedFace.copy())
            
            left_eye, right_eye, eye_coords = mfld.predict(croppedFace.copy())
            #print(left_eye
            
            new_mouse_coord, gaze_vector = mge.predict(left_eye, right_eye, hp_out)
            
            if (not len(previewFlags)==0):
                preview_frame = frame.copy()
                if 'fd' in previewFlags:
                
                    preview_frame = croppedFace
                if 'fld' in previewFlags:
                    cv2.rectangle(croppedFace, (eye_coords[0][0]-10, eye_coords[0][1]-10), (eye_coords[0][2]+10, eye_coords[0][3]+10), (0,255,0), 3)
                    cv2.rectangle(croppedFace, (eye_coords[1][0]-10, eye_coords[1][1]-10), (eye_coords[1][2]+10, eye_coords[1][3]+10), (0,255,0), 3)
                    
                    
                if 'hp' in previewFlags:
                    cv2.putText(preview_frame, "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}".format(hp_out[0],hp_out[1],hp_out[2]), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1)
                if 'ge' in previewFlags:
                    x, y, w = int(gaze_vector[0]*12), int(gaze_vector[1]*12), 160
                    le =cv2.line(left_eye.copy(), (x-w, y-w), (x+w, y+w), (255,0,255), 2)
                    cv2.line(le, (x-w, y+w), (x+w, y-w), (255,0,255), 2)
                    re = cv2.line(right_eye.copy(), (x-w, y-w), (x+w, y+w), (255,0,255), 2)
                    cv2.line(re, (x-w, y+w), (x+w, y-w), (255,0,255), 2)
                    croppedFace[eye_coords[0][1]:eye_coords[0][3],eye_coords[0][0]:eye_coords[0][2]] = le
                    croppedFace[eye_coords[1][1]:eye_coords[1][3],eye_coords[1][0]:eye_coords[1][2]] = re
                    
                    
                cv2.imshow("visualization",cv2.resize(preview_frame,(500,500)))
            
            if frame_count%5==0:
                mc.move(new_mouse_coord[0],new_mouse_coord[1])    
            if key==27:
                    break
    logger.error("video ended...")
    cv2.destroyAllWindows()
    inputFeeder.close()
コード例 #7
0
def run_app(args):
    face_detection_model = Model_Face_Detection(args.model_path_fd,
                                                args.device,
                                                args.cpu_extension,
                                                threshold=args.threshold)
    face_detection_model.load_model()
    head_pose_model = Model_Head_Pose_Estimation(args.model_path_hp,
                                                 args.device,
                                                 args.cpu_extension)
    head_pose_model.load_model()
    face_landmark_model = Model_Facial_Landmarks(args.model_path_fl,
                                                 args.device,
                                                 args.cpu_extension)
    face_landmark_model.load_model()
    gaze_model = Model_Gaze_Estimation(args.model_path_ge, args.device,
                                       args.cpu_extension)
    gaze_model.load_model()

    input_feeder = InputFeeder(
        args.input_type,
        args.input_file,
    )
    input_feeder.load_data()
    mouse_controller = MouseController("medium", "fast")
    # while input_feeder.cap.isOpened():
    # feed_out=input_feeder.next_batch()

    frame_count = 0
    custom = args.toggle

    for frame in input_feeder.next_batch():

        if frame is None:
            break
        key_pressed = cv2.waitKey(60)
        frame_count += 1
        face_out, cords = face_detection_model.predict(frame.copy())

        # When no face was detected
        if cords == 0:
            inf_info = "No Face Detected in the Frame"
            write_text_img(frame, inf_info, 400)
            continue

        eyes_cords, left_eye, right_eye = face_landmark_model.predict(
            face_out.copy())
        head_pose_out = head_pose_model.predict(face_out.copy())
        gaze_out = gaze_model.predict(left_eye, right_eye, head_pose_out)

        # Faliure in processing both eyes
        if gaze_out is None:
            continue
        x, y = gaze_out
        if frame_count % 5 == 0:
            mouse_controller.move(x, y)
        inf_info = "Head Pose (y: {:.2f}, p: {:.2f}, r: {:.2f})".format(
            head_pose_out[0], head_pose_out[1], head_pose_out[2])
        # Process Visualization
        if 'frame' in custom:
            visualization(frame, cords, face_out, eyes_cords)

        if 'stats' in custom:
            write_text_img(face_out, inf_info, 400)
            inf_info = "Gaze Angle: x: {:.2f}, y: {:.2f}".format(x, y)
            log.info("Statistic " + inf_info)
            write_text_img(face_out, inf_info, 400, 15)
        if 'gaze' in custom:
            display_head_pose(frame, head_pose_out, cords)

        out_f = np.hstack(
            (cv2.resize(frame, (400, 400)), cv2.resize(face_out, (400, 400))))
        cv2.imshow('Visualization', out_f)
        if key_pressed == 27:
            break
    input_feeder.close()
    cv2.destroyAllWindows()