def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m", "--draw-markers", action="store_true", default=False,
                    help="")
    ap.add_argument("-c", "--draw-confidence", action="store_true", default=False,
                    help="")
    ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9,
                    help="")
    ap.add_argument("-p", "--draw-pose", action="store_false", default=True,
                    help="")
    ap.add_argument("-u", "--draw-unstable", action="store_true", default=False,
                    help="")
    ap.add_argument("-s", "--draw-segmented", action="store_true", default=False,
                    help="")
    args = vars(ap.parse_args())

    confidence_threshold = args["confidence_threshold"]

    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    if isWindows():
        thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue))
        thread.daemon = True
        thread.start()
    else:
        box_process = Process(target=get_face,
                              args=(mark_detector, confidence_threshold, img_queue, box_queue))
        box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        result = box_queue.get()

        if result is not None:
            if args["draw_confidence"]:
                mark_detector.face_detector.draw_result(frame, result)
            # unpack result
            facebox, confidence = result
            # fix facebox if needed
            if facebox[1] > facebox[3]:
                facebox[1] = 0
            if facebox[0] > facebox[2]:
                facebox[0] = 0
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # segment the image based on markers and facebox
            seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0])
            if args["draw_segmented"]:
                mark_detector.draw_box(frame, seg.getSegmentBBs())
                cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"])

            if args["draw_markers"]:
                mark_detector.draw_marks(
                    frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stable_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stable_pose.append(ps_stb.state[0])
            stable_pose = np.reshape(stable_pose, (-1, 3))

            if args["draw_unstable"]:
                pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

            if args["draw_pose"]:
                pose_estimator.draw_annotation_box(
                    frame, stable_pose[0], stable_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    if not isWindows():
        box_process.terminate()
        box_process.join()
Exemple #2
0
def thread_func(args, detector, predictor, img_queue, result_queue):
    """Get face from image queue. This function is used for multiprocessing"""

    # Introduce mark_detector to detect landmarks.
    gaze_model = args["gaze_net"]
    eye_size = args["eye_size"]
    face_size = args["face_size"]
    inputs = args["inputs"]
    outputs = args["outputs"]
    print("[INFO] loading gaze predictor...")
    gaze_detector = GazeEstimator(gaze_model=gaze_model,
                                  eye_image_size=eye_size,
                                  face_image_size=face_size,
                                  inputs=inputs,
                                  outputs=outputs)

    # init variables
    detectorWidth = 400
    faceBoxScale = 0.15

    while True:
        # get the image
        try:
            frame = img_queue.get(timeout=1)
        except Q.Empty:
            print("Image Q empty, thread exiting!")
            return
        # update factors
        originalWidth = frame.shape[1]
        factor = originalWidth / detectorWidth
        # resize for face detection
        image = imutils.resize(frame, width=detectorWidth)
        # convert to grayscale for face detection
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # actually run face detection
        faceboxes, scores, idx = detector.run(image, 0)
        if faceboxes is not None and len(faceboxes) > 0:
            facebox = faceboxes[0]
            confidence = scores[0]
            # get 5 landmarks
            marks = predictor(gray, facebox)
            # convert marks to np array
            marks = face_utils.shape_to_np(marks)
            leftEyeMarks = []
            rightEyeMarks = []
            # pull out left and right eye marks
            for (i, (x, y)) in enumerate(marks):
                [x, y] = [int(x * factor), int(y * factor)]
                if i == 0 or i == 1:
                    leftEyeMarks.append([x, y])
                if i == 2 or i == 3:
                    rightEyeMarks.append([x, y])

            # convert the facebox from dlib format to regular BB and
            # rescale it back to original image size
            facebox = utils.dlib_to_box(facebox, factor, faceBoxScale)
            # segment the image based on markers and facebox
            seg = Segmenter(facebox, leftEyeMarks, rightEyeMarks,
                            frame.shape[1], frame.shape[0])
            segments = seg.getSegmentJSON()
            # detect gaze
            gaze = gaze_detector.detect_gaze(frame, segments["leftEye"],
                                             segments["rightEye"],
                                             segments["face"],
                                             segments["faceGrid"])
            # pack result
            result = [gaze, frame]
            result_queue.put(result)
        else:
            result_queue.put(None)