def do_experiments():
    face_detectors = [HogFaceDetector(), HaarcascadeFaceDetector()]
    landmarks_detectors = [LbfLandmarksDetector(), KazemiLandmarksDetector()]
    equalize_hist_values = [False]   # , False
    skip_face_detection_values = [False]    # True,

    df_results = pd.DataFrame()
    results_dir = os.path.join(RESULTS_PATH, "face_landmarks")
    os.makedirs(results_dir, exist_ok=True)
    results_path = os.path.join(results_dir, "face_landmarks_correct_rgb.csv")

    for face_detector in face_detectors:
        for landmarks_detector in landmarks_detectors:
            for equalize_hist in equalize_hist_values:
                for skip_face_detection in skip_face_detection_values:
                    row = {"face_detector": str(face_detector),
                           "landmarks_detector": str(landmarks_detector),
                           "equalize_hist": str(equalize_hist),
                           "skip_face_detection": str(skip_face_detection)}

                    results = do_landmarks_experiment(face_detector, landmarks_detector, equalize_hist,
                                                      skip_face_detection)
                    row.update(results)
                    df_results = df_results.append([row], ignore_index=True)
                    df_results.to_csv(results_path, index=False)
def haarcascade_lbf(image):
    face_detector = HogFaceDetector()
    landmarks_detector = KazemiLandmarksDetector()

    # grayscale
    image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # detect face
    faces = face_detector.detect(image_gray)

    # Take only first face
    if len(faces) > 1:
        faces = faces[0].reshape(1, -1)
    elif len(faces) == 0:  # No face detected -> assume that face is on the full image
        return None

    # detect landmarks
    landmarks = landmarks_detector.detect(image_gray, faces)
    visualize_landmarks_mpii_gaze_format(landmarks[:, EYES_LANDMARKS, :], image, numbers=False)
Ejemplo n.º 3
0
def hog_kazemi():
    person_id = 0
    file_names, landmarks = extract_landmarks_from_annotation_file(person_id)
    image = load_mpii_face_gaze_image(0, file_names[417])

    # convert image to Grayscale
    image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    face_detector = HogFaceDetector()
    landmark_detector = KazemiLandmarksDetector()

    # Detect faces using the haarcascade classifier on the "grayscale image"
    faces = face_detector.detect(image_gray)
    if len(faces) > 1:  # Take only first face
        faces = faces[0].reshape(1, -1)
    visualize_faces(faces, image.copy())

    # Detect landmarks on "image_gray"
    landmarks = landmark_detector.detect(image_gray, faces)
    landmarks = filter_landmarks(landmarks, indices=MOUTH_EYES_CORNERS)
    print("Landmarks:")
    print(landmarks)

    visualize_landmarks([landmarks], image.copy(), numbers=True)
Ejemplo n.º 4
0
def create_pipeline(model_name="best",
                    face_detector: str = "hog",
                    landmarks_detector="kazemi",
                    screen_size=None):
    eye_image_width = 60
    eye_image_height = 36

    camera_matrix = get_avg_camera_matrix()
    if screen_size is None:
        screen_size = get_screen_size()
    print(f"Screen size: {screen_size}")

    if face_detector == "hog":
        face_detector = HogFaceDetector()
    elif face_detector == "haarcascade":
        face_detector = HaarcascadeFaceDetector()

    if landmarks_detector == "kazemi":
        landmarks_detector = KazemiLandmarksDetector()
    elif landmarks_detector == "lbf":
        landmarks_detector = LbfLandmarksDetector()

    if model_name == "best":
        model = load_best_modal3_conv_net(test=False)
    elif model_name == "own_mpiigaze":
        model = load_best_modal3_conv_net(
            test=False, file_name="modal3_conv_net_own_mpiigaze.h5")
    elif model_name == "modal3_conv_net_own_24_25":
        model = load_best_modal3_conv_net(
            test=False, file_name="modal3_conv_net_own_24_25.h5")
    elif model_name == "modal3_conv_net_mean_camera_matrix":
        model = load_best_modal3_conv_net(
            test=False, file_name="modal3_conv_net_mean_camera_matrix.h5")
    else:
        model = load_best_modal3_conv_net(model_name)

    pipeline = Pipeline(gaze_estimation_model=model,
                        face_detector=face_detector,
                        landmarks_detector=landmarks_detector,
                        eye_image_width=eye_image_width,
                        eye_image_height=eye_image_height,
                        camera_matrix=camera_matrix,
                        screen_size=screen_size)

    return pipeline
Ejemplo n.º 5
0
def process_mpii_face_gaze():
    mean_camera_matrix = False

    right_images = list()
    left_images = list()
    poses = list()
    coords = list()
    people_ids = list()

    eye_image_width = 60
    eye_image_height = 36

    for person_id in range(15):
        person_id_str = str(person_id).zfill(2)
        print(f"Person id: {person_id_str}")

        if mean_camera_matrix:
            camera_matrix = get_avg_camera_matrix()
        else:
            camera_matrix = load_camera_matrix(
                path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat")
        screen_size = load_screen_size(
            path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat")
        ann_path = mpii_face_gaze_path_wrapper(
            f"p{person_id_str}/p{person_id_str}.txt")
        annotation = np.loadtxt(ann_path, dtype=str)
        if len(annotation.shape) == 1:
            annotation = np.reshape(annotation, (1, -1))

        face_detector = HogFaceDetector()
        landmarks_detector = KazemiLandmarksDetector()
        model = load_best_modal3_conv_net()
        pipeline = Pipeline(gaze_estimation_model=model,
                            face_detector=face_detector,
                            landmarks_detector=landmarks_detector,
                            eye_image_width=eye_image_width,
                            eye_image_height=eye_image_height,
                            camera_matrix=camera_matrix,
                            screen_size=screen_size)

        file_paths = annotation[:, 0]

        for i, file_path in tqdm(enumerate(file_paths)):
            im = load_image_by_cv2(
                mpii_face_gaze_path_wrapper(f"p{person_id_str}/{file_path}"))
            data = pipeline.process(im)
            if data is None:
                continue
            right_images.append(data["right_image"])
            left_images.append(data["left_image"])
            poses.append(data["pose"])

            # coords
            coords_row = annotation[i, 1:3].astype(float)  # x, y
            coords_row = [coords_row[1], coords_row[0]]  # y, x
            coords_row = [
                coords_row[0] / screen_size[0], coords_row[1] / screen_size[1]
            ]
            coords.append(coords_row)

            people_ids.append(person_id)

        save_data_to_file(right_images, left_images, poses, coords, people_ids)
Ejemplo n.º 6
0
from models.landmarks_detectors.kazemi_landmarks_detector import KazemiLandmarksDetector
from settings import PHOTO_TAKER_DATA_PATH


def read_metadata():
    file_path = os.path.join(PHOTO_TAKER_DATA_PATH, "metadata.txt")
    return np.loadtxt(file_path, dtype='U100')


if __name__ == "__main__":
    eye_image_width = 60
    eye_image_height = 36

    camera_matrix = get_avg_camera_matrix()
    screen_size = get_screen_size()

    face_detector = HogFaceDetector()
    landmarks_detector = KazemiLandmarksDetector()

    pipeline = Pipeline(face_detector=face_detector,
                        landmarks_detector=landmarks_detector,
                        eye_image_width=eye_image_width,
                        eye_image_height=eye_image_height,
                        camera_matrix=camera_matrix,
                        screen_size=screen_size)

    metadata = read_metadata()
    for row in metadata:
        file_path = os.path.join(PHOTO_TAKER_DATA_PATH, row[0])
        coords = row[1:].astype(np.int)
Ejemplo n.º 7
0
from scripts.create_dataset.create_dataset_mpiigaze_processed_both_rgb import load_camera_matrix, load_screen_size, \
    load_face_model

if __name__ == "__main__":
    face_model = load_face_model()

    person_id_str = "14"
    day = "day04"
    im_file = "0008.jpg"
    camera_matrix = load_camera_matrix(path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat")
    screen_size = load_screen_size(path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat")
    im = load_image_by_cv2(mpii_face_gaze_path_wrapper(f"p{person_id_str}/{day}/{im_file}"))
    img = Image.fromarray(im, 'RGB')
    img.show()

    # convert image to Grayscale
    image_gray = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)

    face_detector = HogFaceDetector()
    landmark_detector = KazemiLandmarksDetector()

    # Detect faces using the haarcascade classifier on the "grayscale image"
    faces = face_detector.detect(image_gray)
    if len(faces) > 1:      # Take only first face
        faces = faces[0].reshape(1, -1)

    # Detect landmarks on "image_gray"
    landmarks = landmark_detector.detect(image_gray, faces)
    landmarks = filter_landmarks(landmarks, indices=MOUTH_EYES_CORNERS)

    print(estimate_head_pose(im, landmarks, camera_matrix, face_model_points=np.transpose(face_model), show=True))