def experiment_detect_faces(): dataset_size = 200 out_dir = os.path.join(RESULTS_PATH, "manual_face_detection") os.makedirs(out_dir, exist_ok=True) haarcascade = HaarcascadeFaceDetector() hog = HogFaceDetector() metadata = _load_metadata() paths = get_random_subset_paths(metadata, dataset_size) for equal_hist in [False, True]: out_subdir = "equal_hist" if equal_hist else "raw_grayscale" out_subdir = os.path.join(out_dir, out_subdir) os.makedirs(out_subdir, exist_ok=True) for i, file_path in tqdm(enumerate(paths)): file_path = own_dataset_path_wrapper(file_path) image = load_image_by_cv2(file_path) image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) if equal_hist: image_gray = cv2.equalizeHist(image_gray) faces = haarcascade.detect(image_gray) visualize_faces(faces, image, color=(255, 0, 0), show=False) faces = hog.detect(image_gray) visualize_faces(faces, image, color=(0, 0, 255), show=False) path = os.path.join(out_subdir, str(i).zfill(4) + ".jpg") im = Image.fromarray(image) im.save(path)
def parse_mpiigaze_landmark_coords(person_id: int, day: int, img_n: int, eye_image_width=60, eye_image_height=36): face_model = load_face_model() person_id_str = str(person_id).zfill(2) day_str = str(day).zfill(2) img_n_str = str(img_n).zfill(4) im = load_image_by_cv2(mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/day{day_str}/{img_n_str}.jpg")) ann_path = mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/day{day_str}/annotation.txt") annotation = load_annotation(ann_path) camera_matrix = load_camera_matrix(path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat") im_height, im_width, _ = im.shape headpose_hr = np.reshape(annotation[img_n - 1, 29:32], (1, -1)) headpose_ht = np.reshape(annotation[img_n - 1, 32:35], (1, -1)) h_r, _ = cv2.Rodrigues(headpose_hr) fc = np.dot(h_r, face_model) fc = fc + np.reshape(headpose_ht, (-1, 1)) gaze_target = annotation[img_n - 1, 26:29] gaze_target = np.reshape(gaze_target, (-1, 1)) right_eye_center = 0.5 * (fc[:, 0] + fc[:, 1]) left_eye_center = 0.5 * (fc[:, 2] + fc[:, 3]) right_eye = get_img_gaze_headpose_per_eye(im, right_eye_center, h_r, gaze_target, eye_image_width, eye_image_height, camera_matrix) left_eye = get_img_gaze_headpose_per_eye(im, left_eye_center, h_r, gaze_target, eye_image_width, eye_image_height, camera_matrix) landmarks = np.reshape(annotation[img_n - 1, 0:24], (1, -1)) landmarks = norm_landmarks(landmarks, height=im_height, width=im_width) coordinates = np.array([annotation[img_n - 1, 25], annotation[img_n - 1, 24]]) return right_eye, left_eye, landmarks, coordinates
def load_mpii_face_gaze_image(person_id: int, person_image_path: str, verbose=False): person_str = "p" + str(person_id).zfill(2) full_file_path = os.path.join(MPII_FACE_GAZE_PATH, person_str, person_image_path) if os.path.exists(full_file_path): im = load_image_by_cv2(full_file_path) else: if verbose: print(f"Image file {full_file_path} does not exist.") im = None return im
def _prepare_data_for_calibration(pipeline, dir_name): dir_path = os.path.join(DATA_PATH, "application", "calibration", dir_name) metadata = _load_metadata(dir_path) coords_list = list() predictions = list() for row in tqdm(metadata): file_path = _get_file_path(dir_path, row) coords = _get_coords(row) # x, y coords = (coords[1], coords[0]) # y, x if not os.path.exists(file_path): continue im = load_image_by_cv2(file_path) if im is None: continue prediction = pipeline.predict(im) predictions.append(prediction.squeeze()) coords_list.append(coords) return np.asarray(coords_list), np.asarray(predictions)
def process_mpii_face_gaze(): mean_camera_matrix = False right_images = list() left_images = list() poses = list() coords = list() people_ids = list() eye_image_width = 60 eye_image_height = 36 for person_id in range(15): person_id_str = str(person_id).zfill(2) print(f"Person id: {person_id_str}") if mean_camera_matrix: camera_matrix = get_avg_camera_matrix() else: camera_matrix = load_camera_matrix( path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat") screen_size = load_screen_size( path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat") ann_path = mpii_face_gaze_path_wrapper( f"p{person_id_str}/p{person_id_str}.txt") annotation = np.loadtxt(ann_path, dtype=str) if len(annotation.shape) == 1: annotation = np.reshape(annotation, (1, -1)) face_detector = HogFaceDetector() landmarks_detector = KazemiLandmarksDetector() model = load_best_modal3_conv_net() pipeline = Pipeline(gaze_estimation_model=model, face_detector=face_detector, landmarks_detector=landmarks_detector, eye_image_width=eye_image_width, eye_image_height=eye_image_height, camera_matrix=camera_matrix, screen_size=screen_size) file_paths = annotation[:, 0] for i, file_path in tqdm(enumerate(file_paths)): im = load_image_by_cv2( mpii_face_gaze_path_wrapper(f"p{person_id_str}/{file_path}")) data = pipeline.process(im) if data is None: continue right_images.append(data["right_image"]) left_images.append(data["left_image"]) poses.append(data["pose"]) # coords coords_row = annotation[i, 1:3].astype(float) # x, y coords_row = [coords_row[1], coords_row[0]] # y, x coords_row = [ coords_row[0] / screen_size[0], coords_row[1] / screen_size[1] ] coords.append(coords_row) people_ids.append(person_id) save_data_to_file(right_images, left_images, poses, coords, people_ids)
import numpy as np from PIL import Image from data_processing.utils import load_image_by_cv2, mpii_face_gaze_path_wrapper from settings import FOR_THESIS_DIR from utils.landmarks import visualize_landmarks_mpii_gaze_format if __name__ == "__main__": """ Script prints all landmarks from MPII Gaze dataset on photo, to know which landmarks are they. """ person_id_str = "14" day = "day04" im_file = "0008.jpg" offset = 740 im = load_image_by_cv2( mpii_face_gaze_path_wrapper(f"p{person_id_str}/{day}/{im_file}")) annotation = np.loadtxt( mpii_face_gaze_path_wrapper(f"p{person_id_str}/p{person_id_str}.txt"), dtype=str) print(f"Real file path: {annotation[offset + 8 - 1, 0]}") landmarks = np.reshape(annotation[offset + 8 - 1, 3:15], (1, -1, 2)) print(landmarks) landmarks = landmarks.astype(np.int) visualize_landmarks_mpii_gaze_format(landmarks, im, numbers=True) im = Image.fromarray(im) im.save(os.path.join(FOR_THESIS_DIR, "eye_landmarks.png"))
def create_own_dataset(dir_name: str): # screen resolution screen_size = _load_screen_resolution(dir_name) # screen_size = (768, 1366) # counters counter = 0 # count valid records broken_path_counter = 0 no_face_detected_counter = 0 # data to save right_image_list = list() left_image_list = list() pose_list = list() coords_list = list() # pipeline pipeline = create_pipeline(screen_size=screen_size) # pipeline_haarcascade_lbf = create_pipeline(face_detector="haarcascade", landmarks_detector="lbf") metadata = _load_metadata(dir_name) # time time_sum = 0 for row in tqdm(metadata): file_path = _get_file_path(dir_name, row) coords = _get_coords(row) # x, y coords = (coords[1], coords[0]) # y, x if not os.path.exists(file_path): continue im = load_image_by_cv2(file_path) if im is None: broken_path_counter += 1 continue start_time = time() data = pipeline.process(im) if data is None: no_face_detected_counter += 1 continue elapsed = time() - start_time time_sum += elapsed counter += 1 right_image_list.append(data["right_image"]) left_image_list.append(data["left_image"]) pose_list.append(data["pose"]) coords_list.append(coords) print(f"Valid records: {counter}") print(f"Broken paths: {broken_path_counter}") print(f"No face detected: {no_face_detected_counter}") print(f"Average time: {time_sum / counter}") data = { "right_image": np.concatenate(right_image_list), "left_image": np.concatenate(left_image_list), "pose_landmarks": np.concatenate(pose_list), "coordinates": np.asarray(coords_list) } data["coordinates"] = norm_coords(data["coordinates"], screen_size) _save_own_dataset(data)
def create_dataset_mpiigaze_processed_both_rgb_full_transformation( dataset_name): create_dirs(dataset_name) face_model = load_face_model() eye_image_width = 60 eye_image_height = 36 for person_id in range(15): person_id_str = str(person_id).zfill(2) print(f"--------------\nperson_id_str: {person_id_str}") camera_matrix = load_camera_matrix( path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat") screen_size = load_screen_size( path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat") print(screen_size) day_dirs = get_all_days( path=mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/")) for day in day_dirs: left_eyes = list() right_eyes = list() headposes = list() gazes = list() coordinates = list() print(day) ann_path = mpiigaze_path_wrapper( f"Data/Original/p{person_id_str}/{day}/annotation.txt") annotation = load_annotation(ann_path) im_filenames = get_all_jpg_files( mpiigaze_path_wrapper( f"Data/Original/p{person_id_str}/{day}/")) for i in tqdm(range(len(im_filenames))): im_file = im_filenames[i] im = load_image_by_cv2( mpiigaze_path_wrapper( f"Data/Original/p{person_id_str}/{day}/{im_file}")) headpose_hr = np.reshape(annotation[i, 29:32], (1, -1)) headpose_ht = np.reshape(annotation[i, 32:35], (1, -1)) h_r, _ = cv2.Rodrigues(headpose_hr) fc = np.dot(h_r, face_model) fc = fc + np.reshape(headpose_ht, (-1, 1)) gaze_target = annotation[i, 26:29] gaze_target = np.reshape(gaze_target, (-1, 1)) right_eye_center = 0.5 * (fc[:, 0] + fc[:, 1]) left_eye_center = 0.5 * (fc[:, 2] + fc[:, 3]) right_eye_img, right_eye_headpose, right_eye_gaze = mpii_gaze_normalize_image( im, right_eye_center, h_r, gaze_target, (eye_image_width, eye_image_height), camera_matrix) left_eye_img, left_eye_headpose, left_eye_gaze = mpii_gaze_normalize_image( im, left_eye_center, h_r, gaze_target, (eye_image_width, eye_image_height), camera_matrix) right_eyes.append(right_eye_img) left_eyes.append(left_eye_img) headpose_angles = list() headpose_angles.extend( count_headpose_angles(right_eye_headpose / np.linalg.norm(right_eye_headpose))) headpose_angles.extend( count_headpose_angles(left_eye_headpose / np.linalg.norm(left_eye_headpose))) headpose_angles.extend( count_headpose_angles(headpose_hr / np.linalg.norm(headpose_hr))) headposes.append(np.array(headpose_angles)) gazes.append( count_gaze_angles(gaze_target / np.linalg.norm(gaze_target))) coordinates.append([annotation[i, 25], annotation[i, 24]]) coordinates = norm_coords(coordinates, screen_size) # save_coords(person_id_str, day, coordinates) save_dataset_mpiigaze_processed_both_rgb(person_id_str, day, right_eyes, left_eyes, headposes, gazes, coordinates, dataset_name=dataset_name)
def create_dataset_mpiigaze_processed_both_rgb(dataset_name, headpose_type): """ This function creates dataset with following record structure: (right_eye_rgb_img, left_eye_rgb_img, gaze_target_theta, gaze_target_phi, norm_x_coordinate, norm_y_coordinate). Records are saved to pickles. One pickle for one person_id_str. :return: """ create_dirs(dataset_name) face_model = load_face_model() eye_image_width = 60 eye_image_height = 36 for person_id in range(15): person_id_str = str(person_id).zfill(2) print(f"--------------\nperson_id_str: {person_id_str}") camera_matrix = load_camera_matrix( path=f"Data/Original/p{person_id_str}/Calibration/Camera.mat") screen_size = load_screen_size( path=f"Data/Original/p{person_id_str}/Calibration/screenSize.mat") print(screen_size) day_dirs = get_all_days( path=mpiigaze_path_wrapper(f"Data/Original/p{person_id_str}/")) for day in day_dirs: left_eyes = list() right_eyes = list() headposes = list() gazes = list() coordinates = list() print(day) ann_path = mpiigaze_path_wrapper( f"Data/Original/p{person_id_str}/{day}/annotation.txt") annotation = load_annotation(ann_path) im_filenames = get_all_jpg_files( mpiigaze_path_wrapper( f"Data/Original/p{person_id_str}/{day}/")) for i in tqdm(range(len(im_filenames))): im_file = im_filenames[i] im = load_image_by_cv2( mpiigaze_path_wrapper( f"Data/Original/p{person_id_str}/{day}/{im_file}")) headpose_hr = np.reshape(annotation[i, 29:32], (1, -1)) headpose_ht = np.reshape(annotation[i, 32:35], (1, -1)) h_r, _ = cv2.Rodrigues(headpose_hr) fc = np.dot(h_r, face_model) fc = fc + np.reshape(headpose_ht, (-1, 1)) gaze_target = annotation[i, 26:29] gaze_target = np.reshape(gaze_target, (-1, 1)) right_eye_center = 0.5 * (fc[:, 0] + fc[:, 1]) left_eye_center = 0.5 * (fc[:, 2] + fc[:, 3]) right_eye_img = cut_eye(im, right_eye_center, h_r, (eye_image_width, eye_image_height), camera_matrix) left_eye_img = cut_eye(im, left_eye_center, h_r, (eye_image_width, eye_image_height), camera_matrix) right_eyes.append(right_eye_img) left_eyes.append(left_eye_img) if headpose_type == "2_3_dim_vectors": headposes.append( np.concatenate((headpose_hr, headpose_ht), axis=1).squeeze()) elif headpose_type == "2_angles": headposes.append( count_headpose_angles(headpose_hr / np.linalg.norm(headpose_hr))) gazes.append( count_gaze_angles(gaze_target / np.linalg.norm(gaze_target))) coordinates.append([annotation[i, 25], annotation[i, 24]]) coordinates = norm_coords(coordinates, screen_size) # save_coords(person_id_str, day, coordinates) save_dataset_mpiigaze_processed_both_rgb(person_id_str, day, right_eyes, left_eyes, headposes, gazes, coordinates, dataset_name=dataset_name)