COUNTER_ORIGIN = 0 ###### if args.eye_gaze == 1: _rotation_matrix = np.matmul(rmat, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) # le_c, re_c, _, _ = get_eye_image_from_landmarks(f.bbox, get_image_from_bb(frame, f.bbox), f.lms, (60,36)) re_c = get_eye_area(eye_gaze_frame, [f.lms[36], f.lms[37], f.lms[38], f.lms[39], f.lms[40], f.lms[41]]) le_c = get_eye_area(eye_gaze_frame, [f.lms[42], f.lms[43], f.lms[44], f.lms[45], f.lms[46], f.lms[47]]) r_eye_roi_resize.append(input_from_image(re_c)) l_eye_roi_resize.append(input_from_image(le_c)) head_list.append([phi_head, theta_head]) ######### frame_count += 1
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix): faceboxes = landmark_estimator.get_face_bb(color_img) if len(faceboxes) == 0: tqdm.write('Could not find faces in the image') return subjects = landmark_estimator.get_subjects_from_faceboxes( color_img, faceboxes) extract_eye_image_patches(subjects) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] for idx, subject in enumerate(subjects): if subject.left_eye_color is None or subject.right_eye_color is None: tqdm.write('Failed to extract eye image patches') continue success, rotation_vector, _ = cv2.solvePnP( landmark_estimator.model_points, subject.landmarks.reshape(len(subject.landmarks), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS) if not success: tqdm.write( 'Not able to extract head pose for subject {}'.format(idx)) continue _rotation_matrix, _ = cv2.Rodrigues(rotation_vector) _rotation_matrix = np.matmul( _rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) head_pose_image = landmark_estimator.visualize_headpose_result( face_image_resized, (phi_head, theta_head)) if args.vis_headpose: plt.axis("off") plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) plt.show() if args.save_headpose: cv2.imwrite( os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image) input_r_list.append( gaze_estimator.input_from_image(subject.right_eye_color)) input_l_list.append( gaze_estimator.input_from_image(subject.left_eye_color)) input_head_list.append([theta_head, phi_head]) valid_subject_list.append(idx) if len(valid_subject_list) == 0: return gaze_est = gaze_estimator.estimate_gaze_twoeyes( inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list): subject = subjects[subject_id] # Build visualizations r_gaze_img = gaze_estimator.visualize_eye_result( subject.right_eye_color, gaze) l_gaze_img = gaze_estimator.visualize_eye_result( subject.left_eye_color, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) if args.vis_gaze: plt.axis("off") plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB)) plt.show() if args.save_gaze: cv2.imwrite( os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color) if args.save_estimate: with open( os.path.join( args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f: f.write( os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix ,label): global FPS cv2.putText(color_img, "FPS : "+FPS, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA) start = time.time() #face box의 위치를 반환.(모든 대상 list로 반환) -[left_x, top_y, right_x, bottom_y] faceboxes = landmark_estimator.get_face_bb(color_img) #draw bounding box for facebox in faceboxes: left = int(facebox[0]) top = int(facebox[1]) right = int(facebox[2]) bottom = int(facebox[3]) cv2.rectangle(color_img,(left,top),(right,bottom),(0,255,0),2) if len(faceboxes) == 0: tqdm.write('Could not find faces in the image') # if EVAL and not SHIFT: # head_phi.append(-100) # head_theta.append(-100) # gaze_phi.append(-100) # gaze_theta.append(-100) return # check_people(faceboxes) subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes) extract_eye_image_patches(subjects) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] people_count = 1; frame_img =color_img for idx, subject in enumerate(subjects): people_idx = get_people(faceboxes[idx][0]) people_list[people_idx].set_bbox_l(faceboxes[idx][0]) if subject.left_eye_color is None or subject.right_eye_color is None: tqdm.write('Failed to extract eye image patches') continue success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points, subject.landmarks.reshape(len(subject.landmarks), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS) if not success: tqdm.write('Not able to extract head pose for subject {}'.format(idx)) continue _rotation_matrix, _ = cv2.Rodrigues(rotation_vector) _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) # if EVAL: # head_phi.append(phi_head) # head_theta.append(theta_head) # face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) # head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head)) #color_image의 facebox에 headpose vector를 그림. if EVAL: head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx],label) else: head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx]) frame_img = head_pose_image if EVAL: headpose_error.append(headpose_err) print("head pose error:",headpose_err) if args.mode =='image': #show headpose # if args.vis_headpose: # plt.axis("off") # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) # plt.show() if args.save_headpose: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_headpose.jpg'), head_pose_image) people_count +=1 #size 등 format 변경. input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color)) input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color)) input_head_list.append([theta_head, phi_head]) valid_subject_list.append(idx) # if args.mode =='video': # # plt.axis("off") # # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) # # plt.show() # headpose_out_video.write(frame_img) if len(valid_subject_list) == 0: return # returns [subject : [gaze_pose]] gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) people_count = 1 for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list): subject = subjects[subject_id] facebox = faceboxes[subject_id] people_idx = get_people(facebox[0]) # Build visualizations # r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze) # l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze) # if EVAL: # gaze_theta.append(gaze[0]) # gaze_phi.append(gaze[1]) if EVAL: r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r", label) l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l", label) else: r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r") l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l") frame_img = l_gaze_img if EVAL: print("right gaze error:",r_gaze_err) print("left gaze error:",l_gaze_err) gaze_error.append(r_gaze_err) gaze_error.append(l_gaze_err) #show gaze image # if args.vis_gaze: # plt.axis("off") # plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB)) # plt.show() if args.mode =='image': if args.save_gaze: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_gaze.jpg'), frame_img) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color) if args.save_estimate: with open(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f: f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n') people_count +=1 if args.mode =='video': out_video.write(frame_img) end = time.time() delay_time = end-start FPS = str(int(1/delay_time))