def image_callback(self, subject_image_list): """This method is called whenever new input arrives. The input is first converted in a format suitable for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`), and this image is published along with the estimated gaze vector (see :meth:`publish_image` and :func:`publish_gaze`)""" timestamp = subject_image_list.header.stamp subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] for subject_id, s in subjects_dict.items(): try: transform_msg = self.tf2_buffer.lookup_transform(self.ros_tf_frame, self.headpose_frame + str(subject_id), timestamp) rot_head = transform_msg.transform.rotation euler_angles_head = list(transformations.euler_from_quaternion([rot_head.x, rot_head.y, rot_head.z, rot_head.w])) euler_angles_head = gaze_tools.limit_yaw(euler_angles_head) phi_head, theta_head = gaze_tools.get_phi_theta_from_euler(euler_angles_head) input_head_list.append([theta_head, phi_head]) input_r_list.append(self.input_from_image(s.right)) input_l_list.append(self.input_from_image(s.left)) valid_subject_list.append(subject_id) except (tf2_ros.LookupException, tf2_ros.ConnectivityException, tf2_ros.ExtrapolationException, tf2_ros.TransformException): pass if len(valid_subject_list) == 0: return gaze_est = self.estimate_gaze_twoeyes(inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) subjects_gaze_img_list = [] for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()): self.publish_gaze(gaze, timestamp, subject_id) if self.visualise_eyepose: s = subjects_dict[subject_id] r_gaze_img = self.visualize_eye_result(s.right, gaze) l_gaze_img = self.visualize_eye_result(s.left, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) subjects_gaze_img_list.append(s_gaze_img) if len(subjects_gaze_img_list) > 0: gaze_img_msg = self.bridge.cv2_to_imgmsg(np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8") gaze_img_msg.header.stamp = timestamp self.subjects_gaze_img.publish(gaze_img_msg) _now = rospy.Time().now() _freq = 1.0 / (_now - self._last_time).to_sec() self._freq_deque.append(_freq) self._latency_deque.append(_now.to_sec() - timestamp.to_sec()) self._last_time = _now tqdm.write( '\033[2K\033[1;32mTime now: {:.2f} message color: {:.2f} latency: {:.2f}s for {} subject(s) {:.0f}Hz\033[0m'.format( (_now.to_sec()), timestamp.to_sec(), np.mean(self._latency_deque), len(valid_subject_list), np.mean(self._freq_deque)), end="\r")
def process_image(self, color_msg): color_img = gaze_tools.convert_image(color_msg, "bgr8") timestamp = color_msg.header.stamp self.update_subject_tracker(color_img) if not self.subject_tracker.get_tracked_elements(): tqdm.write("\033[2K\033[1;31mNo face found\033[0m", end="\r") return self.subject_tracker.update_eye_images(self.eye_image_size) final_head_pose_images = [] for subject_id, subject in self.subject_tracker.get_tracked_elements( ).items(): if subject.left_eye_color is None or subject.right_eye_color is None: continue if subject_id not in self.pose_stabilizers: self.pose_stabilizers[subject_id] = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] success, head_rpy, translation_vector = self.get_head_pose( subject.marks, subject_id) if success: # Publish all the data self.publish_pose(timestamp, translation_vector, head_rpy, subject_id) if self.visualise_headpose: roll_pitch_yaw = gaze_tools.limit_yaw(head_rpy) face_image_resized = cv2.resize( subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) final_head_pose_images.append( LandmarkMethodROS.visualize_headpose_result( face_image_resized, gaze_tools.get_phi_theta_from_euler( roll_pitch_yaw))) if len(self.subject_tracker.get_tracked_elements().items()) > 0: self.publish_subject_list( timestamp, self.subject_tracker.get_tracked_elements()) if len(final_head_pose_images) > 0: headpose_image_ros = self.bridge.cv2_to_imgmsg( np.hstack(final_head_pose_images), "bgr8") headpose_image_ros.header.stamp = timestamp self.subject_faces_pub.publish(headpose_image_ros)
def image_callback(self, subject_image_list): """This method is called whenever new input arrives. The input is first converted in a format suitable for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`), and this image is published along with the estimated gaze vector (see :meth:`publish_image` and :func:`publish_gaze`)""" timestamp = subject_image_list.header.stamp subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] for subject_id, s in subjects_dict.items(): try: (trans_head, rot_head) = self.tf_listener.lookupTransform(self.ros_tf_frame, self.headpose_frame + str(subject_id), timestamp) euler_angles_head = list(tf.transformations.euler_from_quaternion(rot_head)) euler_angles_head = gaze_tools.limit_yaw(euler_angles_head) phi_head, theta_head = gaze_tools.get_phi_theta_from_euler(euler_angles_head) input_head_list.append([theta_head, phi_head]) input_r_list.append(self.input_from_image(s.right)) input_l_list.append(self.input_from_image(s.left)) valid_subject_list.append(subject_id) except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException, tf.Exception): pass if len(valid_subject_list) == 0: return gaze_est = self.estimate_gaze_twoeyes(inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) subjects_gaze_img_list = [] for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()): self.publish_gaze(gaze, timestamp, subject_id) if self.visualise_eyepose: s = subjects_dict[subject_id] r_gaze_img = self.visualize_eye_result(s.right, gaze) l_gaze_img = self.visualize_eye_result(s.left, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) subjects_gaze_img_list.append(s_gaze_img) if len(subjects_gaze_img_list) > 0: gaze_img_msg = self.bridge.cv2_to_imgmsg(np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8") gaze_img_msg.header.stamp = timestamp self.subjects_gaze_img.publish(gaze_img_msg)
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix): faceboxes = landmark_estimator.get_face_bb(color_img) if len(faceboxes) == 0: tqdm.write('Could not find faces in the image') return subjects = landmark_estimator.get_subjects_from_faceboxes( color_img, faceboxes) extract_eye_image_patches(subjects) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] for idx, subject in enumerate(subjects): if subject.left_eye_color is None or subject.right_eye_color is None: tqdm.write('Failed to extract eye image patches') continue success, rotation_vector, _ = cv2.solvePnP( landmark_estimator.model_points, subject.landmarks.reshape(len(subject.landmarks), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS) if not success: tqdm.write( 'Not able to extract head pose for subject {}'.format(idx)) continue _rotation_matrix, _ = cv2.Rodrigues(rotation_vector) _rotation_matrix = np.matmul( _rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) head_pose_image = landmark_estimator.visualize_headpose_result( face_image_resized, (phi_head, theta_head)) if args.vis_headpose: plt.axis("off") plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) plt.show() if args.save_headpose: cv2.imwrite( os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image) input_r_list.append( gaze_estimator.input_from_image(subject.right_eye_color)) input_l_list.append( gaze_estimator.input_from_image(subject.left_eye_color)) input_head_list.append([theta_head, phi_head]) valid_subject_list.append(idx) if len(valid_subject_list) == 0: return gaze_est = gaze_estimator.estimate_gaze_twoeyes( inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list): subject = subjects[subject_id] # Build visualizations r_gaze_img = gaze_estimator.visualize_eye_result( subject.right_eye_color, gaze) l_gaze_img = gaze_estimator.visualize_eye_result( subject.left_eye_color, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) if args.vis_gaze: plt.axis("off") plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB)) plt.show() if args.save_gaze: cv2.imwrite( os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color) if args.save_estimate: with open( os.path.join( args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f: f.write( os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix): faceboxes = landmark_estimator.get_face_bb(color_img) if len(faceboxes) == 0: tqdm.write('Could not find faces in the image') return subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes) extract_eye_image_patches(subjects) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] for idx, subject in enumerate(subjects): if subject.left_eye_color is None or subject.right_eye_color is None: tqdm.write('Failed to extract eye image patches') continue success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points, subject.marks.reshape(len(subject.marks), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS) if not success: tqdm.write('Not able to extract head pose for subject {}'.format(idx)) continue roll_pitch_yaw = [-rotation_vector[2], -rotation_vector[0], rotation_vector[1] + np.pi] roll_pitch_yaw = limit_yaw(np.array(roll_pitch_yaw).flatten().tolist()) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head)) if args.vis_headpose: plt.axis("off") plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) plt.show() if args.save_headpose: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image) input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color)) input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color)) input_head_list.append([theta_head, phi_head]) valid_subject_list.append(idx) if len(valid_subject_list) == 0: return gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list): subject = subjects[subject_id] # Build visualizations r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze) l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) if args.vis_gaze: plt.axis("off") plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB)) plt.show() if args.save_gaze: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
###### if args.eye_gaze == 1: _rotation_matrix = np.matmul(rmat, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) # le_c, re_c, _, _ = get_eye_image_from_landmarks(f.bbox, get_image_from_bb(frame, f.bbox), f.lms, (60,36)) re_c = get_eye_area(eye_gaze_frame, [f.lms[36], f.lms[37], f.lms[38], f.lms[39], f.lms[40], f.lms[41]]) le_c = get_eye_area(eye_gaze_frame, [f.lms[42], f.lms[43], f.lms[44], f.lms[45], f.lms[46], f.lms[47]]) r_eye_roi_resize.append(input_from_image(re_c)) l_eye_roi_resize.append(input_from_image(le_c)) head_list.append([phi_head, theta_head]) ######### frame_count += 1 if not log is None:
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix ,label): global FPS cv2.putText(color_img, "FPS : "+FPS, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA) start = time.time() #face box의 위치를 반환.(모든 대상 list로 반환) -[left_x, top_y, right_x, bottom_y] faceboxes = landmark_estimator.get_face_bb(color_img) #draw bounding box for facebox in faceboxes: left = int(facebox[0]) top = int(facebox[1]) right = int(facebox[2]) bottom = int(facebox[3]) cv2.rectangle(color_img,(left,top),(right,bottom),(0,255,0),2) if len(faceboxes) == 0: tqdm.write('Could not find faces in the image') # if EVAL and not SHIFT: # head_phi.append(-100) # head_theta.append(-100) # gaze_phi.append(-100) # gaze_theta.append(-100) return # check_people(faceboxes) subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes) extract_eye_image_patches(subjects) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] people_count = 1; frame_img =color_img for idx, subject in enumerate(subjects): people_idx = get_people(faceboxes[idx][0]) people_list[people_idx].set_bbox_l(faceboxes[idx][0]) if subject.left_eye_color is None or subject.right_eye_color is None: tqdm.write('Failed to extract eye image patches') continue success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points, subject.landmarks.reshape(len(subject.landmarks), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS) if not success: tqdm.write('Not able to extract head pose for subject {}'.format(idx)) continue _rotation_matrix, _ = cv2.Rodrigues(rotation_vector) _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) # if EVAL: # head_phi.append(phi_head) # head_theta.append(theta_head) # face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) # head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head)) #color_image의 facebox에 headpose vector를 그림. if EVAL: head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx],label) else: head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx]) frame_img = head_pose_image if EVAL: headpose_error.append(headpose_err) print("head pose error:",headpose_err) if args.mode =='image': #show headpose # if args.vis_headpose: # plt.axis("off") # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) # plt.show() if args.save_headpose: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_headpose.jpg'), head_pose_image) people_count +=1 #size 등 format 변경. input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color)) input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color)) input_head_list.append([theta_head, phi_head]) valid_subject_list.append(idx) # if args.mode =='video': # # plt.axis("off") # # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) # # plt.show() # headpose_out_video.write(frame_img) if len(valid_subject_list) == 0: return # returns [subject : [gaze_pose]] gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) people_count = 1 for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list): subject = subjects[subject_id] facebox = faceboxes[subject_id] people_idx = get_people(facebox[0]) # Build visualizations # r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze) # l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze) # if EVAL: # gaze_theta.append(gaze[0]) # gaze_phi.append(gaze[1]) if EVAL: r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r", label) l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l", label) else: r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r") l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l") frame_img = l_gaze_img if EVAL: print("right gaze error:",r_gaze_err) print("left gaze error:",l_gaze_err) gaze_error.append(r_gaze_err) gaze_error.append(l_gaze_err) #show gaze image # if args.vis_gaze: # plt.axis("off") # plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB)) # plt.show() if args.mode =='image': if args.save_gaze: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_gaze.jpg'), frame_img) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color) if args.save_estimate: with open(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f: f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n') people_count +=1 if args.mode =='video': out_video.write(frame_img) end = time.time() delay_time = end-start FPS = str(int(1/delay_time))