def __init__(self, device_id_blink, model_files, threshold): super(BlinkEstimatorNode, self).__init__(device_id_blink, model_files, threshold, (96, 96)) self.sub = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.callback, queue_size=1, buff_size=2**24) self.cv_bridge = CvBridge() self.bridge = SubjectListBridge() self.viz = rospy.get_param("~viz", True) self._last_time = rospy.Time().now() self._freq_deque = collections.deque( maxlen=30) # average frequency statistic over roughly one second self._latency_deque = collections.deque(maxlen=30) self.blink_publisher = rospy.Publisher("/subjects/blink", MSG_BlinkList, queue_size=3) if self.viz: self.viz_pub = rospy.Publisher(rospy.get_param( "~viz_topic", "/subjects/blink_images"), Image, queue_size=3)
def __init__(self, device_id_gaze, model_files): super(GazeEstimatorROS, self).__init__(device_id_gaze, model_files) self.bridge = CvBridge() self.subjects_bridge = SubjectListBridge() self.tf_broadcaster = TransformBroadcaster() self.tf_listener = TransformListener() self.tf_prefix = rospy.get_param("~tf_prefix", "gaze") self.headpose_frame = self.tf_prefix + "/head_pose_estimated" self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_ros_frame") self.image_subscriber = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.image_callback, queue_size=3, buff_size=2**24) self.subjects_gaze_img = rospy.Publisher("/subjects/gazeimages", Image, queue_size=3) self.visualise_eyepose = rospy.get_param("~visualise_eyepose", default=True) self._last_time = rospy.Time().now()
def __init__(self, device_id_blink, model_files, threshold): self.cv_bridge = CvBridge() self.bridge = SubjectListBridge() self.viz = rospy.get_param("~viz", True) blink_backend = rospy.get_param("~blink_backend", default="pytorch") model_type = rospy.get_param("~model_type", default="resnet18") if blink_backend == "tensorflow": from rt_bene.estimate_blink_tensorflow import BlinkEstimatorTensorflow self._blink_estimator = BlinkEstimatorTensorflow(device_id_blink, model_files, model_type, threshold) elif blink_backend == "pytorch": from rt_bene.estimate_blink_pytorch import BlinkEstimatorPytorch self._blink_estimator = BlinkEstimatorPytorch(device_id_blink, model_files, model_type, threshold) else: raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch") self._last_time = rospy.Time().now() self._freq_deque = collections.deque(maxlen=30) # average frequency statistic over roughly one second self._latency_deque = collections.deque(maxlen=30) self.blink_publisher = rospy.Publisher("/subjects/blink", MSG_BlinkList, queue_size=3) if self.viz: self.viz_pub = rospy.Publisher(rospy.get_param("~viz_topic", "/subjects/blink_images"), Image, queue_size=3) self.sub = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.callback, queue_size=1, buff_size=2 ** 24)
def __init__(self, device_id_gaze, model_files): super(GazeEstimatorROS, self).__init__(device_id_gaze, model_files) self.bridge = CvBridge() self.subjects_bridge = SubjectListBridge() self.tf2_broadcaster = tf2_ros.TransformBroadcaster() self.tf2_buffer = tf2_ros.Buffer() self.tf2_listener = tf2_ros.TransformListener(self.tf2_buffer) self.tf_prefix = rospy.get_param("~tf_prefix", "gaze") self.headpose_frame = self.tf_prefix + "/head_pose_estimated" self.image_subscriber = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.image_callback, queue_size=3, buff_size=2**24) self.subjects_gaze_img = rospy.Publisher("/subjects/gazeimages", Image, queue_size=3) self.gaze_publishers = rospy.Publisher("/subjects/gaze", MSG_GazeList, queue_size=3) self.visualise_eyepose = rospy.get_param("~visualise_eyepose", default=True) self._last_time = rospy.Time().now() self._freq_deque = collections.deque( maxlen=30) # average frequency statistic over roughly one second self._latency_deque = collections.deque(maxlen=30)
def __init__(self, img_proc=None): super(LandmarkMethodROS, self).__init__(device_id_facedetection=rospy.get_param( "~device_id_facedetection", default="cuda:0")) self.subject_tracker = FaceEncodingTracker() if rospy.get_param( "~use_face_encoding_tracker", default=True) else SequentialTracker() self.bridge = CvBridge() self.__subject_bridge = SubjectListBridge() self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_nonrotated_link") self.tf_broadcaster = TransformBroadcaster() self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze") self.visualise_headpose = rospy.get_param("~visualise_headpose", default=True) self.pose_stabilizers = {} # Introduce scalar stabilizers for pose. try: if img_proc is None: tqdm.write("Wait for camera message") cam_info = rospy.wait_for_message("/camera_info", CameraInfo, timeout=None) self.img_proc = PinholeCameraModel() # noinspection PyTypeChecker self.img_proc.fromCameraInfo(cam_info) else: self.img_proc = img_proc if np.array_equal( self.img_proc.intrinsicMatrix().A, np.array([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])): raise Exception( 'Camera matrix is zero-matrix. Did you calibrate' 'the camera and linked to the yaml file in the launch file?' ) tqdm.write("Camera message received") except rospy.ROSException: raise Exception("Could not get camera info") # multiple person images publication self.subject_pub = rospy.Publisher("/subjects/images", MSG_SubjectImagesList, queue_size=3) # multiple person faces publication for visualisation self.subject_faces_pub = rospy.Publisher("/subjects/faces", Image, queue_size=3) Server(ModelSizeConfig, self._dyn_reconfig_callback) # have the subscriber the last thing that's run to avoid conflicts self.color_sub = rospy.Subscriber("/image", Image, self.process_image, buff_size=2**24, queue_size=3)
def __init__(self, device_id_gaze, model_files): self.bridge = CvBridge() self.subjects_bridge = SubjectListBridge() self.tf2_broadcaster = tf2_ros.TransformBroadcaster() self.tf2_buffer = tf2_ros.Buffer() self.tf2_listener = tf2_ros.TransformListener(self.tf2_buffer) self.tf_prefix = rospy.get_param("~tf_prefix", "gaze") self.headpose_frame = self.tf_prefix + "/head_pose_estimated" self.gaze_backend = rospy.get_param("~gaze_backend", "tensorflow") if self.gaze_backend == "tensorflow": from rt_gene.estimate_gaze_tensorflow import GazeEstimator self._gaze_estimator = GazeEstimator(device_id_gaze, model_files) elif self.gaze_backend == "pytorch": from rt_gene.estimate_gaze_pytorch import GazeEstimator self._gaze_estimator = GazeEstimator(device_id_gaze, model_files) else: raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch") self.image_subscriber = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.image_callback, queue_size=3, buff_size=2**24) self.subjects_gaze_img = rospy.Publisher("/subjects/gazeimages", Image, queue_size=3) self.gaze_publishers = rospy.Publisher("/subjects/gaze", MSG_GazeList, queue_size=3) self.visualise_eyepose = rospy.get_param("~visualise_eyepose", default=True) self._last_time = rospy.Time().now() self._freq_deque = collections.deque(maxlen=30) # average frequency statistic over roughly one second self._latency_deque = collections.deque(maxlen=30)
def __init__(self): self.image_height = rospy.get_param("~image_height", 36) self.image_width = rospy.get_param("~image_width", 60) self.bridge = CvBridge() self.subjects_bridge = SubjectListBridge() self.tf_broadcaster = TransformBroadcaster() self.tf_listener = TransformListener() self.use_last_headpose = rospy.get_param("~use_last_headpose", True) self.tf_prefix = rospy.get_param("~tf_prefix", "gaze") self.last_phi_head, self.last_theta_head = None, None self.rgb_frame_id_ros = rospy.get_param("~rgb_frame_id_ros", "/kinect2_nonrotated_link") self.headpose_frame = self.tf_prefix + "/head_pose_estimated" self.device_id_gazeestimation = rospy.get_param( "~device_id_gazeestimation", default="/gpu:0") with tensorflow.device(self.device_id_gazeestimation): config = tensorflow.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) if "gpu" in self.device_id_gazeestimation: config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 config.log_device_placement = False self.sess = tensorflow.Session(config=config) set_session(self.sess) model_files = rospy.get_param("~model_files") self.models = [] for model_file in model_files: tqdm.write('Load model ' + model_file) model = load_model(os.path.join( rospkg.RosPack().get_path('rt_gene'), model_file), custom_objects={ 'accuracy_angle': accuracy_angle, 'angle_loss': angle_loss }) # noinspection PyProtectedMember model._make_predict_function( ) # have to initialize before threading self.models.append(model) tqdm.write('Loaded ' + str(len(self.models)) + ' models') self.graph = tensorflow.get_default_graph() self.image_subscriber = rospy.Subscriber('/subjects/images', MSG_SubjectImagesList, self.image_callback, queue_size=3) self.subjects_gaze_img = rospy.Publisher('/subjects/gazeimages', Image, queue_size=3) self.average_weights = np.array([0.1, 0.125, 0.175, 0.2, 0.4]) self.gaze_buffer_c = {}
class GazeEstimatorROS(GazeEstimatorBase): def __init__(self, device_id_gaze, model_files): super(GazeEstimatorROS, self).__init__(device_id_gaze, model_files) self.bridge = CvBridge() self.subjects_bridge = SubjectListBridge() self.tf_broadcaster = TransformBroadcaster() self.tf_listener = TransformListener() self.tf_prefix = rospy.get_param("~tf_prefix", "gaze") self.headpose_frame = self.tf_prefix + "/head_pose_estimated" self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_ros_frame") self.image_subscriber = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.image_callback, queue_size=3, buff_size=2**24) self.subjects_gaze_img = rospy.Publisher("/subjects/gazeimages", Image, queue_size=3) self.visualise_eyepose = rospy.get_param("~visualise_eyepose", default=True) self._last_time = rospy.Time().now() def publish_image(self, image, image_publisher, timestamp): """This image publishes the `image` to the `image_publisher` with the given `timestamp`.""" image_ros = self.bridge.cv2_to_imgmsg(image, "rgb8") image_ros.header.stamp = timestamp image_publisher.publish(image_ros) def image_callback(self, subject_image_list): """This method is called whenever new input arrives. The input is first converted in a format suitable for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`), and this image is published along with the estimated gaze vector (see :meth:`publish_image` and :func:`publish_gaze`)""" timestamp = subject_image_list.header.stamp subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] for subject_id, s in subjects_dict.items(): try: (trans_head, rot_head) = self.tf_listener.lookupTransform( self.ros_tf_frame, self.headpose_frame + str(subject_id), timestamp) euler_angles_head = list( tf.transformations.euler_from_quaternion(rot_head)) euler_angles_head = gaze_tools.limit_yaw(euler_angles_head) phi_head, theta_head = gaze_tools.get_phi_theta_from_euler( euler_angles_head) input_head_list.append([theta_head, phi_head]) input_r_list.append(self.input_from_image(s.right)) input_l_list.append(self.input_from_image(s.left)) valid_subject_list.append(subject_id) except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException, tf.Exception): pass if len(valid_subject_list) == 0: return gaze_est = self.estimate_gaze_twoeyes( inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) subjects_gaze_img_list = [] for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()): self.publish_gaze(gaze, timestamp, subject_id) if self.visualise_eyepose: s = subjects_dict[subject_id] r_gaze_img = self.visualize_eye_result(s.right, gaze) l_gaze_img = self.visualize_eye_result(s.left, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) subjects_gaze_img_list.append(s_gaze_img) if len(subjects_gaze_img_list) > 0: gaze_img_msg = self.bridge.cv2_to_imgmsg( np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8") gaze_img_msg.header.stamp = timestamp self.subjects_gaze_img.publish(gaze_img_msg) _now = rospy.Time().now() _freq = 1.0 / (_now - self._last_time).to_sec() self._last_time = _now tqdm.write( '\033[2K\033[1;32mTime now: {:.2f} message color: {:.2f} diff: {:.2f}s for {} subjects {:.0f}Hz\033[0m' .format((_now.to_sec()), timestamp.to_sec(), _now.to_sec() - timestamp.to_sec(), len(valid_subject_list), _freq), end="\r") def publish_gaze(self, est_gaze, msg_stamp, subject_id): """Publish the gaze vector as a PointStamped.""" theta_gaze = est_gaze[0] phi_gaze = est_gaze[1] euler_angle_gaze = gaze_tools.get_euler_from_phi_theta( phi_gaze, theta_gaze) quaternion_gaze = tf.transformations.quaternion_from_euler( *euler_angle_gaze) self.tf_broadcaster.sendTransform( (0, 0, 0.05), # publish it 5cm above the head pose's origin (nose tip) quaternion_gaze, msg_stamp, self.tf_prefix + "/world_gaze" + str(subject_id), self.headpose_frame + str(subject_id))
def __init__(self): self.subjects = dict() self.bridge = CvBridge() self.__subject_bridge = SubjectListBridge() self.model_size_rescale = 30.0 self.head_pitch = 0.0 self.margin = rospy.get_param("~margin", 42) self.margin_eyes_height = rospy.get_param("~margin_eyes_height", 36) self.margin_eyes_width = rospy.get_param("~margin_eyes_width", 60) self.interpupillary_distance = 0.058 self.cropped_face_size = (rospy.get_param("~face_size_height", 224), rospy.get_param("~face_size_width", 224)) self.gpu_id = rospy.get_param("~gpu_id", default="0") torch.cuda.set_device(self.gpu_id) rospy.loginfo("Using GPU for landmark: {}".format(self.gpu_id)) self.rgb_frame_id = rospy.get_param("~rgb_frame_id", "/kinect2_link") self.rgb_frame_id_ros = rospy.get_param("~rgb_frame_id_ros", "/kinect2_nonrotated_link") self.model_points = None self.eye_image_size = (rospy.get_param("~eye_image_height", 36), rospy.get_param("~eye_image_width", 60)) self.tf_broadcaster = TransformBroadcaster() self.tf_listener = TransformListener() self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze") self.use_previous_headpose_estimate = True self.last_rvec = {} self.last_tvec = {} self.pose_stabilizers = {} # Introduce scalar stabilizers for pose. try: tqdm.write("Wait for camera message") cam_info = rospy.wait_for_message("/camera_info", CameraInfo, timeout=None) self.img_proc = PinholeCameraModel() # noinspection PyTypeChecker self.img_proc.fromCameraInfo(cam_info) if np.array_equal( self.img_proc.intrinsicMatrix(), np.matrix([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])): raise Exception( 'Camera matrix is zero-matrix. Did you calibrate' 'the camera and linked to the yaml file in the launch file?' ) tqdm.write("Camera message received") except rospy.ROSException: raise Exception("Could not get camera info") # multiple person images publication self.subject_pub = rospy.Publisher("/subjects/images", MSG_SubjectImagesList, queue_size=1) # multiple person faces publication for visualisation self.subject_faces_pub = rospy.Publisher("/subjects/faces", Image, queue_size=1) self.model_points = self._get_full_model_points() self.sess_bb = None self.face_net = FaceDetector(device="cuda:{}".format(self.gpu_id)) self.color_sub = rospy.Subscriber("/image", Image, self.callback, buff_size=2**24, queue_size=1) self.facial_landmark_nn = face_alignment.FaceAlignment( landmarks_type=face_alignment.LandmarksType._2D, device="cuda:{}".format(self.gpu_id), flip_input=False) Server(ModelSizeConfig, self._dyn_reconfig_callback)
class LandmarkMethod(object): def __init__(self): self.subjects = dict() self.bridge = CvBridge() self.__subject_bridge = SubjectListBridge() self.model_size_rescale = 30.0 self.head_pitch = 0.0 self.margin = rospy.get_param("~margin", 42) self.margin_eyes_height = rospy.get_param("~margin_eyes_height", 36) self.margin_eyes_width = rospy.get_param("~margin_eyes_width", 60) self.interpupillary_distance = 0.058 self.cropped_face_size = (rospy.get_param("~face_size_height", 224), rospy.get_param("~face_size_width", 224)) self.gpu_id = rospy.get_param("~gpu_id", default="0") torch.cuda.set_device(self.gpu_id) rospy.loginfo("Using GPU for landmark: {}".format(self.gpu_id)) self.rgb_frame_id = rospy.get_param("~rgb_frame_id", "/kinect2_link") self.rgb_frame_id_ros = rospy.get_param("~rgb_frame_id_ros", "/kinect2_nonrotated_link") self.model_points = None self.eye_image_size = (rospy.get_param("~eye_image_height", 36), rospy.get_param("~eye_image_width", 60)) self.tf_broadcaster = TransformBroadcaster() self.tf_listener = TransformListener() self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze") self.use_previous_headpose_estimate = True self.last_rvec = {} self.last_tvec = {} self.pose_stabilizers = {} # Introduce scalar stabilizers for pose. try: tqdm.write("Wait for camera message") cam_info = rospy.wait_for_message("/camera_info", CameraInfo, timeout=None) self.img_proc = PinholeCameraModel() # noinspection PyTypeChecker self.img_proc.fromCameraInfo(cam_info) if np.array_equal( self.img_proc.intrinsicMatrix(), np.matrix([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])): raise Exception( 'Camera matrix is zero-matrix. Did you calibrate' 'the camera and linked to the yaml file in the launch file?' ) tqdm.write("Camera message received") except rospy.ROSException: raise Exception("Could not get camera info") # multiple person images publication self.subject_pub = rospy.Publisher("/subjects/images", MSG_SubjectImagesList, queue_size=1) # multiple person faces publication for visualisation self.subject_faces_pub = rospy.Publisher("/subjects/faces", Image, queue_size=1) self.model_points = self._get_full_model_points() self.sess_bb = None self.face_net = FaceDetector(device="cuda:{}".format(self.gpu_id)) self.color_sub = rospy.Subscriber("/image", Image, self.callback, buff_size=2**24, queue_size=1) self.facial_landmark_nn = face_alignment.FaceAlignment( landmarks_type=face_alignment.LandmarksType._2D, device="cuda:{}".format(self.gpu_id), flip_input=False) Server(ModelSizeConfig, self._dyn_reconfig_callback) def _dyn_reconfig_callback(self, config, level): self.model_points /= (self.model_size_rescale * self.interpupillary_distance) self.model_size_rescale = config["model_size"] self.interpupillary_distance = config["interpupillary_distance"] self.model_points *= (self.model_size_rescale * self.interpupillary_distance) self.head_pitch = config["head_pitch"] return config def _get_full_model_points(self): """Get all 68 3D model points from file""" raw_value = [] filename = rospkg.RosPack().get_path( 'rt_gene') + '/model_nets/face_model_68.txt' with open(filename) as f: for line in f: raw_value.append(line) model_points = np.array(raw_value, dtype=np.float32) model_points = np.reshape(model_points, (3, -1)).T # model_points *= 4 model_points[:, -1] *= -1 # index the expansion of the model based. model_points = model_points * (self.interpupillary_distance * self.model_size_rescale) return model_points def get_face_bb(self, image): faceboxes = [] start_time = time.time() fraction = 4 image = scipy.misc.imresize(image, 1.0 / fraction) detections = self.face_net.detect_from_image(image) tqdm.write("Face Detector Frequency: {:.2f}Hz".format( 1 / (time.time() - start_time))) for result in detections: x_left_top, y_left_top, x_right_bottom, y_right_bottom, confidence = result * fraction if confidence > 0.8 * fraction: box = [x_left_top, y_left_top, x_right_bottom, y_right_bottom] diff_height_width = (box[3] - box[1]) - (box[2] - box[0]) offset_y = int(abs(diff_height_width / 2)) box_moved = self.move_box(box, [0, offset_y]) # Make box square. facebox = self.get_square_box(box_moved) faceboxes.append(facebox) return faceboxes @staticmethod def move_box(box, offset): """Move the box to direction specified by vector offset""" left_x = box[0] + offset[0] top_y = box[1] + offset[1] right_x = box[2] + offset[0] bottom_y = box[3] + offset[1] return [left_x, top_y, right_x, bottom_y] @staticmethod def box_in_image(box, image): """Check if the box is in image""" rows = image.shape[0] cols = image.shape[1] return box[0] >= 0 and box[1] >= 0 and box[2] <= cols and box[3] <= rows @staticmethod def get_square_box(box): """Get a square box out of the given box, by expanding it.""" left_x = box[0] top_y = box[1] right_x = box[2] bottom_y = box[3] box_width = right_x - left_x box_height = bottom_y - top_y # Check if box is already a square. If not, make it a square. diff = box_height - box_width delta = int(abs(diff) / 2) if diff == 0: # Already a square. return box elif diff > 0: # Height > width, a slim box. left_x -= delta right_x += delta if diff % 2 == 1: right_x += 1 else: # Width > height, a short box. top_y -= delta bottom_y += delta if diff % 2 == 1: bottom_y += 1 return [left_x, top_y, right_x, bottom_y] def process_image(self, color_msg): tqdm.write('Time now: ' + str(rospy.Time.now().to_sec()) + ' message color: ' + str(color_msg.header.stamp.to_sec()) + ' diff: ' + str(rospy.Time.now().to_sec() - color_msg.header.stamp.to_sec())) start_time = time.time() color_img = gaze_tools.convert_image(color_msg, "bgr8") timestamp = color_msg.header.stamp self.detect_landmarks(color_img, timestamp) # update self.subjects tqdm.write('Elapsed after detecting transformed_landmarks: ' + str(time.time() - start_time)) if not self.subjects: tqdm.write("No face found") return self.get_eye_image() # update self.subjects final_head_pose_images = None for subject_id, subject in self.subjects.items(): if subject.left_eye_color is None or subject.right_eye_color is None: continue if subject_id not in self.last_rvec: self.last_rvec[subject_id] = np.array([[0.01891013], [0.08560084], [-3.14392813]]) if subject_id not in self.last_tvec: self.last_tvec[subject_id] = np.array([[-14.97821226], [-10.62040383], [-2053.03596872]]) if subject_id not in self.pose_stabilizers: self.pose_stabilizers[subject_id] = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] success, rotation_vector, translation_vector = self.get_head_pose( subject.marks, subject_id) # Publish all the data translation_headpose_tf = self.get_head_translation( timestamp, subject_id) if success: if translation_headpose_tf is not None: euler_angles_head = self.publish_pose( timestamp, translation_headpose_tf, rotation_vector, subject_id) if euler_angles_head is not None: head_pose_image = self.visualize_headpose_result( subject.face_color, gaze_tools.get_phi_theta_from_euler( euler_angles_head)) head_pose_image_resized = cv2.resize( head_pose_image, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) if final_head_pose_images is None: final_head_pose_images = head_pose_image_resized else: final_head_pose_images = np.concatenate( (final_head_pose_images, head_pose_image_resized), axis=1) else: if not success: tqdm.write("Could not get head pose properly") if final_head_pose_images is not None: self.publish_subject_list(timestamp, self.subjects) headpose_image_ros = self.bridge.cv2_to_imgmsg( final_head_pose_images, "bgr8") headpose_image_ros.header.stamp = timestamp self.subject_faces_pub.publish(headpose_image_ros) tqdm.write('Elapsed total: ' + str(time.time() - start_time) + '\n\n') def get_head_pose(self, landmarks, subject_id): """ We are given a set of 2D points in the form of landmarks. The basic idea is that we assume a generic 3D head model, and try to fit the 2D points to the 3D model based on the Levenberg-Marquardt optimization. We can use OpenCV's implementation of SolvePnP for this. This method is inspired by http://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/ We are planning to replace this with our own head pose estimator. :param landmarks: Landmark positions to be used to determine head pose :return: success - Whether the pose was successfully extracted rotation_vector - rotation vector that along with translation vector brings points from the model coordinate system to the camera coordinate system translation_vector - see rotation_vector """ image_points_headpose = landmarks camera_matrix = self.img_proc.intrinsicMatrix() dist_coeffs = self.img_proc.distortionCoeffs() try: if self.last_rvec[subject_id] is not None and self.last_tvec[ subject_id] is not None and self.use_previous_headpose_estimate: (success, rotation_vector_unstable, translation_vector_unstable) = \ cv2.solvePnP(self.model_points, image_points_headpose, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE, useExtrinsicGuess=True, rvec=self.last_rvec[subject_id], tvec=self.last_tvec[subject_id]) else: (success, rotation_vector_unstable, translation_vector_unstable) = \ cv2.solvePnP(self.model_points, image_points_headpose, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE) except Exception: print('Could not estimate head pose') return False, None, None if not success: print('Could not estimate head pose') return False, None, None # Apply Kalman filter stable_pose = [] pose_np = np.array( (rotation_vector_unstable, translation_vector_unstable)).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers[subject_id]): ps_stb.update([value]) stable_pose.append(ps_stb.state[0]) stable_pose = np.reshape(stable_pose, (-1, 3)) rotation_vector = stable_pose[0] translation_vector = stable_pose[1] self.last_rvec[subject_id] = rotation_vector self.last_tvec[subject_id] = translation_vector rotation_vector_swapped = [ -rotation_vector[2], -rotation_vector[1] + np.pi + self.head_pitch, rotation_vector[0] ] rot_head = tf_transformations.quaternion_from_euler( *rotation_vector_swapped) return success, rot_head, translation_vector def publish_subject_list(self, timestamp, subjects): assert (subjects is not None) subject_list_message = self.__subject_bridge.images_to_msg( subjects, timestamp) self.subject_pub.publish(subject_list_message) @staticmethod def visualize_headpose_result(face_image, est_headpose): """Here, we take the original eye eye_image and overlay the estimated gaze.""" output_image = np.copy(face_image) center_x = face_image.shape[1] / 2 center_y = face_image.shape[0] / 2 endpoint_x, endpoint_y = gaze_tools.get_endpoint( est_headpose[1], est_headpose[0], center_x, center_y, 100) cv2.line(output_image, (int(center_x), int(center_y)), (int(endpoint_x), int(endpoint_y)), (0, 0, 255), 3) return output_image def get_head_translation(self, timestamp, subject_id): trans_reshaped = self.last_tvec[subject_id].reshape(3, 1) nose_center_3d_rot = [ -float(trans_reshaped[0] / 1000.0), -float(trans_reshaped[1] / 1000.0), -float(trans_reshaped[2] / 1000.0) ] nose_center_3d_rot_frame = self.rgb_frame_id try: nose_center_3d_rot_pt = PointStamped() nose_center_3d_rot_pt.header.frame_id = nose_center_3d_rot_frame nose_center_3d_rot_pt.header.stamp = timestamp nose_center_3d_rot_pt.point = Point(x=nose_center_3d_rot[0], y=nose_center_3d_rot[1], z=nose_center_3d_rot[2]) nose_center_3d = self.tf_listener.transformPoint( self.rgb_frame_id_ros, nose_center_3d_rot_pt) nose_center_3d.header.stamp = timestamp nose_center_3d_tf = gaze_tools.position_ros_to_tf( nose_center_3d.point) print('Translation based on landmarks', nose_center_3d_tf) return nose_center_3d_tf except ExtrapolationException as e: print(e) return None def publish_pose(self, timestamp, nose_center_3d_tf, rot_head, subject_id): self.tf_broadcaster.sendTransform( nose_center_3d_tf, rot_head, timestamp, self.tf_prefix + "/head_pose_estimated" + str(subject_id), self.rgb_frame_id_ros) return gaze_tools.get_head_pose(nose_center_3d_tf, rot_head) def callback(self, color_msg): """Simply call process_image.""" try: self.process_image(color_msg) except CvBridgeError as e: print(e) except rospy.ROSException as e: if str(e) == "publish() to a closed topic": pass else: raise e def __update_subjects(self, new_faceboxes): """ Assign the new faces to the existing subjects (id tracking) :param new_faceboxes: new faceboxes detected :return: update self.subjects """ assert (self.subjects is not None) assert (new_faceboxes is not None) if len(new_faceboxes) == 0: self.subjects = dict() return if len(self.subjects) == 0: for j, b_new in enumerate(new_faceboxes): self.subjects[j] = SubjectDetected(b_new) return distance_matrix = np.ones((len(self.subjects), len(new_faceboxes))) for i, subject in enumerate(self.subjects.values()): for j, b_new in enumerate(new_faceboxes): distance_matrix[i][j] = np.sqrt( np.mean( ((np.array(subject.face_bb) - np.array(b_new))**2))) ids_to_assign = range(len(new_faceboxes)) self.subjects = dict() for id in ids_to_assign: subject = np.argmin(distance_matrix[:, id]) while subject in self.subjects: subject += 1 self.subjects[subject] = SubjectDetected(new_faceboxes[id]) def __detect_landmarks_one_box(self, facebox, color_img): try: _bb = map(int, facebox) face_img = color_img[_bb[1]:_bb[3], _bb[0]:_bb[2]] marks_orig = np.array( self.__detect_facial_landmarks(color_img, facebox)[0]) eye_indices = np.array([36, 39, 42, 45]) transformed_landmarks = marks_orig[eye_indices] transformed_landmarks[:, 0] -= facebox[0] transformed_landmarks[:, 1] -= facebox[1] return face_img, transformed_landmarks, marks_orig except Exception: return None, None, None def __detect_facial_landmarks(self, color_img, facebox): marks = self.facial_landmark_nn.get_landmarks(color_img, detected_faces=[facebox]) return marks def detect_landmarks(self, color_img, timestamp): faceboxes = self.get_face_bb(color_img) self.__update_subjects(faceboxes) for subject in self.subjects.values(): res = self.__detect_landmarks_one_box(subject.face_bb, color_img) subject.face_color = res[0] subject.transformed_landmarks = res[1] subject.marks = res[2] def __get_eye_image_one(self, transformed_landmarks, face_aligned_color): margin_ratio = 1.0 try: # Get the width of the eye, and compute how big the margin should be according to the width lefteye_width = transformed_landmarks[3][ 0] - transformed_landmarks[2][0] righteye_width = transformed_landmarks[1][ 0] - transformed_landmarks[0][0] lefteye_margin, righteye_margin = lefteye_width * margin_ratio, righteye_width * margin_ratio # lefteye_center_x = transformed_landmarks[2][0] + lefteye_width / 2 # righteye_center_x = transformed_landmarks[0][0] + righteye_width / 2 lefteye_center_y = (transformed_landmarks[2][1] + transformed_landmarks[3][1]) / 2 righteye_center_y = (transformed_landmarks[1][1] + transformed_landmarks[0][1]) / 2 desired_ratio = self.eye_image_size[0] / self.eye_image_size[1] / 2 # Now compute the bounding boxes # The left / right x-coordinates are computed as the landmark position plus/minus the margin # The bottom / top y-coordinates are computed according to the desired ratio, as the width of the image is known left_bb = np.zeros(4, dtype=np.int32) left_bb[0] = transformed_landmarks[2][0] - lefteye_margin / 2 left_bb[1] = lefteye_center_y - ( lefteye_width + lefteye_margin) * desired_ratio * 1.25 left_bb[2] = transformed_landmarks[3][0] + lefteye_margin / 2 left_bb[3] = lefteye_center_y + ( lefteye_width + lefteye_margin) * desired_ratio * 1.25 right_bb = np.zeros(4, dtype=np.int32) right_bb[0] = transformed_landmarks[0][0] - righteye_margin / 2 right_bb[1] = righteye_center_y - ( righteye_width + righteye_margin) * desired_ratio * 1.25 right_bb[2] = transformed_landmarks[1][0] + righteye_margin / 2 right_bb[3] = righteye_center_y + ( righteye_width + righteye_margin) * desired_ratio * 1.25 # Extract the eye images from the aligned image left_eye_color = face_aligned_color[left_bb[1]:left_bb[3], left_bb[0]:left_bb[2], :] right_eye_color = face_aligned_color[right_bb[1]:right_bb[3], right_bb[0]:right_bb[2], :] # for p in transformed_landmarks: # For debug visualization only # cv2.circle(face_aligned_color, (int(p[0]), int(p[1])), 3, (0, 0, 255), -1) # So far, we have only ensured that the ratio is correct. Now, resize it to the desired size. left_eye_color_resized = scipy.misc.imresize(left_eye_color, self.eye_image_size, interp='lanczos') right_eye_color_resized = scipy.misc.imresize(right_eye_color, self.eye_image_size, interp='lanczos') except (ValueError, TypeError) as e: print(e) return None, None, None, None return left_eye_color_resized, right_eye_color_resized, left_bb, right_bb # noinspection PyUnusedLocal def get_eye_image(self): """Extract the left and right eye images given the (dlib) transformed_landmarks and the source image. First, align the face. Then, extract the width of the eyes given the landmark positions. The height of the images is computed according to the desired ratio of the eye images.""" start_time = time.time() for subject in self.subjects.values(): res = self.__get_eye_image_one(subject.transformed_landmarks, subject.face_color) subject.left_eye_color = res[0] subject.right_eye_color = res[1] subject.left_eye_bb = res[2] subject.right_eye_bb = res[3] tqdm.write('New get_eye_image time: ' + str(time.time() - start_time)) @staticmethod def get_image_points_eyes_nose(landmarks): landmarks_x, landmarks_y = landmarks.T[0], landmarks.T[1] left_eye_center_x = landmarks_x[42] + (landmarks_x[45] - landmarks_x[42]) / 2.0 left_eye_center_y = (landmarks_y[42] + landmarks_y[45]) / 2.0 right_eye_center_x = landmarks_x[36] + (landmarks_x[40] - landmarks_x[36]) / 2.0 right_eye_center_y = (landmarks_y[36] + landmarks_y[40]) / 2.0 nose_center_x, nose_center_y = (landmarks_x[33] + landmarks_x[31] + landmarks_x[35]) / 3.0, \ (landmarks_y[33] + landmarks_y[31] + landmarks_y[35]) / 3.0 return (nose_center_x, nose_center_y), \ (left_eye_center_x, left_eye_center_y), (right_eye_center_x, right_eye_center_y)
class LandmarkMethodROS(LandmarkMethodBase): def __init__(self, img_proc=None): super(LandmarkMethodROS, self).__init__(device_id_facedetection=rospy.get_param( "~device_id_facedetection", default="cuda:0")) self.subject_tracker = FaceEncodingTracker() if rospy.get_param( "~use_face_encoding_tracker", default=True) else SequentialTracker() self.bridge = CvBridge() self.__subject_bridge = SubjectListBridge() self.camera_frame = rospy.get_param("~camera_frame", "kinect2_link") self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "kinect2_ros_frame") self.tf2_broadcaster = TransformBroadcaster() self.tf2_buffer = Buffer() self.tf2_listener = TransformListener(self.tf2_buffer) self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze") self.visualise_headpose = rospy.get_param("~visualise_headpose", default=True) self.pose_stabilizers = {} # Introduce scalar stabilizers for pose. try: if img_proc is None: tqdm.write("Wait for camera message") cam_info = rospy.wait_for_message("/camera_info", CameraInfo, timeout=None) self.img_proc = PinholeCameraModel() # noinspection PyTypeChecker self.img_proc.fromCameraInfo(cam_info) else: self.img_proc = img_proc if np.array_equal( self.img_proc.intrinsicMatrix().A, np.array([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])): raise Exception( 'Camera matrix is zero-matrix. Did you calibrate' 'the camera and linked to the yaml file in the launch file?' ) tqdm.write("Camera message received") except rospy.ROSException: raise Exception("Could not get camera info") # multiple person images publication self.subject_pub = rospy.Publisher("/subjects/images", MSG_SubjectImagesList, queue_size=3) # multiple person faces publication for visualisation self.subject_faces_pub = rospy.Publisher("/subjects/faces", Image, queue_size=3) Server(ModelSizeConfig, self._dyn_reconfig_callback) # have the subscriber the last thing that's run to avoid conflicts self.color_sub = rospy.Subscriber("/image", Image, self.process_image, buff_size=2**24, queue_size=3) def _dyn_reconfig_callback(self, config, _): self.model_points /= (self.model_size_rescale * self.interpupillary_distance) self.model_size_rescale = config["model_size"] self.interpupillary_distance = config["interpupillary_distance"] self.model_points *= (self.model_size_rescale * self.interpupillary_distance) self.head_pitch = config["head_pitch"] return config def process_image(self, color_msg): color_img = gaze_tools.convert_image(color_msg, "bgr8") timestamp = color_msg.header.stamp self.update_subject_tracker(color_img) if not self.subject_tracker.get_tracked_elements(): tqdm.write("\033[2K\033[1;31mNo face found\033[0m", end="\r") return self.subject_tracker.update_eye_images(self.eye_image_size) final_head_pose_images = [] for subject_id, subject in self.subject_tracker.get_tracked_elements( ).items(): if subject.left_eye_color is None or subject.right_eye_color is None: continue if subject_id not in self.pose_stabilizers: self.pose_stabilizers[subject_id] = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] success, head_rpy, translation_vector = self.get_head_pose( subject.marks, subject_id) if success: # Publish all the data self.publish_pose(timestamp, translation_vector, head_rpy, subject_id) if self.visualise_headpose: # pitch roll yaw trans_msg = self.tf2_buffer.lookup_transform( self.ros_tf_frame, self.tf_prefix + "/head_pose_estimated" + str(subject_id), timestamp) rotation = trans_msg.transform.rotation euler_angles_head = list( transformations.euler_from_quaternion( [rotation.x, rotation.y, rotation.z, rotation.w])) euler_angles_head = gaze_tools.limit_yaw(euler_angles_head) face_image_resized = cv2.resize( subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) final_head_pose_images.append( LandmarkMethodROS.visualize_headpose_result( face_image_resized, gaze_tools.get_phi_theta_from_euler( euler_angles_head))) if len(self.subject_tracker.get_tracked_elements().items()) > 0: self.publish_subject_list( timestamp, self.subject_tracker.get_tracked_elements()) if len(final_head_pose_images) > 0: headpose_image_ros = self.bridge.cv2_to_imgmsg( np.hstack(final_head_pose_images), "bgr8") headpose_image_ros.header.stamp = timestamp self.subject_faces_pub.publish(headpose_image_ros) def get_head_pose(self, landmarks, subject_id): """ We are given a set of 2D points in the form of landmarks. The basic idea is that we assume a generic 3D head model, and try to fit the 2D points to the 3D model based on the Levenberg-Marquardt optimization. We can use OpenCV's implementation of SolvePnP for this. This method is inspired by http://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/ We are planning to replace this with our own head pose estimator. :param landmarks: Landmark positions to be used to determine head pose :param subject_id: ID of the subject that corresponds to the given landmarks :return: success - Whether the pose was successfully extracted rotation_vector - rotation vector that along with translation vector brings points from the model coordinate system to the camera coordinate system translation_vector - see rotation_vector """ camera_matrix = self.img_proc.intrinsicMatrix() dist_coeffs = self.img_proc.distortionCoeffs() try: success, rodrigues_rotation, translation_vector, _ = cv2.solvePnPRansac( self.model_points, landmarks.reshape(len(self.model_points), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coeffs, flags=cv2.SOLVEPNP_DLS) except cv2.error as e: tqdm.write( '\033[2K\033[1;31mCould not estimate head pose: {}\033[0m'. format(e), end="\r") return False, None, None if not success: tqdm.write( '\033[2K\033[1;31mUnsuccessful in solvingPnPRanscan\033[0m', end="\r") return False, None, None # this is generic point stabiliser, the underlying representation doesn't matter rotation_vector, translation_vector = self.apply_kalman_filter_head_pose( subject_id, rodrigues_rotation, translation_vector / 1000.0) rotation_vector[0] += self.head_pitch _rotation_matrix, _ = cv2.Rodrigues(rotation_vector) _rotation_matrix = np.matmul( _rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 _rpy_rotation = np.array(transformations.euler_from_matrix(_m)) return success, _rpy_rotation, translation_vector def apply_kalman_filter_head_pose(self, subject_id, rotation_vector_unstable, translation_vector_unstable): stable_pose = [] pose_np = np.array( (rotation_vector_unstable, translation_vector_unstable)).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers[subject_id]): ps_stb.update([value]) stable_pose.append(ps_stb.state[0]) stable_pose = np.reshape(stable_pose, (-1, 3)) rotation_vector = stable_pose[0] translation_vector = stable_pose[1] return rotation_vector, translation_vector def publish_subject_list(self, timestamp, subjects): assert (subjects is not None) subject_list_message = self.__subject_bridge.images_to_msg( subjects, timestamp) self.subject_pub.publish(subject_list_message) def publish_pose(self, timestamp, nose_center_3d_tf, head_rpy, subject_id): t = TransformStamped() t.header.frame_id = self.camera_frame t.header.stamp = timestamp t.child_frame_id = self.tf_prefix + "/head_pose_estimated" + str( subject_id) t.transform.translation.x = nose_center_3d_tf[0] t.transform.translation.y = nose_center_3d_tf[1] t.transform.translation.z = nose_center_3d_tf[2] rotation = transformations.quaternion_from_euler(*head_rpy) t.transform.rotation.x = rotation[0] t.transform.rotation.y = rotation[1] t.transform.rotation.z = rotation[2] t.transform.rotation.w = rotation[3] try: self.tf2_broadcaster.sendTransform([t]) except rospy.ROSException as exc: if str(exc) == "publish() to a closed topic": pass else: raise exc self.tf2_buffer.set_transform(t, 'extract_landmarks') def update_subject_tracker(self, color_img): faceboxes = self.get_face_bb(color_img) if len(faceboxes) == 0: self.subject_tracker.clear_elements() return tracked_subjects = self.get_subjects_from_faceboxes( color_img, faceboxes) # track the new faceboxes according to the previous ones self.subject_tracker.track(tracked_subjects)
class GazeEstimatorROS(object): def __init__(self, device_id_gaze, model_files): self.bridge = CvBridge() self.subjects_bridge = SubjectListBridge() self.tf2_broadcaster = tf2_ros.TransformBroadcaster() self.tf2_buffer = tf2_ros.Buffer() self.tf2_listener = tf2_ros.TransformListener(self.tf2_buffer) self.tf_prefix = rospy.get_param("~tf_prefix", "gaze") self.headpose_frame = self.tf_prefix + "/head_pose_estimated" self.gaze_backend = rospy.get_param("~gaze_backend", "tensorflow") if self.gaze_backend == "tensorflow": from rt_gene.estimate_gaze_tensorflow import GazeEstimator self._gaze_estimator = GazeEstimator(device_id_gaze, model_files) elif self.gaze_backend == "pytorch": from rt_gene.estimate_gaze_pytorch import GazeEstimator self._gaze_estimator = GazeEstimator(device_id_gaze, model_files) else: raise ValueError( "Incorrect gaze_base backend, choices are: tensorflow or pytorch" ) self.image_subscriber = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.image_callback, queue_size=3, buff_size=2**24) self.subjects_gaze_img = rospy.Publisher("/subjects/gazeimages", Image, queue_size=3) self.gaze_publishers = rospy.Publisher("/subjects/gaze", MSG_GazeList, queue_size=3) self.visualise_eyepose = rospy.get_param("~visualise_eyepose", default=True) self._last_time = rospy.Time().now() self._freq_deque = collections.deque( maxlen=30) # average frequency statistic over roughly one second self._latency_deque = collections.deque(maxlen=30) def publish_image(self, image, image_publisher, timestamp): """This image publishes the `image` to the `image_publisher` with the given `timestamp`.""" image_ros = self.bridge.cv2_to_imgmsg(image, "rgb8") image_ros.header.stamp = timestamp image_publisher.publish(image_ros) def image_callback(self, subject_image_list): """This method is called whenever new input arrives. The input is first converted in a format suitable for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`), and this image is published along with the estimated gaze vector (see :meth:`publish_image` and :func:`publish_gaze`)""" timestamp = subject_image_list.header.stamp camera_frame = subject_image_list.header.frame_id subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] for subject_id, s in subjects_dict.items(): try: transform_msg = self.tf2_buffer.lookup_transform( camera_frame, self.headpose_frame + str(subject_id), timestamp) rot_head = transform_msg.transform.rotation _m = transformations.quaternion_matrix( [rot_head.x, rot_head.y, rot_head.z, rot_head.w]) euler_angles_head = list( transformations.euler_from_matrix( np.dot(ros_tools.camera_to_ros, _m))) euler_angles_head = gaze_tools.limit_yaw(euler_angles_head) phi_head, theta_head = gaze_tools.get_phi_theta_from_euler( euler_angles_head) input_head_list.append([theta_head, phi_head]) input_r_list.append( self._gaze_estimator.input_from_image(s.right)) input_l_list.append( self._gaze_estimator.input_from_image(s.left)) valid_subject_list.append(subject_id) except (tf2_ros.LookupException, tf2_ros.ConnectivityException, tf2_ros.ExtrapolationException, tf2_ros.TransformException): pass if len(valid_subject_list) == 0: return gaze_est = self._gaze_estimator.estimate_gaze_twoeyes( inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) subject_subset = dict((k, subjects_dict[k]) for k in valid_subject_list if k in subjects_dict) self.publish_gaze_msg(subject_image_list.header, subject_subset, gaze_est.tolist()) subjects_gaze_img_list = [] for subject_id, gaze in zip(valid_subject_list, gaze_est.tolist()): subjects_dict[subject_id].gaze = gaze self.publish_gaze(gaze, timestamp, subject_id) if self.visualise_eyepose: s = subjects_dict[subject_id] r_gaze_img = self._gaze_estimator.visualize_eye_result( s.right, gaze) l_gaze_img = self._gaze_estimator.visualize_eye_result( s.left, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) subjects_gaze_img_list.append(s_gaze_img) if len(subjects_gaze_img_list) > 0: gaze_img_msg = self.bridge.cv2_to_imgmsg( np.hstack(subjects_gaze_img_list).astype(np.uint8), "bgr8") gaze_img_msg.header.stamp = timestamp self.subjects_gaze_img.publish(gaze_img_msg) _now = rospy.Time().now() _freq = 1.0 / (_now - self._last_time).to_sec() self._freq_deque.append(_freq) self._latency_deque.append(_now.to_sec() - timestamp.to_sec()) self._last_time = _now tqdm.write( '\033[2K\033[1;32mTime now: {:.2f} message color: {:.2f} latency: {:.2f}s for {} subject(s) {:.0f}Hz\033[0m' .format((_now.to_sec()), timestamp.to_sec(), np.mean(self._latency_deque), len(valid_subject_list), np.mean(self._freq_deque)), end="\r") def publish_gaze_msg(self, header, subjects, gazes): gaze_msg_list = MSG_GazeList() gaze_msg_list.header = header for subjects_id, gaze in zip(subjects.keys(), gazes): gaze_msg = MSG_Gaze() gaze_msg.subject_id = subjects_id gaze_msg.theta = gaze[0] gaze_msg.phi = gaze[1] gaze_msg_list.subjects.append(gaze_msg) self.gaze_publishers.publish(gaze_msg_list) def publish_gaze(self, est_gaze, msg_stamp, subject_id): """Publish the gaze vector as a PointStamped.""" theta_gaze = est_gaze[0] phi_gaze = est_gaze[1] euler_angle_gaze = gaze_tools.get_euler_from_phi_theta( phi_gaze, theta_gaze) quaternion_gaze = transformations.quaternion_from_euler( *euler_angle_gaze) t = TransformStamped() t.header.stamp = msg_stamp t.header.frame_id = self.headpose_frame + str(subject_id) t.child_frame_id = self.tf_prefix + "/world_gaze" + str(subject_id) t.transform.translation.x = 0 t.transform.translation.y = 0 t.transform.translation.z = 0.05 # publish it 5cm above the head pose's origin (nose tip) t.transform.rotation.x = quaternion_gaze[0] t.transform.rotation.y = quaternion_gaze[1] t.transform.rotation.z = quaternion_gaze[2] t.transform.rotation.w = quaternion_gaze[3] try: self.tf2_broadcaster.sendTransform([t]) except rospy.ROSException as exc: if str(exc) == "publish() to a closed topic": pass else: raise exc
class BlinkEstimatorROS(object): def __init__(self, device_id_blink, model_files, threshold): self.cv_bridge = CvBridge() self.bridge = SubjectListBridge() self.viz = rospy.get_param("~viz", True) blink_backend = rospy.get_param("~blink_backend", default="pytorch") model_type = rospy.get_param("~model_type", default="resnet18") if blink_backend == "tensorflow": from rt_bene.estimate_blink_tensorflow import BlinkEstimatorTensorflow self._blink_estimator = BlinkEstimatorTensorflow(device_id_blink, model_files, model_type, threshold) elif blink_backend == "pytorch": from rt_bene.estimate_blink_pytorch import BlinkEstimatorPytorch self._blink_estimator = BlinkEstimatorPytorch(device_id_blink, model_files, model_type, threshold) else: raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch") self._last_time = rospy.Time().now() self._freq_deque = collections.deque(maxlen=30) # average frequency statistic over roughly one second self._latency_deque = collections.deque(maxlen=30) self.blink_publisher = rospy.Publisher("/subjects/blink", MSG_BlinkList, queue_size=3) if self.viz: self.viz_pub = rospy.Publisher(rospy.get_param("~viz_topic", "/subjects/blink_images"), Image, queue_size=3) self.sub = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.callback, queue_size=1, buff_size=2 ** 24) def callback(self, msg): subjects = self.bridge.msg_to_images(msg) left_eyes = [] right_eyes = [] for subject in subjects.values(): _left, _right = self._blink_estimator.inputs_from_images(subject.left, subject.right) left_eyes.append(_left) right_eyes.append(_right) if len(left_eyes) == 0: return probs = self._blink_estimator.predict(left_eyes, right_eyes) self.publish_msg(msg.header, subjects, probs) if self.viz: blink_image_list = [] for subject, p in zip(subjects.values(), probs): resized_face = cv2.resize(subject.face, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) blink_image_list.append(self._blink_estimator.overlay_prediction_over_img(resized_face, p)) if len(blink_image_list) > 0: blink_viz_img = self.cv_bridge.cv2_to_imgmsg(np.hstack(blink_image_list), encoding="bgr8") blink_viz_img.header.stamp = msg.header.stamp self.viz_pub.publish(blink_viz_img) _now = rospy.Time().now() timestamp = msg.header.stamp _freq = 1.0 / (_now - self._last_time).to_sec() self._freq_deque.append(_freq) self._latency_deque.append(_now.to_sec() - timestamp.to_sec()) self._last_time = _now tqdm.write( '\033[2K\033[1;32mTime now: {:.2f} message color: {:.2f} latency: {:.2f}s for {} subject(s) {:.0f}Hz\033[0m'.format( (_now.to_sec()), timestamp.to_sec(), np.mean(self._latency_deque), len(subjects), np.mean(self._freq_deque)), end="\r") def publish_msg(self, header, subjects, probabilities): blink_msg_list = MSG_BlinkList() blink_msg_list.header = header for subject_id, p in zip(subjects.keys(), probabilities): blink_msg = MSG_Blink() blink_msg.subject_id = str(subject_id) blink_msg.blink = bool(p >= self._blink_estimator.threshold) blink_msg.probability = p blink_msg_list.subjects.append(blink_msg) self.blink_publisher.publish(blink_msg_list)
class GazeEstimator(object): """This class encapsulates a deep neural network for gaze estimation. It retrieves two image streams, one containing the left eye and another containing the right eye. It synchronizes these two images with the estimated head pose. The images are then converted in a suitable format, and a forward pass (one per eye) of the deep neural network results in the estimated gaze for this frame. The estimated gaze is then published in the (theta, phi) notation. Additionally, two images with the gaze overlaid on the eye images are published.""" def __init__(self): self.image_height = rospy.get_param("~image_height", 36) self.image_width = rospy.get_param("~image_width", 60) self.bridge = CvBridge() self.subjects_bridge = SubjectListBridge() self.tf_broadcaster = TransformBroadcaster() self.tf_listener = TransformListener() self.use_last_headpose = rospy.get_param("~use_last_headpose", True) self.tf_prefix = rospy.get_param("~tf_prefix", "gaze") self.last_phi_head, self.last_theta_head = None, None self.rgb_frame_id_ros = rospy.get_param("~rgb_frame_id_ros", "/kinect2_nonrotated_link") self.headpose_frame = self.tf_prefix + "/head_pose_estimated" config = tensorflow.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 config.log_device_placement = False self.sess = tensorflow.Session(config=config) set_session(self.sess) model_files = rospy.get_param("~model_files") self.models = [] for model_file in model_files: tqdm.write('Load model ' + model_file) model = load_model(os.path.join( rospkg.RosPack().get_path('rt_gene'), model_file), custom_objects={ 'accuracy_angle': accuracy_angle, 'angle_loss': angle_loss }) # noinspection PyProtectedMember model._make_predict_function( ) # have to initialize before threading self.models.append(model) tqdm.write('Loaded ' + str(len(self.models)) + ' models') self.graph = tensorflow.get_default_graph() self.image_subscriber = rospy.Subscriber('/subjects/images', MSG_SubjectImagesList, self.image_callback, queue_size=3) self.subjects_gaze_img = rospy.Publisher('/subjects/gazeimages', Image, queue_size=3) self.average_weights = np.array([0.1, 0.125, 0.175, 0.2, 0.4]) self.gaze_buffer_c = {} def __del__(self): if self.sess is not None: self.sess.close() def estimate_gaze_twoeyes(self, test_input_left, test_input_right, headpose): test_headpose = headpose.reshape(1, 2) with self.graph.as_default(): predictions = [] for model in self.models: predictions.append( model.predict({ 'img_input_L': test_input_left, 'img_input_R': test_input_right, 'headpose_input': test_headpose })[0]) mean_prediction = np.mean(np.array(predictions), axis=0) if len( self.models ) == 1: # only apply offset for single model, not for ensemble models mean_prediction[1] += 0.11 return mean_prediction def visualize_eye_result(self, eye_image, est_gaze): """Here, we take the original eye eye_image and overlay the estimated gaze.""" output_image = np.copy(eye_image) center_x = self.image_width / 2 center_y = self.image_height / 2 endpoint_x, endpoint_y = gaze_tools.get_endpoint( est_gaze[0], est_gaze[1], center_x, center_y, 50) cv2.line(output_image, (int(center_x), int(center_y)), (int(endpoint_x), int(endpoint_y)), (255, 0, 0)) return output_image def publish_image(self, image, image_publisher, timestamp): """This image publishes the `image` to the `image_publisher` with the given `timestamp`.""" image_ros = self.bridge.cv2_to_imgmsg(image, "rgb8") image_ros.header.stamp = timestamp image_publisher.publish(image_ros) def input_from_image(self, eye_img_msg, flip=False): """This method converts an eye_img_msg provided by the landmark estimator, and converts it to a format suitable for the gaze network.""" cv_image = eye_img_msg #cv_image = self.bridge.imgmsg_to_cv2(eye_img_msg, "rgb8") if flip: cv_image = cv2.flip(cv_image, 1) currimg = cv_image.reshape(self.image_height, self.image_width, 3, order='F') currimg = currimg.astype(np.float32) # print('currimg.dtype', currimg.dtype) # cv2.imwrite('/home/tobias/test_inplace.png', currimg) testimg = np.zeros((1, self.image_height, self.image_width, 3)) testimg[0, :, :, 0] = currimg[:, :, 0] - 103.939 testimg[0, :, :, 1] = currimg[:, :, 1] - 116.779 testimg[0, :, :, 2] = currimg[:, :, 2] - 123.68 return testimg def compute_eye_gaze_estimation(self, subject_id, timestamp, input_r, input_l): """ subject_id : integer, id of the subject input_x : cv_image, input image of x eye (phi_x) : double, phi angle estimated using pupil detection (theta_x) : double, theta angle estimated using pupil detection """ try: lct = self.tf_listener.getLatestCommonTime( self.rgb_frame_id_ros, self.headpose_frame + str(subject_id)) if (timestamp - lct).to_sec() < 0.25: tqdm.write('Time diff: ' + str((timestamp - lct).to_sec())) (trans_head, rot_head) = self.tf_listener.lookupTransform( self.rgb_frame_id_ros, self.headpose_frame + str(subject_id), lct) euler_angles_head = gaze_tools.get_head_pose( trans_head, rot_head) phi_head, theta_head = gaze_tools.get_phi_theta_from_euler( euler_angles_head) self.last_phi_head, self.last_theta_head = phi_head, theta_head else: if self.use_last_headpose and self.last_phi_head is not None: tqdm.write('Big time diff, use last known headpose! ' + str((timestamp - lct).to_sec())) phi_head, theta_head = self.last_phi_head, self.last_theta_head else: tqdm.write( 'Too big time diff for head pose, do not estimate gaze!' + str((timestamp - lct).to_sec())) return start_time = time.time() est_gaze_c = self.estimate_gaze_twoeyes( input_l, input_r, np.array([theta_head, phi_head])) self.gaze_buffer_c[subject_id].append(est_gaze_c) if len(self.average_weights) == len( self.gaze_buffer_c[subject_id]): est_gaze_c_med = np.average(np.array( self.gaze_buffer_c[subject_id]), axis=0, weights=self.average_weights) self.publish_gaze(est_gaze_c_med, timestamp, subject_id) tqdm.write('est_gaze_c: ' + str(est_gaze_c_med)) return est_gaze_c_med tqdm.write('Elapsed: ' + str(time.time() - start_time)) except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException, tf.Exception) as tf_e: print(tf_e) except rospy.ROSException as ros_e: if str(ros_e) == "publish() to a closed topic": print("See ya") return None def image_callback(self, subject_image_list, masked_list=None): """This method is called whenever new input arrives. The input is first converted in a format suitable for the gaze estimation network (see :meth:`input_from_image`), then the gaze is estimated (see :meth:`estimate_gaze`. The estimated gaze is overlaid on the input image (see :meth:`visualize_eye_result`), and this image is published along with the estimated gaze vector (see :meth:`publish_image` and :func:`publish_gaze`)""" timestamp = subject_image_list.header.stamp subjects_gaze_img = None subjects_dict = self.subjects_bridge.msg_to_images(subject_image_list) for subject_id, s in subjects_dict.items(): if subject_id not in self.gaze_buffer_c.keys(): self.gaze_buffer_c[subject_id] = collections.deque(maxlen=5) input_r = self.input_from_image(s.right, flip=False) input_l = self.input_from_image(s.left, flip=False) gaze_est = self.compute_eye_gaze_estimation( subject_id, timestamp, input_r, input_l) if gaze_est is not None: r_gaze_img = self.visualize_eye_result(s.right, gaze_est) l_gaze_img = self.visualize_eye_result(s.left, gaze_est) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) if subjects_gaze_img is None: subjects_gaze_img = s_gaze_img else: subjects_gaze_img = np.concatenate( (subjects_gaze_img, s_gaze_img), axis=0) if subjects_gaze_img is not None: gaze_img_msg = self.bridge.cv2_to_imgmsg( subjects_gaze_img.astype(np.uint8), "bgr8") self.subjects_gaze_img.publish(gaze_img_msg) def publish_gaze(self, est_gaze, msg_stamp, subject_id): """Publish the gaze vector as a PointStamped.""" theta_gaze = est_gaze[0] phi_gaze = est_gaze[1] euler_angle_gaze = gaze_tools.get_euler_from_phi_theta( phi_gaze, theta_gaze) quaternion_gaze = tf.transformations.quaternion_from_euler( *euler_angle_gaze) self.tf_broadcaster.sendTransform( (0, 0, 0.05), # publish it 5cm above the head pose's origin (nose tip) quaternion_gaze, msg_stamp, self.tf_prefix + "/world_gaze" + str(subject_id), self.headpose_frame + str(subject_id))
class BlinkEstimatorNode(BlinkEstimatorBase): def __init__(self, device_id_blink, model_files, threshold): super(BlinkEstimatorNode, self).__init__(device_id_blink, model_files, threshold, (96, 96)) self.sub = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.callback, queue_size=1, buff_size=2**24) self.cv_bridge = CvBridge() self.bridge = SubjectListBridge() self.viz = rospy.get_param("~viz", True) self._last_time = rospy.Time().now() self._freq_deque = collections.deque( maxlen=30) # average frequency statistic over roughly one second self._latency_deque = collections.deque(maxlen=30) self.blink_publisher = rospy.Publisher("/subjects/blink", MSG_BlinkList, queue_size=3) if self.viz: self.viz_pub = rospy.Publisher(rospy.get_param( "~viz_topic", "/subjects/blink_images"), Image, queue_size=3) def callback(self, msg): subjects = self.bridge.msg_to_images(msg) left_eyes = [] right_eyes = [] for subject in subjects.values(): left_eyes.append(self.resize_img(subject.left)) right_eyes.append(cv2.flip(self.resize_img(subject.right), 1)) if len(left_eyes) == 0: return probs, _ = self.predict(left_eyes, right_eyes) self.publish_msg(msg.header.stamp, subjects, probs) if self.viz: blink_image_list = [] for subject, p in zip(subjects.values(), probs): resized_face = cv2.resize(subject.face, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) blink_image_list.append( self.overlay_prediction_over_img(resized_face, p)) if len(blink_image_list) > 0: blink_viz_img = self.cv_bridge.cv2_to_imgmsg( np.hstack(blink_image_list), encoding="bgr8") blink_viz_img.header.stamp = msg.header.stamp self.viz_pub.publish(blink_viz_img) _now = rospy.Time().now() timestamp = msg.header.stamp _freq = 1.0 / (_now - self._last_time).to_sec() self._freq_deque.append(_freq) self._latency_deque.append(_now.to_sec() - timestamp.to_sec()) self._last_time = _now tqdm.write( '\033[2K\033[1;32mTime now: {:.2f} message color: {:.2f} latency: {:.2f}s for {} subject(s) {:.0f}Hz\033[0m' .format((_now.to_sec()), timestamp.to_sec(), np.mean(self._latency_deque), len(subjects), np.mean(self._freq_deque)), end="\r") def publish_msg(self, timestamp, subjects, probabilities): blink_msg_list = MSG_BlinkList() blink_msg_list.header.stamp = timestamp blink_msg_list.header.frame_id = '0' for subject_id, p in zip(subjects.keys(), probabilities): blink_msg = MSG_Blink() blink_msg.subject_id = str(subject_id) blink_msg.blink = p >= self.threshold blink_msg.probability = p blink_msg_list.subjects.append(blink_msg) self.blink_publisher.publish(blink_msg_list)