def _process_image(self, img_file_path: str) -> tuple: """ Extract all faces in given image file and compute corresponding description vectors. :param img_file_path: str - Path to image file. :return: (list, list) - List of data points and another list of corresponding metadata structures. Return empty lists in case no description vectors could be retrieved. """ # Read specified image file. img_bgr = image.read_image_from_file(img_file_path) if img_bgr.size < 1: # Error: image file could not be # read, therefore return empty lists # to indicate that no description # vectors could be retrieved. return list(), list() # Convert retrieved image to RGB color encoding. img_rgb = image.swap_color_encoding(img_bgr) # Get description vector and # bounding box coordinates of all # detected faces in current image. face_descriptions = self._get_face_descriptions(img_rgb) # Create empty list to store description vectors. vector_list = list() # Create empty list to store metadata structures. metadata_list = list() # Fill both lists with corresponding data of detected faces. for face_data_tuple in face_descriptions: # Extract and store description vector of current face. vector = face_data_tuple[0] vector_list.append(vector) # Extract and store metadata of current face. top_left = face_data_tuple[1] bottom_right = face_data_tuple[2] # Create metadata structure for current # face in image file. In this case, this # is a tuple containing file name and top # left / bottom right bounding box corners. img_metadata = DataSet.create_metadata( os.path.basename(img_file_path), top_left, bottom_right) metadata_list.append(img_metadata) return vector_list, metadata_list
def _process_video(self, video_file_path: str, num_skipped_frames: int) -> tuple: """ Extract all faces in given video file and compute corresponding description vectors. :param video_file_path: str - Path to video file. :param num_skipped_frames: int - Number of skipped frames between two consecutive samples. :return: (list, list) - List of data points and another list of corresponding metadata structures. Return empty lists in case no description vectors could be retrieved. """ # Open video file. cap = cv2.VideoCapture(video_file_path) # Adjust given number of frames between # two samples to indicate the number of # necessary forward steps in video file. num_skipped_frames += 1 # Create empty list to store description vectors. vector_list = list() # Create empty list to store metadata structures. metadata_list = list() # Create list to store description vectors # of last sample. Should be used as cache # to avoid multiple insertions of description # vectors corresponding to the same person. vectors_in_last_sample = list() # Read frames with specified sample rate. next_frame_index = 0 ret = True while ret: # Jump to next frame index. cap.set(cv2.CAP_PROP_POS_FRAMES, next_frame_index) # Read frame. ret, frame_bgr = cap.read() # Check if retrieved pixel data is valid. if frame_bgr is None: # Error: current frame is invalid, # therefore skip it and continue # with next sample (if possible). continue # Convert retrieved frame to RGB color encoding. frame_rgb = image.swap_color_encoding(frame_bgr) # Get current frame number and convert # it to corresponding frame index. frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES)) frame_index = frame_number - 1 # Get description vector of all # detected faces in current frame. face_descriptions = self._get_face_descriptions(frame_rgb) # Fill description vector and metadata lists # with corresponding data of detected faces. for face_tuple in face_descriptions: # Extract description vector of current face. vector = face_tuple[0] # Compare current vector to those detected in last # frame. When its distance exceeds a certain threshold, # assume a new face which needs to be stored. present_in_last_sample = False for last_vector in vectors_in_last_sample: distance = calculation.get_distance(last_vector, vector) if numpy.less_equal( distance, settings.DISTANCE_THRESHOLD_RECOGNITION): present_in_last_sample = True break if not present_in_last_sample: # Store description vector of current face. vector_list.append(vector) # Extract and store corresponding metadata. top_left = face_tuple[1] bottom_right = face_tuple[2] # Create metadata structure for current face in # specified video file frame. In this case, this # is a tuple containing file name, frame index # and top left / bottom right bounding box corners. video_metadata = DataSet.create_metadata( os.path.basename(video_file_path), frame_index, top_left, bottom_right) metadata_list.append(video_metadata) # Update description vectors of last # frame to those currently retrieved. vectors_in_last_sample.clear() for vector, _top_left, _bottom_right in face_descriptions: vectors_in_last_sample.append(vector) # Update frame index. next_frame_index += num_skipped_frames # Close video file. cap.release() return vector_list, metadata_list