class Mask(Masker): """ Neural network to process face image into a segmentation mask of the face """ def __init__(self, **kwargs): git_model_id = 8 model_filename = "Nirkin_300_softmax_v1.h5" super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self.name = "VGG Clear" self.input_size = 300 self.vram = 2944 self.vram_warnings = 1088 # at BS 1. OOMs at higher batch sizes self.vram_per_batch = 400 self.batchsize = self.config["batch-size"] def init_model(self): self.model = KSession(self.name, self.model_path, model_kwargs=dict(), allow_growth=self.config["allow_growth"], exclude_gpus=self._exclude_gpus) self.model.load_model() self.model.append_softmax_activation(layer_index=-1) placeholder = np.zeros( (self.batchsize, self.input_size, self.input_size, 3), dtype="float32") self.model.predict(placeholder) def process_input(self, batch): """ Compile the detected faces for prediction """ input_ = np.array( [face.feed_face[..., :3] for face in batch["detected_faces"]], dtype="float32") batch["feed"] = input_ - np.mean(input_, axis=(1, 2))[:, None, None, :] logger.trace("feed shape: %s", batch["feed"].shape) return batch def predict(self, batch): """ Run model to get predictions """ predictions = self.model.predict(batch["feed"]) batch["prediction"] = predictions[..., -1] return batch def process_output(self, batch): """ Compile found faces for output """ return batch
class Mask(Masker): """ Neural network to process face image into a segmentation mask of the face """ def __init__(self, **kwargs): git_model_id = 6 model_filename = "DFL_256_sigmoid_v1.h5" super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self.name = "U-Net" self.input_size = 256 self.vram = 3424 self.vram_warnings = 256 self.vram_per_batch = 80 self.batchsize = self.config["batch-size"] self._storage_centering = "legacy" def init_model(self): self.model = KSession(self.name, self.model_path, model_kwargs=dict(), allow_growth=self.config["allow_growth"], exclude_gpus=self._exclude_gpus) self.model.load_model() placeholder = np.zeros( (self.batchsize, self.input_size, self.input_size, 3), dtype="float32") self.model.predict(placeholder) def process_input(self, batch): """ Compile the detected faces for prediction """ batch["feed"] = np.array( [feed.face[..., :3] for feed in batch["feed_faces"]], dtype="float32") / 255.0 logger.trace("feed shape: %s", batch["feed"].shape) return batch def predict(self, batch): """ Run model to get predictions """ batch["prediction"] = self.model.predict(batch["feed"]) return batch def process_output(self, batch): """ Compile found faces for output """ return batch
class Mask(Masker): """ Perform transformation to align and get landmarks """ def __init__(self, **kwargs): git_model_id = 6 model_filename = "DFL_256_sigmoid_v1.h5" super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self.name = "U-Net" self.input_size = 256 self.vram = 3424 self.vram_warnings = 256 self.vram_per_batch = 80 self.batchsize = self.config["batch-size"] def init_model(self): self.model = KSession(self.name, self.model_path, model_kwargs=dict()) self.model.load_model() placeholder = np.zeros( (self.batchsize, self.input_size, self.input_size, 3), dtype="float32") self.model.predict(placeholder) def process_input(self, batch): """ Compile the detected faces for prediction """ batch["feed"] = np.array( [face.feed_face[..., :3] for face in batch["detected_faces"]], dtype="float32") / 255.0 logger.trace("feed shape: %s", batch["feed"].shape) return batch def predict(self, batch): """ Run model to get predictions """ batch["prediction"] = self.model.predict(batch["feed"]) return batch def process_output(self, batch): """ Compile found faces for output """ return batch
class Mask(Masker): """ Perform transformation to align and get landmarks """ def __init__(self, **kwargs): git_model_id = 5 model_filename = "Nirkin_500_softmax_v1.h5" super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self.name = "VGG Obstructed" self.input_size = 500 self.vram = 3936 self.vram_warnings = 1088 # at BS 1. OOMs at higher batchsizes self.vram_per_batch = 304 self.batchsize = self.config["batch-size"] def init_model(self): self.model = KSession(self.name, self.model_path, model_kwargs=dict()) self.model.load_model() self.model.append_softmax_activation(layer_index=-1) placeholder = np.zeros((self.batchsize, self.input_size, self.input_size, 3), dtype="float32") self.model.predict(placeholder) def process_input(self, batch): """ Compile the detected faces for prediction """ input_ = [face.feed_face[..., :3] for face in batch["detected_faces"]] batch["feed"] = input_ - np.mean(input_, axis=(1, 2))[:, None, None, :] logger.trace("feed shape: %s", batch["feed"].shape) return batch def predict(self, batch): """ Run model to get predictions """ predictions = self.model.predict(batch["feed"]) batch["prediction"] = predictions[..., 0] * -1.0 + 1.0 return batch def process_output(self, batch): """ Compile found faces for output """ return batch
class Align(Aligner): """ Perform transformation to align and get landmarks """ def __init__(self, **kwargs): git_model_id = 9 model_filename = "face-alignment-network_2d4_keras_v1.h5" super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self.name = "FAN" self.input_size = 256 self.colorformat = "RGB" self.vram = 2240 self.vram_warnings = 512 # Will run at this with warnings self.vram_per_batch = 64 self.batchsize = self.config["batch-size"] self.reference_scale = 195 def init_model(self): """ Initialize FAN model """ model_kwargs = dict( custom_objects={'TorchBatchNorm2D': TorchBatchNorm2D}) self.model = KSession(self.name, self.model_path, model_kwargs=model_kwargs, allow_growth=self.config["allow_growth"]) self.model.load_model() # Feed a placeholder so Aligner is primed for Manual tool placeholder = np.zeros( (self.batchsize, 3, self.input_size, self.input_size), dtype="float32") self.model.predict(placeholder) def process_input(self, batch): """ Compile the detected faces for prediction """ # TODO Batching logger.trace("Aligning faces around center") batch["center_scale"] = self.get_center_scale(batch["detected_faces"]) faces = self.crop(batch) logger.trace("Aligned image around center") faces = self._normalize_faces(faces) batch["feed"] = np.array(faces, dtype="float32").transpose( (0, 3, 1, 2)) / 255.0 return batch def get_center_scale(self, detected_faces): """ Get the center and set scale of bounding box """ logger.trace("Calculating center and scale") l_center = [] l_scale = [] for face in detected_faces: center = np.array([(face.left + face.right) / 2.0, (face.top + face.bottom) / 2.0]) center[1] -= face.h * 0.12 l_center.append(center) l_scale.append((face.w + face.h) / self.reference_scale) logger.trace("Calculated center and scale: %s, %s", l_center, l_scale) return l_center, l_scale def crop(self, batch): # pylint:disable=too-many-locals """ Crop image around the center point """ logger.trace("Cropping images") new_images = [] for face, center, scale in zip(batch["detected_faces"], *batch["center_scale"]): is_color = face.image.ndim > 2 v_ul = self.transform([1, 1], center, scale, self.input_size).astype(np.int) v_br = self.transform([self.input_size, self.input_size], center, scale, self.input_size).astype(np.int) if is_color: new_dim = np.array([ v_br[1] - v_ul[1], v_br[0] - v_ul[0], face.image.shape[2] ], dtype=np.int32) new_img = np.zeros(new_dim, dtype=np.uint8) else: new_dim = np.array([v_br[1] - v_ul[1], v_br[0] - v_ul[0]], dtype=np.int) new_img = np.zeros(new_dim, dtype=np.uint8) height = face.image.shape[0] width = face.image.shape[1] new_x = np.array( [max(1, -v_ul[0] + 1), min(v_br[0], width) - v_ul[0]], dtype=np.int32) new_y = np.array( [max(1, -v_ul[1] + 1), min(v_br[1], height) - v_ul[1]], dtype=np.int32) old_x = np.array( [max(1, v_ul[0] + 1), min(v_br[0], width)], dtype=np.int32) old_y = np.array([max(1, v_ul[1] + 1), min(v_br[1], height)], dtype=np.int32) if is_color: new_img[new_y[0] - 1:new_y[1], new_x[0] - 1:new_x[1]] = face.image[old_y[0] - 1:old_y[1], old_x[0] - 1:old_x[1], :] else: new_img[new_y[0] - 1:new_y[1], new_x[0] - 1:new_x[1]] = face.image[old_y[0] - 1:old_y[1], old_x[0] - 1:old_x[1]] if new_img.shape[0] < self.input_size: interpolation = cv2.INTER_CUBIC # pylint:disable=no-member else: interpolation = cv2.INTER_AREA # pylint:disable=no-member new_images.append( cv2.resize( new_img, # pylint:disable=no-member dsize=(int(self.input_size), int(self.input_size)), interpolation=interpolation)) logger.trace("Cropped images") return new_images @staticmethod def transform(point, center, scale, resolution): """ Transform Image """ logger.trace("Transforming Points") pnt = np.array([point[0], point[1], 1.0]) hscl = 200.0 * scale eye = np.eye(3) eye[0, 0] = resolution / hscl eye[1, 1] = resolution / hscl eye[0, 2] = resolution * (-center[0] / hscl + 0.5) eye[1, 2] = resolution * (-center[1] / hscl + 0.5) eye = np.linalg.inv(eye) retval = np.matmul(eye, pnt)[0:2] logger.trace("Transformed Points: %s", retval) return retval def predict(self, batch): """ Predict the 68 point landmarks """ logger.trace("Predicting Landmarks") batch["prediction"] = self.model.predict(batch["feed"])[-1] logger.trace([pred.shape for pred in batch["prediction"]]) return batch def process_output(self, batch): """ Process the output from the model """ self.get_pts_from_predict(batch) return batch def get_pts_from_predict(self, batch): """ Get points from predictor """ logger.trace("Obtain points from prediction") landmarks = [] for prediction, center, scale in zip(batch["prediction"], *batch["center_scale"]): var_b = prediction.reshape( (prediction.shape[0], prediction.shape[1] * prediction.shape[2])) var_c = var_b.argmax(1).reshape( (prediction.shape[0], 1)).repeat(2, axis=1).astype(np.float) var_c[:, 0] %= prediction.shape[2] var_c[:, 1] = np.apply_along_axis( lambda x: np.floor(x / prediction.shape[2]), 0, var_c[:, 1]) for i in range(prediction.shape[0]): pt_x, pt_y = int(var_c[i, 0]), int(var_c[i, 1]) if 63 > pt_x > 0 and 63 > pt_y > 0: diff = np.array([ prediction[i, pt_y, pt_x + 1] - prediction[i, pt_y, pt_x - 1], prediction[i, pt_y + 1, pt_x] - prediction[i, pt_y - 1, pt_x] ]) var_c[i] += np.sign(diff) * 0.25 var_c += 0.5 landmarks = [ self.transform(var_c[i], center, scale, prediction.shape[2]) for i in range(prediction.shape[0]) ] batch.setdefault("landmarks", []).append(landmarks) logger.trace("Obtained points from prediction: %s", batch["landmarks"])
class Align(Aligner): """ Perform transformation to align and get landmarks """ def __init__(self, **kwargs): git_model_id = 9 model_filename = "face-alignment-network_2d4_keras_v1.h5" super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self.name = "FAN" self.input_size = 256 self.color_format = "RGB" self.vram = 2240 self.vram_warnings = 512 # Will run at this with warnings self.vram_per_batch = 64 self.batchsize = self.config["batch-size"] self.reference_scale = 200. / 195. def init_model(self): """ Initialize FAN model """ model_kwargs = dict( custom_objects={'TorchBatchNorm2D': TorchBatchNorm2D}) self.model = KSession(self.name, self.model_path, model_kwargs=model_kwargs, allow_growth=self.config["allow_growth"]) self.model.load_model() # Feed a placeholder so Aligner is primed for Manual tool placeholder_shape = (self.batchsize, 3, self.input_size, self.input_size) placeholder = np.zeros(placeholder_shape, dtype="float32") self.model.predict(placeholder) def process_input(self, batch): """ Compile the detected faces for prediction """ logger.debug("Aligning faces around center") batch["center_scale"] = self.get_center_scale(batch["detected_faces"]) faces = self.crop(batch) logger.trace("Aligned image around center") faces = self._normalize_faces(faces) batch["feed"] = np.array(faces, dtype="float32")[..., :3].transpose( (0, 3, 1, 2)) / 255.0 return batch def get_center_scale(self, detected_faces): """ Get the center and set scale of bounding box """ logger.debug("Calculating center and scale") center_scale = np.empty((len(detected_faces), 68, 3), dtype='float32') for index, face in enumerate(detected_faces): x_center = (face.left + face.right) / 2.0 y_center = (face.top + face.bottom) / 2.0 - face.h * 0.12 scale = (face.w + face.h) * self.reference_scale center_scale[index, :, 0] = np.full(68, x_center, dtype='float32') center_scale[index, :, 1] = np.full(68, y_center, dtype='float32') center_scale[index, :, 2] = np.full(68, scale, dtype='float32') logger.trace("Calculated center and scale: %s", center_scale) return center_scale def crop(self, batch): # pylint:disable=too-many-locals """ Crop image around the center point """ logger.debug("Cropping images") sizes = (self.input_size, self.input_size) batch_shape = batch["center_scale"].shape[:2] resolutions = np.full(batch_shape, self.input_size, dtype='float32') matrix_ones = np.ones(batch_shape + (3, ), dtype='float32') matrix_size = np.full(batch_shape + (3, ), self.input_size, dtype='float32') matrix_size[..., 2] = 1.0 upper_left = self.transform(matrix_ones, batch["center_scale"], resolutions) bot_right = self.transform(matrix_size, batch["center_scale"], resolutions) # TODO second pass .. convert to matrix new_images = [] for image, top_left, bottom_right in zip(batch["image"], upper_left, bot_right): height, width = image.shape[:2] channels = 3 if image.ndim > 2 else 1 bottom_right_width, bottom_right_height = bottom_right[0].astype( 'int32') top_left_width, top_left_height = top_left[0].astype('int32') new_dim = (bottom_right_height - top_left_height, bottom_right_width - top_left_width, channels) new_img = np.empty(new_dim, dtype=np.uint8) new_x = slice(max(0, -top_left_width), min(bottom_right_width, width) - top_left_width) new_y = slice(max(0, -top_left_height), min(bottom_right_height, height) - top_left_height) old_x = slice(max(0, top_left_width), min(bottom_right_width, width)) old_y = slice(max(0, top_left_height), min(bottom_right_height, height)) new_img[new_y, new_x] = image[old_y, old_x] interp = cv2.INTER_CUBIC if new_dim[ 0] < self.input_size else cv2.INTER_AREA new_images.append( cv2.resize(new_img, dsize=sizes, interpolation=interp)) logger.trace("Cropped images") return new_images @staticmethod def transform(points, center_scales, resolutions): """ Transform Image """ logger.debug("Transforming Points") num_images, num_landmarks = points.shape[:2] transform_matrix = np.eye(3, dtype='float32') transform_matrix = np.repeat(transform_matrix[None, :], num_landmarks, axis=0) transform_matrix = np.repeat(transform_matrix[None, :, :], num_images, axis=0) scales = center_scales[:, :, 2] / resolutions translations = center_scales[..., 2:3] * -0.5 + center_scales[..., :2] transform_matrix[:, :, 0, 0] = scales # x scale transform_matrix[:, :, 1, 1] = scales # y scale transform_matrix[:, :, 0, 2] = translations[:, :, 0] # x translation transform_matrix[:, :, 1, 2] = translations[:, :, 1] # y translation new_points = np.einsum('abij, abj -> abi', transform_matrix, points, optimize='greedy') retval = new_points[:, :, :2].astype('float32') logger.trace("Transformed Points: %s", retval) return retval def predict(self, batch): """ Predict the 68 point landmarks """ logger.debug("Predicting Landmarks") batch["prediction"] = self.model.predict(batch["feed"])[-1] logger.trace([pred.shape for pred in batch["prediction"]]) return batch def process_output(self, batch): """ Process the output from the model """ self.get_pts_from_predict(batch) return batch def get_pts_from_predict(self, batch): """ Get points from predictor """ logger.debug("Obtain points from prediction") num_images, num_landmarks, height, width = batch["prediction"].shape image_slice = np.repeat(np.arange(num_images)[:, None], num_landmarks, axis=1) landmark_slice = np.repeat(np.arange(num_landmarks)[None, :], num_images, axis=0) resolution = np.full((num_images, num_landmarks), 64, dtype='int32') subpixel_landmarks = np.ones((num_images, num_landmarks, 3), dtype='float32') flat_indices = batch["prediction"].reshape(num_images, num_landmarks, -1).argmax(-1) indices = np.array(np.unravel_index(flat_indices, (height, width))) min_clipped = np.minimum(indices + 1, height - 1) max_clipped = np.maximum(indices - 1, 0) offsets = [(image_slice, landmark_slice, indices[0], min_clipped[1]), (image_slice, landmark_slice, indices[0], max_clipped[1]), (image_slice, landmark_slice, min_clipped[0], indices[1]), (image_slice, landmark_slice, max_clipped[0], indices[1])] x_subpixel_shift = batch["prediction"][ offsets[0]] - batch["prediction"][offsets[1]] y_subpixel_shift = batch["prediction"][ offsets[2]] - batch["prediction"][offsets[3]] # TODO improve rudimentary sub-pixel logic to centroid of 3x3 window algorithm subpixel_landmarks[:, :, 0] = indices[1] + np.sign( x_subpixel_shift) * 0.25 + 0.5 subpixel_landmarks[:, :, 1] = indices[0] + np.sign( y_subpixel_shift) * 0.25 + 0.5 batch["landmarks"] = self.transform(subpixel_landmarks, batch["center_scale"], resolution) logger.trace("Obtained points from prediction: %s", batch["landmarks"])
class VGGFace2(Extractor): # pylint:disable=abstract-method """ VGG Face feature extraction. Extracts feature vectors from faces in order to compare similarity. Notes ----- Input images should be in BGR Order Model exported from: https://github.com/WeidiXie/Keras-VGGFace2-ResNet50 which is based on: https://www.robots.ox.ac.uk/~vgg/software/vgg_face/ Licensed under Creative Commons Attribution License. https://creativecommons.org/licenses/by-nc/4.0/ """ def __init__(self, *args, **kwargs): # pylint:disable=unused-argument logger.debug("Initializing %s", self.__class__.__name__) git_model_id = 10 model_filename = ["vggface2_resnet50_v2.h5"] super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self._plugin_type = "recognition" self.name = "VGG_Face2" self.input_size = 224 # Average image provided in https://github.com/ox-vgg/vgg_face2 self._average_img = np.array([91.4953, 103.8827, 131.0912]) logger.debug("Initialized %s", self.__class__.__name__) # <<< GET MODEL >>> # def init_model(self): """ Initialize VGG Face 2 Model. """ model_kwargs = dict(custom_objects={'L2_normalize': L2_normalize}) self.model = KSession(self.name, self.model_path, model_kwargs=model_kwargs, allow_growth=self.config["allow_growth"], exclude_gpus=self._exclude_gpus) self.model.load_model() def predict(self, batch): """ Return encodings for given image from vgg_face2. Parameters ---------- batch: numpy.ndarray The face to be fed through the predictor. Should be in BGR channel order Returns ------- numpy.ndarray The encodings for the face """ face = batch if face.shape[0] != self.input_size: face = self._resize_face(face) face = face[None, :, :, :3] - self._average_img preds = self.model.predict(face) return preds[0, :] def _resize_face(self, face): """ Resize incoming face to model_input_size. Parameters ---------- face: numpy.ndarray The face to be fed through the predictor. Should be in BGR channel order Returns ------- numpy.ndarray The face resized to model input size """ sizes = (self.input_size, self.input_size) interpolation = cv2.INTER_CUBIC if face.shape[0] < self.input_size else cv2.INTER_AREA face = cv2.resize(face, dsize=sizes, interpolation=interpolation) return face @staticmethod def find_cosine_similiarity(source_face, test_face): """ Find the cosine similarity between two faces. Parameters ---------- source_face: numpy.ndarray The first face to test against :attr:`test_face` test_face: numpy.ndarray The second face to test against :attr:`source_face` Returns ------- float: The cosine similarity between the two faces """ var_a = np.matmul(np.transpose(source_face), test_face) var_b = np.sum(np.multiply(source_face, source_face)) var_c = np.sum(np.multiply(test_face, test_face)) return 1 - (var_a / (np.sqrt(var_b) * np.sqrt(var_c))) def sorted_similarity(self, predictions, method="ward"): """ Sort a matrix of predictions by similarity. Transforms a distance matrix into a sorted distance matrix according to the order implied by the hierarchical tree (dendrogram). Parameters ---------- predictions: numpy.ndarray A stacked matrix of vgg_face2 predictions of the shape (`N`, `D`) where `N` is the number of observations and `D` are the number of dimensions. NB: The given :attr:`predictions` will be overwritten to save memory. If you still require the original values you should take a copy prior to running this method method: ['single','centroid','median','ward'] The clustering method to use. Returns ------- list: List of indices with the order implied by the hierarchical tree """ logger.info("Sorting face distances. Depending on your dataset this may take some time...") num_predictions, dims = predictions.shape kwargs = dict(method=method) if self._use_vector_linkage(num_predictions, dims): func = linkage_vector else: kwargs["preserve_input"] = False func = linkage result_linkage = func(predictions, **kwargs) result_order = self._seriation(result_linkage, num_predictions, num_predictions + num_predictions - 2) return result_order @staticmethod def _use_vector_linkage(item_count, dims): """ Calculate the RAM that will be required to sort these images and select the appropriate clustering method. From fastcluster documentation: "While the linkage method requires Θ(N:sup:`2`) memory for clustering of N points, this [vector] method needs Θ(N D)for N points in RD, which is usually much smaller." also: "half the memory can be saved by specifying :attr:`preserve_input`=``False``" To avoid under calculating we divide the memory calculation by 1.8 instead of 2 Parameters ---------- item_count: int The number of images that are to be processed dims: int The number of dimensions in the vgg_face output Returns ------- bool: ``True`` if vector_linkage should be used. ``False`` if linkage should be used """ np_float = 24 # bytes size of a numpy float divider = 1024 * 1024 # bytes to MB free_ram = psutil.virtual_memory().available / divider linkage_required = (((item_count ** 2) * np_float) / 1.8) / divider vector_required = ((item_count * dims) * np_float) / divider logger.debug("free_ram: %sMB, linkage_required: %sMB, vector_required: %sMB", int(free_ram), int(linkage_required), int(vector_required)) if linkage_required < free_ram: logger.verbose("Using linkage method") retval = False elif vector_required < free_ram: logger.warning("Not enough RAM to perform linkage clustering. Using vector " "clustering. This will be significantly slower. Free RAM: %sMB. " "Required for linkage method: %sMB", int(free_ram), int(linkage_required)) retval = True else: raise FaceswapError("Not enough RAM available to sort faces. Try reducing " "the size of your dataset. Free RAM: {}MB. " "Required RAM: {}MB".format(int(free_ram), int(vector_required))) logger.debug(retval) return retval def _seriation(self, tree, points, current_index): """ Seriation method for sorted similarity. Seriation computes the order implied by a hierarchical tree (dendrogram). Parameters ---------- tree: numpy.ndarray A hierarchical tree (dendrogram) points: int The number of points given to the clustering process current_index: int The position in the tree for the recursive traversal Returns ------- list: The indices in the order implied by the hierarchical tree """ if current_index < points: return [current_index] left = int(tree[current_index-points, 0]) right = int(tree[current_index-points, 1]) return self._seriation(tree, points, left) + self._seriation(tree, points, right)