def VideoMain(args): TRAINED_MODEL = '../TrainedModels/MobileNetV2/' + args.TRAINED_MODEL + '/facenet.ckpt' model.load_weights(TRAINED_MODEL).expect_partial() if args.MODE == 'VIDEO': cap = cv2.VideoCapture(args.video_path) elif args.MODE == 'WEBCAM': cap = cv2.VideoCapture("http://192.168.1.6:4747/video") if not (cap.isOpened()): print("Could not open video device") while (True): ret, frame = cap.read() start = time.time() image = cv2.flip(frame, 1) boxes, points = detector.detect(image, select_largest=args.select_largest, proba=False, landmarks=True) if (boxes is not None): for box in boxes: cv2.rectangle(image, tuple( (np.int32(box[0]), np.int32(box[1]))), tuple((np.int32(box[2]), np.int32(box[3]))), (255, 255, 255), 1) face = extract_face(image, box, image_size=160) face_embedding = model(np.float32(face), True) predictions = svm_model.predict_proba(face_embedding) best_class_idxs = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_idxs)), best_class_idxs] for i in range(len(best_class_idxs)): print('%4d %s: %.3f' % (i, class_names[best_class_idxs[i]], best_class_probabilities[i])) predict_str = '%s: %.3f' % (class_names[best_class_idxs[i]], best_class_probabilities[i]) pos = tuple((np.int32(box[0]), np.int32(box[1]))) cv2.putText(image, predict_str, pos, FONT, 0.5, (255, 255, 255), 1, cv2.LINE_AA) fps_str = 'fps: %3.f' % (int(1 / (time.time() - start))) cv2.putText(image, fps_str, (10, 50), FONT, 1, (255, 0, 0), 2, cv2.LINE_AA) cv2.imshow('preview', image) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def align(self, img, select_largest=True, save_path=None): assert img is not None thumbnails = [] boxes, points = self.detect(img, select_largest=select_largest, proba=False, landmarks=True) if boxes is not None: i = 0 for box, point in zip(boxes, points): i += 1 thumbnail = detect_face.extract_face(img, box, self.image_size) thumbnails.append(thumbnail) if save_path is not None: save_img(thumbnail, save_path + 'face_' + str(i) + '.jpg') else: return None return thumbnails
def ImageMain(args): TRAINED_MODEL = '../TrainedModels/MobileNetV2/' + args.TRAINED_MODEL + '/facenet.ckpt' model.load_weights(TRAINED_MODEL).expect_partial() image = cv2.imread(args.image_path) if max(image.shape[0], image.shape[1]) > 900: scale_percent = 900 / max(image.shape[0], image.shape[1]) image = img_resize(image, scale_percent) boxes = detector.detect(img=image, select_largest=args.select_largest, proba=False, landmarks=False) if boxes is not None: for box in boxes: cv2.rectangle(image, tuple((np.int32(box[0]), np.int32(box[1]))), tuple((np.int32(box[2]), np.int32(box[3]))), (255, 255, 255), 1) face = extract_face(image, box, image_size=160) face_embedding = model(np.float32(face), True) predictions = svm_model.predict_proba(face_embedding) best_class_idxs = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_idxs)), best_class_idxs] for i in range(len(best_class_idxs)): print('%4d %s: %.3f' % (i, class_names[best_class_idxs[i]], best_class_probabilities[i])) predict_str = '%s: %.3f' % (class_names[best_class_idxs[i]], best_class_probabilities[i]) pos = tuple((np.int32(box[0]), np.int32(box[1]))) cv2.putText(image, predict_str, pos, FONT, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imshow('preview', image) cv2.waitKey(0) cv2.destroyAllWindows()
def forward(self, img, save_path=None, return_prob=False, remove_eyes = False): """Run MTCNN face detection on a PIL image. Arguments: img {PIL.Image} -- A PIL image. Keyword Arguments: save_path {str} -- An optional save path for the cropped image. Note that when self.prewhiten=True, although the returned tensor is prewhitened, the saved face image is not, so it is a true representation of the face in the input image. (default: {None}) return_prob {bool} -- Whether or not to return the detection probability. (default: {False}) Returns: Union[torch.Tensor, (torch.tensor, float)] -- If detected, cropped image of a single face with dimensions 3 x image_size x image_size. Optionally, the probability that a face was detected. If self.keep_all is True, n detected faces are returned in an n x 3 x image_size x image_size tensor. """ with torch.no_grad(): boxes = detect_face( img, self.min_face_size, self.pnet, self.rnet, self.onet, self.thresholds, self.factor, self.device ) if len(boxes) == 0: print('Face not found') if return_prob: return None, [None] if self.keep_all else None else: return None if self.select_largest: boxes = boxes[ np.argsort( (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) )[::-1] ] if not self.keep_all: boxes = boxes[[0]] faces = [] probs = [] for i, box in enumerate(boxes): face_path = save_path if save_path is not None and i > 0: save_name, ext = os.path.splitext(save_path) face_path = save_name + '_' + str(i + 1) + ext face, prob = extract_face(img, box, self.image_size, self.margin, face_path, remove_eyes) if self.prewhiten: face = prewhiten(face) faces.append(face) probs.append(prob) if self.keep_all: faces = torch.stack(faces) else: faces = faces[0] probs = probs[0] if return_prob: return faces, probs else: return faces
def forward(self, img, save_path=None, return_prob=False): """Run MTCNN face detection on a PIL image or numpy array. This method performs both detection and extraction of faces, returning tensors representing detected faces rather than the bounding boxes. To access bounding boxes, see the MTCNN.detect() method below. Arguments: img {PIL.Image, np.ndarray, or list} -- A PIL image, np.ndarray, or list. Keyword Arguments: save_path {str} -- An optional save path for the cropped image. Note that when self.post_process=True, although the returned tensor is post processed, the saved face image is not, so it is a true representation of the face in the input image. If `img` is a list of images, `save_path` should be a list of equal length. (default: {None}) return_prob {bool} -- Whether or not to return the detection probability. (default: {False}) Returns: Union[torch.Tensor, tuple(torch.tensor, float)] -- If detected, cropped image of a face with dimensions 3 x image_size x image_size. Optionally, the probability that a face was detected. If self.keep_all is True, n detected faces are returned in an n x 3 x image_size x image_size tensor with an optional list of detection probabilities. If `img` is a list of images, the item(s) returned have an extra dimension (batch) as the first dimension. Example: >>> from facenet_pytorch import MTCNN >>> mtcnn = MTCNN() >>> face_tensor, prob = mtcnn(img, save_path='face.png', return_prob=True) """ # Detect faces with torch.no_grad(): batch_boxes, batch_probs = self.detect(img) # Determine if a batch or single image was passed batch_mode = True if not isinstance(img, (list, tuple)) and not (isinstance(img, np.ndarray) and len(img.shape) == 4): img = [img] batch_boxes = [batch_boxes] batch_probs = [batch_probs] batch_mode = False # Parse save path(s) if save_path is not None: if isinstance(save_path, str): save_path = [save_path] else: save_path = [None for _ in range(len(img))] # Process all bounding boxes and probabilities faces, probs = [], [] for im, box_im, prob_im, path_im in zip(img, batch_boxes, batch_probs, save_path): if box_im is None: faces.append(None) probs.append([None] if self.keep_all else None) continue if not self.keep_all: box_im = box_im[[0]] faces_im = [] for i, box in enumerate(box_im): face_path = path_im if path_im is not None and i > 0: save_name, ext = os.path.splitext(path_im) face_path = save_name + '_' + str(i + 1) + ext face = extract_face(im, box, self.image_size, self.margin, face_path) if self.post_process: face = fixed_image_standardization(face) faces_im.append(face) if self.keep_all: faces_im = torch.stack(faces_im) else: faces_im = faces_im[0] prob_im = prob_im[0] faces.append(faces_im) probs.append(prob_im) if not batch_mode: faces = faces[0] probs = probs[0] if return_prob: return faces, probs else: return faces