def process_img(img): global pnet, rnet, onet minsize = 50 # minimum size of face detec_threshold = 0.7 threshold = [0.6, 0.7, detec_threshold] # three steps's threshold factor = 0.709 # scale factor im_name = img.split('/')[-1].split('.')[0] input_img = plt.imread(img) faces, pnts = mtcnn_detect_face.detect_face(input_img, minsize, pnet, rnet, onet, threshold, factor) print(faces) print(pnts) faces = process_mtcnn_bbox(faces, input_img.shape) for idx, (x0, y1, x1, y0, conf_score) in enumerate(faces): det_face_im = input_img[int(x0):int(x1), int(y0):int(y1), :] # get src/tar landmarks src_landmarks = get_src_landmarks(x0, x1, y0, y1, pnts, idx) tar_landmarks = get_tar_landmarks(det_face_im) # align detected face aligned_det_face_im = landmarks_match_mtcnn(det_face_im, src_landmarks, tar_landmarks) fname = f"./faces/aligned_faces/{im_name}face{str(idx)}.jpg" plt.imsave(fname, aligned_det_face_im, format="jpg") fname = f"./faces/raw_faces/{im_name}face{str(idx)}.jpg" plt.imsave(fname, det_face_im, format="jpg") '''
def get_faces_bbox(image): minsize = 20 # minimum size of face threshold = [0.6, 0.7, detect_threshold] # three steps's threshold factor = 0.709 # scale factor video_scaling_factor = 1 resized_image = image if is_higher_than_1080p(image): video_scaling_factor = 4 resized_image = cv2.resize(image, (image.shape[1] // video_scaling_factor, image.shape[0] // video_scaling_factor)) elif is_higher_than_720p(image): video_scaling_factor = 3 resized_image = cv2.resize(image, (image.shape[1] // video_scaling_factor, image.shape[0] // video_scaling_factor)) elif is_higher_than_480p(image): video_scaling_factor = 2 resized_image = cv2.resize(image, (image.shape[1] // video_scaling_factor, image.shape[0] // video_scaling_factor)) faces, pnts = mtcnn_detect_face.detect_face(resized_image, minsize, pnet, rnet, onet, threshold, factor) faces = process_mtcnn_bbox(faces, image.shape) faces = calibrate_coord(faces, video_scaling_factor) return faces
def process_video(input_img): global frames, save_interval global pnet, rnet, onet minsize = 30 # minimum size of face detec_threshold = 0.7 threshold = [0.6, 0.7, detec_threshold] # three steps's threshold factor = 0.709 # scale factor frames += 1 if frames % save_interval == 0: faces, pnts = mtcnn_detect_face.detect_face(input_img, minsize, pnet, rnet, onet, threshold, factor) faces = process_mtcnn_bbox(faces, input_img.shape) for idx, (x0, y1, x1, y0, conf_score) in enumerate(faces): det_face_im = input_img[int(x0):int(x1), int(y0):int(y1), :] # get src/tar landmarks src_landmarks = get_src_landmarks(x0, x1, y0, y1, pnts) tar_landmarks = get_tar_landmarks(det_face_im) # align detected face aligned_det_face_im = landmarks_match_mtcnn( det_face_im, src_landmarks, tar_landmarks) fname = f"./faces/aligned_faces/frame{frames}face{str(idx)}.jpg" #plt.imsave(fname, aligned_det_face_im, format="jpg") cv2.imwrite(fname, aligned_det_face_im) fname = f"./faces/raw_faces/frame{frames}face{str(idx)}.jpg" #plt.imsave(fname, det_face_im, format="jpg") cv2.imwrite(fname, det_face_im) bm = np.zeros_like(aligned_det_face_im) h, w = bm.shape[:2] bm[int(src_landmarks[0][0] - h / 15):int(src_landmarks[0][0] + h / 15), int(src_landmarks[0][1] - w / 8):int(src_landmarks[0][1] + w / 8), :] = 255 bm[int(src_landmarks[1][0] - h / 15):int(src_landmarks[1][0] + h / 15), int(src_landmarks[1][1] - w / 8):int(src_landmarks[1][1] + w / 8), :] = 255 bm = landmarks_match_mtcnn(bm, src_landmarks, tar_landmarks) fname = f"./faces/binary_masks_eyes/frame{frames}face{str(idx)}.jpg" #plt.imsave(fname, bm, format="jpg") cv2.imwrite(fname, bm) return np.zeros((3, 3, 3))
def detect_face(self, image, minsize=20, threshold=0.7, factor=0.709, use_auto_downscaling=True, min_face_area=25*25): if use_auto_downscaling: image, scale_factor = self.auto_downscale(image) faces, pnts = mtcnn_detect_face.detect_face( image, minsize, self.pnet, self.rnet, self.onet, [0.6, 0.7, threshold], factor) faces = self.process_mtcnn_bbox(faces, image.shape) faces, pnts = self.remove_small_faces(faces, pnts, min_face_area) if use_auto_downscaling: faces = self.calibrate_coord(faces, scale_factor) pnts = self.calibrate_landmarks(pnts, scale_factor) return faces, pnts
def extract_face_from_img(img, params): pnet, rnet, onet = params['pnet'], params['rnet'], params['onet'] minsize = params['minsize'] threshold = params['threshold'] factor = params['factor'] faces, pnts = mtcnn_detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) faces = process_mtcnn_bbox(faces, img.shape) aligned_list = [] det_faces = [] bm_list = [] for idx, (x0, y1, x1, y0, conf_score) in enumerate(faces): det_face_im = img[int(x0):int(x1), int(y0):int(y1), :] # get src/tar landmarks src_landmarks = get_src_landmarks(x0, x1, y0, y1, pnts) tar_landmarks = get_tar_landmarks(det_face_im) # align detected face aligned_det_face_im = landmarks_match_mtcnn(det_face_im, src_landmarks, tar_landmarks) bm = np.zeros_like(aligned_det_face_im) h, w = bm.shape[:2] bm[int(src_landmarks[0][0] - h / 15):int(src_landmarks[0][0] + h / 15), int(src_landmarks[0][1] - w / 8):int(src_landmarks[0][1] + w / 8), :] = 255 bm[int(src_landmarks[1][0] - h / 15):int(src_landmarks[1][0] + h / 15), int(src_landmarks[1][1] - w / 8):int(src_landmarks[1][1] + w / 8), :] = 255 bm = landmarks_match_mtcnn(bm, src_landmarks, tar_landmarks) aligned_list.append(aligned_det_face_im) det_faces.append(det_face_im) bm_list.append(bm) return aligned_list, det_faces, bm_list
onet = K.function( [onet.layers['data']], [onet.layers['conv6-2'], onet.layers['conv6-3'], onet.layers['prob1']]) er_model = emotion_recognition_model('weights/mobilenet_0.4379_0.8605.hdf5') labels = [ 'Surprise', 'Fear', 'Disgust', 'Happiness', 'Sadness', 'Anger', 'Neutral' ] face_input = np.zeros((1, 128, 128, 3)) cap = cv2.VideoCapture(0) while True: ret, img = cap.read() input_img = img.copy() faces, pnts = mtcnn_detect_face.detect_face(input_img, pnet, rnet, onet) faces = process_mtcnn_bbox(faces, input_img.shape) figure_right = np.zeros((input_img.shape[0], 140, 3), np.uint8) biggest_face_idx = -1 biggest_area = 0 faces_boxes = [] i = 0 for (y0, x1, y1, x0, conf_score) in faces: if y0 > 0 and x1 > 0 and y1 > 0 and x0 > 0: faces_boxes.append((y0, x1, y1, x0)) if biggest_area < (y1 - y0) * (x1 - x0): biggest_area = (y1 - y0) * (x1 - x0) biggest_face_idx = i i += 1
'Surprise', 'Fear', 'Disgust', 'Happiness', 'Sadness', 'Anger', 'Neutral' ] face_input = np.zeros((1, 128, 128, 3)) t_start = time.time() fps = 0 scale = 4 cap = cv2.VideoCapture(0) ret, input_img = cap.read() height, weight, _ = input_img.shape while True: ret, img = cap.read() input_img = img.copy() img_small = cv2.resize(input_img, (weight // scale, height // scale)) faces, pnts = mtcnn_detect_face.detect_face(img_small, pnet, rnet, onet) faces = process_mtcnn_bbox(faces, img_small.shape) figure_right = np.zeros((input_img.shape[0], 140, 3), np.uint8) biggest_face_idx = -1 biggest_area = 0 faces_boxes = [] i = 0 for (y0, x1, y1, x0, conf_score) in faces: if y0 > 0 and x1 > 0 and y1 > 0 and x0 > 0: faces_boxes.append((y0, x1, y1, x0)) if biggest_area < (y1 - y0) * (x1 - x0): biggest_area = (y1 - y0) * (x1 - x0) biggest_face_idx = i i += 1