def image_deep_alignment(img, transform_kind="crop"): # convert image to np array img = np.array(img) # cv2 image color conversion img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # initialize detector face_detector = MTCNN() face_detector.select_largest = True # detect landmark points detections, probs, landmarks = face_detector.detect(img, landmarks=True) transformed = img if detections is not None: x, y, x2, y2 = int(detections[0][0]), int(detections[0][1]), int( detections[0][2]), int(detections[0][3]) print(x, y, x2, y2) left_eye = landmarks[0][0] right_eye = landmarks[0][1] nose = landmarks[0][4] h = img.shape[0] w = img.shape[1] if transform_kind == FaceAlignTransform.ROTATION: rotation = get_rotation_matrix(left_eye, right_eye) #translation = get_translation_matrix(left_eye, w, h) #translated = cv2.warpAffine(img, translation, img.shape[:2], flags=cv2.INTER_CUBIC) transformed = cv2.warpAffine(img, rotation, img.shape[:2], flags=cv2.INTER_CUBIC) elif transform_kind == FaceAlignTransform.AFFINE: matrix = get_affine_transform_matrix(left_eye, right_eye, nose, LEFT_EYE_POS(w, h), RIGHT_EYE_POS(w, h), NOSE_POS(w, h)) transformed = cv2.warpAffine(img, matrix, img.shape[:2], flags=cv2.INTER_CUBIC) elif transform_kind == "crop": y = y - int((y2 - y) * 1 / 3) if y < 0: y = 0 y2 = y2 + int((y2 - y) * 1 / 3) if y2 > h: y2 = h - 1 x = x - int((x2 - x) * 1 / 3) if x < 0: x = 0 x2 = x2 + int((x2 - x) * 1 / 3) if x2 > w: x2 = w - 1 return Image.fromarray( cv2.cvtColor(img[y:y2, x:x2, :], cv2.COLOR_BGR2RGB)) transformed = cv2.cvtColor(transformed, cv2.COLOR_BGR2RGB) return Image.fromarray(transformed)
def base64arr_to_tensor(frames): model = model_init() face_detector = MTCNN() face_detector.select_largest = True tensor_arr = [] for frame in frames: # convert to PIL image base64img = base64.b64decode(frame) buf = io.BytesIO(base64img) img = Image.open(buf).convert('RGB') img = np.array(img) # extract face detections, probs = face_detector.detect(img) if detections is not None: x, y, x2, y2 = int(detections[0][0]), int(detections[0][1]), int( detections[0][2]), int(detections[0][3]) img = Image.fromarray(img[y:y2, x:x2, :]) # resize face img = resize(img, model.input_size) # save sample img.save("last_image.jpg") transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize((model.input_size, model.input_size)) ]) tensor_arr.append(transform(img)) return torch.stack(tensor_arr)
def set_mtcnn(mtcnn, all, largest, post=False, factor=0.6, min=20): if type(mtcnn)!=MTCNN: mtcnn = MTCNN() mtcnn.keep_all = all; mtcnn.post_process = post; mtcnn.factor = factor mtcnn.select_largest = largest; mtcnn.min_face_size = min; return mtcnn
def image_deep_alignment(img, transform_kind="crop", precomputed_detection=None, precomputed_landmarks=None, compute_landmarks=True): # convert image to np array img = np.array(img) detections = None landmarks = None # compute bounding box and landmarks if precomputed_detection is None or precomputed_landmarks is None: device = 'cuda' if torch.cuda.is_available() else 'cpu' # initialize detector face_detector = MTCNN(device=device) face_detector.select_largest = True detections, probs, landmarks = None, None, None # detect landmark points if not compute_landmarks: detections, probs = face_detector.detect(img, landmarks=False) else: detections, probs, landmarks = face_detector.detect(img, landmarks=True) else: detections = precomputed_detection landmarks = precomputed_landmarks transformed = img if detections is not None: x, y, x2, y2 = int(detections[0][0]), int(detections[0][1]), int( detections[0][2]), int(detections[0][3]) h = img.shape[0] w = img.shape[1] # rotation transformation if transform_kind == FaceAlignTransform.ROTATION: left_eye = landmarks[0][0] right_eye = landmarks[0][1] nose = landmarks[0][4] rotation = get_rotation_matrix(left_eye, right_eye) transformed = cv2.warpAffine(img, rotation, img.shape[:2], flags=cv2.INTER_CUBIC) # crop the bounding boxes and expand the box by a factor of 1/3 elif transform_kind == "crop": y = y - int((y2 - y) * 1 / 3) if y < 0: y = 0 y2 = y2 + int((y2 - y) * 1 / 3) if y2 > h: y2 = h - 1 x = x - int((x2 - x) * 1 / 3) if x < 0: x = 0 x2 = x2 + int((x2 - x) * 1 / 3) if x2 > w: x2 = w - 1 return Image.fromarray(img[y:y2, x:x2, :]), detections, landmarks return Image.fromarray(transformed), detections, landmarks