class ModelWrapper(MAXModelWrapper): MODEL_META_DATA = { 'id': MODEL_ID, 'name': MODEL_NAME, 'description': 'Emotion classifier trained on the FER+ face dataset', 'type': 'Facial Recognition', 'license': 'MIT', 'source': 'https://developer.ibm.com/exchanges/models/all/max-facial-emotion-classifier/' } DETECTION_THRESHOLD = 0.95 """Model wrapper for ONNX image classification model""" def __init__(self, model_name='emotion_ferplus', path=DEFAULT_MODEL_PATH): self.input_shape = (1, 1, 64, 64) self.img_size = 64 self.detector = MTCNN() logger.info('Loading model from: {}...'.format(path)) # Load the graph self.sess = rt.InferenceSession('{}/{}.onnx'.format(path, model_name)) self.input_name = self.sess.get_inputs()[0].name self.output_name = self.sess.get_outputs()[0].name logger.info('Loaded model') with open('{}/idx_to_label.pkl'.format(path), 'rb') as f: self.idx_to_label = pickle.load(f) def _pre_process(self, input_img): const = 0.35 img_h, img_w, _ = np.shape(input_img) # downscale image if its w/h > 1024 input_img = img_resize(input_img) img_h, img_w, _ = np.shape(input_img) detected = self.detector.detect_faces(input_img) # filter based on detection threshold detected = [ d for d in detected if d['confidence'] > self.DETECTION_THRESHOLD ] faces = np.empty((len(detected), self.img_size, self.img_size)) for i, d in enumerate(detected): x1, y1, w, h = d['box'] x2 = x1 + w y2 = y1 + h # convert box from [x, y, w, h] in pixels to [y1, x1, y2, x2] in normalized pixel values d['box'] = [ float(y1) / img_h, float(x1) / img_w, float(y2) / img_h, float(x2) / img_w ] xw1 = max(int(x1 - const * w), 0) yw1 = max(int(y1 - const * h), 0) xw2 = min(int(x2 + const * w), img_w - 1) yw2 = min(int(y2 + const * h), img_h - 1) f = cv2.resize(input_img[yw1:yw2 + 1, xw1:xw2 + 1, :], (self.img_size, self.img_size)) f = cv2.cvtColor(f, code=cv2.COLOR_RGBA2GRAY) faces[i, :, :] = f return (faces, detected) def _post_process(self, post_scores): scores = post_scores[0] result = [] for i, d in enumerate(post_scores[1]): bbox = d['box'] probs = softmax(scores[i]) probs = np.squeeze(probs) idxs = np.argsort(probs)[::-1] result.append( post_process_result(probs, idxs, self.idx_to_label, bbox, topk=8)) return result def _predict(self, pre_x): x = pre_x[0] tmp = x.shape img_num = tmp[0] predict_result = [] # to Emotion input for i in range(img_num): img_2d = np.array(x[i, :, :]) img_2d = np.resize(img_2d, self.input_shape) img_2d = img_2d.astype(np.float32) predict_result.append( self.sess.run([self.output_name], {self.input_name: img_2d})[0].ravel()) return (predict_result, pre_x[1])
class FaceDetector: # constant locators for landmarks jaw_points = np.arange(0, 17) # face contour points eyebrow_dx_points = np.arange(17, 22) eyebrow_sx_points = np.arange(22, 27) nose_points = np.arange(27, 36) nosecenter_points = np.array([30, 33]) right_eye = np.arange(36, 42) left_eye = np.arange(42, 48) def __init__(self, config): self.config = config # if specified use mtcnn model self.mtcnn_model_path = config.get('mtcnn_model_path', None) if self.mtcnn_model_path: from mtcnn.mtcnn import MTCNN self.detector = MTCNN() # otherwise rely on dlib detector else: self.detector_model_path = config.get('detector_path', None) if self.detector_model_path: self.detector = dlib.cnn_face_detection_model_v1( self.detector_model_path) else: self.detector = dlib.get_frontal_face_detector() # always instantiate predictor self.predictor = dlib.shape_predictor( config.get('shape_predictor_path')) def _mtcnn_detect_faces(self, img): face_confidence_threshold = self.config['mtcnn_confidence_threshold'] rects = self.detector.detect_faces(img) faces = [ Face( img.copy(), # output bbox coordinate of MTCNN is [x, y, width, height] # need to max to 0 cause sometimes bbox has negative values ??library BUG Face.Rectangle(left=max(r['box'][0], 0), top=max(r['box'][1], 0), right=max(r['box'][0], 0) + max(r['box'][2], 0), bottom=max(r['box'][1], 0) + max(r['box'][3], 0))) for r in rects if r['confidence'] > face_confidence_threshold ] return faces def detect_faces(self, img): if self.mtcnn_model_path: faces = self._mtcnn_detect_faces(img) else: rects = self.detector(img, 1) # if using custom detector we need to get the rect attribute if self.detector_model_path: rects = [r.rect for r in rects] faces = [ Face( img.copy(), Face.Rectangle(top=max(r.top(), 0), right=max(r.right(), 0), bottom=max(r.bottom(), 0), left=max(r.left(), 0))) for r in rects ] # continue only if we detected at least one face if len(faces) == 0: logging.debug("No face detected") raise FaceExtractException("No face detected.") for face in faces: face.landmarks = self.get_landmarks(face) return faces def get_landmarks(self, face: Face, recompute=False): # If landmarks already present, just return, unless is required to recompute them if face.landmarks is not None and not recompute: return face.landmarks else: # we need a dlib rectangle to get the landmarks dlib_rect = dlib.rectangle(left=face.rect.left, top=face.rect.top, right=face.rect.right, bottom=face.rect.bottom) shape = self.predictor(face.img, dlib_rect) return np.array([(p.x, p.y) for p in shape.parts()]) @staticmethod def get_eyes(face: Face): lx_eye = face.landmarks[FaceDetector.left_eye] rx_eye = face.landmarks[FaceDetector.right_eye] return lx_eye, rx_eye @staticmethod def get_contour_points(shape): # shape to numpy points = np.array([(p.x, p.y) for p in shape.parts()]) face_boundary = points[np.concatenate([ FaceDetector.jaw_points, FaceDetector.eyebrow_dx_points, FaceDetector.eyebrow_sx_points ])] return face_boundary, shape.rect def extract_face(self, face: Face): """ Utility method which uses directly the current detector configuration for the generic extraction operation :param face: :return: """ # size is a tuple, so need to eval from string representation in config size = literal_eval(self.config['extract']['size']) border_expand = literal_eval(self.config['extract']['border_expand']) align = self.config['extract']['align'] maintain_proportion = self.config['extract']['maintain_proportion'] masked = self.config['extract']['masked'] return self._extract_face(face, size, border_expand=border_expand, align=align, maintain_proportion=maintain_proportion, masked=masked) def _extract_face(self, face: Face, out_size=None, border_expand=(0., 0.), align=False, maintain_proportion=False, masked=False): face_size = face.get_face_size() border_expand = (int(border_expand[0] * face_size[0]), int(border_expand[1] * face_size[1])) # if not specified otherwise, we want extracted face size to be exactly as input face size if not out_size: out_size = face_size face.landmarks = self.get_landmarks(face) if masked: mask = utils.get_face_mask( face, 'hull', erosion_size=literal_eval(self.config['extract'].get( 'dilation_kernel', 'None')), dilation_kernel=literal_eval(self.config['extract'].get( 'dilation_kernel', 'None')), blur_size=int(self.config['extract']['blur_size'])) # black all pixels outside the mask face.img = cv2.bitwise_and(face.img, face.img, mask=mask[:, :, 1]) # keep proportions of original image (rect) for extracted image, otherwise resize might stretch the content if maintain_proportion: border_delta = self._get_maintain_proportion_delta( face_size, out_size) border_expand = (border_expand[0] + int(border_delta[0] // 2), border_expand[1] + int(border_delta[1] // 2)) if align: cut_face = utils.ffhq_align(face, output_size=out_size[0], boundary_resize_factor=border_expand) #cut_face, _ = utils.align_face(face, boundary_resize_factor=border_expand) #cut_face = utils._align_face(face, size=out_size) else: cut_face = cv2.resize(face.get_face_img(), out_size, interpolation=cv2.INTER_CUBIC) return cut_face def _get_maintain_proportion_delta(self, src_size, dest_size): """ Return delta amount to maintain destination proportion given source size. Tuples order is (w, h) :param base_border: :param src_size: :param dest_size: :return: """ dest_ratio = max(dest_size) / min(dest_size) delta_h = delta_w = 0 w, h = src_size if w > h: delta_h = w * dest_ratio - h else: delta_w = h * dest_ratio - w return delta_w, delta_h
def main(): K.set_learning_phase(0) # make sure its testing mode weight_file = "../pre-trained/wiki/ssrnet_3_3_3_64_1.0_1.0/ssrnet_3_3_3_64_1.0_1.0.h5" # for face detection # detector = dlib.get_frontal_face_detector() detector = MTCNN() try: os.mkdir('./img') except OSError: pass # load model and weights img_size = 64 stage_num = [3, 3, 3] lambda_local = 1 lambda_d = 1 model = SSR_net(img_size, stage_num, lambda_local, lambda_d)() model.load_weights(weight_file) clip = VideoFileClip(sys.argv[1]) # can be gif or movie # python version pyFlag = '' if len(sys.argv) < 3: pyFlag = '2' # default to use moviepy to show, this can work on python2.7 and python3.5 elif len(sys.argv) == 3: pyFlag = sys.argv[2] # python version else: print('Wrong input!') sys.exit() img_idx = 0 detected = '' # make this not local variable time_detection = 0 time_network = 0 time_plot = 0 ad = 0.4 skip_frame = 5 # every 5 frame do 1 detection and network forward propagation for img in clip.iter_frames(): img_idx = img_idx + 1 input_img = img # using python2.7 with moivepy to show th image without channel flip if pyFlag == '3': input_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_h, img_w, _ = np.shape(input_img) input_img = cv2.resize(input_img, (1024, int(1024 * img_h / img_w))) img_h, img_w, _ = np.shape(input_img) if img_idx == 1 or img_idx % skip_frame == 0: # detect faces using dlib detector start_time = timeit.default_timer() detected = detector.detect_faces(input_img) elapsed_time = timeit.default_timer() - start_time time_detection = time_detection + elapsed_time faces = np.empty((len(detected), img_size, img_size, 3)) for i, d in enumerate(detected): print(i) print(d['confidence']) if d['confidence'] > 0.95: x1, y1, w, h = d['box'] x2 = x1 + w y2 = y1 + h xw1 = max(int(x1 - ad * w), 0) yw1 = max(int(y1 - ad * h), 0) xw2 = min(int(x2 + ad * w), img_w - 1) yw2 = min(int(y2 + ad * h), img_h - 1) cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2) # cv2.rectangle(img, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2) faces[i, :, :, :] = cv2.resize( input_img[yw1:yw2 + 1, xw1:xw2 + 1, :], (img_size, img_size)) start_time = timeit.default_timer() if len(detected) > 0: # predict ages and genders of the detected faces results = model.predict(faces) predicted_ages = results # draw results for i, d in enumerate(detected): if d['confidence'] > 0.95: x1, y1, w, h = d['box'] label = "{}".format(int(predicted_ages[i])) draw_label(input_img, (x1, y1), label) elapsed_time = timeit.default_timer() - start_time time_network = time_network + elapsed_time start_time = timeit.default_timer() if pyFlag == '2': img_clip = ImageClip(input_img) img_clip.show() cv2.imwrite('img/' + str(img_idx) + '.png', cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR)) elif pyFlag == '3': cv2.imshow("result", input_img) cv2.imwrite('img/' + str(img_idx) + '.png', cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR)) elapsed_time = timeit.default_timer() - start_time time_plot = time_plot + elapsed_time else: for i, d in enumerate(detected): if d['confidence'] > 0.95: x1, y1, w, h = d['box'] x2 = x1 + w y2 = y1 + h xw1 = max(int(x1 - ad * w), 0) yw1 = max(int(y1 - ad * h), 0) xw2 = min(int(x2 + ad * w), img_w - 1) yw2 = min(int(y2 + ad * h), img_h - 1) cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2) # cv2.rectangle(img, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2) faces[i, :, :, :] = cv2.resize( input_img[yw1:yw2 + 1, xw1:xw2 + 1, :], (img_size, img_size)) # draw results for i, d in enumerate(detected): if d['confidence'] > 0.95: x1, y1, w, h = d['box'] label = "{}".format(int(predicted_ages[i])) draw_label(input_img, (x1, y1), label) start_time = timeit.default_timer() if pyFlag == '2': img_clip = ImageClip(input_img) img_clip.show() elif pyFlag == '3': cv2.imshow("result", input_img) elapsed_time = timeit.default_timer() - start_time time_plot = time_plot + elapsed_time # Show the time cost (fps) print('avefps_time_detection:', img_idx / time_detection) print('avefps_time_network:', img_idx / time_network) print('avefps_time_plot:', img_idx / time_plot) print('===============================') if pyFlag == '3': key = cv2.waitKey(30) if key == 27: break
focalLength = 350.0 video_capture = cv2.VideoCapture(0) # create the detector, using default weights detector = MTCNN() while True: ret, frame = video_capture.read() h, w, _ = frame.shape # preprocess img acquired img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (640, 480)) faces = detector.detect_faces(img) for index, i in enumerate(faces): x, y, width, height = i['box'] x1 = int(x) x2 = int(x) + int(width) y1 = int(y) y2 = int(y) + int(height) # distance to camera R = calculate_R_bbox(width, height) cm = distance_to_camera(r, focalLength, R) cv2.rectangle(frame, (x1, y1), (x2, y2), (80, 18, 236), 2) cv2.rectangle(frame, (x1, y2 - 20), (x2, y2), (80, 18, 236), cv2.FILLED) font = cv2.FONT_HERSHEY_DUPLEX text = f"face: {index + 1}"
class FacenetEngine(object): """ facenet engine class """ # クラス変数 # encodeのベクトルサイズ = 128, それぞれの値は-2~2である。 # そのため、max_distance = 11.3 __distance_threshold = 11.0 __debug_mode = True __bound = 18 __encode_features_vector_length = 128 def __init__(self): """ コンストラクト """ cur_dir = os.path.abspath(os.path.join(os.path.dirname(__file__))) # facenet model path model_path = "./model/keras-facenet/model/facenet_keras.h5" model_path = os.path.join(cur_dir, model_path) # モデルパスの存在チェック if osp.exists(model_path) is False: raise ValueError('{} not Exist'.format(model_path)) # set for facenet model self.__model_path = model_path self.model = load_model(self.__model_path, compile=False) # transfer learning model self.transfer_model = self.make_transfer_learning_model() print(self.transfer_model.input) print(self.transfer_model.output) # create new detector, using default weights from mtcnn self.__detector = MTCNN() # set classifier model classifier_filename = "./model/SVM_classifer.pkl" classifier_filename = os.path.join(cur_dir, classifier_filename) self.__classifier_filename = classifier_filename # TODO: remove these self.__data_set_path = "../../../dataset/train/japanese/" sub_dirs = glob(self.__data_set_path + '*/') self.__people = [os.path.dirname(sub_dir) for sub_dir in sub_dirs] # Get current data from DB self.all_anchors = db_util.get_all_encode() # -------------------------------------------------------------------------------- # Public function # -------------------------------------------------------------------------------- def recognize(self, image_path, image_data=None): """ 指定するイメージを認証する :param image_path: :return: name """ errcode, name, user_id, department = 0, 'Unknown', -1, 'Unknown' # check arguments if image_data is None: if osp.exists(image_path) is False: raise ValueError('file not found {}'.format(image_path)) # make encode from image_path if image_data is None: errcode, img_encode = self.make_encode(image_path) else: errcode, img_encode = self.make_encode(image_path, image_data=image_data) if errcode is 0: # get all encodes fromdb all_anchors = self.all_anchors distances = list() for anchor in all_anchors: if len(anchor['encode']) == FacenetEngine.__encode_features_vector_length: distances.append(distance.euclidean(img_encode, anchor['encode'])) if FacenetEngine.__debug_mode is True: print('img_coode = {}'.format(type(img_encode))) print("length of all encode in database: {}".format(len(all_anchors))) print("min of distances = {}".format(min(distances))) print("max of distances = {}".format(max(distances))) if np.max(distances) < FacenetEngine.__bound: if min(distances) < FacenetEngine.__distance_threshold: anchor_idx = distances.index(min(distances)) name = all_anchors[anchor_idx]['name'] user_id = all_anchors[anchor_idx]['id'] department = all_anchors[anchor_idx]['department'] else: print(distances) print(FacenetEngine.__distance_threshold) if FacenetEngine.__debug_mode is True: print('name = {}, id = {}, department = {}'.format(name, user_id, department)) return errcode, name, user_id, department ''' Training SVM ''' def extract_face(self, file_path, image_data=None, required_size=(160, 160)): """ extract face for further steps Calling:: faces = extract_face(file_path) Args:: _ filename: path of images file _ require_size: required size of training model Returns:: _ face_array: Numpy array contains bounding box information - Details:: - get_trained_data """ errcode, face_array = 0, np.array([]) if image_data is None: # load image from file image = Image.open(file_path) else: image = image_data # convert to RGB, if needed img = image.convert('RGB') # conver to array pixels = asarray(img) # detect faces in the image results = self.__detector.detect_faces(pixels) if len(results) < 1: errcode = -1 else: # extract the bounding box x1, y1, width, height = results[0]['box'] # resize pixels to the model size x1, y1 = abs(x1), abs(y1) x2, y2 = x1 + width, y1 + height # extract the face face = pixels[y1:y2, x1:x2] # TODO: Debug時に、以下の画像を出力する。 # cv2.imwrite("check.jpg", face) # resize pixels to required size of further steps img = Image.fromarray(face) img = img.resize(required_size) face_array = asarray(img) return errcode, face_array def extract_face_for_preprocessing(self, file_path, required_size=(160, 160)): """ extract face for further steps Calling:: faces = extract_face(file_path) Args:: _ filename: path of images file _ require_size: required size of training model Returns:: _ face_array: Numpy array contains bounding box information - Details:: - get_trained_data """ errcode, face_array = 0, np.array([]) # load image from file image = Image.open(file_path) # convert to RGB, if needed img = image.convert('RGB') # conver to array pixels = asarray(img) # detect faces in the image results = self.__detector.detect_faces(pixels) if len(results) < 1: errcode = -1 else: # extract the bounding box x1, y1, width, height = results[0]['box'] # resize pixels to the model size x1, y1 = abs(x1), abs(y1) x2, y2 = x1 + width, y1 + height # extract the face face = pixels[y1:y2, x1:x2] # resize pixels to required size of further steps img = Image.fromarray(face) img = img.resize(required_size) face_array = asarray(img) return errcode, face_array def load_data_set(self, require_size=(160, 160)): """ Load face locations from data_set Calling:: faces = load_faces(directory) Args:: - Returns:: - asarray (X): Numpy array contains bounding box information for face position - asarray(Y): Numpy array contains labels Raises:: - Details:: - Load face locations from data_set """ X, Y = list(), list() # enumerate folders, on per class for subdir in listdir(self.__data_set_path): faces = list() # path path = self.__data_set_path + subdir + '/' # skip any files that might be in the dir if not isdir(path): continue for name in listdir(path): file_path = path + name print(file_path) # extract face face = self.extract_face(file_path) faces.append(face) # create labels labels = [subdir for _ in range(len(faces))] # summarize progress print('>loaded %d examples for class: %s' % (len(faces), subdir)) # storing faces X.extend(faces) Y.extend(labels) return asarray(X), asarray(Y) def convert(self, faces): """ Load faces dataset (160, 160, 3) to encode into embedding 128d vector """ new = list() # Training dataset # Convert each face to an encoding for face in faces: embed = self.encoding(self.model, face) new.append(embed) new = np.asarray(new) # Checking new dataset dimemsion return new @staticmethod def encoding(model, faces): """ Load facenet pretrained model and encoding using predict function of Keras """ # Scale pixel values faces = faces.astype('float32') # Standardize pixel value across channels (global) mean, std = faces.mean(), faces.std() faces = (faces - mean) / std # Transform face into one sample samples = np.expand_dims(faces, axis=0) # Make prediction to get encoding Y_hat = model.predict(samples) # TODO: Normalizationが必要かどうかを要検討 # Y_hat_norm = [((i - min(Y_hat[0])) / (max(Y_hat[0]) - min(Y_hat[0]))) for i in Y_hat[0]] return Y_hat[0] @staticmethod def l2_normalizer(x, axis=-1, epsilon=1e-10): """ 標準化 """ output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon)) return output def make_faces_encoding_labels(self): """ 画像より128次元の特徴値に変換する """ faces, labels = self.load_data_set() print(faces.shape) # データ数, 160, 160, 3 print(labels.shape) # データ数 # Encoding faces faces_encoding = self.convert(faces) print(faces_encoding.shape) # データ数, 128 # Normalize faces_encoding = self.l2_normalizer(faces_encoding) return faces_encoding, labels def train(self): """ Train SVM model on given dataset """ encodes = db_util.get_all_encode() faces_encoding = [] labels = [] for encode in encodes: if len(encode['encode']) == FacenetEngine.__encode_features_vector_length: faces_encoding.append(encode['encode']) labels.append(encode['id']) else: print("length is not {} encode: {}".format(FacenetEngine.__encode_features_vector_length, len(encode['encode']))) # Label encode targets encoder = LabelEncoder() encoder.fit(labels) normalized_labels = encoder.transform(labels) normalized_labels = np.array(normalized_labels) faces_encoding = np.array(faces_encoding) # Fit into SVM model model = SVC(kernel='linear', probability=True) model.fit(faces_encoding, normalized_labels) joblib.dump(model, self.__classifier_filename) print('Save') def preprocessing(self, input_folder, output_folder): """ Extract face from input image and save as output image Args: - input_folder(str) : path of input data folder (will process all image in all sub dir of input folder) - output_folder(str) : path of output data folder (output folder structure is the same as structure of input data folder) Details: - input images size : any - output images size : 160*160*3 (RGB) """ for cur, dirs, _ in os.walk(input_folder): for sub_dir in dirs: for curDir, subDirs, files in os.walk(os.path.join(input_folder, sub_dir)): for file in files: file_path = os.path.join(curDir, file) filename, file_extension = os.path.splitext(file_path) out_path = os.path.join(output_folder, sub_dir) if not os.path.exists(out_path): os.mkdir(out_path) output_file_path = os.path.join(out_path, file) if 'jpeg' in file_extension: errcode, face = self.extract_face_for_preprocessing(file_path) if errcode is 0: try: pil_img = Image.fromarray(face) pil_img.save(output_file_path) except Exception as e: print("process image {} get error {}".format(file, e)) else: print("process image {} get error when extract face".format(file)) def make_transfer_learning_model(self): """ making transfer learning model from facenet input: 160,160,3 output: 128 """ model = self.model # Freeze the layers for layer in model.layers[:424]: layer.trainable = False model.layers.pop() # Adding custom Layers x = model.layers[-1].output predictions = Dense(26, activation="softmax", kernel_regularizer=regularizers.l2(0.01))(x) # creating the final model model_final = Model(input=model.input, output=predictions) return model_final def transfer_learning(self, train_data_dir, validation_data_dir, epochs): # compile the model self.transfer_model.compile(loss="categorical_crossentropy", optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), metrics=["accuracy"]) # Initiate the train and test generators with data Augumentation # Save the model according to the conditions checkpoint = ModelCheckpoint("facenet_transfer_weight.h5", monitor='val_accuracy', verbose=2, save_best_only=True, save_weights_only=False, mode='auto', period=1) early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=100, verbose=1, mode='auto') temp_path = os.path.join(os.getcwd(), "temp") train_data_path = os.path.join(temp_path, "train") val_data_path = os.path.join(temp_path, "val") # doing preprocessing when temp dir not exist if not os.path.exists(temp_path): os.mkdir(temp_path) if not os.path.exists(train_data_path): os.mkdir(train_data_path) if not os.path.exists(val_data_path): os.mkdir(val_data_path) self.preprocessing(train_data_dir, train_data_path) self.preprocessing(validation_data_dir, val_data_path) train_datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( train_data_path, target_size=(160, 160), batch_size=32, class_mode="categorical") validation_generator = test_datagen.flow_from_directory( val_data_path, target_size=(160, 160), class_mode="categorical") # Train the model history = self.transfer_model.fit_generator( train_generator, steps_per_epoch=2, epochs=epochs, validation_data=validation_generator, validation_steps=2, callbacks=[checkpoint, early]) return history ''' Predicting ''' def make_encode(self, input_image, image_data=None): """ Make embedding vector (128-dimensions) from one image """ errcode, embed, face_img_receiver_mode = 0, np.array([]), True if image_data is None: errcode, face = self.extract_face(input_image) else: # TODO: (CongThanh) Consider when merge with facenet_engine.py # Add-in for face data receiver # NOTE: if image shape equals to face shape if face_img_receiver_mode: img = image_data.convert('RGB') # conver to array pixels = asarray(img) faces = [] faces.append(pixels) embed = self.convert(faces) else: errcode, face = self.extract_face(input_image, image_data=image_data) if errcode is 0: faces = [] faces.append(face) embed = self.convert(faces) return errcode, embed def predict(self, input_image): """ Predicting the class of input image using pretrained model on Japanese dataset """ errcode, predictions = 0, None errcode, embed = self.make_encode(input_image) if errcode is 0: model = joblib.load(self.__classifier_filename) predictions = model.predict_proba(embed) return errcode, predictions
def get_pixels(image): r, g, b = 0, 0, 0 count = 0 for img in image: colors = image[img] r += colors[0] g += colors[1] b += colors[2] count += 1 return (r/count), (g/count), (b/count), count while True: ret, frame = cap.read() detect_face = detector.detect_faces(frame) if len(detect_face) > 0: x = detect_face[0]['box'][0] y = detect_face[0]['box'][1] width = detect_face[0]['box'][2] height = detect_face[0]['box'][3] crop_face = frame[y - 14:y + height + 14, x - 6:x + width + 10] cv2.imwrite('Rostro.jpg', crop_face) break if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() # min_YCrCb = np.array([0,133,77],np.uint8)
return image with progressbar.ProgressBar(maxval=frames) as bar: n = 0 while (vidin.isOpened()): ret, frame = vidin.read() if ret is True: rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) bgr_frame = rgb_to_gray(rgb_image) # Start timer start_time = time.time() dets = detector.detect_faces(rgb_image) for det in dets: bgr_frame = draw_result(bgr_frame, det) results[n] = det break # Calculate elapsed time elapsed_time = time.time() - start_time cv2.putText(bgr_frame, "Video FPS rate is {}".format(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 125, 0), 1, cv2.LINE_AA) cv2.putText(bgr_frame, "{:d} total frames".format(int(frames)), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 125, 0), 1, cv2.LINE_AA)
async def _real_time_recognize(self, width, height, dist_metric, logging, use_dynamic, use_picam, use_graphics, use_lcd, use_keypad, framerate, resize, flip, device): """Real-time facial recognition under the hood (dev use only) :param width: width of frame (only matters if use_graphics is True) :param height: height of frame (only matters if use_graphics is True) :param metric: DistMetric object :param logging: logging type-- None, "firebase", or "mysql" :param use_dynamic: use dynamic database for visitors or not :param use_picam: use picamera or not :param use_graphics: display video feed or not :param use_lcd: use LCD or not. If LCD is not connected, will default to LCD simulation and warn :param use_keypad: use keypad or not. If keypad not connected, will default to False and warn :param framerate: frame rate (recommended <120) :param resize: resize scale (float between 0. and 1.) :param flip: flip method: +1 = +90º rotation :param device: camera device (/dev/video{device}) :returns: number of frames elapsed """ # INITS db_types = ["static"] if use_dynamic: db_types.append("dynamic") if logging: log.init(logging, flush=True) log.server_init() if use_lcd: lcd.init() if use_keypad: keypad.init() if dist_metric: self.set_dist_metric(dist_metric) if resize: mtcnn_width, mtcnn_height = width * resize, height * resize else: mtcnn_width, mtcnn_height = width, height cap = self.get_video_cap(width, height, picamera=use_picam, framerate=framerate, flip=flip, device=device) assert cap.isOpened(), "video capture failed to initialize" mtcnn = MTCNN(min_face_size=0.5 * (mtcnn_width + mtcnn_height) / 3) # face needs to fill at least 1/3 of the frame missed_frames = 0 frames = 0 last_gpu_checkup = time.time() # CAM LOOP while True: _, frame = cap.read() original_frame = frame.copy() if resize: frame = cv2.resize(frame, (0, 0), fx=resize, fy=resize) if use_picam: # make sure computation is performed periodically to keep GPU "warm" (i.e., constantly active); # otherwise, recognition times can be slow when spaced out by several minutes last_gpu_checkup = self.keep_gpu_warm(frame, frames, last_gpu_checkup, use_lcd) # using MTCNN to detect faces result = mtcnn.detect_faces(frame) if result: overlay = original_frame.copy() person = max(result, key=lambda person: person["confidence"]) face = person["box"] if person["confidence"] < self.HYPERPARAMS["mtcnn_alpha"]: print("{}% face detection confidence is too low".format(round(person["confidence"] * 100, 2))) continue # facial recognition try: embedding, is_recognized, best_match, dist = self._recognize(frame, db_types, faces=face) print("{}: {} ({}){}".format( self.dist_metric, round(dist, 4), best_match, " !" if not is_recognized else "") ) except (ValueError, cv2.error) as error: # error-handling using names is unstable-- change later if "query data dimension" in str(error): raise ValueError("Current model incompatible with database") elif "empty" in str(error): print("Image refresh rate too high") elif "opencv" in str(error): print("Failed to capture frame") else: raise error continue # add graphics, lcd, and do logging if use_graphics: self.add_graphics(original_frame, overlay, person, width, height, is_recognized, best_match, resize) if use_lcd and is_recognized: lcd.PROGRESS_BAR.update(previous_msg="Recognizing...") if use_keypad: if is_recognized: run_async_method(keypad.monitor) elif last_best_match != best_match: keypad.CONFIG["continue"] = False # FIXME: # 1. above lines should be changed and use log.current_log instead of making another local var # 2. use of 3 is ambiguous-- add to keypad.CONFIG) # 3. keypad.monitor(0) should be replaced with a reset or flush function if that's what it does if logging and frames > 5: # five frames before logging starts self.log_activity(is_recognized, best_match, use_dynamic, embedding) log.DISTS.append(dist) else: missed_frames += 1 if missed_frames > log.THRESHOLDS["missed_frames"]: missed_frames = 0 log.flush_current(mode=["known", "unknown"], flush_times=False) print("No face detected") if use_graphics: cv2.imshow("AI Security v0.9a", original_frame) if cv2.waitKey(1) & 0xFF == ord("q"): # FIXME: doesn't escape when 'q' is pressed-- maybe because of async? break frames += 1 await asyncio.sleep(1e-6) cap.release() cv2.destroyAllWindows() return frames
from mtcnn.mtcnn import MTCNN # draw each face separately def draw_faces(filename, result_list): # load the image data = pyplot.imread(filename) # plot each face as a subplot for i in range(len(result_list)): # get coordinates x1, y1, width, height = result_list[i]['box'] x2, y2 = x1 + width, y1 + height # define subplot pyplot.subplot(1, len(result_list), i + 1) pyplot.axis('off') # plot face pyplot.imshow(data[y1:y2, x1:x2]) # show the plot pyplot.show() filename = 'images/jun2.jpg' # load images from file pixels = pyplot.imread(filename) # create the detector detector = MTCNN() # detect faces in the image faces = detector.detect_faces(pixels) # display image draw_faces(filename, faces)
def main(sess,x,y_smile_conv,y_gender_conv,y_glasses_conv,phase_train,keep_prob): detector = MTCNN() if(int(args['usecamera'])==1): cap = cv2.VideoCapture(0) #cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) #cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) while True: # get video frame ret, img = cap.read() if not ret: print("error: failed to capture image") return -1 # detect face and crop face, convert to gray, resize to 48x48 original_img = img cv2.imshow("result", original_img) result = detector.detect_faces(original_img) if not result: cv2.imshow("result", original_img) continue face_position = result[0].get('box') x_coordinate = face_position[0] y_coordinate = face_position[1] w_coordinate = face_position[2] h_coordinate = face_position[3] img = original_img[y_coordinate:y_coordinate + h_coordinate, x_coordinate:x_coordinate + w_coordinate] if (img.size == 0): cv2.imshow("result", original_img) continue; img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.resize(img, (48, 48)) img = (img - 128) / 255.0 T = np.zeros([48, 48, 1]) T[:, :, 0] = img test_img = [] test_img.append(T) test_img = np.asarray(test_img) predict_y_smile_conv = sess.run(y_smile_conv, feed_dict={x: test_img, phase_train: False, keep_prob: 1}) predict_y_gender_conv = sess.run(y_gender_conv, feed_dict={x: test_img, phase_train: False, keep_prob: 1}) predict_y_glasses_conv = sess.run(y_glasses_conv, feed_dict={x: test_img, phase_train: False, keep_prob: 1}) smile_label = "-_-" if np.argmax(predict_y_smile_conv) == 0 else ":)" gender_label = "Female" if np.argmax(predict_y_gender_conv) == 0 else "Male" glasses_label = 'On Glasses' if np.argmax(predict_y_glasses_conv)==1 else 'No Glasses' label = "{}, {}, {}".format(smile_label, gender_label, glasses_label) draw_label(original_img, x_coordinate, y_coordinate, w_coordinate, h_coordinate, label) cv2.imshow("result", original_img) key = cv2.waitKey(1) if key == 27: break else: img_list = os.listdir(args['image']) with open('label.csv','a') as csv_file: writer = csv.writer(csv_file,delimiter = ',') for img_name in img_list: label_list = [] original_img = cv2.imread(os.path.join(args['image'],img_name)) result = detector.detect_faces(original_img) if not result: print('can not detect face in the photo') print(img_name) continue face_position = result[0].get('box') x_coordinate = face_position[0] y_coordinate = face_position[1] w_coordinate = face_position[2] h_coordinate = face_position[3] img = original_img[y_coordinate:y_coordinate + h_coordinate, x_coordinate:x_coordinate + w_coordinate] if img.size ==0: print('can not crop the face from the photo') continue img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.resize(img, (48, 48)) img = (img - 128) / 255.0 T = np.zeros([48, 48, 1]) T[:, :, 0] = img test_img = [] test_img.append(T) test_img = np.asarray(test_img) predict_y_smile_conv = sess.run(y_smile_conv, feed_dict={x: test_img, phase_train: False, keep_prob: 1}) predict_y_gender_conv = sess.run(y_gender_conv, feed_dict={x: test_img, phase_train: False, keep_prob: 1}) predict_y_glasses_conv = sess.run(y_glasses_conv, feed_dict={x: test_img, phase_train: False, keep_prob: 1}) label_list.append(img_name) label_list.append( '-_-' if np.argmax(predict_y_smile_conv)==0 else ':)') label_list.append('Female' if np.argmax(predict_y_gender_conv)==0 else 'Male') label_list.append('On Glasses' if np.argmax(predict_y_glasses_conv)==1 else 'No Glasses') writer.writerow(label_list) label = "{}, {}, {}".format(label_list[1], label_list[2], label_list[3]) draw_label(original_img, x_coordinate, y_coordinate, w_coordinate, h_coordinate, label) cv2.imshow("result", original_img) key = cv2.waitKey(1)
class Ui_MainWindow(QMainWindow): def __init__(self, parent=None): super(Ui_MainWindow, self).__init__(parent) #数据库调用 self.dbWidge = DBWidge() self.dbWidge.setHidden(True) self.db = PyMySQL('localhost', 'root', 'CockTail', 'TESTDATABASE') #相机区域 #人脸识别与记录线程 self.detector = MTCNN() self.FaceThread = DetectionThread(self.detector, net) #添加新人脸的线程 self.AddFaceThread = AddFaceThread(self.detector, net) #定时器 self.timer_camera = QTimer() self.timer_camera_counter = 0 self.timer_clear_label = QTimer() self.timer_dynamic_recog = QTimer() self.timer_long_name = QTimer() self.cap = cv2.VideoCapture() self.CAM_NUM = 0 #Camera used self.dynamic_draw_flag = False #初始化 # self.setBackGround() self.facelabel_list = [] self.textlabel_list = [] self.name_list = [] self.long_name_list = [] self.set_ui() self.slot_init() self.__flag_work = 0 self.initMenu() self.initAnimation() self.setStyleSheet(qdarkstyle.load_stylesheet_pyqt5()) self.timer_clear_label.start(5000) self.timer_long_name.start(60000) def set_ui(self): self.resize(1600, 900) self.textBrowser = QtWidgets.QTextBrowser(self) self.textBrowser.setGeometry(QtCore.QRect(10, 650, 661, 151)) self.textBrowser.setObjectName("textBrowser") self.textBrowser.setFont(QFont("Timers", 14)) self.tabWidget = QtWidgets.QScrollArea(self) self.tabWidget.setGeometry(QtCore.QRect(670, 40, 500, 800)) self.tabWidget.setObjectName("tabWidget") self.tab = QtWidgets.QWidget() self.tab.setMinimumSize(400, 2000) self.tab.setObjectName("tab") self.tabWidget.setWidget(self.tab) self.gridLayoutWidget = QtWidgets.QWidget(self.tab) self.gridLayoutWidget.setGeometry(QtCore.QRect(0, 0, 400, 1200)) self.gridLayoutWidget.setObjectName("gridLayoutWidget") self.gridLayout = QtWidgets.QGridLayout(self.gridLayoutWidget) self.gridLayout.setContentsMargins(0, 0, 0, 0) self.gridLayout.setObjectName("gridLayout") self.append_label() self.append_label() self.append_label() self.append_label() self.lcdNumber = QtWidgets.QLCDNumber(self) self.lcdNumber.setGeometry(QtCore.QRect(470, 40, 201, 41)) self.lcdNumber.setObjectName("lcdNumber") self.lcdNumber.setDigitCount(2) self.camera_label = QtWidgets.QLabel(self) self.camera_label.setGeometry(QtCore.QRect(10, 90, 661, 551)) self.camera_label.setObjectName("camera_label") self.horizontalLayoutWidget = QtWidgets.QWidget(self) self.horizontalLayoutWidget.setGeometry(QtCore.QRect(10, 10, 395, 81)) self.horizontalLayoutWidget.setObjectName("horizontalLayoutWidget") self.horizontalLayout = QtWidgets.QHBoxLayout( self.horizontalLayoutWidget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName("horizontalLayout") self.pushButton_4 = QtWidgets.QPushButton(self.horizontalLayoutWidget) self.pushButton_4.setObjectName("pushButton_4") self.pushButton_4.setText("人脸显示") self.horizontalLayout.addWidget(self.pushButton_4) self.pushButton_3 = QtWidgets.QPushButton(self.horizontalLayoutWidget) self.pushButton_3.setObjectName("pushButton_3") self.pushButton_3.setText("手动签到") self.horizontalLayout.addWidget(self.pushButton_3) self.pushButton_2 = QtWidgets.QPushButton(self.horizontalLayoutWidget) self.pushButton_2.setObjectName("pushButton_2") self.pushButton_2.setText("清除列表") self.horizontalLayout.addWidget(self.pushButton_2) self.pushButton = QtWidgets.QPushButton(self.horizontalLayoutWidget) self.pushButton.setObjectName("pushButton") self.pushButton.setText("开启相机") self.horizontalLayout.addWidget(self.pushButton) def append_label(self): label_num = self.facelabel_list.__len__() temp_text = QLabel(self.gridLayoutWidget) temp_text.setFont(QFont("Timers", 12)) self.facelabel_list.append(QLabel(self.gridLayoutWidget)) self.textlabel_list.append(temp_text) self.gridLayout.addWidget(self.facelabel_list[-1], label_num, 0, 1, 1) self.gridLayout.addWidget(self.textlabel_list[-1], label_num, 1, 1, 1) def contextMenuEvent(self, event): pos = event.globalPos() size = self._contextMenu.sizeHint() x, y, w, h = pos.x(), pos.y(), size.width(), size.height() self._animation.stop() self._animation.setStartValue(QRect(x, y, 0, 0)) self._animation.setEndValue(QRect(x, y, w, h)) self._animation.start() self._contextMenu.exec_(event.globalPos()) def initMenu(self): self._contextMenu = QMenu(self) self.ac_open_cama = self._contextMenu.addAction( '打开相机', self.CameraOperation) self.ac_detection = self._contextMenu.addAction('一键签到', self.Checkin) self.ac_Addface = self._contextMenu.addAction('添加新人脸', self.AddFace) self.ac_DynamicRecog = self._contextMenu.addAction( '关闭动态识别', self.DynamicRecogOn) self.ac_dbManager = self._contextMenu.addAction( '数据库操作', self.openDBmanager) self.ac_delete_text = self._contextMenu.addAction( '删除信息显示', self.clear_all_text) def initAnimation(self): # 按钮动画 self._animation = QPropertyAnimation(self._contextMenu, b'geometry', self, easingCurve=QEasingCurve.Linear, duration=300) # easingCurve 修改该变量可以实现不同的效果 #定义信号槽 def slot_init(self): self.timer_camera.timeout.connect(self.show_camera) self.timer_camera.timeout.connect(self.frame_count) self.timer_clear_label.timeout.connect(self.del_instant_label) self.timer_dynamic_recog.timeout.connect(self.Checkin) self.timer_long_name.timeout.connect(self.del_long_name) #人脸识别算法完成后在右边的tab widget 中显示 self.AddFaceThread.No_face.connect(self.TextShowNoFace) self.FaceThread.Bound_Name.connect(self.ShowInTab) self.FaceThread.Face_Count.connect(self.ShowInLCD) self.pushButton.clicked.connect(self.CameraOperation) self.pushButton_2.clicked.connect(self.clear_all_label) self.pushButton_3.clicked.connect(self.Checkin) self.pushButton_4.clicked.connect(self.OpenDraw) def OpenDraw(self): self.dynamic_draw_flag = 1 - self.dynamic_draw_flag def frame_count(self): if self.timer_camera_counter is None: self.timer_camera_counter = 0 else: self.timer_camera_counter = self.timer_camera_counter + 1 if self.timer_camera_counter >= 5: self.timer_camera_counter = 0 def ShowInLCD(self, number): self.lcdNumber.display(number) def TextShowNoFace(self): self.textBrowser.insertPlainText("未检测到人脸,请重试") def openDBmanager(self): if self.dbWidge.isHidden(): self.dbWidge.setHidden(False) def del_instant_label(self): #删除第一个Label,剩余label后移动 if not self.textlabel_list[0].text(): return self.facelabel_list[0].clear() name = self.textlabel_list[0].text().split('#')[1] self.textlabel_list[0].clear() self.name_list.remove(name) for i in range(self.textlabel_list.__len__() - 1): print(self.textlabel_list[i].text()) if self.textlabel_list[i + 1].text(): print('p2') self.facelabel_list[i].setPixmap( self.facelabel_list[i + 1].pixmap()) self.textlabel_list[i].setText(self.textlabel_list[i + 1].text()) self.textlabel_list[i + 1].clear() self.facelabel_list[i + 1].clear() def AddFace(self): if self.timer_camera.isActive() == False: flag = self.cap.open(self.CAM_NUM) if flag == False: msg = QtWidgets.QMessageBox.warning( self, u"Warning", u"Please check you have connected your camera", buttons=QtWidgets.QMessageBox.Ok, defaultButton=QtWidgets.QMessageBox.Ok) else: self.Addface_img = self.image.copy() try: self.AddFaceThread.SetImg(self.Addface_img) except: pass def DynamicRecogOn(self): if self.timer_camera.isActive() == False: msg = QtWidgets.QMessageBox.warning( self, u"warning", u"没有检测到摄像头", buttons=QtWidgets.QMessageBox.Ok, defaultButton=QtWidgets.QMessageBox.Ok) else: if self.timer_dynamic_recog.isActive() == False: self.timer_dynamic_recog.start(400) self.ac_DynamicRecog.setText('关闭动态识别') else: self.timer_dynamic_recog.stop() self.ac_DynamicRecog.setText('开启动态识别') def CameraOperation(self): if self.timer_camera.isActive() == False: flag = self.cap.open(self.CAM_NUM) if flag == False: msg = QtWidgets.QMessageBox.warning( self, u"Warning", u"Please check you have connected your camera", buttons=QtWidgets.QMessageBox.Ok, defaultButton=QtWidgets.QMessageBox.Ok) else: self.timer_camera.start(75) self.timer_dynamic_recog.start(400) self.ac_DynamicRecog.setText('关闭动态识别') self.ac_open_cama.setText('关闭摄像头') self.pushButton.setText('关闭摄像头') else: if self.timer_dynamic_recog.isActive(): self.timer_dynamic_recog.stop() self.ac_DynamicRecog.setText('开启动态识别') self.timer_camera.stop() self.cap.release() self.camera_label.clear() self.ac_open_cama.setText('打开摄像头') self.pushButton.setText('打开摄像头') #相机显示 def show_camera(self): flag, self.image = self.cap.read() if self.dynamic_draw_flag: if self.timer_camera_counter == 0: self.draw_face_rec() show = cv2.resize(self.image, (800, 600)) show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QImage.Format_RGB888) self.camera_label.setPixmap(QtGui.QPixmap.fromImage(showImage)) def draw_face_rec(self): result = self.detector.detect_faces(self.image) if len(result) == 0: return for face in result: bouding_boxes = face['box'] for axis in bouding_boxes: if axis <= 0 or axis >= self.image.shape[ 0] - 1 or axis >= self.image.shape[1] - 1: return cv2.rectangle(self.image, (bouding_boxes[0], bouding_boxes[1]), (bouding_boxes[0] + bouding_boxes[2], bouding_boxes[1] + bouding_boxes[3]), (255, 0, 0), 2) def Checkin(self): if self.timer_camera.isActive() == False: msg = QtWidgets.QMessageBox.warning( self, u"warning", u"没有检测到摄像头", buttons=QtWidgets.QMessageBox.Ok, defaultButton=QtWidgets.QMessageBox.Ok) else: #启动识别算法线程 self.RecogImage = self.image.copy() try: self.FaceThread.SetImg(self.image) except: pass # def button_wrtieface_click(self): # if self.timer_camera.isActive() == False: # msg = QtWidgets.QMessageBox.warning(self, u"Warning", u"Please open your camara ", buttons=QtWidgets.QMessageBox.Ok, # defaultButton=QtWidgets.QMessageBox.Ok) # else: # name,ok = QInputDialog.getText(self, "Your name ", "Your name", # QLineEdit.Normal, self.nameLable.text()) # if(ok and (len(name)!=0)): # add_new_face(self.image,name) def ShowInTab(self, bound0, bound1, bound2, bound3, name): try: face = self.RecogImage[bound1:bound1 + bound3, bound0:bound0 + bound2] show = cv2.resize(face, (200, 200)) show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) pix = mask_image(show) if self.textlabel_list.__len__() == 0: self.append_label() except: return try: if self.check_name(name) == True: for i, text_label in enumerate(self.textlabel_list): if not text_label.text(): print('doing') self.facelabel_list[i].setPixmap(pix) tx = time.strftime('%Y-%m-%d\n%H:%M:%S') all_str = '姓名:#' + name + '#\n' + '时间:' + tx text_label.setText(all_str) break if i == self.textlabel_list.__len__( ) - 1 and text_label.text(): self.append_label() print('1') self.facelabel_list[-1].setPixmap(pix) tx = time.strftime('%Y-%m-%d\n%H:%M:%S') all_str = '姓名:#' + name + '#\n' + '时间:' + tx self.textlabel_list[-1].setText(all_str) except: return def check_name(self, name): if name not in self.long_name_list: if name == 'Unknown': tx1 = time.strftime('%Y-%m-%d %H:%M:%S') str_1 = '检测到未知人员于' + tx1 + '出现\n' # self.textBrowser.insertPlainText(str_1) # self.textBrowser.verticalScrollBar().setValue(self.textBrowser.verticalScrollBar().maximum()) else: self.long_name_list.append(name) tx1 = time.strftime('%Y-%m-%d %H:%M:%S') str_1 = '检测到#' + name + '#于' + tx1 + '出现\n' self.textBrowser.insertPlainText(str_1) self.textBrowser.verticalScrollBar().setValue( self.textBrowser.verticalScrollBar().maximum()) if name not in self.name_list: self.name_list.append(name) return True else: return False def del_long_name(self): self.long_name_list.clear() def closeEvent(self, event): ok = QtWidgets.QPushButton() cacel = QtWidgets.QPushButton() msg = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Warning, u"关闭", u"关闭?") msg.addButton(ok, QtWidgets.QMessageBox.ActionRole) msg.addButton(cacel, QtWidgets.QMessageBox.RejectRole) ok.setText(u'是') cacel.setText(u'否') if msg.exec_() == QtWidgets.QMessageBox.RejectRole: event.ignore() else: if self.cap.isOpened(): self.cap.release() if self.timer_camera.isActive(): self.timer_camera.stop() event.accept() def clear_all_label(self): self.facelabel_list = [] self.textlabel_list = [] def clear_all_text(self): self.textBrowser.clear() def draw_face(self, src, radius): if src is None: return si = QSize(2 * radius, 2 * radius) mask = QBitmap(si) painter = QPainter(mask) painter.setRenderHint(QPainter.Antialiasing) painter.setRenderHint(QPainter.SmoothPixmapTransform) painter.fillRect(0, 0, si.width(), si.height(), Qt.white) painter.setBrush(QColor(0, 0, 0)) painter.drawRoundedRect(0, 0, si.width(), si.height(), 99, 99) image = QPixmap(src.scaled(si)) image.setMask(mask) return image def label_draw_face(self, label, image): label.setMaximumSize(200, 200) label.setMinimumSize(200, 200) label.radius = 100 target = QPixmap(label.size()) target.fill(Qt.transparent) p = QPixmap.fromImage(image).scaled(200, 200, Qt.KeepAspectRatioByExpanding, Qt.SmoothTransformation) painter = QPainter(target) painter.setRenderHint(QPainter.Antialiasing, True) painter.setRenderHint(QPainter.HighQualityAntialiasing, True) painter.setRenderHint(QPainter.SmoothPixmapTransform, True) path = QPainterPath() path.addRoundedRect(0, 0, label.width(), label.height(), label.radius, label.radius) painter.setClipPath(path) painter.drawPixmap(0, 0, p) label.setPixmap(target) print('ck1') return label
def detect_face_mtcnn(img): pixels = plt.imread(img) detector = MTCNN() faces = detector.detect_faces(pixels) draw_image_with_boxes(img, faces) draw_faces(img, faces)
save_width = 800 save_height = int(800/frame_width*frame_height) video_out = cv2.VideoWriter(args.video_out, cv2.VideoWriter_fourcc('M','J','P','G'), 24, (save_width,save_height)) while True: ret, frame = cap.read() frames += 1 frame = cv2.resize(frame, (save_width, save_height)) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if frames%3 == 0: trackers = [] texts = [] detect_tick = time.time() bboxes = detector.detect_faces(frame) detect_tock = time.time() if len(bboxes) != 0: reco_tick = time.time() for bboxe in bboxes: bbox = bboxe['box'] bbox = np.array([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]) landmarks = bboxe['keypoints'] landmarks = np.array([landmarks["left_eye"][0], landmarks["right_eye"][0], landmarks["nose"][0], landmarks["mouth_left"][0], landmarks["mouth_right"][0], landmarks["left_eye"][1], landmarks["right_eye"][1], landmarks["nose"][1], landmarks["mouth_left"][1], landmarks["mouth_right"][1]]) landmarks = landmarks.reshape((2,5)).T nimg = face_preprocess.preprocess(frame, bbox, landmarks, image_size='112,112') nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) nimg = np.transpose(nimg, (2,0,1)) embedding = embedding_model.get_feature(nimg).reshape(1,-1)
def frames(): # Создание сети нахождения лиц detector = MTCNN() # Загрузка модели сети определения лиц embedder = keras.models.load_model( 'static/bin/python_facedars_master/model/keras/facenet_keras.h5', compile=False) # Получить дистанцию лица def get_distance(model, face): face = face.astype('float32') face = (face - face.mean()) / face.std() face = numpy.expand_dims(face, axis=0) return embedder.predict(face)[0] ''' Созданием базы с известными лицами ''' base = {} file = open('static/bin/python_facedars_master/demo/people/names.txt', 'r', encoding='utf-8') names = file.read().split(',') file.close() for dirname in names: base[dirname] = [] for file in os.listdir( 'static/bin/python_facedars_master/demo/people/' + dirname): if file.endswith('.jpg'): # Загрузка изображения с лицом image = cv2.imread( 'static/bin/python_facedars_master/demo/people/' + dirname + '/' + file) # Получить размеры изображения image_size = numpy.asarray(image.shape)[0:2] # Получение списка лиц с координатами и значением уверенности faces_boxes = detector.detect_faces(image) # Работа с лицами if faces_boxes: # Координаты лица x, y, w, h = faces_boxes[0]['box'] # Выравнивание лица d = h - w # Разница между высотой и шириной w = w + d # Делаем изображение квадратным x = numpy.maximum(x - round(d / 2), 0) x1 = numpy.maximum(x, 0) y1 = numpy.maximum(y, 0) x2 = numpy.minimum(x + w, image_size[1]) y2 = numpy.minimum(y + h, image_size[0]) # Получение картинки с лицом cropped = image[y1:y2, x1:x2, :] # Сохранение суммы евклидова пространства base[dirname].append(get_distance(embedder, image)) ''' РАСПОЗНОВАНИЕ ЛИЦ ''' def faces(name_new_face): # Загрузка фото frame = cv2.imread( 'static/bin/python_facedars_master/demo/recognition_video/input/objects.jpg' ) # Увеличение/уменьшение наименьшей стороны изображения до 1000 пикселей if frame.shape[0] < frame.shape[1]: frame = imutils.resize(frame, height=1000) else: frame = imutils.resize(frame, width=1000) # Получить размеры изображения image_size = numpy.asarray(frame.shape)[0:2] # Получение списка лиц с координатами и значением уверенности faces_boxes = detector.detect_faces(frame) # Копия изображения для рисования рамок на нём image_detected = frame.copy() # Замена BGR на RGB (так находит в два раза больше лиц) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Работа с лицами if faces_boxes: face_kol1 = 0 for face_box in faces_boxes: face_kol1 += 1 # Увеличение счётчика файлов global face_n face_n += 1 # Координаты лица x, y, w, h = face_box['box'] # Выравнивание лица d = h - w # Разница между высотой и шириной w = w + d # Делаем изображение квадратным x = numpy.maximum(x - round(d / 2), 0) x1 = numpy.maximum(x, 0) y1 = numpy.maximum(y, 0) x2 = numpy.minimum(x + w, image_size[1]) y2 = numpy.minimum(y + h, image_size[0]) # Получение картинки с лицом cropped = frame[y1:y2, x1:x2, :] face_image = cv2.resize(cropped, (160, 160), interpolation=cv2.INTER_AREA) # Координаты лица x, y, w, h = face_box['box'] # Отступы для увеличения рамки d = h - w # Разница между высотой и шириной w = w + d # Делаем изображение квадратным x = numpy.maximum(x - round(d / 2), 0) x1 = numpy.maximum(x - round(w / 4), 0) y1 = numpy.maximum(y - round(h / 4), 0) x2 = numpy.minimum(x + w + round(w / 4), image_size[1]) y2 = numpy.minimum(y + h + round(h / 4), image_size[0]) # # Отборка лиц {selected|rejected} if face_box[ 'confidence'] > 0.80: # 0.99 - уверенность сети в процентах что это лицо cv2.putText(image_detected, 'None', (x1 + 10, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1) # Рисует красный квадрат на картинке по координатам cv2.rectangle(image_detected, (x1, y1), (x2, y2), (0, 0, 255, 1), 1) # # Сохранение изображения нового лица cv2.imwrite( 'static/bin/python_facedars_master/demo/people/' + name_new_face + '/' + '(' + str(face_n) + ')' + '.jpg', face_image) ''' Захват изображение с камеры ''' global face_n face_n = 0 file = open( 'static/bin/python_facedars_master/demo/people/new_name.txt', 'r', encoding='utf-8') name_new_face = file.read() file.close() os.mkdir("static/bin/python_facedars_master/demo/people/" + name_new_face) file = open('static/bin/python_facedars_master/demo/people/names.txt', 'a', encoding='utf-8') file.write(',' + name_new_face) file.close() # Включаем камеру cap = cv2.VideoCapture(0) while True: yield 'Fuuuu' # Нужен yield, чтобы всё работало # Делаем снимок ret, cadr = cap.read() # Записываем в файл cv2.imwrite( 'static/bin/python_facedars_master/demo/recognition_video/input/objects.jpg', cadr) faces(name_new_face)
args = vars(ap.parse_args()) # Initialize the neural network for face detection net = MTCNN() # load the face mask detector model from disk print("[INFO] loading face mask detector model...") model = load_model(args["model"]) # load the input image from disk, clone it, and grab the image spatial # dimensions image = cv2.imread(args["image"]) orig = image.copy() (h, w) = image.shape[:2] detections = net.detect_faces(image) # # loop over the detections for i in range(0, len(detections)): # extract the confidence (i.e., probability) associated with # the detection confidence = detections[i]['confidence'] # filter out weak detections by ensuring the confidence is # greater than the minimum confidence if confidence > args["confidence"]: # compute the (x, y)-coordinates of the bounding box for # the object (startX, startY, w, h) = detections[i]['box'] (endX, endY) = (startX + w, startY + h) # extract the face ROI, convert it from BGR to RGB channel # ordering, resize it to 224x224, and preprocess it
parser = argparse.ArgumentParser() parser.add_argument("--source_video", "-i", type=str) parser.add_argument("--output_blur_img_folder", "-o", type=str) args = parser.parse_args() blur_path = args.output_blur_img_folder reset(blur_path) reset(img_path) video_to_images(args.source_video, img_path) print(args.source_video) source_path = sorted(glob.glob("./img/*.png")) for i, fi in enumerate(source_path): print(fi) img = cv2.imread(fi) blur = img.copy() detector = MTCNN() if not detector.detect_faces(img) == []: for j in range(len(detector.detect_faces(img))): pos = detector.detect_faces(img)[j]["box"] blur[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]] = cv2.blur( blur[pos[1]:pos[1] + pos[3], pos[0]:pos[0] + pos[2]], (40, 40)) cv2.imwrite(blur_path + "/%05d.jpg" % i, blur)
#key = embd.keys(i) if e == 1: k = 0 for j in res.keys(): if k == i: key = j k += 1 print(key) # load image from file pixels = pyplot.imread(user) # create the detector, using default weights detector = MTCNN() # detect faces in the image results = detector.detect_faces(pixels) # extract the bounding box from the first face x1, y1, width, height = results[0]['box'] x2, y2 = x1 + width, y1 + height img = cv2.imread(user) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) name = key img = cv2.putText(img, name, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2, cv2.LINE_AA) cv2.imshow('123',img) cv2.waitKey(0) cv2.destroyAllWindows() if e == 0: print('the person is not recognized')
class FaceRecognition(object): """ Face Recognition object class """ def __init__(self): """ Initialize Face Recognition model """ # GRAPH self.graph = tf.get_default_graph() # Load Face Detector self.face_detector = MTCNN() # Load FaceNet self.facenet = FaceNet() # Euclidean Classifier self.clf = None def predict(self, path, threshold=None): """ Find faces and recognize them, return predicted people into image :param path: Source image path :param threshold: cutoff threshold :return: Return predictions and images with rectangles drawn """ if not self.clf: raise RuntimeError("No classifier found. Please load classifier") start_at = time.time() bounding_boxes = [] image = cv2.imread(path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.astype(np.uint8) for person, confidence, box in self.__predict__(image, threshold=threshold): # Draw rectangle with person name cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2) cv2.putText(image, person, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0)) bounding_boxes.append({ "person": person, "confidence": confidence, "box": box, }) # encode frame _, buffer = cv2.imencode('.jpg', image) return { "frame": base64.b64encode(buffer).decode('ascii'), "elapsed_time": (time.time() - start_at), "predictions": bounding_boxes } def __predict__(self, image, threshold=None): """ Extract face and perform evaluation :param image: Source image :param threshold: decision threshold :return: yield (person_id, person, confidence, box) """ # Resize Image for encoding, face, box in self.face_encoding(image): # Check face size if (box[2] - box[0]) < config.MIN_FACE_SIZE[0] or \ (box[3] - box[1]) < config.MIN_FACE_SIZE[1]: yield (config.UNKNOWN_LABEL, 0.0, box) else: results = self.clf.predict(encoding) person, confidence = results["person"], results["confidence"] if threshold and confidence < threshold: person = config.UNKNOWN_LABEL yield (person, confidence, box) def face_detection(self, image): """ Face detection from source image :param image: Source image :return: extracted face and bounding box """ image_to_detect = image.copy() # detect faces in the image for face_attributes in self.face_detector.detect_faces( image_to_detect): if face_attributes["confidence"] > config.FACE_CONFIDENCE: # extract the bounding box x1, y1, w, h = [ max(point, 0) for point in face_attributes["box"] ] x2, y2 = x1 + w, y1 + h face = image[y1:y2, x1:x2] # Align face face = FaceRecognition.align_face(face_attributes, face.copy()) yield (cv2.resize(face, config.FACE_SIZE), (x1, y1, x2, y2)) def face_encoding(self, source_image): """ Extract face encodings from image :param source_image: Source image :return: 512 encoding, face and bounding box """ for face, box in self.face_detection(source_image): with self.graph.as_default(): # Face encoding encoding = self.facenet.embeddings(np.expand_dims(face, axis=0))[0] yield (encoding, face, box) @staticmethod def align_face(face_attribute, image): if not face_attribute: return image # Get left and right eyes left_eye = face_attribute["keypoints"]["left_eye"] right_eye = face_attribute["keypoints"]["right_eye"] # Get distance between eyes d = math.sqrt( math.pow(right_eye[0] - left_eye[0], 2) + math.pow(right_eye[1] - left_eye[1], 2)) a = left_eye[1] - right_eye[1] # get alpha degree alpha = (math.asin(a / d) * 180.0) / math.pi return imutils.rotate(image, -alpha) def load(self, path): """ Load classifier from pickle file :param path: path """ clf = EuclideanClassifier() clf.load(path) self.clf = clf def save(self, path): """ Save classifier as pickle file :param path: path """ self.clf.save(path) def fit(self, folder): """ Fit classifier from directory. Directory must have this structure: Person 1: file.jpg .... file.jpg Person 2: file.jpg ... file.jpg ... :param folder: root folder path """ # Initialize classifier clf = EuclideanClassifier() # Load all files files = [] for ext in config.ALLOWED_IMAGE_TYPES: files.extend( glob.glob(os.path.join(folder, "*", ext), recursive=True)) for path in tqdm.tqdm(files): # Load image image = cv2.imread(path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Get person name by folder person = os.path.split(os.path.split(path)[0])[1] # Get encoding for encoding, face, box in self.face_encoding(image): # Add to classifier clf.fit([encoding], [person]) self.clf = clf def fit_from_dataframe(self, df, person_col="person", path_col="path"): """ Fit classifier from dataframe. :param df: Pandas dataframe :param person_col: Dataframe column with person id :param path_col: Dataframe column with image path """ # Initialize classifier clf = EuclideanClassifier() for index, row in tqdm.tqdm(df.iterrows(), total=df.shape[0]): # Load image image = cv2.imread(row[path_col]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Get person name by folder person = row[person_col] # Get encoding for encoding, face, box in self.face_encoding(image): # Add to classifier clf.fit([encoding], [person]) self.clf = clf
cv2.rectangle(image, upper_left, bottom_right, (0, 255, 0), 3) #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #image = capture.frame cv2.imshow('video stream...', image) # image2=np.copy(image) #cv2.resizeWindow("frame",800,600) # inner_image = image[upper_left[1] : bottom_right[1], upper_left[0] : bottom_right[0]] # image2[0:, 0:upper_left[0] + 3] = 0 # image2[0:, bottom_right[0] - 3:] = 0 # image2[0:upper_left[1] + 3, 0:] = 0 # image2[bottom_right[1] - 3:, 0:] = 0 #cv2.imwrite(test+"1.png",inner_image) #image = frame #if cv2.waitKey(1) & 0xFF == 32: result = detector.detect_faces(inner_image) #print(i) print("Found {0} faces!".format(len(result))) if len(result) != 1: count = 0 name2 = "" vote = [] t1 = time.clock() t2 = time.clock() t3 = t2 - t1 t3 = int(t3 * 100) t3 = t3 / 100 if len(result) == 1: bounding_box = result[0]['box']
class ImageAndVideo(object): """ This class can be used to do all the core functionality required in facial computer vision. For instance, some functionality in this class can be used to detect faces in images or to start a video stream. """ def __init__(self): self.serialize_model = MTCNN(min_face_size = 60) def load_image_from_file(self, image_location): """ Function that takes in an image and converts it to the correct format to be used by the models. Arguments: image {str}: Image location on system """ # load the input image from disk, clone it, and grab the image spatial dimensions self.image = cv2.imread(image_location) (self.h, self.w) = self.image.shape[:2] if self.h >= 1000 or self.w >=1000: self.w = int(self.w * 0.3) self.h = int(self.h * 0.3) self.image = cv2.resize(self.image, (self.w, self.h)) self.frame_picture = False def load_image_from_frame(self, frame): """ Function that takes in an image from a video stream and conerts it to the correct format to be used by the models. Arguments: frame {opencv2 frame}: Frame captured by opencv2 """ # load the input image from disk, clone it, and grab the image spatial dimensions self.image = frame # ensure correct color format as models were trained on self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) (self.h, self.w) = self.image.shape[:2] self.frame_picture = True def detect_faces(self, probability=0.5, face_size = (224,224)): """ This function will scan through the loaded image and return all the faces detected. Arguments: probability {float, default=0.5}: Minimum probability of prediction required to return a face face_size {tuple, default = (224, 224)}: Width and heigth that face image should be converted to. """ if self.frame_picture: detections = self.serialize_model.detect_faces(self.image) else: detections = self.serialize_model.detect_faces(cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)) self.faces = [] self.bounding_boxes = [] # loop over the detections for detection in detections: confidence = detection['confidence'] if confidence >= probability: (startX, startY, width, height) = detection['box'] (endX, endY) = (startX+width, startY+height) # ensure the bounding boxes fall within the dimensions of the frame (startX, startY) = (max(0, startX), max(0, startY)) (endX, endY) = (min(self.w - 1, endX), min(self.h - 1, endY)) self.bounding_boxes.append([startX, startY, endX, endY]) # extract the face ROI, convert it from BGR to RGB channel # ordering, resize it to 224x224, and preprocess it face = self.image[startY:endY, startX:endX] try: face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) face = cv2.resize(face, face_size) face = img_to_array(face) face = np.expand_dims(face, axis=0) face = face/255 self.faces.append(face) except: pass return len(self.faces) def draw_boxes_around_faces(self, labels=[], colors = []): """ Function that alters the original image used in predictions to draw a box around the face and indicate if the person is wearing a face mask or not. If wished, one can provide labels and colors, but the class can create its own. Arguments: labels {list(str), default=[]}: List of labels to put on top of boxes drawn around faces. colors {list((r,g,b)), default = []}: List of RGB colors that boxes should be. """ if len(labels) < 1 or len(colors) < 1: labels = self.labels colors = self.colors for bounding_box, label, color in zip(self.bounding_boxes, labels, colors): cv2.putText(self.image, str(label), (bounding_box[0], bounding_box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2) cv2.rectangle(self.image, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), color, 2) def display_predictions(self): """ Function to display the given prediction. Will only work if all the previous steps have been done. """ if self.frame_picture: img = Image.fromarray(self.image) else: img = Image.fromarray(cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)) img.show() def start_video_stream(self): """ Start video stream. """ self.vs = VideoStream(src=0).start() # allowing camera to warm up time.sleep(2.0) def capture_frame_and_load_image(self, stop = True, vs=None): """ Captures the current frame of the video stream and load it into the class. Arguments: stop {boolean, default = True}: If True, will stop the video stream otherwise the video stream will continue. """ # grab the frame from the threaded video stream and resize it to have a maximum width of 400 pixels if vs is None: frame = self.vs.read() else: frame = vs.read() frame = resize(frame, width=400) if stop: cv2.destroyAllWindows() self.vs.stop() self.load_image_from_frame(frame) def get_video_steam(self): """ Return the video stream object created. Returns: video stream class. """ return self.vs def get_faces(self): """ Return the current identified faces as well as the bounding boxes. Returns: faces{list}: list of image values for the various faces detected. boundix_boxes{list}: list of cooardinates that faces are found in original image. """ return self.faces, self.bounding_boxes def get_image(self): """ Return the image as it currently is in the process. Returns: image{cv2 instance of image}: """ return self.image
# Add the color variable img = cv2.imdecode(img, cv2.IMREAD_COLOR) # Show the image cv2_imshow(img) """# Step 2: Face detection""" # Initialize mtcnn detector detector = MTCNN() # set face extraction parameters target_size = (224,224) # output image size border_rel = 0 # increase or decrease zoom on image # detect faces in the image detections = detector.detect_faces(img) print(detections) x1, y1, width, height = detections[0]['box'] dw = round(width * border_rel) dh = round(height * border_rel) x2, y2 = x1 + width + dw, y1 + height + dh face = img[y1:y2, x1:x2] # resize pixels to the model size face = PIL.Image.fromarray(face) face = face.resize((224, 224)) face = np.asarray(face) # show face cv2_imshow(face)
def mtcnn(path_images): img = cv2.imread(path_images) detector = MTCNN() return detector.detect_faces(img)
# os.makedirs('faces/dlib') # os.makedirs('faces/mtcnn') # os.makedirs('faces/dnn') # os.makedirs('faces/haar') for image in images: img = cv2.imread(os.path.join('faces', image)) # img = cv2.resize(img, None, fx=2, fy=2) height, width = img.shape[:2] img1 = img.copy() img2 = img.copy() img3 = img.copy() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # detect faces in the image faces1 = detector1.detect_faces(img_rgb) faces2 = detector2(gray, 2) blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0, (300, 300), (104.0, 117.0, 123.0)) net.setInput(blob) faces3 = net.forward() faces4 = classifier2.detectMultiScale(img) #MTCNN for result in faces1: x, y, w, h = result['box'] x1, y1 = x + w, y + h cv2.rectangle(img, (x, y), (x1, y1), (0, 0, 255), 2) #DLIB for result in faces2:
class DeepFace(): def __init__(self, model_name, pooling, image_size, mtcnn_model_path, vgg_model_path): self.model_name = model_name self.pooling = pooling self.image_size = image_size self.mtcnn_model_path = mtcnn_model_path self.vgg_model_path = vgg_model_path self.model = VGGFace(model=self.model_name, include_top=False, input_shape=(self.image_size, self.image_size, 3), pooling=self.pooling, weights_path=self.vgg_model_path) self.detector = MTCNN(weights_file=self.mtcnn_model_path) def extract_face(self, img, required_size=(224, 224)): results = [] results = self.detector.detect_faces(img) return_data = {} return_data["Status"] = False return_data["Face"] = np.zeros((224, 224, 3)).astype(np.uint8) return_data["X"] = -1 return_data["Y"] = -1 return_data["Width"] = -1 return_data["Height"] = -1 if len(results) == 0: return return_data face_size = 0 face_id = 0 for i in range(len(results)): x1, y1, width, height = results[i]['box'] if face_size < width * height: face_size = width * height face_id = i x1, y1, width, height = results[face_id]['box'] x2, y2 = x1 + width, y1 + height if x1 < 0: x1 = 0 if y1 < 0: y1 = 0 if x2 > img.shape[1]: x2 = img.shape[1] if y2 > img.shape[0]: y2 = img.shape[0] face = img[y1:y2, x1:x2] image = Image.fromarray(face) image = image.resize(required_size) face_array = asarray(image) return_data["Status"] = True return_data["Face"] = face_array return_data["X"] = x1 return_data["Y"] = y1 return_data["Width"] = width return_data["Height"] = height return return_data def read_face(self, filename): face = pyplot.imread(filename) image = Image.fromarray(face) face_array = asarray(image) return face_array def get_embeddings(self, filenames): faces = [self.read_face(f) for f in filenames] samples = asarray(faces, 'float32') samples = preprocess_input(samples, version=2) yhat = None yhat = self.model.predict(samples) return yhat def is_match(self, known_embedding, candidate_embedding, thresh=0.5): score = cosine(known_embedding, candidate_embedding) if score <= thresh: return (True, score) else: return (False, score)
video_height = int(vid.get(4)) video_fps = int(vid.get(5)) record_video = True #ecord_video = False if record_video: out = cv2.VideoWriter('data/outvideo.avi',cv2.VideoWriter_fourcc('M','J','P','G'), video_fps, (video_width, video_height)) # for writing Video face_detector = MTCNN() #Initializing MTCNN detector object face_tracker = Sort(max_age=50) #Initializing SORT tracker object ret , frame = vid.read() while ret: try: ret , frame = vid.read() original_frame = frame.copy() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result = face_detector.detect_faces(frame) min_confidence= 0.4 box = [] for i in range(len(result)): box_ = result[i]["box"] print("Face Detectes: Box=",box_) box.append([box_[0],box_[1],box_[0]+box_[2],box_[1]+box_[3],result[i]["confidence"] ]) dets = np.array(box) track_bbx,pts = face_tracker.update(dets)
from mtcnn.mtcnn import MTCNN import cv2 import tensorflow as tf gpu = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(gpu[0], True) detector = MTCNN() image = cv2.imread("images/be_iu.jpg") result = detector.detect_faces(image) for person in result: bounding_box = person['box'] keypoints = person['keypoints'] cv2.rectangle( image, (bounding_box[0], bounding_box[1]), (bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]), (0, 155, 255), 2) cv2.circle(image, (keypoints['left_eye']), 2, (0, 155, 255), 2) cv2.circle(image, (keypoints['right_eye']), 2, (0, 155, 255), 2) cv2.circle(image, (keypoints['nose']), 2, (0, 155, 255), 2) cv2.circle(image, (keypoints['mouth_left']), 2, (0, 155, 255), 2) cv2.circle(image, (keypoints['mouth_right']), 2, (0, 155, 255), 2) cv2.imshow( "image", image[bounding_box[1]:bounding_box[1] + bounding_box[3], bounding_box[0]:bounding_box[0] + bounding_box[2]]) cv2.waitKey(0)
_CAMERA_WIDTH = 640 _CAMERA_HEIGH = 480 cap = cv2.VideoCapture(0) # cap = cv2.VideoCapture(1) cap.set(cv2.CAP_PROP_FRAME_WIDTH, _CAMERA_WIDTH) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, _CAMERA_HEIGH) while True: #Capture frame-by-frame __, frame = cap.read() #Use MTCNN to detect faces result = detector.detect_faces(frame) if result != []: k = 0 for person in result: k += 1 bounding_box = person['box'] keypoints = person['keypoints'] x = bounding_box[0] y = bounding_box[1] w = bounding_box[2] h = bounding_box[3] n = 5 t = int(time.time()) padding = 20
class FaceAnalysis: def __init__(self): config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 set_session(tf.Session(config=config)) self.detector = MTCNN() self.pre_train_model = load_model('/test/facenet_keras.h5') global graph graph = tf.get_default_graph() print("load OK") def extract_face_get_embedding(self, pixels, required_size=(160, 160)): # create the detector, using default weights try: with graph.as_default(): # detect face in the image crop_face = [] yhat_list = [] bound_box_list = [] results = self.detector.detect_faces(pixels) if not results: # print("face detect fail") return False, False, False, False for result in results: temp_list = [] # extract the bounding box from the first face x1, y1, width, height = result['box'] # bug fix x1, y1 = abs(x1), abs(y1) x2, y2 = x1 + width, y1 + height temp_list.append(x1) temp_list.append(y1) temp_list.append(x2) temp_list.append(y2) bound_box_list.append(temp_list) # extract the face face = pixels[y1:y2, x1:x2] # resize pixels to the model size image = Image.fromarray(face) image = image.resize((160, 160)) face_array = asarray(image) crop_face.append(face_array) face_pixels = face_array # scale pixel values face_pixels = face_pixels.astype('float32') # standardize pixel values across channels (global) mean, std = face_pixels.mean(), face_pixels.std() face_pixels = (face_pixels - mean) / std # transform face into one sample samples = expand_dims(face_pixels, axis=0) # make prediction to get embedding yhat = self.pre_train_model.predict(samples) # face_array_list.append(face_array) yhat_list.append(yhat[0]) return yhat_list, bound_box_list, crop_face, True except Exception as e: print(e) return False, False, False, False
def get_frame(self): _, fr = self.video.read() # fr = imutils.resize( fr, width=400 ) # # modelFile = "res10_300x300_ssd_iter_140000.caffemodel" # configFile = "deploy.txt" # net = cv2.dnn.readNetFromCaffe(configFile, modelFile ) # (h, w) = fr.shape[:2] # blob = cv2.dnn.blobFromImage(cv2.resize( fr, (300, 300)), 1.0, # (300, 300), (104.0, 177.0, 123.0) ) # net.setInput( blob ) # detections = net.forward() # # loop over the detections # for i in range( 0, detections.shape[2] ): # # extract the confidence (i.e., probability) associated with the # # prediction # confidence = detections[0, 0, i, 2] # # filter out weak detections by ensuring the `confidence` is # # greater than the minimum confidence # if confidence < 0.75: # continue # # compute the (x, y)-coordinates of the bounding box for the # # object # box = detections[0, 0, i, 3:7] * np.array( [w, h, w, h] ) # (startX, startY, endX, endY) = box.astype( "int" ) # # # draw the bounding box of the face along with the associated # # probability # text = "{:.2f}%".format( confidence * 100 ) # y = startY - 10 if startY - 10 > 10 else startY + 10 # cv2.rectangle( fr, (startX, startY), (endX, endY), # (0, 0, 255), 2 ) # cv2.putText( fr, text, (startX, y), # cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2 ) # # pixels = np.asarray(fr) detector = MTCNN() result = detector.detect_faces(pixels) if result: for person in result: bounding_box = person['box'] cv2.rectangle(fr, (bounding_box[0], bounding_box[1]), (bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]), (0, 155, 255), 2) fc = pixels[bounding_box[1]:bounding_box[1] + bounding_box[3], bounding_box[0]:bounding_box[0] + bounding_box[2]] print(fc) roi = cv2.resize(fc, (224, 224)) print(roi) with session.graph.as_default(): k.backend.set_session(session) pred = model.predict_emotion(roi[np.newaxis, :, :]) cv2.putText(fr, pred, (bounding_box[0], bounding_box[1]), font, 2, (0, 0, 255), 3) # # # # cv2.rectangle( fr, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2 ) _, jpeg = cv2.imencode('.jpg', fr) return jpeg.tobytes()
def main(character, mode_num=0): if mode_num == 0: in_dir = character.align_png_dir out_dir = character.align_crop_dir else: in_dir = character.imgB_dir out_dir = character.imgB_crop_dir if os.path.exists(out_dir): shutil.rmtree(out_dir, ignore_errors=True) os.makedirs(out_dir) if not os.path.exists(in_dir): os.makedirs(in_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) # save_name = character.full_name print("\ninput directory:", in_dir, '\noutput directory:', out_dir) extensions = ['.png', '.jpg', '.jpeg'] image_path_list = [] for extension in extensions: for i, file in enumerate(glob.glob('%s/*%s' % (in_dir, extension))): image_path_list.append(file) image_path_list = sorted(image_path_list) # print (image_path_list) # create an empty dictionary for filename, coordinate info # to be written to a json file for the replacer script on the other side info_dict = {} # walk the list of input images, detect images detector = MTCNN() if not os.path.isfile('%s/already_cropped.json' % in_dir): with open('%s/already_cropped.json' % in_dir, 'w') as outfile: json.dump('already cropped!', outfile, indent=4) outfile.write("\n") for i, image_path in enumerate(image_path_list): try: image = cv2.imread(str(image_path)) results = detector.detect_faces(image)[0] x, y, w, h = results['box'] pad = int(.3 * h) x -= pad w += 2 * pad y -= pad h += 2 * pad image = image[y:y + h, x:x + w] # write over the existing image with the cropped one cv2.imwrite(image_path, image) # redetect mouth points to extract just the mouth mleft = detector.detect_faces( image)[0]['keypoints']['mouth_left'] mright = detector.detect_faces( image)[0]['keypoints']['mouth_right'] print(image_path) # determine degrees needed to rotate the image to be aligned degrees = get_rot_angle(mleft, mright) # rotate original image to align image_r = rotate_image(image, degrees) # detect new mouth points (could replace with math) mleft = detector.detect_faces( image_r)[0]['keypoints']['mouth_left'] mright = detector.detect_faces( image_r)[0]['keypoints']['mouth_right'] # create coordinates for bounding box w = mright[0] - mleft[0] h = w x = mleft[0] y = mleft[1] - h / 2 print(x, y, w, h) # add pixels of padding as a percentage of the width pad = int(round(.6 * w)) x -= pad x = int(x) y -= pad y = int(y) h += 2 * pad h = int(h) w += 2 * pad w = int(w) coords = [x, y, h, w] #basename for files filename = os.path.basename(image_path) # location of the cropped face and source image out_path = '%s/%s' % (out_dir, filename) in_path = '%s/%s' % (in_dir, filename) # add cropped output file path, coordinates and input # file path to dictionary info_dict[filename] = coords, degrees, in_path # add padding and resize to 128 pixels roi = image_r[y:y + h, x:x + w] roi = cv2.resize(roi, (128, 128)) #roi = imutils.resize(roi, 128, inter=cv2.INTER_CUBIC) # write image cv2.imwrite(out_path, roi) except: print('whhoopppss!') pass if os.path.isfile('%s/already_cropped.json' % in_dir): print('Already Cropped!') for i, image_path in enumerate(image_path_list): try: image = cv2.imread(str(image_path)) # redetect mouth points to extract just the mouth mleft = detector.detect_faces( image)[0]['keypoints']['mouth_left'] mright = detector.detect_faces( image)[0]['keypoints']['mouth_right'] print(image_path) # determine degrees needed to rotate the image to be aligned degrees = get_rot_angle(mleft, mright) # rotate original image to align image_r = rotate_image(image, degrees) # detect new mouth points (could replace with math) mleft = detector.detect_faces( image_r)[0]['keypoints']['mouth_left'] mright = detector.detect_faces( image_r)[0]['keypoints']['mouth_right'] # create coordinates for bounding box w = mright[0] - mleft[0] h = w x = mleft[0] y = mleft[1] - h / 2 print(x, y, w, h) # add pixels of padding as a percentage of the width pad = int(round(.6 * w)) x -= pad x = int(x) y -= pad y = int(y) h += 2 * pad h = int(h) w += 2 * pad w = int(w) coords = [x, y, h, w] #basename for files filename = os.path.basename(image_path) # location of the cropped face and source image out_path = '%s/%s' % (out_dir, filename) in_path = '%s/%s' % (in_dir, filename) # add cropped output file path, coordinates and input # file path to dictionary info_dict[filename] = coords, degrees, in_path # add padding and resize to 128 pixels roi = image_r[y:y + h, x:x + w] roi = cv2.resize(roi, (128, 128)) #roi = imutils.resize(roi, 128, inter=cv2.INTER_CUBIC) # write image cv2.imwrite(out_path, roi) except: print('oh no!') pass try: with open('%s/alignments.json' % out_dir, 'w') as outfile: json.dump(info_dict, outfile, indent=4) outfile.write("\n") except: pass