def _setup_mtcnn(self): with tf.Graph().as_default(): gpu_memory_fraction = 0.3 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): return align_detect_face.create_mtcnn(sess, None)
def load_and_align_image(image, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) img_size = np.asarray(image.shape)[0:2] bounding_boxes, _ = detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) if len(bounding_boxes) < 1: print("can't detect face, remove ", image) det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = image[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) return prewhitened
def __init__(self): with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): self.pnet, self.rnet, self.onet = detect_face.create_mtcnn( sess, None)
def create_network_face_detection(gpu_memory_fraction): with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) return pnet, rnet, onet
def load_model(model, sess, input_map=None): # load model from caffe model to create mtcnn model # load model from a tensorflow pb file to create facenet model gpu_options = tf.GPUOptions() gpu_options.allow_growth = True pnet, rnet, onet = detect_face.create_mtcnn(sess, None) model_exp = os.path.expanduser(MODEL_PATH) print('Model filename: %s' % model_exp) with gfile.FastGFile(model_exp, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, input_map=input_map, name='') return pnet, rnet, onet
def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) nrof_samples = len(image_paths) img_list = [] count_per_image = [] for i in xrange(nrof_samples): img = misc.imread(os.path.expanduser(image_paths[i])) img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) count_per_image.append(len(bounding_boxes)) for j in range(len(bounding_boxes)): det = np.squeeze(bounding_boxes[j, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) return images, count_per_image, nrof_samples
with recog_graph.as_default(): with recog_sess.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) # Load the model print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] pnet, rnet, onet = detect_face.create_mtcnn(recog_sess, "src/align") people_detected = set() person_detected = collections.Counter() recog_init = tf.local_variables_initializer() # Load model nhan dien fake/real print("[INFO] loading liveness detector...") with liveness_graph.as_default(): with liveness_sess.as_default(): liveness_model = load_model(args["model"]) liveness_init = tf.local_variables_initializer() le = pickle.loads(open(args["le"], "rb").read())
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = imageio.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = np.array( Image.fromarray(cropped).resize( (args.image_size, args.image_size))) nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: output_filename_n = "{}{}".format( filename_base, file_extension) imageio.imwrite(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--path', help='Path of the video you want to test on.', default=0) args = parser.parse_args() MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.709 IMAGE_SIZE = 182 INPUT_IMAGE_SIZE = 160 CLASSIFIER_PATH = './Models/custom/custom.pkl' VIDEO_PATH = args.path FACENET_MODEL_PATH = './Models/facenet/20180402-114759.pb' # Load The Custom Classifier with open(CLASSIFIER_PATH, 'rb') as file: model, class_names = pickle.load(file) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Load the model print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) update_bool(True) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] pnet, rnet, onet = detect_face.create_mtcnn(sess, "src/align") people_detected = set() person_detected = collections.Counter() cap = cv2.VideoCapture(VIDEO_PATH) while (cap.isOpened()): ret, frame = cap.read() bounding_boxes, _ = detect_face.detect_face( frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[0] try: if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize( cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape( -1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] best_name = class_names[best_class_indices[0]] print("Name: {}, Probability: {}".format( best_name, best_class_probabilities)) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 if best_class_probabilities > 0.3: name = class_names[best_class_indices[0]] else: name = "Unknown" cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) cv2.putText(frame, str( round(best_class_probabilities[0], 3)), (text_x, text_y + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) person_detected[best_name] += 1 is_open_to_speak = read_bool() # print(is_open_to_speak) if is_open_to_speak == '1': try: update_bool(False) subprocess.Popen([ 'python', 'text_to_speech.py', best_name ]) except Exception as e: print(e) print("Error: unable to start thread") except Exception as e: print(e) cv2.imshow('Face Recognition', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
import matplotlib.pyplot as plt minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor gpu_memory_fraction = 1.0 print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) image_path = '/Users/chenlinzhong/Downloads/4834d0e4-c9e1-46ba-b7b9-8d5a25e135eb' img = misc.imread(image_path) bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] # 人脸数目 print('找到人脸数目为:{}'.format(nrof_faces)) print(bounding_boxes) crop_faces = [] for face_position in bounding_boxes: face_position = face_position.astype(int) print(face_position[0:4])
def main_app(user_name, user_id): cam = cv2.VideoCapture(0, cv2.CAP_DSHOW) #id = input('Nhập mã nhân viên:') #name = input('Nhập tên nhân viên;') #print("Bắt đầu chụp ảnh nhân viên, nhấn q để thoát!") insertOrUpdate(user_id, user_name) sampleNum = 0 MAX_IMG = 10 MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.709 FACENET_MODEL_PATH = 'src/Models/20180402-114759.pb' recog_graph = tf.Graph() recog_sess = tf.Session(graph=recog_graph) with recog_graph.as_default(): with recog_sess.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) # Load the model print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) pnet, rnet, onet = detect_face.create_mtcnn( recog_sess, "src/align") img_num = 0 while(img_num <= MAX_IMG): ret, img = cam.read() # Lật ảnh cho đỡ bị ngược img = cv2.flip(img, 1) # Kẻ khung giữa màn hình để người dùng đưa mặt vào khu vực này centerH = img.shape[0] // 2 centerW = img.shape[1] // 2 sizeboxW = 300 sizeboxH = 400 cv2.rectangle(img, (centerW - sizeboxW // 2, centerH - sizeboxH // 2), (centerW + sizeboxW // 2, centerH + sizeboxH // 2), (255, 255, 255), 5) # Đưa ảnh về ảnh xám gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Nhận diện khuôn mặt # faces = detector.detectMultiScale(gray, 1.3, 5) bounding_boxes, _ = detect_face.detect_face( img, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces = bounding_boxes[:, 0:4] for (x1, y1, x2, y2) in faces: x1 = int(x1) x2 = int(x2) y1 = int(y1) y2 = int(y2) img_num += 1 # Vẽ hình chữ nhật quanh mặt nhận được cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2) sampleNum = sampleNum + 1 # Ghi dữ liệu khuôn mặt vào thư mục dataSet if not path.isdir(os.getcwd() + "/Dataset/FaceData/processed/" + user_id): os.makedirs( os.getcwd() + "/Dataset/FaceData/processed/" + user_id) cv2.imwrite(os.getcwd() + "/Dataset/FaceData/processed/" + user_id + "/User." + user_id + '.' + str(sampleNum) + ".jpg", gray[y1:y2, x1:x2]) cv2.imshow('frame', img) # Check xem có bấm q hoặc trên 100 ảnh sample thì thoát if cv2.waitKey(100) & 0xFF == ord('q'): break elif sampleNum > 100: break cam.release() cv2.destroyAllWindows()
facenet_session = tf.Session() facenet.load_model_with_session(facenet_session, MODEL_DIR, meta_file, ckpt_file) time_check_2 = time.time() logger.info("Loading facenet taken {} seconds".format(time_check_2 - time_check_1)) # set a session for mtcnn face detection neural network logger.info( 'Creating Multi-task Cascaded Convolutional Neural Networks and loading parameters' ) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) mtcnn_sess = tf.Session( config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with mtcnn_sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(mtcnn_sess, './data/') time_check_3 = time.time() logger.info("Loading MTCNN taken {} seconds".format(time_check_3 - time_check_2)) logger.info("Get functions for face detection: {}, {}, {}".format( pnet, rnet, onet)) def compare_two_faces(face_1, face_2): # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0")
def AttendenceCheck(url): # parser = argparse.ArgumentParser() # parser.add_argument('--path', help='Url of the image you want to dowload and test on.', default=0) # args = parser.parse_args() # Cai dat cac tham so can thiet MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.709 IMAGE_SIZE = 182 INPUT_IMAGE_SIZE = 160 CLASSIFIER_PATH = 'Models/facemodel.pkl' # IMAGE_URL = args.path if url != "None": IMAGE_URL = url image_path = dwl.download_image(IMAGE_URL) print("Download done") else: IMAGE_URL = "test_image/d1.jpg" image_path = IMAGE_URL FACENET_MODEL_PATH = 'Models/20180402-114759.pb' best_name = "" best_class_probabilities = 0 with open(CLASSIFIER_PATH, 'rb') as file: model, class_names = pickle.load(file) print("Custom Classifier, Successfully loaded") with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] pnet, rnet, onet = detect_face.create_mtcnn(sess, "src/align") people_detected = set() person_detected = collections.Counter() frame = cv2.imread(image_path) # frame = cv2.imread(IMAGE_URL) bounding_boxes, _ = detect_face.detect_face(frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[0] try: if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize(cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape(-1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False} emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] best_name = class_names[best_class_indices[0]] # print("Name: {}, Probability: {}".format(best_name, best_class_probabilities)) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 if best_class_probabilities > 0.8: name = class_names[best_class_indices[0]] else: name = "Unknown" cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) cv2.putText(frame, str(round(best_class_probabilities[0], 3)), (text_x, text_y + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) person_detected[best_name] += 1 except: pass print(best_name, str(best_class_probabilities)) return best_name, str(best_class_probabilities)
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory # facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) image_dir = os.path.join(args.input_dir, 'facescrub') dataset = face_image.get_dataset('facescrub', image_dir) print('dataset size', len(dataset)) bbox = {} for label_file in ['facescrub_actors.txt', 'facescrub_actresses.txt']: label_file = os.path.join(args.input_dir, label_file) pp = 0 for line in open(label_file, 'r'): pp += 1 if pp == 1: continue vec = line.split("\t") key = (vec[0], int(vec[2])) value = [int(x) for x in vec[4].split(',')] bbox[key] = value print('bbox size', len(bbox)) valid_key = {} json_data = open( os.path.join(args.input_dir, 'facescrub_uncropped_features_list.json')).read() json_data = json.loads(json_data)['path'] for _data in json_data: key = _data.split('/')[-1] pos = key.rfind('.') if pos < 0: print(_data) else: key = key[0:pos] keys = key.split('_') # print(key) if len(keys) != 2: print('err', key, _data) continue # assert len(keys)==2 key = (keys[0], int(keys[1])) valid_key[key] = 1 # print(key) print('valid keys', len(valid_key)) print('Creating networks and loading parameters') with tf.Graph().as_default(): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor image_size = [112, 96] image_size = [112, 112] src = np.array([[30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041]], dtype=np.float32) if image_size[1] == 112: src[:, 0] += 8.0 # Add a random key to the filename to allow alignment using multiple processes # random_key = np.random.randint(0, high=99999) # bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) # output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros((5, ), dtype=np.int32) for fimage in dataset: if nrof_images_total % 100 == 0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 # if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue # print(image_path) filename = os.path.splitext(os.path.split(image_path)[1])[0] _paths = fimage.image_path.split('/') print(fimage.image_path) a, b = _paths[-2], _paths[-1] pb = b.rfind('.') bname = b[0:pb] pb = bname.rfind('_') body = bname[(pb + 1):] img_id = int(body) key = (a, img_id) if not key in valid_key: continue # print(b, img_id) assert key in bbox fimage.bbox = bbox[key] try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) # text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] tb = bname.replace(' ', '_') + ".png" ta = a.replace(' ', '_') target_dir = os.path.join(args.output_dir, ta) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, tb) warped = None if fimage.landmark is not None: dst = fimage.landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src[0:3, :] * 1.5 + image_size[0] * 0.25) M = tform.params[0:2, :] warped0 = cv2.warpAffine( img, M, (image_size[1] * 2, image_size[0] * 2), borderValue=0.0) _minsize = image_size[0] bounding_boxes, points = detect_face.detect_face( warped0, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: bindex = 0 det = bounding_boxes[bindex, 0:4] # points need to be transpose, points = points.reshape( (5,2) ).transpose() dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(warped0, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[0] += 1 # assert fimage.bbox is not None if warped is None and fimage.bbox is not None: _minsize = img.shape[0] // 4 bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: det = bounding_boxes[:, 0:4] bindex = -1 index2 = [0.0, 0] for i in range(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou > index2[0]: index2[0] = iou index2[1] = i if index2[0] > 0.3: bindex = index2[1] if bindex >= 0: dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine( img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[1] += 1 # print('1',target_file,index2[0]) if warped is None and fimage.bbox is not None: bb = fimage.bbox # croped = img[bb[1]:bb[3],bb[0]:bb[2],:] bounding_boxes, points = detect_face.detect_face_force( img, bb, pnet, rnet, onet) assert bounding_boxes.shape[0] == 1 _box = bounding_boxes[0] if _box[4] >= 0.3: dst = points[:, 0].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[2] += 1 # print('2',target_file) if warped is None: roi = np.zeros((4, ), dtype=np.int32) roi[0] = int(img.shape[1] * 0.06) roi[1] = int(img.shape[0] * 0.06) roi[2] = img.shape[1] - roi[0] roi[3] = img.shape[0] - roi[1] if fimage.bbox is not None: bb = fimage.bbox h = bb[3] - bb[1] w = bb[2] - bb[0] x = bb[0] y = bb[1] # roi = np.copy(bb) _w = int((float(h) / image_size[0]) * image_size[1]) x += (w - _w) // 2 # x = min( max(0,x), img.shape[1] ) x = max(0, x) xw = x + _w xw = min(xw, img.shape[1]) roi = np.array((x, y, xw, y + h), dtype=np.int32) nrof[3] += 1 else: nrof[4] += 1 # print('3',bb,roi,img.shape) # print('3',target_file) warped = img[roi[1]:roi[3], roi[0]:roi[2], :] # print(warped.shape) warped = cv2.resize(warped, (image_size[1], image_size[0])) bgr = warped[..., ::-1] cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1, target_file, int( fimage.classname)) text_file.write(oline)