def handle_image_array_faces(image, base_image_name="", output_directory="manual_filter"): face_locations = face_recognition.face_locations( image, number_of_times_to_upsample=0, model="cnn") face_landmarks_list = face_recognition.face_landmarks(image) if len(face_locations) != len(face_landmarks_list): print("landmarks and face_locations do not match! Found faces: {}". format(len(face_locations))) save_faces(Image.fromarray(image), face_locations, output_path=output_directory) return aligner = FaceAligner(output_directory=output_directory) for i in range(len(face_landmarks_list)): face_location = face_locations[i] face_landmarks = face_landmarks_list[i] if base_image_name: aligner.save_rotated_face(face_location, face_landmarks, image, file_name="{}_{}.jpg".format( base_image_name, i)) else: aligner.save_rotated_face(face_location, face_landmarks, image, file_name="{}_{}.jpg".format( get_new_file_name(), i))
def __init__(self, cpu_lib="/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension_avx2.so", landmarks_xml="openvino_detectors/landmarks-regression/FP32/model.xml", features_xml="openvino_detectors/face-reidentification/FP32/model.xml"): # Plugin initialization for specified device and load extensions library if specified plugin = IEPlugin(device="CPU") plugin.add_cpu_extension(cpu_lib) # Read landmarks IR landmarks_bin = os.path.splitext(landmarks_xml)[0] + ".bin" log.info("Loading landmarks network files:\n\t{}\n\t{}".format(landmarks_xml, landmarks_bin)) landmarks_net = IENetwork.from_ir(model=landmarks_xml, weights=landmarks_bin) # Read features IR features_bin = os.path.splitext(features_xml)[0] + ".bin" log.info("Loading features network files:\n\t{}\n\t{}".format(features_xml, features_bin)) features_net = IENetwork.from_ir(model=features_xml, weights=features_bin) self.l_in = next(iter(landmarks_net.inputs)) self.l_out = next(iter(landmarks_net.outputs)) landmarks_net.batch_size = 1 self.f_in = next(iter(features_net.inputs)) self.f_out = next(iter(features_net.outputs)) features_net.batch_size = 1 cur = landmarks_net.inputs[self.l_in] self.l_n = cur.layout self.l_c, self.l_h, self.l_w = cur.shape[1:] # self.l_n = NCHW it is 1 self.l_images = np.ndarray(shape=(1, self.l_c, self.l_h, self.l_w)) cur = features_net.inputs[self.f_in] self.f_n = cur.layout self.f_c, self.f_h, self.f_w = cur.shape[1:] self.f_images = np.ndarray(shape=(1, self.f_c, self.f_h, self.f_w)) # Loading models to the plugin log.info("Loading models to the plugin") self.l_exec_net = plugin.load(network=landmarks_net) self.f_exec_net = plugin.load(network=features_net) self.face_aligner = FaceAligner(face_width=self.f_w, face_height=self.f_h) self.vectors = {}
def main(): args = parse_args() det_json = args.det_json save_dir = args.save_dir model_dir = args.mtcnn_model_dir gpu_id = args.gpu_id aligner = FaceAligner(model_dir, gpu_id=gpu_id) index = 0 with open(det_json, "r") as f: for line in f: index += 1 print("Processing img %d" % index) line = json.loads(line.strip()) # 一个url对应一个pts url = str(line["url"]) if not line['det']: continue pts = line['det'][0]['boundingBox']['pts'] # 图片以人名为前缀, 若无, 则为neg name = str(url.split('/')[-2]) img_name = url.split('/')[-1] sub_save_dir = os.path.join(save_dir, name) if not os.path.exists(sub_save_dir): os.makedirs(sub_save_dir) img = _pull_img(url) if img is None: continue # 只crop一张脸 face_chip = aligner.get_face_chips(img, [pts], output_square=default_square) save_name = os.path.join(sub_save_dir, img_name) cv2.imwrite(save_name, face_chip[0])
def main(img_list_file, root_dir, mtcnn_model_dir, save_dir=None): if not save_dir: save_dir = './aligned_images' if not osp.exists(save_dir): print('mkdir for aligned faces, aligned root dir: ', save_dir) os.makedirs(save_dir) aligned_save_dir = osp.join(save_dir, 'aligned_faces') if not osp.exists(aligned_save_dir): print('mkdir for aligned faces, aligned images dir: ', aligned_save_dir) os.makedirs(aligned_save_dir) #aligner = MtcnnAligner(mtcnn_model_dir, False) aligner = FaceAligner(mtcnn_model_dir) fp = open(img_list_file, 'r') fn_rlt = osp.join(save_dir, 'fd_rlt.json') fp_rlt = open(fn_rlt, 'w') fp_rlt.write('[\n') count = 0 for line in fp: print line line_split = line.split() img_fn = line_split[0] id_num = line_split[1] img_fn_split = img_fn.split('/') img_fn = osp.join(root_dir, img_fn) print 'process image: ', img_fn, " id_num: ", id_num #for root,dirs,files in path_walk: err_msg = '' if not count: fp_rlt.write(',\n') count = count + 1 print 'count: ', count overlap_thresh_0 = overlap_thresh save_subdir = osp.join(aligned_save_dir, img_fn_split[-2]) save_img_fn = osp.join(save_subdir, img_fn_split[-1]) if not osp.exists(save_subdir): os.makedirs(save_subdir) image = cv2.imread(img_fn) print image.shape boxes, points = aligner.align_face(image, [GT_RECT]) box = boxes[0] pts = points[0] facial5points = np.reshape(points, (2, -1)) #dst_img = warp_and_crop_face(image, facial5points, reference_5pts, output_size) dst_img = aligner.get_face_chips(image, [box], [pts])[0] cv2.imwrite(save_img_fn, dst_img) item = {} tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts, 'id': id_num} item['faces'] = tmp #item['id'] = data[u'url'].line_splitit('/')[-3] item['shape'] = image.shape json_str = json.dumps(item, indent=2) fp_rlt.write(json_str + '\n') fp_rlt.flush() fp_rlt.write(']\n') fp_rlt.close() fp.close()
# import the necessary packages from face_aligner import FaceAligner from helpers import rect_to_bb import argparse import glob import imutils import dlib import cv2 count = 0 uid = 69 # initialize dlib's face detector (HOG-based) and then create # the facial landmark predictor and the face aligner detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") fa = FaceAligner(predictor, desiredFaceWidth=256) ap = argparse.ArgumentParser() ap.add_argument("-c", "--class", type=str, default="all", help="test-images or base-image") args = vars(ap.parse_args()) if (args["class"] == "base"): images = glob.glob("test_images/*") else: images = glob.glob("input_dir/*") for img in images: print(img)
# Path Arguments parser.add_argument( '--predictor_path', type=str, required=True, help= 'location of the dlib facial landmark predictor where shape_predictor_68_face_landmarks.dat is located' ) parser.add_argument('--gallery_path', type=str, required=True, help='location of the gallery') parser.add_argument('--port', type=int, default=8000, help='which port to use') args = parser.parse_args() face_aligner = FaceAligner(args.predictor_path) face_recognizer = FaceRecognizer(args.gallery_path, OpenCVAlgorithm, face_aligner) register_handler = RegisterHandler(args.gallery_path, face_aligner) recognize_handler = RecognizeHandler(args.gallery_path, face_aligner, face_recognizer) class S(BaseHTTPRequestHandler): def _set_response(self, message=None): self.send_response(200) if message is not None: #self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'application/json') self.end_headers()
def main(args): print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Create an object of face aligner module affine = FaceAligner(desiredLeftEye=(0.39, 0.39), desiredFaceWidth=256, desiredFaceHeight=256) print("[INFO] camera sensor warming up...") vs = cv2.VideoCapture(0) vs.set(3, 1280) vs.set(4, 720) time.sleep(2.0) while True: ret, img = vs.read() # we get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) #print("here they are \n") #print(points) # See if face is detected if bb.shape[0] > 0: # Draw rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])), (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(img, (int(points[i][col]), int(points[i + 5][col])), 1, (255, 0, 0), -1) # ALIGNMENT - use the bounding boxes and facial landmarks to align images aligned_image = affine.align(img, points) # Show the image only if alignment is there cv2.imshow("Alignment", aligned_image) cv2.imshow("Output", img) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break
def create_net(configs): use_gpu = False roi_scale = 1.0 CTX.logger.info("===> Input app configs: %s\n", str(configs)) if not configs: configs = {} CTX.logger.info( "===> Try to load default app configs from: %s and use them to update configs\n", DEFAULT_APP_CONFIG_FNAME) try: fp = open(DEFAULT_APP_CONFIG_FNAME, 'r') _configs = json.load(fp) fp.close() CTX.logger.info("===> Loaded default app configs: %s\n", str(_configs)) _configs.update(configs) configs = _configs CTX.logger.info("===> Updated app configs: %s\n", str(configs)) mtcnn_model_path = '' feature_model_path = '' if ("model_files" in configs): # print 'configs["model_files"]: ', configs["model_files"] for k, v in configs["model_files"].iteritems(): if not mtcnn_model_path and k.startswith("mtcnn"): if osp.isfile(v): mtcnn_model_path = osp.dirname(v) elif osp.isdir(v): mtcnn_model_path = v if not feature_model_path and k.startswith("feature"): if osp.isfile(v): feature_model_path = osp.dirname(v) elif osp.isdir(v): feature_model_path = v if not mtcnn_model_path: raise Exception("Error: empty mtcnn_model_path\n") if not feature_model_path: raise Exception("Error: empty feature_model_path\n") configs["model_params"]["mtcnn_model_path"] = mtcnn_model_path configs["model_params"]["feature_model_path"] = feature_model_path configs["model_params"]["network_model"] = osp.join( feature_model_path, 'model,0') use_gpu = configs["use_device"].upper() == 'GPU' CTX.logger.info("===> use_gpu: %s", str(use_gpu)) if 'gpu_id' not in configs["model_params"]: configs["model_params"]["gpu_id"] = 0 if use_gpu: CTX.logger.info("===> gpu_id: %s", str(configs["model_params"]["gpu_id"])) if 'roi_scale' in configs["model_params"]: roi_scale = configs["model_params"]['roi_scale'] except Exception as e: CTX.logger.error("Error when load and update app configs: %s\n", traceback.format_exc()) return {}, 521, str(e) CTX.logger.info("===> Updated app configs: %s\n", str(configs)) CTX.logger.info( "===> Try to load default extractor_config from: %s and update it by configs['model_params']\n", DEFAULT_EXTRACTOR_CONFIG_FNAME) try: fp = open(DEFAULT_EXTRACTOR_CONFIG_FNAME, 'r') extractor_config = json.load(fp) fp.close() CTX.logger.info("===> Loaded feature extractor configs: %s\n", str(extractor_config)) if 'model_params' in configs: extractor_config.update(configs["model_params"]) # if 'feature_model' in configs["model_params"]: # extractor_config["network_model"] = configs["model_params"]["feature_model"] if 'batch_size' in configs: extractor_config["batch_size"] = configs["batch_size"] if use_gpu: extractor_config["cpu_only"] = False else: extractor_config["cpu_only"] = True except Exception as e: CTX.logger.error("Error when load and update extractor configs: %s\n", traceback.format_exc()) return {}, 522, str(e) CTX.logger.info("===> Updated feature extractor configs: %s", str(extractor_config)) try: feature_extractor = MxnetFeatureExtractor(extractor_config) except Exception as e: CTX.logger.error("Error when init face feature extractor: %s\n", traceback.format_exc()) return {}, 523, str(e) try: face_aligner = FaceAligner( str(configs["model_params"]["mtcnn_model_path"]), configs["model_params"]["gpu_id"] if use_gpu else -1) except Exception as e: CTX.logger.error("Error when init face feature extractor: %s\n", traceback.format_exc()) return {}, 524, str(e) model = { "feature_extractor": feature_extractor, "face_aligner": face_aligner, "batch_size": configs["batch_size"], "input_height": extractor_config["input_height"], "input_width": extractor_config["input_width"], "workspace": configs["workspace"], "roi_scale": roi_scale } return model, 0, 'Success'
def dataset_creation(): path = input("\nEnter the output folder location or simply press ENTER create a dataset folder in this directory only: ").rstrip() if os.path.isdir(path): # User given path is present. path += '/output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if not os.makedirs(path): print("Directory successfully made in: " + path + "\n") # either user pressed ENTER or gave wrong location. else: if path == "": print("Making an output folder in this directory only. \n") else: print("No such directory exists. Making an output folder in this current code directory only. \n") path = 'output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if os.makedirs(path): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + path + "\n") # Ask for webcam resolution res = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower() if res == "": res = (640, 480) else: res = tuple(map(int, res.split('x'))) # Start MTCNN face detection and pose estimation module. # Take gpu fraction values gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip() if gpu_fraction == "": gpu_fraction = 0.8 else: gpu_fraction = round(float(gpu_fraction), 1) # Some more MTCNN parameter minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # Three steps's threshold factor = 0.709 # scale factor with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) # Create an object of face aligner module face_size = input("\nEnter desired face width and height in WidthxHeight format OR press ENTER for default 160x160 pixel: ").rstrip().lower() if face_size == "": face_size = (160, 160) else: face_size = tuple(map(int, face_size.split('x'))) affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1]) # Create dataset was choosen before and so working with taking dataset. while True: ask = input("\nEnter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ").rstrip() # removing all spaces with underscore ask = ask.replace(" ", "_") if ask=="": folder_name = 'person' + str(personNo) else: folder_name = ask # Creating new user specific variables personNo += 1 users_folder = path + "/" + folder_name image_no = 1 # Create folder with the given location and the given username. if os.path.isdir(users_folder): print("Directory already exists. Using it \n") else: if os.makedirs(users_folder): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + users_folder + "\n") # Start webcam or videofile according to user. data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ").rstrip() # default webcam which uses infinite loop and video variable to find total frames loop_type = False total_frames = 0 if data_type == "": data_type = 0 loop_type = True # Initialize webcam or video device = cv2.VideoCapture(data_type) # If webcam set resolution if data_type == 0: device.set(3, res[0]) device.set(4, res[1]) else: # Finding total number of frames of video. total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT)) # Shutting down webcam variable loop_type = False # Start web cam or start video and start creating dataset by user. while loop_type or (total_frames > 0): # If video selected dec counter if loop_type == False: total_frames -= 1 ret, image = device.read() # Run MTCNN and do face detection until 's' keyword is pressed if (cv2.waitKey(1) & 0xFF) == ord("s"): # DETECT FACES. We get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # align the detected faces for col in range(points.shape[1]): aligned_image = affine.align(image, points[:,col]) # Save the image image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(4) + ".png" cv2.imwrite(image_name, aligned_image) image_no += 1 # Draw the bounding boxes and pose landmarks on the image # Draw functions to show rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1) # Show the output video to user cv2.imshow("Output", image) # Break this loop if 'q' keyword pressed to go to next user. if (cv2.waitKey(20) & 0xFF) == ord("q"): device.release() cv2.destroyAllWindows() break # Ask for more user using webcam or video else exit. ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ") ask = ask.rstrip().lstrip().lower() if ask != "": if ask[0] == 'q': break # This means dataset creating is complete. ASK the user for train now or exit. ask = input("Press ENTER to exit or \nPress T keyword to TRAIN and 'maybe' TEST later by creating a classifier on the facenet model OR \nPress W to test the dataset folder on a classifier model: ").rstrip().lstrip().lower() if ask == 't': train() elif ask == 'w': test() else: if ask == "": print("Cleaning and exiting. Thank You \n") else: print("\n wrong keyword pressed. Cleaning and exiting. \n Thank You \n")
def main(json_file, save_dir=None, save_img=True, show_img=True): if not osp.exists(json_file): print 'Cannot find json file: ' + json_file pass if save_dir is None: save_dir = './fa_facex_rlt' save_json = 'mtcnn_align_rlt.json' model_path = "../../model" fp_json = open(json_file, 'r') facex_response = json.load(fp_json) fp_json.close() if (not facex_response or not isinstance(facex_response, dict) or 'facex_det' not in facex_response): print 'Invalid json file: ' + json_file pass facex_det_response = facex_response['facex_det'] if not osp.exists(save_dir): os.makedirs(save_dir) fp_rlt = open(osp.join(save_dir, save_json), 'w') results = [] for item in facex_det_response: img_path = item['name'] print '===> Processing image: ' + img_path if 'detections' not in item: continue face_rects = [] for face in item['detections']: face_rects.append(face['pts']) img = cv2.imread(img_path) aligner = FaceAligner(model_path, False) rlt = {} rlt["filename"] = img_path rlt["faces"] = [] rlt['face_count'] = 0 t1 = time.clock() bboxes, points = aligner.align_face(img, face_rects) t2 = time.clock() n_boxes = len(face_rects) print( "-->Alignment cost %f seconds, processed %d face rects, avg time: %f seconds" % ((t2 - t1), n_boxes, (t2 - t1) / n_boxes)) if bboxes is not None and len(bboxes) > 0: for (box, pts) in zip(bboxes, points): # box = box.tolist() # pts = pts.tolist() tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts} rlt['faces'].append(tmp) rlt['face_count'] = len(bboxes) rlt['message'] = 'success' results.append(rlt) spl = osp.split(img_path) sub_dir = osp.split(spl[0])[1] base_name = spl[1] save_img_subdir = osp.join(save_dir, sub_dir) if not osp.exists(save_img_subdir): os.mkdir(save_img_subdir) # save_rect_subdir = osp.join(save_dir, sub_dir) # if not osp.exists(save_rect_subdir): # os.mkdir(save_rect_subdir) # print pts save_img_fn = osp.join(save_img_subdir, base_name) print 'save face chip into ', save_img_fn # facial5points = np.reshape(pts, (2, -1)) # dst_img = warp_and_crop_face( # img, facial5points, reference_5pts, output_size) dst_img = aligner.get_face_chips(img, [box], [pts], True)[0] cv2.imwrite(save_img_fn, dst_img) json.dump(results, fp_rlt, indent=2) fp_rlt.close()
def main(args): print('Creating networks and loading parameters') # Building seperate graphs for both the networks g1 = tf.Graph() g2 = tf.Graph() #images_placeholder = tf.placeholder(tf.int32) #embeddings = tf.Variable() #phase_train_placeholder = tf.placeholder(tf.bool) with g1.as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with tf.Session() as sess: facenet.load_model(args.model) #with tf.Graph().as_default(): #with tf.Session() as sess: with g2.as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Create an object of face aligner module affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=160, desiredFaceHeight=160) # Load the model for FaceNet image recognition and get the tensors print("[INFO] camera sensor warming up...") vs = cv2.VideoCapture(0) vs.set(3, 640) vs.set(4, 480) time.sleep(2.0) while True: ret, img = vs.read() # we get the bounding boxes as well as the points for the face g2.as_default() with tf.Session(graph=g2) as sess: bb, points = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) #print("here they are \n") #print(points) # See if face is detected if bb.shape[0] > 0: # Draw rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])), (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(img, (int(points[i][col]), int(points[i + 5][col])), 1, (255, 0, 0), -1) # ALIGNMENT - use the bounding boxes and facial landmarks to align images aligned_image = affine.align(img, points) # Show the image only if alignment is there cv2.imshow("Alignment", aligned_image) # Prewhiten the image for facenet architecture to give better results mean = np.mean(aligned_image) std = np.std(aligned_image) std_adj = np.maximum(std, 1.0 / np.sqrt(aligned_image.size)) facenet_image = np.multiply(np.subtract(aligned_image, mean), 1 / std_adj) img_list = [] img_list.append(facenet_image) img_list.append(facenet_image) images = np.stack(img_list) g1.as_default() with tf.Session(graph=g1) as sess: # Run forward pass on FaceNet to get the embeddings images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") feed_dict = { images_placeholder: images, phase_train_placeholder: False } embedding = sess.run(embeddings, feed_dict=feed_dict) print("Here is the embedding \n") print(embedding) print("\n") cv2.imshow("Output", img) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break
def main(): print("\n*********************************************************************************************** \n") print(" Welcome to the Face detection and recognition program. \n") print("\n*********************************************************************************************** \n") print("GUIDELINES TO USE THIS SOFTWARE: \n\nThis code gives the user to:\n\n1) CREATE DATASET using MTCNN face detection and alignment. or\n2) TRAIN FaceNet for face recognition. or \n3) Do both.\n\n The user will multiple times get option to choose webcam (default option) or video file to do face detection and will be asked for output folder, username on folder and image files etc also (default options exists for that too)\n\n ************** IMPORTANT *************\n1) Whenever webcam or video starts press 's' keyword to start face detection in video or webcam frames and save the faces in the folder for a single user. This dataset creation will stop the moment you release the 's' key. This can be done multiple times.\n\n2) Press 'q' to close it when you are done with one person, and want to detect face for another person. \n\n3) Make sure you press the keywords on the image window and not the terminal window.\n") mode = input("Press T to train the facenet for recognition OR \nPress D to first create dataset and then 'maybe' train later: ") # Some variables that will be used through out the code path = "" res = () personNo = 1 folder_name = "" # This means user went for Creating of dataset if mode == 'D': path = input("Enter the output folder location or simply press ENTER create a dataset folder in this directory only: ") if os.path.isdir(path): # User given path is present. path += '/output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if not os.makedirs(path): print("Directory successfully made in: " + path + "\n") # either user pressed ENTER or gave wrong location. else: if path == "": print("Making an output folder in this directory only. \n") else: print("No such directory exists. Making an output folder in this current code directory only. \n") path = 'output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if os.makedirs(path): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + path + "\n") # Ask for webcam resolution res = tuple(map(int, input("Enter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").split("x"))) if res == "": res = (640, 480) # Start MTCNN face detection and pose estimation module. # Take gpu fraction values gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ") if gpu_fraction == "": gpu_fraction = 0.8 else: gpu_fraction = round(float(gpu_fraction), 1) # Some more MTCNN parameter minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # Three steps's threshold factor = 0.709 # scale factor with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) # Create an object of face aligner module face_size = tuple(map(int, input("Enter desired face width and height in widthxheight format OR press ENTER for default 160x160 pixel: ").split("x"))) if face_size == "": face_size = (160, 160) affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1]) # This means user went for the train part elif mode == 'T': train() else: print("No correct keyword entered. Exiting") sys.exit() # Create dataset was choosen before and so working with taking dataset. while True: ask = input("\n Enter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ") # removing all spaces with underscore ask = ask.replace(" ", "_") if ask=="": folder_name = 'person_' + str(personNo) else: folder_name = ask # Creating new user specific variables personNo += 1 users_folder = path + "/" + folder_name image_no = 0 # Create folder with the given location and the given username. if os.path.isdir(users_folder): print("Directory already exists. Using it \n") else: if os.makedirs(path): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + users_folder + "\n") # Start webcam or videofile according to user. data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ") # default webcam which uses infinite loop and video variable to find total frames loop_type = False total_frames = 0 if data_type == "": data_type = 0 loop_type = True # Initialize webcam or video device = cv2.VideoCapture(data_type) # If webcam set resolution if data_type == 0: device.set(3, res[0]) device.set(4, res[1]) else: # Finding total number of frames of video. total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT)) # Start web cam and creating dataset by user. while loop_type or (total_frames > 0): total_frames -= 1 ret, image = device.read() # Run MTCNN and do face detection until 's' keyword is pressed if (cv2.waitKey(1) && 0xFF) == ord("s"): # DETECT FACES. We get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # align the detected faces for col in range(points.shape[1]): aligned_image = affine.align(image, points[:,col]) # Save the image image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(3) + ".png" cv2.imwrite(image_name, aligned_image) image_no += 1 # Draw the bounding boxes and pose landmarks on the image # Draw functions to show rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1) # Show the output video to user cv2.imshow("Output", image) # Break this loop if 'q' keyword pressed to go to next user. if (cv2.waitKey(1) && 0xFF) == ord("q"): device.release() cv2.destroyAllWindows() break # Ask for more user using webcam or video else exit. ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ") if ask == 'q': break # This means dataset creating is complete. ASK the user for train now or exit. ask = input("Press ENTER to exit or press T keyword to train the data by Facenet model on dataset: ") if ask = "T": train()
img_path = r'C:\zyf\00_Ataraxia\facex\facex_cluster_test_imgs-wlc\3\3.jpg' face_rect1 = [[490, 353], [767, 353], [767, 757], [490, 757]] face_rects = [face_rect1] base_name = osp.basename(img_path) name, ext = osp.splitext(base_name) ext = '.png' # fp_rlt = open(osp.join(save_dir, save_json), 'w') # results = [] img = cv2.imread(img_path) aligner = FaceAligner(caffe_model_path) t1 = time.clock() # bboxes, points = aligner.align_face(img, face_rects) # face_chips = aligner.get_face_chips(img, bboxes, points) face_chips = aligner.get_face_chips(img, face_rects) t2 = time.clock() for i, chip in enumerate(face_chips): save_name = osp.join(save_dir, 'face_chip_%s_%d' % (name, i) + ext) cv2.imwrite(save_name, chip) if show_img: cv2.imshow('face_chip', chip)
def main(argv): args = parse_arguments(argv) print '===> args:\n', args config = load_config(args.config) print '===> config:\n', config max_faces = config['max_faces'] extractor_config = config['face_feature'] mtcnn_model_path = str(config['mtcnn_model_dir']) do_detect = not args.no_detect do_align = not args.no_align save_dir = args.save_dir if not osp.exists(save_dir): os.makedirs(save_dir) pair_save_dir = osp.join(save_dir, 'img_pairs') if not osp.exists(pair_save_dir): os.mkdir(pair_save_dir) save_img = args.save_image show_img = args.show_image detector = None aligner = None if do_detect: detector = MtcnnDetector(mtcnn_model_path) if do_align: if not do_detect: aligner = FaceAligner(mtcnn_model_path) else: aligner = FaceAligner(None) else: aligner = None feature_extractor = CaffeFeatureExtractor(extractor_config) ctx_static = {} #ctx_static['args'] = args ctx_static['detector'] = detector ctx_static['aligner'] = aligner ctx_static['feature_extractor'] = feature_extractor ctx_static['do_detect'] = do_detect ctx_static['do_align'] = do_align ctx_static['save_img'] = save_img ctx_static['show_img'] = show_img ctx_static['save_dir'] = save_dir ctx_static['max_faces'] = max_faces # result_list = [] img_cnt = 0 faces_cnt = 0 ttl_det_time = 0.0 ttl_feat_time = 0.0 ctx_active = {} #ctx_active['result_list'] = result_list ctx_active['img_cnt'] = img_cnt ctx_active['faces_cnt'] = faces_cnt ctx_active['ttl_det_time'] = ttl_det_time ctx_active['ttl_feat_time'] = ttl_feat_time fp = open(args.img_list_file, 'r') fp_rlt = open(osp.join(save_dir, 'face_feature.json'), 'w') fp_rlt.write('[\n') write_comma_flag = False while True: line = fp.readline().strip() print '---> line: ', line if not line: break img_path = get_image_path(line, args.image_root_dir) print '---> img_path: ', img_path (rlt, features, face_chips) = detect_faces_and_extract_features( img_path, ctx_static, ctx_active) # print 'features: ', features # print 'id(features): ', id(features) # result_list.append(rlt) if write_comma_flag: fp_rlt.write(',\n') else: write_comma_flag = True json_str = json.dumps(rlt, indent=2) fp_rlt.write(json_str) fp_rlt.flush() line = fp.readline().strip() print '---> line: ', line if not line: break img_path2 = get_image_path(line, args.image_root_dir) print '---> img_path2: ', img_path2 (rlt2, features2, face_chips2) = detect_faces_and_extract_features( img_path2, ctx_static, ctx_active) # print 'features2: ', features2 # print 'features: ', features # # print 'id(features): ', id(features) # print 'id(features2): ', id(features2) # # print 'features.data: ', id(features.data) # print 'features2.data: ', id(features2.data) # result_list.append(rlt2) json_str = json.dumps(rlt2, indent=2) fp_rlt.write(',\n' + json_str) fp_rlt.flush() if rlt['face_count'] and rlt2['face_count']: # sim = calc_similarity(features[0], features2[0]) # img_pair = np.hstack((face_chips[0], face_chips2[0])) # img_pair_fn = '%s_%d_vs_%s_%d_%5.4f.jpg' % (osp.basename(img_path), 0, osp.basename(img_path2), 0, sim) # img_pair_fn = osp.join(pair_save_dir, img_pair_fn) # cv2.imwrite(img_pair_fn, img_pair) # # print '---> similarity: ', sim for j in range(rlt['face_count']): for i in range(rlt2['face_count']): sim = calc_similarity(features[j], features2[i]) print 'features[%d]: ' % j, features[j] print 'features2[%d]: ' % i, features2[i] img_pair = np.hstack((face_chips[j], face_chips2[i])) img_pair_fn = '%s_%d_vs_%s_%d_%5.4f.jpg' % (osp.basename( img_path), j, osp.basename(img_path2), i, sim) img_pair_fn = osp.join(pair_save_dir, img_pair_fn) sim_txt = '%5.4f' % sim cv2_put_text_to_image(img_pair, sim_txt, 40, 5, 30, (0, 0, 255)) cv2.imwrite(img_pair_fn, img_pair) print '---> similarity: ', sim # json.dump(result_list, fp_rlt, indent=2) fp_rlt.write('\n]\n') fp_rlt.close() fp.close() if show_img: cv2.destroyAllWindows()
def main(args): print('Creating networks and loading parameters') # Building seperate graphs for both the tf architectures #g1 = tf.Graph() g2 = tf.Graph() ''' with g1.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with tf.Session() as sess: # Load the model for FaceNet image recognition facenet.load_model(args.model) ''' with g2.as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Create an object of face aligner module affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=160, desiredFaceHeight=160) # Taking the video and creating an object of it. print("[INFO] Taking the video input.") vs = cv2.VideoCapture(os.path.expanduser(args.video)) # Finding the file format, size and the fps rate fps = vs.get(cv2.CAP_PROP_FPS) video_format = int(vs.get(cv2.CAP_PROP_FOURCC)) frame_size = (int(vs.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))) total_frames = int(vs.get(cv2.CAP_PROP_FRAME_COUNT)) output_video = cv2.VideoWriter("Output_" + args.video, video_format, fps, frame_size) # Create the output_faces directory by user or default arguments path = os.path.expanduser(args.output) path = path + "/output_faces" if not os.path.isdir(path): os.makedirs(path) image_numbers = 0 print("Total number of frames \n" + str(total_frames) + "\n") #for i in range(total_frames): for i in range(total_frames): # Print the present frame / total frames to know how much we have completed print("\n" + str(i) + " / " + str(total_frames) + "\n") ret, image = vs.read() # Run MTCNN model to detect faces g2.as_default() with tf.Session(graph=g2) as sess: # we get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # ALIGNMENT - use the bounding boxes and facial landmarks to align images # create a numpy array to feed the network img_list = [] images = np.empty([bb.shape[0], image.shape[0], image.shape[1]]) for col in range(points.shape[1]): aligned_image = affine.align(image, points[:, col]) if args.show_video == True: cv2.imshow("aligned", aligned_image) # Prewhiten the image for facenet architecture to give better results #mean = np.mean(aligned_image) #std = np.std(aligned_image) #std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size)) #ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj) # Save the found out images place = path + "/" + "output_faces_" + str( image_numbers) + ".png" print("saved to: " + place + "\n") cv2.imwrite(place, aligned_image) image_numbers += 1 # if we want to show or save the video then draw the box and the points on the image if args.show_video == True or args.save_video == True: for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]), int(bb[i][1])), (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle( image, (int(points[i][col]), int(points[i + 5][col])), 1, (255, 0, 0), -1) if args.save_video == True: output_video.write(image) if args.show_video == True: cv2.imshow("Output", image) # Save the final aligned face image in given format """ # Show the image #cv2.imshow(str(col), aligned_image) img_list.append(ready_image) images = np.stack(img_list) g1.as_default() with tf.Session(graph=g1) as sess: # Run forward pass on FaceNet to get the embeddings images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") feed_dict = { images_placeholder: images, phase_train_placeholder:False } embedding = sess.run(embeddings, feed_dict=feed_dict) print("Here is the embedding \n") print(embedding.shape) print("\n") """ key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): #if keyboard.is_pressed('q'): # do a bit of cleanup vs.release() output_video.release() cv2.destroyAllWindows() break
img_path = '../../test_imgs/Marilyn_Monroe_0002.jpg' face_rect1 = [[91, 57], [173, 57], [173, 180], [91, 180]] face_rects = [face_rect1] base_name = osp.basename(img_path) name, ext = osp.splitext(base_name) ext = '.png' # fp_rlt = open(osp.join(save_dir, save_json), 'w') # results = [] img = cv2.imread(img_path) aligner = FaceAligner(model_path) t1 = time.clock() # You can align the faces in two steps like this: # bboxes, points = aligner.align_face(img, face_rects) # face_chips = aligner.get_face_chips(img, bboxes, points) # OR just align them in one step by calling the following function, # which combine last two functions face_chips = aligner.get_face_chips(img, face_rects) t2 = time.clock() for i, chip in enumerate(face_chips): print('---> chip.shape: ', chip.shape) save_name = osp.join(save_dir, 'face_chip_%s_%d' % (name, i) + ext) cv2.imwrite(save_name, chip)
def main(argv): args = parse_arguments(argv) print '===> args:\n', args config = load_config(args.config) print '===> config:\n', config extractor_config = config['face_feature'] mtcnn_model_path = str(config['mtcnn_model_dir']) do_detect = not args.no_detect do_align = not args.no_align save_dir = args.save_dir if not osp.exists(save_dir): os.makedirs(save_dir) save_img = args.save_image show_img = args.show_image detector = None aligner = None if do_detect: detector = MtcnnDetector(mtcnn_model_path) if do_align: if not do_detect: aligner = FaceAligner(mtcnn_model_path) else: aligner = FaceAligner(None) else: aligner = None feature_extractor = CaffeFeatureExtractor(extractor_config) feat_layer = feature_extractor.get_feature_layers()[0] fp = open(args.img_list_file, 'r') fp_rlt = open(osp.join(save_dir, 'face_feature.json'), 'w') fp_rlt.write('[\n') write_comma_flag = False # result_list = [] img_cnt = 0 faces_cnt = 0 ttl_det_time = 0.0 ttl_feat_time = 0.0 for line in fp: img_path = line.strip() print("\n===>" + img_path) if img_path == '': print 'empty line, not a file name, skip to next' continue if img_path[0] == '#': print 'skip line starts with #, skip to next' continue # result_list.append(rlt) if write_comma_flag: fp_rlt.write(',\n') else: write_comma_flag = True rlt = {} rlt["filename"] = img_path rlt["faces"] = [] rlt['face_count'] = 0 try: if args.image_root_dir: img = cv2.imread(osp.join(args.image_root_dir, img_path)) else: img = cv2.imread(img_path) print '\n---> img.shape: ', img.shape except: print('failed to load image: ' + img_path) #rlt["message"] = "failed to load" json_str = json.dumps(rlt, indent=2) fp_rlt.write(json_str) fp_rlt.flush() continue if img is None: print('failed to load image: ' + img_path) rlt["message"] = "failed to load" # result_list.append(rlt) json_str = json.dumps(rlt, indent=2) fp_rlt.write(json_str) fp_rlt.flush() continue img_cnt += 1 if do_detect: t1 = time.clock() bboxes, points = detector.detect_face(img) t2 = time.clock() ttl_det_time += t2 - t1 print("detect_face() costs %f seconds" % (t2 - t1)) else: print '---> Will not do detection because of option "--no_detect"' shp = img.shape rect = [0, 0, shp[1] - 1, shp[0] - 1, 1.0] bboxes = [rect] points = [None] n_faces = 0 if bboxes is not None: n_faces = len(bboxes) if n_faces > 0: for (box, pts) in zip(bboxes, points): # box = box.tolist() # pts = pts.tolist() tmp = {'rect': box[0:4], 'score': box[4], 'pts': pts} rlt['faces'].append(tmp) rlt['face_count'] = n_faces # print('output bboxes: ' + str(bboxes)) # print('output points: ' + str(points)) # toc() if do_detect: print( "\n===> Detect %d images, costs %f seconds, avg time: %f seconds" % (img_cnt, ttl_det_time, ttl_det_time / img_cnt)) print "---> %d faces detected" % n_faces if not n_faces: continue t1 = time.clock() if do_align: if points is None or points[0] is None: face_chips = aligner.get_face_chips(img, bboxes, None) else: face_chips = aligner.get_face_chips(img, bboxes, points) # face_chips = aligner.get_face_chips(img, bboxes, None) # face_chips = [im.astype(np.float) for im in face_chips_ubyte] else: print '---> Will not do alignment because of option "--no_align"' face_chips = [img.astype(np.float)] features = feature_extractor.extract_features_batch( face_chips)[feat_layer] t2 = time.clock() ttl_feat_time += t2 - t1 print("Cropping and extracting features for %d faces cost %f seconds" % (n_faces, t2 - t1)) faces_cnt += n_faces print( "\n===> Extracting features for %d faces, costs %f seconds, avg time: %f seconds" % (faces_cnt, ttl_feat_time, ttl_feat_time / faces_cnt)) for i, box in enumerate(bboxes): # feat_file = '%s_%d_rect[%d_%d_%d_%d].npy' % ( # osp.basename(img_path), i, box[0], box[1], box[2], box[3]) # feat_file = osp.join(save_dir, feat_file) # np.save(feat_file, features[i]) base_name = osp.basename(img_path) face_fn_prefix = '%s_face_%d' % (osp.splitext(base_name)[0], i) feat_file = face_fn_prefix + '.npy' np.save(osp.join(save_dir, feat_file), features[i]) face_chip_fn = face_fn_prefix + '.jpg' cv2.imwrite(osp.join(save_dir, face_chip_fn), face_chips[i]) rlt['faces'][i]['feat'] = feat_file rlt['faces'][i]['face_chip'] = face_chip_fn rlt['message'] = 'success' # result_list.append(rlt) json_str = json.dumps(rlt, indent=2) fp_rlt.write(json_str) fp_rlt.flush() if save_img or show_img: draw_faces(img, bboxes, points) if save_img: save_name = osp.join(save_dir, osp.basename(img_path)) cv2.imwrite(save_name, img) if show_img: cv2.imshow('img', img) ch = cv2.waitKey(0) & 0xFF if ch == 27: break #json.dump(result_list, fp_rlt, indent=4) fp_rlt.write('\n]\n') fp_rlt.close() fp.close() if show_img: cv2.destroyAllWindows()
def recognize(): # Taking the parameters for recogniton by the user classifier_filename = input("\nEnter the path of the classifier .pkl file or press ENTER if a filename 'classifier.pkl' is present in this code directory itself: ") if classifier_filename == "": classifier_filename = 'classifier.pkl' classifier_filename = os.path.expanduser(classifier_filename) model = input("\nEnter the FOLDER PATH inside which 20180402-114759 FOLDER is present. Press ENTER stating that the FOLDER 20180402-114759 is present in this code directory itself: ").rstrip() if model == "": model = "20180402-114759/20180402-114759.pb" # Create an object of face aligner module image_size = (160, 160) ask = input("\nEnter desired face width and height in WidthxHeight format for face aligner to take OR press ENTER for default 160x160 pixel: ").rstrip().lower() if ask != "": image_size = tuple(map(int, ask.split('x'))) # Take gpu fraction values gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip() if gpu_fraction == "": gpu_fraction = 0.8 else: gpu_fraction = round(float(gpu_fraction), 1) input_type = input("\nPress I for image input OR\nPress V for video input OR\nPress W for webcam input OR\nPress ENTER for default webcam: ").lstrip().rstrip().lower() if input_type == "": input_type = 'w' # Load the face aligner model affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=image_size[0], desiredFaceHeight=image_size[1]) # Building seperate graphs for both the tf architectures g1 = tf.Graph() g2 = tf.Graph() # Load the model for FaceNet image recognition with g1.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with tf.Session() as sess: facenet.load_model(model) # Load the model of MTCNN face detection. with g2.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) # Some MTCNN network parameters minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.8] # Three steps's threshold factor = 0.709 # scale factor ask = input("\nEnter the threshold FACE DETECTION CONFIDENCE SCORE to consider detection by MTCNN OR press ENTER for default 0.80: ") if ask != "" and float(ask) < 1: threshold[2] = round(float(ask),2) classifier_threshold = 0.50 ask = input("\nEnter the threshold FACE RECOGNITION CONFIDENCE SCORE to consider face is recognised OR press ENTER for default 0.50: ") if ask != "": classifier_threshold = float(ask) # Loading the classifier model with open(classifier_filename, 'rb') as infile: (modelSVM, class_names) = pickle.load(infile) print('\nLoaded classifier model from file "%s"' % classifier_filename) # default webcam which uses infinite loop or set video or image setting loop_type = False image_input = 0 total_frames = 0 save_video = False frame_no = 1 output_video = [] image = [] display_output = True res = (640, 480) # If web cam is selected if input_type == "w": data_type = 0 loop_type = True # Ask for webcam resolution ask = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower() if ask != "": res = tuple(map(int, ask.split('x'))) # If image selected, trying to represent it as video with 1 frame elif input_type == "i": loop_type = False total_frames = 0 data_type = input("\nWrite the image path file to open: ").rstrip().lstrip() image = cv2.imread(data_type) # Jump directly intocode to go through a single pass goto(581) # Video is selected else: loop_type = False data_type = input("\nWrite the video path file to open: ").rstrip().lstrip() ask = input("\nPress y to save the output video OR simply press ENTER to ignore it: ").lstrip().rstrip().lower() if ask == "y": save_video = True ask = input("\nSimply press ENTER to see the output video frames OR press N to switch off the output display: ").lstrip().rstrip().lower() if ask == "n": display_output = False # Initialize webcam or video device = cv2.VideoCapture(data_type) # If webcam set resolution if input_type == "w": device.set(3, res[0]) device.set(4, res[1]) elif input_type == "v": # Finding total number of frames of video. total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT)) # Shutting down webcam variable loop_type = False # save video feature. if save_video: # Finding the file format, size and the fps rate fps = device.get(cv2.CAP_PROP_FPS) video_format = int(device.get(cv2.CAP_PROP_FOURCC)) frame_size = (int(device.get(cv2.CAP_PROP_FRAME_WIDTH)), int(device.get(cv2.CAP_PROP_FRAME_HEIGHT))) # Creating video writer to save the video after process if needed output_video = cv2.VideoWriter("Output_" + data_type, video_format, fps, frame_size) # Start web cam or start video and start creating dataset by user. while loop_type or (frame_no <= total_frames): # If video selected dec counter if loop_type == False: frame_no += 1 # Display the progress print("\nProgress: %.2f" %(100*frame_no/total_frames) + "%") ret, image = device.read() # Run MTCNN model to detect faces g2.as_default() with tf.Session(graph=g2) as sess: # we get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # ALIGNMENT - use the bounding boxes and facial landmarks points to align images # create a numpy array to feed the network img_list = [] images = np.empty([bb.shape[0], image.shape[0], image.shape[1]]) for col in range(points.shape[1]): aligned_image = affine.align(image, points[:,col]) # Prewhiten the image for facenet architecture to give better results mean = np.mean(aligned_image) std = np.std(aligned_image) std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size)) ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj) img_list.append(ready_image) images = np.stack(img_list) # EMBEDDINGS: Use the processed aligned images for Facenet embeddings g1.as_default() with tf.Session(graph=g1) as sess: # Run forward pass on FaceNet to get the embeddings images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") feed_dict = { images_placeholder: images, phase_train_placeholder:False } embedding = sess.run(embeddings, feed_dict=feed_dict) # PREDICTION: use the classifier to predict the most likely class (person). predictions = modelSVM.predict_proba(embedding) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] # DRAW: draw bounding boxes, landmarks and predicted names if save_video or display_output: for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (255,0, 0), 1) # Put name and probability of detection only if given threshold is crossed if best_class_probabilities[i] > classifier_threshold: cv2.putText(image, class_names[best_class_indices[i]], (int(bb[i][0]),int(bb[i][1])-7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA) cv2.putText(image, str(round(best_class_probabilities[i]*100, 2) ) + "%", (int(bb[i][0]), int(bb[i][3])+7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA) # loop over the (x, y)-coordinates for the facial landmarks for col in range(points.shape[1]): for i in range(5): cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1) if display_output: cv2.imshow("Output", image) if save_video: output_video.write(image) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): # do a bit of cleanup device.release() if save_video: output_video.release() cv2.destroyAllWindows() break
def main(nsplits, split_id, list_file, img_root_dir, mtcnn_model_dir, save_dir=None): if not save_dir: save_dir = './aligned_root_dir' if not osp.exists(save_dir): print('mkdir for aligned root dir: ', save_dir) os.makedirs(save_dir) save_aligned_dir = osp.join(save_dir, 'aligned_imgs') if not osp.exists(save_aligned_dir): print('mkdir for aligned/cropped face imgs: ', save_dir) os.makedirs(save_aligned_dir) save_rects_dir = osp.join(save_dir, 'face_rects') if not osp.exists(save_rects_dir): print('mkdir for face rects/landmarks: ', save_rects_dir) os.makedirs(save_rects_dir) # aligner = MtcnnAligner(mtcnn_model_dir, False) aligner = FaceAligner(mtcnn_model_dir, False) fp = open(list_file, 'r') all_lines = fp.readlines() fp.close() total_line_cnt = len(all_lines) print('--->%d imgs in total' % total_line_cnt) if nsplits < 2: if split_id > 0: print('===> Will only process first %d imgs' % split_id) start_line = 0 end_line = split_id else: print('===> Will process all of the images') start_line = 0 end_line = total_line_cnt else: assert (split_id < nsplits) lines_per_split = float(total_line_cnt) / nsplits start_line = int(lines_per_split * split_id) end_line = int(lines_per_split * (split_id + 1)) if end_line + 1 >= total_line_cnt: end_line = total_line_cnt print('===> Will only process imgs in the range [%d, %d)]' % (start_line, end_line)) count = start_line for line in all_lines[start_line:end_line]: line = line.strip() print count count = count + 1 img_fn = osp.join(img_root_dir, line) print('===> Processing img: ' + img_fn) img = cv2.imread(img_fn) ht = img.shape[0] wd = img.shape[1] print 'image.shape:', img.shape # GT_RECT = [0,0,img.shape[0],img.shape[1]] GT_RECT = [ int(wd * 0.25), int(ht * 0.25), int(wd * 0.75), int(ht * 0.72) ] # print 'face rect: ', gt # boxes, points = aligner.align_face(img, [GT_RECT]) boxes, points = aligner.align_face(img, [rect]) box = boxes[0] pts = points[0] spl = osp.split(line) sub_dir = spl[0] base_name = spl[1] save_img_subdir = osp.join(save_aligned_dir, sub_dir) if not osp.exists(save_img_subdir): os.mkdir(save_img_subdir) save_rect_subdir = osp.join(save_rects_dir, sub_dir) if not osp.exists(save_rect_subdir): os.mkdir(save_rect_subdir) # print pts save_img_fn = osp.join(save_img_subdir, base_name) facial5points = np.reshape(pts, (2, -1)) # dst_img = warp_and_crop_face( # img, facial5points, reference_5pts, output_size) dst_img = aligner.get_face_chips(img, [box], [pts])[0] cv2.imwrite(save_img_fn, dst_img) save_rect_fn = osp.join(save_rect_subdir, osp.splitext(base_name)[0] + '.txt') fp_rect = open(save_rect_fn, 'w') for it in box: fp_rect.write('%5.2f\t' % it) fp_rect.write('\n') for i in range(5): fp_rect.write('%5.2f\t%5.2f\n' % (facial5points[0][i], facial5points[1][i])) fp_rect.close()
def main(nsplits, split_id, list_file, img_root_dir, mtcnn_model_dir, save_dir=None, rects_fn=None): if not save_dir: save_dir = './facescrub_mtcnn_aligned' if not osp.exists(save_dir): print('mkdir for aligned root dir: ', save_dir) os.makedirs(save_dir) save_aligned_dir = osp.join(save_dir, 'aligned_imgs') if not osp.exists(save_aligned_dir): print('mkdir for aligned/cropped face imgs: ', save_dir) os.makedirs(save_aligned_dir) save_rects_dir = osp.join(save_dir, 'face_rects') if not osp.exists(save_rects_dir): print('mkdir for face rects/landmarks: ', save_rects_dir) os.makedirs(save_rects_dir) aligner = FaceAligner(mtcnn_model_dir) #fp = open(list_file, 'r') #all_lines = fp.readlines() #fp.close() rects_list = load_rect_list(rects_fn) all_lines = rects_list total_line_cnt = len(all_lines) print('--->%d imgs in total' % total_line_cnt) if nsplits < 2: if split_id > 0: print('===> Will only process first %d imgs' % split_id) start_line = 0 end_line = split_id else: print('===> Will process all of the images') start_line = 0 end_line = total_line_cnt else: assert (split_id < nsplits) lines_per_split = float(total_line_cnt) / nsplits start_line = int(lines_per_split * split_id) end_line = int(lines_per_split * (split_id + 1)) if end_line + 1 >= total_line_cnt: end_line = total_line_cnt print('===> Will only process imgs in the range [%d, %d)]' % (start_line, end_line)) count = start_line fp_log = open(osp.join(save_dir, 'missing_imgs_split_%d.txt' % split_id), 'w') for line in all_lines[start_line:end_line]: #line = line.strip() print count count = count + 1 img_fn = osp.join(img_root_dir, line['image']) print('===> Processing img: ' + img_fn) img = cv2.imread(img_fn) if img is None: print 'falied to read image: ', img_fn fp_log.write(img_fn + '\n') continue ht = img.shape[0] wd = img.shape[1] print 'image.shape:', img.shape spl = osp.split(line['image']) #sub_dir = osp.split(spl[0])[1] sub_dir = spl[0] print 'sub_dir: ', sub_dir if CHINESE_2_PINYIN: sub_dir = pinyin.get(sub_dir, format="strip") # replace the dot sign in names sub_dir = sub_dir.replace(u'\xb7', '-').encode('utf-8') base_name = osp.splitext(spl[1])[0] save_img_subdir = osp.join(save_aligned_dir, sub_dir) if not osp.exists(save_img_subdir): os.mkdir(save_img_subdir) save_rect_subdir = osp.join(save_rects_dir, sub_dir) if not osp.exists(save_rect_subdir): os.mkdir(save_rect_subdir) # print pts save_rects_fn = osp.join(save_rect_subdir, base_name + '.txt') fp_rect = open(save_rects_fn, 'w') #rect = get_rects_for_image(rects_list, base_name) rect = line['pts'] # boxes, points = aligner.align_face(img, [rect]) boxes, points = aligner.align_face(img, [rect]) nfaces = len(boxes) fp_rect.write('%d\n' % nfaces) for i in range(nfaces): box = boxes[i] pts = points[i] if i: save_img_fn = osp.join(save_img_subdir, base_name + '_%d.jpg' % (i + 1)) else: save_img_fn = osp.join(save_img_subdir, base_name + '.jpg') facial5points = np.reshape(pts, (2, -1)) # dst_img = warp_and_crop_face( # img, facial5points, reference_5pts, output_size) dst_img = aligner.get_face_chips(img, [box], [pts])[0] cv2.imwrite(save_img_fn, dst_img) print 'aligend face saved into: ', save_img_fn for it in box: fp_rect.write('%5.2f\t' % it) fp_rect.write('\n') for i in range(5): fp_rect.write('%5.2f\t%5.2f\n' % (facial5points[0][i], facial5points[1][i])) fp_rect.close() fp_log.close()
def PatchExtraction(video_path, landmarks_path, output_dir, patch_size=32): print("Input: ", video_path) print("Output:", output_dir) frames = [] frame_number = [] if os.path.exists(landmarks_path) == False: return df = pd.read_csv(landmarks_path) cap = cv2.VideoCapture(video_path) count = 0 while(cap.isOpened()): ret, frame = cap.read() if not ret: break # if count % 6 == 0 and df[' success'][count] == 1: # if df[' success'][count] == 1: if count % 6 == 0 and len(df[' success']) > count: if df[' success'][count] == 1: frame = frame[:,:,::-1] frames.append(frame) frame_number.append(count) count += 1 cap.release() folders = ["aligned_face", "left_eye", "right_eye", "mouth", "nose"] for folder in folders: directory = os.path.join(output_dir, folder) if not os.path.exists(directory): os.makedirs(directory) for idx, frame in enumerate(frames): x = np.array(df.iloc[frame_number[idx],299:299+68]).reshape(68,-1) y = np.array(df.iloc[frame_number[idx],299+68:299+68*2]).reshape(68,-1) z = np.ones(68).reshape(68,-1) landmarks = np.concatenate((x,y), axis=1) aligner = FaceAligner(desiredLeftEye=(0.35, 0.35), desiredFaceWidth=128, desiredFaceHeight=int(128*2)) aligned_face, M = aligner.align(frame, landmarks) landmarks_z = np.concatenate((landmarks, z), axis=1) affined_landmarks = np.matmul(landmarks_z, M.transpose()) regions = ["left_eye", "right_eye", "mouth", "nose"] regions_image = [] for region in regions: start, end = FACIAL_LANDMARKS_68_IDXS[region] Pts = affined_landmarks[start:end] Center = Pts.mean(axis=0) try: img = extract_patch(aligned_face, Center, patch_size) except: break if img.shape != (32, 32, 3): break regions_image.append(img) if len(regions_image) == len(regions): for i, region in enumerate(regions): filename = os.path.join(output_dir, region, str(frame_number[idx]).zfill(4) + '.bmp') img = regions_image[i] save(img, filename) filename = os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4) + '.bmp') np.save(os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4) + '.npy'), affined_landmarks) save(aligned_face, filename)