def load_and_align_data(config, imgs): print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config, sess) # print (config.compared_imgs) nrof_samples = len(imgs) img_list = [None] * nrof_samples for i in xrange(nrof_samples): img = misc.imread(os.path.expanduser(imgs[i])) img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detector.detect_face(img) det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - config.mtcnn.margin / 2, 0) bb[1] = np.maximum(det[1] - config.mtcnn.margin / 2, 0) bb[2] = np.minimum(det[2] + config.mtcnn.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + config.mtcnn.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize( cropped, (config.mtcnn.image_size, config.mtcnn.image_size), interp='bilinear') prewhitened = prewhiten(aligned) img_list[i] = prewhitened images = np.stack(img_list) return images
def main(config): with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config, sess) # Load the model load_model(config.lfw.valid_model_path) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") # Run forward pass to calculate embeddings with open(config.parse_path) as f: names = f.readlines() for name in names: keyword, name, saved_dir = get_keyword(name, config.output_dir) fanye_url = url_init_first + urllib.quote(keyword, safe='/') feats = [] page = 0 while len(os.listdir(saved_dir)) < 20 and fanye_url != '': onepage_urls, fanye_url = get_onepage_urls(fanye_url) for index, url in enumerate(onepage_urls): img_path = download(url, page * len(onepage_urls) + index, saved_dir) try: img = misc.imread(img_path) nrof_faces, face = detect(img, detector, config) face = np.expand_dims(face, 0) if nrof_faces: feed_dict = { images_placeholder: face, phase_train_placeholder: False } feats.append( sess.run(embeddings, feed_dict=feed_dict)) if index != 0: dist = np.sqrt( np.sum( np.square( np.subtract( feats[0], feats[-1])))) print('the %d-th %s image dist: %f' % (index, name, dist)) if dist > config.max_dist: os.remove(img_path) except Exception as e: print(e) if img_path: os.remove(img_path) continue page += 1
def main(config): print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config, sess) frame = cv2.imread(config.image) # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) # If necessary if frame.ndim == 2: frame = to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detector.detect_face(frame) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) else: print('Unable to align') cv2.imshow('Image', frame) if cv2.waitKey(0): cv2.destroyAllWindows()
def main(config): print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config, sess) video_capture = cv2.VideoCapture(0) prevTime = 0 c = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) # resize frame (optional), curTime = time.time() # calc fps timeF = config.mtcnn.frame_interval if (c % timeF == 0): if frame.ndim == 2: frame = to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detector.detect_face(frame) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) else: print('Unable to align') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) string = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 cv2.putText(frame, string, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), thickness=1, lineType=2) # c += 1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
def main(config): print('Creating networks and loading parameters') Data = DataGenerator(config) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config,sess) random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(config.output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 for cls in Data.dataset: output_class_dir = os.path.join(config.output_dir, cls.name) create_dirs([output_class_dir]) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) img = cv2.resize(img, (0,0), fx=0.5, fy=0.5) # print('read data dimension: ', img.ndim) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) print('to_rgb data dimension: ', img.ndim) img = img[:, :, 0:3] # print('after data dimension: ', img.ndim) bounding_boxes, _ = detector.detect_face(img) nrof_faces = bounding_boxes.shape[0] # print('detected_face: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) bb_temp = np.zeros(4, dtype=np.int32) bb_temp[0] = det[0] bb_temp[1] = det[1] bb_temp[2] = det[2] bb_temp[3] = det[3] cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] try: scaled_temp = misc.imresize(cropped_temp, (config.mtcnn.image_size, config.mtcnn.image_size), interp='bilinear') except (IOError, ValueError, IndexError) as e: continue nrof_successfully_aligned += 1 misc.imsave(output_filename, scaled_temp) text_file.write('%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def main(config): print('Creating networks and loading parameters') Data = DataGenerator(config, False) with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config, sess) nrof_images_total = 0 nrof_successfully_aligned = 0 for cls in Data.dataset: output_class_dir = os.path.join(config.output_dir, cls.name) create_dirs([output_class_dir]) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) # print('read data dimension: ', img.ndim) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) continue if img.ndim == 2: img = to_rgb(img) print('to_rgb data dimension: ', img.ndim) img = img[:, :, 0:3] # print('after data dimension: ', img.ndim) bounding_boxes, _ = detector.detect_face(img) nrof_faces = bounding_boxes.shape[0] # print('detected_face: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] bb_temp = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): bb_temp[i][0] = det[i][0] bb_temp[i][1] = det[i][1] bb_temp[i][2] = det[i][2] bb_temp[i][3] = det[i][3] cropped_temp = img[bb_temp[i][1]:bb_temp[i][3], bb_temp[i][0]:bb_temp[i][2], :] try: scaled_temp = misc.imresize( cropped_temp, (config.mtcnn.image_size, config.mtcnn.image_size), interp='bilinear') except (IOError, ValueError, IndexError) as e: continue nrof_successfully_aligned += 1 if nrof_faces == 1: misc.imsave(output_filename, scaled_temp) else: output_bodyname = output_filename.split('.')[0] output_indexname = '_%d' % i misc.imsave( output_bodyname + output_indexname + '.png', scaled_temp) else: print('Unable to align "%s"' % image_path) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def main(config): print('Creating networks and loading parameters') graph_detect = tf.Graph() with graph_detect.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=config.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): detector = MTCNN(config,sess) load_model(config.lfw.valid_model_path) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = config.classifier_path classifier_filename_exp = os.path.expanduser(classifier_filename) print('load classifier file-> %s' % classifier_filename_exp) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) HumanNames = os.listdir(config.input_dir) video_capture = cv2.VideoCapture(0) prevTime = 0 c = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) # resize frame (optional), curTime = time.time() # calc fps timeF = config.mtcnn.frame_interval if (c % timeF == 0): if frame.ndim == 2: frame = to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detector.detect_face(frame) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (182, 182), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (160,160),interpolation=cv2.INTER_CUBIC) scaled[i] = prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,160,160,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) # best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] # print "best_class_probabilities:", best_class_probabilities cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 print('result: ', best_class_indices[0]) for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) string = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 cv2.putText(frame, string, (text_fps_x, text_fps_y),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), thickness=1, lineType=2) c += 1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()