def ReadDetectAndEncode(self, imgPath, sess, n_jitters=0): img = misc.imread(imgPath, mode='RGB') bbs, landmarks = detect_face.detect_face(img, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) if len(bbs) != 1: return [] img_list = [None] prewhitened = facenet.prewhiten(img) img_list[0] = prewhitened # Fixed normalization controlArray = np.expand_dims(np.zeros(1, dtype=np.int32), 1) controlArray += np.expand_dims(np.ones(1, dtype=np.int32), 1) * facenet.FIXED_STANDARDIZATION # Run forward pass to calculate embeddings feed_dict = { self.images_placeholder: img_list, self.phase_train_placeholder: False, self.control_placeholder: controlArray } img_encoding = sess.run(self.embeddings, feed_dict=feed_dict) if n_jitters: imgEncodings = img_encoding img = dlib.load_rgb_image(imgPath) augmented_images = dlib.jitter_image(img, num_jitters=n_jitters) for augmented_image in augmented_images: prewhitened = facenet.prewhiten(augmented_image) img_list[0] = prewhitened # Run forward pass to calculate embeddings feed_dict = { self.images_placeholder: img_list, self.phase_train_placeholder: False, self.control_placeholder: controlArray } img_encoding = sess.run(self.embeddings, feed_dict=feed_dict) imgEncodings = np.concatenate((imgEncodings, img_encoding), axis=0) return np.average(imgEncodings, axis=0) return img_encoding[0]
def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') tmp_image_paths = copy.copy(image_paths) img_list = [] for image in tmp_image_paths: img = misc.imread(os.path.expanduser(image), mode='RGB') img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) if len(bounding_boxes) < 1: image_paths.remove(image) print("can't detect face, remove ", image) continue det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') # misc.imsave("oops", aligned) prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) return images
def align_data(image_list, image_size, margin, pnet, rnet, onet): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor img_list = [] for x in range(len(image_list)): img_size = np.asarray(image_list[x].shape)[0:2] bounding_boxes, _ = src.align.detect_face.detect_face( image_list[x], minsize, pnet, rnet, onet, threshold, factor) nrof_samples = len(bounding_boxes) if nrof_samples > 0: for i in range(nrof_samples): if bounding_boxes[i][4] > 0.95: det = np.squeeze(bounding_boxes[i, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = image_list[x][bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') #aligned = np.array(Image.fromarray(cropped).resize((image_size,image_size))) prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) if len(img_list) > 0: images = np.stack(img_list) return images else: return None
def encode(self, frame, face_rect): import tensorflow as tf # lazy loading import facenet.src.facenet as facenet # lazy loading (x, y, w, h) = face_rect if self._face_crop_margin: (x, y, w, h) = ( max(x - int(self._face_crop_margin / 2), 0), max(y - int(self._face_crop_margin / 2), 0), min(x + w + int(self._face_crop_margin / 2), frame.shape[1]) - x, min(y + h + int(self._face_crop_margin / 2), frame.shape[0]) - y) face = misc.imresize(frame[y:y + h, x:x + w, :], (self._face_crop_size, self._face_crop_size), interp='bilinear') images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") prewhiten_face = facenet.prewhiten(face) feed_dict = { images_placeholder: [prewhiten_face], phase_train_placeholder: False } return self._sess.run(embeddings, feed_dict=feed_dict)[0]
def creat_npy(crop_dir, npy_dir, model): ''' create npy file for tfrecord ''' from facenet.src.facenet import load_model, prewhiten config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 with tf.Graph().as_default(): with tf.Session(config = config) as sess: print("Now loading the model...") load_model(model) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") subfolders = [f.path for f in os.scandir(crop_dir) if f.is_dir() ] for sub_dir in subfolders: count = 0 sub_dir_basename = os.path.basename(sub_dir) print("CreatNpy step, Now is processing: {} ...".format(sub_dir_basename)) onlyfiles = [f for f in os.listdir(sub_dir) if os.path.isfile(os.path.join(sub_dir, f))] for f in onlyfiles: image_name = os.path.join(sub_dir, f) print(image_name) try: img = cv2.imdecode(np.fromfile(os.path.expanduser(image_name),dtype=np.uint8), cv2.IMREAD_COLOR) prewhitened = prewhiten(img) feed_dict = { images_placeholder: [prewhitened], phase_train_placeholder:False } emb = sess.run(embeddings, feed_dict=feed_dict) store_img_as_npy(os.path.join(npy_dir, sub_dir_basename), sub_dir_basename + '_' + str(count), emb) count = count + 1 except TypeError as e: print("having the {}, so passing it".format(e))
def save(image, simpleName, image_size=160, margin=44, gpu_memory_fraction=1.0): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') img = misc.imread(os.path.expanduser(image), mode='RGB') img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) if len(bounding_boxes) < 1: print("can't detect face from ", image) return det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) misc.imsave("static/facedb/" + simpleName, aligned) misc.imsave("static/facedbpw/" + simpleName, prewhitened)
def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) # noqa: E501 sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) nrof_samples = len(image_paths) img_list = [None] * nrof_samples for i in xrange(nrof_samples): print(image_paths[i]) img = imageio.imread(os.path.expanduser(image_paths[i])) img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = resize(cropped, (image_size, image_size)) prewhitened = facenet.prewhiten(aligned) img_list[i] = prewhitened images = np.stack(img_list) return images
def transform_images(inpath, output, image_size=160): for imgpath in inpath.glob("*0.png"): img = load_image(imgpath) img = cv2.resize(img, (image_size, image_size), interpolation=cv2.INTER_CUBIC) img = facenet.prewhiten(img) outpath = output / f"{imgpath.stem}.npy" np.save(outpath, img)
def encode(self, frame, bounding_boxes=None, num_face=1): try: import tensorflow as tf # lazy loading import facenet.src.facenet as facenet # lazy loading img_list = [] if bounding_boxes is not None: # have multiple face in photo for bounding_boxe in bounding_boxes: if (len(bounding_boxe) == 5 and bounding_boxe[4] < 0.50): return None else: (x, y, w, h) = bounding_boxe if self._face_crop_margin: (x, y, w, h) = (max(x - int(self._face_crop_margin / 2), 0), max(y - int(self._face_crop_margin / 2), 0), min(x + w + int(self._face_crop_margin / 2), frame.shape[1]) - x, min(y + h + int(self._face_crop_margin / 2), frame.shape[0]) - y) face = misc.imresize( frame[y:y + h, x:x + w, :], (self._face_crop_size, self._face_crop_size), interp='bilinear') prewhiten_face = facenet.prewhiten(face) img_list.append(prewhiten_face) else: # face is aligned prewhiten_face = facenet.prewhiten(frame) img_list.append(prewhiten_face) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") feed_dict = { images_placeholder: img_list, phase_train_placeholder: False } if num_face == 1: return self._sess.run(embeddings, feed_dict=feed_dict)[0] else: return self._sess.run(embeddings, feed_dict=feed_dict) except Exception as ex: print(ex) return None
def run(self): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 160 gpu_memory_fraction = 1.0 with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): p_net, r_net, o_net = detect_face.create_mtcnn(sess, None) while True: img = self.inq.get() img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face( img, minsize, p_net, r_net, o_net, threshold, factor) src = img.copy() dist_white_ends = [] for num in range(bounding_boxes.shape[0]): det = np.squeeze(bounding_boxes[num, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] if (bb[0] >= 0) & (bb[0] < src.shape[1]): src[bb[1]:bb[3], bb[0], :] = 255 else: src[bb[1]:bb[3], src.shape[1] - 1, :] = 255 if (bb[2] >= 0) & (bb[2] < src.shape[1]): src[bb[1]:bb[3], bb[2], :] = 255 else: src[bb[1]:bb[3], src.shape[1] - 1, :] = 255 if (bb[1] >= 0) & (bb[1] < src.shape[0]): src[bb[1], bb[0]:bb[2], :] = 255 else: src[src.shape[0] - 1, bb[0]:bb[2], :] = 255 if (bb[3] >= 0) & (bb[3] < src.shape[0]): src[bb[3], bb[0]:bb[2], :] = 255 else: src[src.shape[0] - 1, bb[0]:bb[2], :] = 255 pil_im = Image.fromarray(cropped) aligned = pil_im.resize((image_size, image_size), Image.BILINEAR) aligned = np.array(aligned) pre_whitened = facenet.prewhiten(aligned) dist_white_ends.append(pre_whitened) self.out_q.put({"src": src, "dst": dist_white_ends})
def vectorize(self, image): prewhitened = facenet.prewhiten(image) prewhitened = prewhitened.reshape(-1, prewhitened.shape[0], prewhitened.shape[1], prewhitened.shape[2]) feed_dict = { self.images_placeholder: prewhitened, self.phase_train_placeholder: False, } embeddings = self.sess.run(self.embeddings, feed_dict=feed_dict) embeddings = embeddings.flatten() return embeddings
def extract_features(faces): for i in range(faces.shape[0]): faces[i] = cv2.resize(faces[i], (image_size, image_size)) faces[i] = facenet.prewhiten(faces[i]) faces[i] = facenet.flip(faces[i], False) feed_dict = { images_placeholder: faces, phase_train_placeholder: False } return sess.run(embeddings, feed_dict=feed_dict)
def generate_embedding(self, face): images_placeholder = self.sess.graph.get_tensor_by_name("input:0") embeddings = self.sess.graph.get_tensor_by_name("embeddings:0") phase_train_placeholder = self.sess.graph.get_tensor_by_name("phase_train:0") prewhiten_face = facenet.prewhiten(face.image) feed_dict = { images_placeholder: [prewhiten_face], phase_train_placeholder: False, } return self.sess.run(embeddings, feed_dict=feed_dict)[0]
def generate_embedding(self, face): # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") prewhiten_face = facenet.prewhiten(face.image) # Run forward pass to calculate embeddings feed_dict = {images_placeholder: [prewhiten_face], phase_train_placeholder: False} return self.sess.run(embeddings, feed_dict=feed_dict)[0]
def align_img(self, img, bounding_boxes, image_size, margin): img_size = np.asarray(img.shape)[0:2] det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') aligned_img = facenet.prewhiten(aligned) return aligned_img
def find_closest_mean(self, image, unknown_bound=0.85): """ Find the average distance to all people from the base. :param image: Detected face. :param unknown_bound: If min distance is greater then unknown_bound than face is unknown. :return: Unknown or name of the most like person. """ image_embedding = self.get_embedding(facenet.prewhiten(image)) confidences = {} for name, embeddings in self.embeddings.items(): confidences[name] = np.mean([ distance.euclidean(emb, image_embedding) for emb in embeddings ]) print(confidences) if min(confidences.values()) > unknown_bound: return "unknown" else: return min(confidences, key=confidences.get)
def __face_embedding_thread(self, in_queue, out_queue, model): face_embeddings = [] with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): # Load the model facenet.load_model(model) while True: face_image = in_queue.get() prewhiten_face = facenet.prewhiten(face_image) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") # Run forward pass to calculate embeddings feed_dict = { images_placeholder: [prewhiten_face], phase_train_placeholder:False } face_embeddings = sess.run(embeddings, feed_dict=feed_dict) out_queue.put(face_embeddings)
def crop_and_align_image(img, bounding_box, confidence, image_size=(182, 182), margin=44): if confidence > 0.95: img_size = np.asarray(img.shape)[0:2] det = np.squeeze(bounding_box[0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, size=image_size, interp='bilinear') prewhitened = facenet.prewhiten(aligned) return prewhitened else: return None
def __init__(self, image_size=(160, 160), data_path=".\data\ms_faces", embeddings_path=".\data\data_facenet.json", train=False): """ :param image_size: Image size to which calculates embedding. :param data_path: Path to the base of faces. It is assumed that all faces are frontal. :param embeddings_path: Path to the base of embeddings. :param train: If True than train a new database of embeddings. """ self.image_size = image_size facenet.load_model("facenet_model/20180408-102900.pb") names = os.listdir(data_path) name_face_path = {} for name in names: faces = os.listdir(os.path.join(data_path, name)) for face in faces: path = os.path.join(data_path, name, face) if name in name_face_path.keys(): name_face_path[name].append(path) else: name_face_path[name] = [] name_face_path[name].append(path) self.sess = tf.Session() if train: self.embeddings = dict.fromkeys(name_face_path.keys()) for name in name_face_path.keys(): emb = [] for img_path in name_face_path[name]: image = read_rgb(img_path) emb.append( self.get_embedding(facenet.prewhiten(image)).tolist()) self.embeddings[name] = emb with open(embeddings_path, mode="w") as f: json.dump(self.embeddings, f, indent=4) else: with open(embeddings_path, mode="r") as f: self.embeddings = json.load(f)
def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) tmp_image_paths = copy.copy(image_paths) img_list = [] for image in tmp_image_paths: img = misc.imread(os.path.expanduser(image), mode='RGB') img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) if len(bounding_boxes) < 1: image_paths.remove(image) print("can't detect face, remove ", image) continue det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) return images
def find_faces(self, image): faces = [] print(image.shape) bounding_boxes, _ = align_detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) for bb in bounding_boxes: face = Face() face.container_image = image face.bounding_box = np.zeros(4, dtype=np.int32) img_size = np.asarray(image.shape)[0:2] face.bounding_box[0] = np.maximum(bb[0] - self.face_crop_margin / 2, 0) face.bounding_box[1] = np.maximum(bb[1] - self.face_crop_margin / 2, 0) face.bounding_box[2] = np.minimum(bb[2] + self.face_crop_margin / 2, img_size[1]) face.bounding_box[3] = np.minimum(bb[3] + self.face_crop_margin / 2, img_size[0]) cropped = image[face.bounding_box[1]:face.bounding_box[3], face.bounding_box[0]:face.bounding_box[2], :] aligned = misc.imresize(cropped, (self.face_crop_size, self.face_crop_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) face.image = aligned faces.append(face) return faces
def _calculate_embeddings(self, cropped_images): """Run forward pass to calculate embeddings""" prewhitened_images = [prewhiten(img) for img in cropped_images] calc_model = self._embedding_calculator graph_images_placeholder = calc_model.graph.get_tensor_by_name( "input:0") graph_embeddings = calc_model.graph.get_tensor_by_name("embeddings:0") graph_phase_train_placeholder = calc_model.graph.get_tensor_by_name( "phase_train:0") embedding_size = graph_embeddings.get_shape()[1] image_count = len(prewhitened_images) batches_per_epoch = int(math.ceil(1.0 * image_count / self.BATCH_SIZE)) embeddings = np.zeros((image_count, embedding_size)) for i in range(batches_per_epoch): start_index = i * self.BATCH_SIZE end_index = min((i + 1) * self.BATCH_SIZE, image_count) feed_dict = { graph_images_placeholder: prewhitened_images, graph_phase_train_placeholder: False } embeddings[start_index:end_index, :] = calc_model.sess.run( graph_embeddings, feed_dict=feed_dict) return embeddings
def inference(model, tfRecord, img_paths, top, av): # step1 crop the face in the list img_list = [] for image_name in img_paths: face_loc = find_rectangle_of_face(image_name) if not face_loc: print("Not found the Face in the Picture") else: img_list.append(crop_resize_image(image_name, face_loc, "", av.img_size, av.img_type, store_ = 0)) # step 2 store img as the np format from facenet.src.facenet import load_model, prewhiten emb_list = [] config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 for i in img_list: with tf.Graph().as_default(): with tf.Session(config = config) as sess: load_model(model) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") prewhitened = prewhiten(i) feed_dict = { images_placeholder: [prewhitened], phase_train_placeholder:False } emb = sess.run(embeddings, feed_dict=feed_dict) emb_list.append(emb) dataset = read_and_decodeEmb(tfRecord, av.batch_size, 512) # 512 is face vector of the model iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # step 3 calculate the confidence level dist_dict = {} label_dict = {} config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 with tf.Session(config = config) as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) sess.run(iterator.initializer) while True: try: next_example, next_label = sess.run(next_element) for emb in emb_list: for i, ele in enumerate(next_example): if str(next_label[i]) not in dist_dict: dist_dict[str(next_label[i])] = 0 if str(next_label[i]) not in label_dict: label_dict[str(next_label[i])] = 0 dist = np.sqrt(np.sum(np.square(np.subtract(ele, emb)))) dist_dict[str(next_label[i])] = dist + dist_dict[str(next_label[i])] label_dict[str(next_label[i])] = 1 + label_dict[str(next_label[i])] except tf.errors.OutOfRangeError: break coord.request_stop() coord.join(threads) for i in range(len(av.list_name)): dist_dict[str(i)] = (2 * label_dict[str(i)] - dist_dict[str(i)]) * 50 / label_dict[str(i)] from collections import Counter k = Counter(dist_dict) top = av.list_name if top > len(av.list_name) else top high = k.most_common(top) for i, ele in enumerate(high): print("第 {} 相似為: {}, 相似度: {:.1f}%".format(i + 1, av.rdict_name[ele[0]], ele[1]))
def main(): MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.709 IMAGE_SIZE = 182 INPUT_IMAGE_SIZE = 160 CLASSIFIER_PATH = 'facenet/Models/Own/Own.pkl' VIDEO_PATH = args.path FACENET_MODEL_PATH = 'facenet/Models/facenet/20180402-114759.pb' # Load The Custom Classifier with open(CLASSIFIER_PATH, 'rb') as file: model, class_names = pickle.load(file) print("Custom Classifier, Successfully loaded") with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Load the model print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] pnet, rnet, onet = detect_face.create_mtcnn( sess, "facenet/src/align") people_detected = set() person_detected = collections.Counter() from Config import webcam, feed webcam = webcam() webcam.init("Microsoft® LifeCam HD-3000 - 1") webcam.set_callback() webcam.set_callback_properties() webcam.grabber_cb.grab_sample() webcam.run() images_queue = queue.Queue() lock = Lock() thread1 = feed(lock, webcam.queue, images_queue) count = 0 detector = MTCNN() while True: if images_queue.empty() == True: print("empty images queue") continue img = images_queue.get(0) print("heeererea") result = detector.detect_faces(img) try: if result != []: for person in result: bounding_box = person['box'] keypoints = person['keypoints'] print(person['confidence']) print(result) x1 = bounding_box[0] y1 = bounding_box[1] x2 = bounding_box[0] + bounding_box[2] y2 = bounding_box[1] + bounding_box[3] cropped = img[y1:y2, x1:x2] scaled = cv2.resize( cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape( -1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] best_name = class_names[best_class_indices[0]] print("Name: {}, Probability: {}".format( best_name, best_class_probabilities)) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 155, 255), 2) text_x = x1 text_y = y2 + 20 if best_class_probabilities > 0.45: name = class_names[best_class_indices[0]] else: name = "Unknown" cv2.putText(img, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) cv2.putText(img, str( round(best_class_probabilities[0], 3)), (text_x, text_y + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) person_detected[best_name] += 1 except: pass cv2.imshow('Face Recognition', img) if cv2.waitKey(1) & 0xFF == ord('x'): break cv2.destroyAllWindows()
def main(classifierpath, slotid, imagepath, pretrained_model): MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.709 IMAGE_SIZE = 182 INPUT_IMAGE_SIZE = 160 CLASSIFIER_PATH = classifierpath SLOTID = slotid IMAGE_PATH = imagepath FACENET_MODEL_PATH = pretrained_model # IMAGE_PATH = IMAGE_PATH[1:-1].split(",") # return # Load The Custom Classifier with open(CLASSIFIER_PATH, "rb") as file: model, class_names = pickle.load(file) # print("Custom Classifier, Successfully loaded") with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Load the model # print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] pnet, rnet, onet = align.detect_face.create_mtcnn( sess, os.path.dirname(os.path.realpath(__file__)) + "/align/") # people_detected = set() person_detected = collections.Counter() ctr = 1 face_list = {} unknown_list = {} unknown_count = 0 detected_face = {} for IMG_ADDR in IMAGE_PATH: ctr = ctr + 1 frame = imageio.imread(IMG_ADDR) bounding_boxes, _ = align.detect_face.detect_face( frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[0] print("Faces found :", faces_found) try: if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] # print(type(cropped)) scaled = cv2.resize( cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape( -1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) prob_claa = {} best_prob = 0 second_best_prob = 0 third_best_prob = 0 best_name = "" second_best_name = "" third_best_name = "" for iterr in range(len(predictions[0])): prob_claa[str(class_names[iterr] )] = predictions[0][iterr] if predictions[0][iterr] > best_prob: best_prob = predictions[0][iterr] best_name = str(class_names[iterr]) for key in prob_claa.keys(): if (prob_claa[key] > second_best_prob) and ( prob_claa[key] < best_prob): second_best_name = key second_best_prob = prob_claa[key] for key in prob_claa.keys(): if (prob_claa[key] > third_best_prob) and ( prob_claa[key] < second_best_prob): third_best_name = key third_best_prob = prob_claa[key] best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] best_name = class_names[best_class_indices[0]] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 if best_class_probabilities > 0.20: name = class_names[best_class_indices[0]] # print(name) id = uuid.uuid1().int entry = { "name": name, "prob": best_class_probabilities[0], "best_name": best_name, "second_best_name": second_best_name, "third_best_name": third_best_name, "prob_list": prob_claa, } detected_face[id] = cropped # imageio.imwrite("detected_faces/{}-{}.jpg".format(entry['name'], entry['prob']), cropped) face_list[id] = entry # cv2.putText(frame, name[10:], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) else: id = uuid.uuid1().int detected_face[id] = cropped name = "Unknown" + str(unknown_count) guess = class_names[best_class_indices[0]] prob = best_class_probabilities[0] entry = { "name": name, "guess": guess, "prob": prob, } unknown_count += 1 unknown_list[id] = entry # cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) # cv2.putText(frame, str(round(best_class_probabilities[0], 3)), (text_x, text_y + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) person_detected[best_name] += 1 except: pass # cv2.imshow('Face Recognition', frame) if cv2.waitKey(1) & 0xFF == ord("q"): break # cap.release() duplicate_keys = find_duplicate(face_list) present_list = [] absent_list = [] # for key in duplicate_keys: # face_list[key] = replace_with_second_best(face_list[key]) for key in detected_face.keys(): if key in face_list: imageio.imwrite( "./static/{}--{}-{}.jpg".format( SLOTID, face_list[key]["name"], face_list[key]["prob"]), detected_face[key], ) present_list.append(face_list[key]["name"].split(" ")[0]) else: imageio.imwrite( "./static/{}_{}-{}.jpg".format( SLOTID, unknown_list[key]["name"], unknown_list[key]["prob"]), detected_face[key], ) absent_list.append(face_list[key]["name"].split(" ")[0]) print(json.dumps(face_list, indent=4)) return {"regno": present_list} cv2.destroyAllWindows()
def main(filename, tmp_result): args = lambda: None args.video = True args.youtube_video_url = '' args.video_speedup = 2 args.webcam = False a = 0 b = 0 c = 0 for i in range( 0, len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people'))): # print(len(os.listdir('C:/Users/mmlab/PycharmProjects/facenet-pytorch-master/facenet-pytorch-master/models/clusteringfolder/{}'.format(i)))) a += len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'. format(i) + 'human')) print( len(os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people'))) folder = [] folder_name = [] folder_in_file = [] under_folder = [] for i in range( 0, len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people'))): b = len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'. format(i) + 'human')) d = int(b / a * 100) print(d) folder.append( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'.format( i) + 'human') folder_name.append(i) if d < 30: under_folder.append(str(i) + 'human') for i in range(len(under_folder)): print(under_folder[i]) for i in range(0, len(folder)): print(folder[i]) print(folder_name[i]) if int(folder_name[i]) > 0: file = os.path.join( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'. format(int(folder_name[i])) + 'human', str(folder_name[i]) + 'human1.png') folder_in_file.append(file) minsize = 20 threshold = [0.6, 0.7, 0.7] factor = 0.709 input_image_size = 160 # comment out these lines if you do not want video recording # USE FOR RECORDING VIDEO fourcc = cv2.VideoWriter_fourcc(*'FMP4') # Get the path of the classifier and load it project_root_folder = os.path.join( os.path.abspath(__file__), 'C:/Users/mmlab/PycharmProjects/UI_pyqt/') classifier_path = project_root_folder + 'trained_classifier/video_new_name_test4.pkl' with open(classifier_path, 'rb') as f: (model, class_names) = pickle.load(f) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Bounding box pnet, rnet, onet = src.align.detect_face.create_mtcnn( sess, project_root_folder + "src/align") # Get the path of the facenet model and load it facenet_model_path = project_root_folder + "20180402-114759/20180402-114759.pb" facenet.load_model(facenet_model_path) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") # Start video capture video_capture = cv2.VideoCapture(filename) width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) # float videoclip = VideoFileClip(filename) audioclip = videoclip.audio #저장파일 이름 video_recording = cv2.VideoWriter( project_root_folder + 'final_video_mosaic.avi', fourcc, 15, (int(width), int(height))) output_video_name = project_root_folder + 'final_video_mosaic.avi' total_frames_passed = 0 while True: try: ret, frame = video_capture.read() except Exception as e: break if ret: # Skip frames if video is to be sped up if args.video_speedup: total_frames_passed += 1 if total_frames_passed % args.video_speedup != 0: continue bounding_boxes, _ = src.align.detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) frame_track = [] faces_found = bounding_boxes.shape[0] #known_name=under_folder known_name = ['1human'] if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize( cropped, (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape( -1, input_image_size, input_image_size, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] if best_class_probabilities > 0.09: for y in range(0, len(known_name)): if class_names[best_class_indices[ 0]] != known_name[y]: frame[bb[i][1] + 5:bb[i][3] - 5, bb[i][0] + 2:bb[i][2] - 2] = cv2.blur( frame[bb[i][1] + 5:bb[i][3] - 5, bb[i][0] + 2:bb[i][2] - 2], (50, 50)) video_recording.write(frame) frame_track.append(frame) else: break print("mosaiced") video_recording.release() video_capture.release() cv2.destroyAllWindows() videoclip2 = VideoFileClip(output_video_name) videoclip2.audio = audioclip videoclip2.write_videofile("tmp_result1.mp4") print("done")
def main(filename, tmp_result): args = lambda: None args.video = True args.youtube_video_url = '' args.video_speedup = 2 args.webcam = False a = 0 b = 0 c = 0 for i in range( 0, len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people'))): # print(len(os.listdir('C:/Users/mmlab/PycharmProjects/facenet-pytorch-master/facenet-pytorch-master/models/clusteringfolder/{}'.format(i)))) a += len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'. format(i) + 'human')) print("asdfasdfdsa") print( len(os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people'))) folder = [] folder_name = [] folder_in_file = [] under_folder = [] for i in range( 0, len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people'))): b = len( os.listdir( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'. format(i) + 'human')) d = int(b / a * 100) print(d) folder.append( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'.format( i) + 'human') folder_name.append(i) if d < 30: under_folder.append(str(i) + 'human') for i in range(len(under_folder)): print(under_folder[i]) for i in range(0, len(folder)): print(folder[i]) print(folder_name[i]) if int(folder_name[i]) > 0: file = os.path.join( 'C:/Users/mmlab/PycharmProjects/UI_pyqt/cluster_people/{}'. format(int(folder_name[i])) + 'human', str(folder_name[i]) + 'human1.png') folder_in_file.append(file) #print(folder_in_file[i]) for i in range(0, len(folder_in_file) - 1): cv = cv2.imread(folder_in_file[i], cv2.IMREAD_COLOR) #cv2.imwrite('model{}.png'.format(i), cv) minsize = 20 threshold = [0.6, 0.7, 0.7] factor = 0.709 image_size = 182 input_image_size = 160 # comment out these lines if you do not want video recording # USE FOR RECORDING VIDEO fourcc = cv2.VideoWriter_fourcc(*'FMP4') # Get the path of the classifier and load it project_root_folder = os.path.join( os.path.abspath(__file__), 'C:/Users/mmlab/PycharmProjects/UI_pyqt/') classifier_path = project_root_folder + 'trained_classifier/video_new_name_test4.pkl' print(classifier_path) with open(classifier_path, 'rb') as f: (model, class_names) = pickle.load(f) print("Loaded classifier file") with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Bounding box pnet, rnet, onet = src.align.detect_face.create_mtcnn( sess, project_root_folder + "src/align") # Get the path of the facenet model and load it facenet_model_path = project_root_folder + "20180402-114759/20180402-114759.pb" facenet.load_model(facenet_model_path) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Start video capture people_detected = set() person_detected = collections.Counter() video_path = project_root_folder video_name = filename # 영상 full_original_video_path_name = filename print(filename) video_capture_path = full_original_video_path_name if not os.path.isfile(full_original_video_path_name): print('Video not found at path ' + full_original_video_path_name + '. Commencing download from YouTube') # Note if the video ever gets removed this will cause issues #YouTube(args.youtube_video_url).streams.first().download(output_path =video_path, filename=video_name) video_capture = cv2.VideoCapture(full_original_video_path_name) width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) # float videoclip = VideoFileClip(full_original_video_path_name) audioclip = videoclip.audio #저장파일 이름 video_recording = cv2.VideoWriter( project_root_folder + 'final_video_mosaic.avi', fourcc, 15, (int(width), int(height))) output_video_name = project_root_folder + 'final_video_mosaic.avi' total_frames_passed = 0 while True: try: ret, frame = video_capture.read() except Exception as e: break if ret: # Skip frames if video is to be sped up if args.video_speedup: total_frames_passed += 1 if total_frames_passed % args.video_speedup != 0: continue bounding_boxes, _ = src.align.detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) frame_track = [] faces_found = bounding_boxes.shape[0] #known_name=under_folder known_name = ['4human'] if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize( cropped, (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) # cv2.imshow("Cropped and scaled", scaled) # cv2.waitKey(1) scaled = facenet.prewhiten(scaled) # cv2.imshow("\"Whitened\"", scaled) # cv2.waitKey(1) scaled_reshape = scaled.reshape( -1, input_image_size, input_image_size, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] best_name = class_names[best_class_indices[0]] if best_class_probabilities > 0.09: for y in range(0, len(known_name)): if class_names[best_class_indices[ 0]] == known_name[y]: frame[bb[i][1] + 5:bb[i][3] - 5, bb[i][0] + 2:bb[i][2] - 2] = cv2.blur( frame[bb[i][1] + 5:bb[i][3] - 5, bb[i][0] + 2:bb[i][2] - 2], (50, 50)) for j in range(100): c = +1 person_detected[best_name] += 1 else: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # total_frames_passed += 1 # if total_frames_passed == 2: for person, count in person_detected.items(): if count > 4: print("Person Detected: {}, Count: {}".format( person, count)) people_detected.add(person) # person_detected.clear() # total_frames_passed = 0 #cv2.putText(frame, "People detected so far:", (20, 20), cv2.FONT_HERSHEY_PLAIN, #1, (255, 0, 0), thickness=1, lineType=2) ''' currentYIndex = 40 for idx, name in enumerate(people_detected): cv2.putText(frame, name, (20, currentYIndex + 20 * idx), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), thickness=1, lineType=2) ''' cv2.imshow("Face Detection and Identification", frame) video_recording.write(frame) frame_track.append(frame) #if cv2.waitKey(1) & 0xFF == ord('q'): # break else: break print("mosaiced") video_recording.release() video_capture.release() cv2.destroyAllWindows() videoclip2 = VideoFileClip(output_video_name) videoclip2.audio = audioclip #저장파일 일므 videoclip2.write_videofile("tmp_result1.mp4") print("done")
def face_verification(img_pairs_list): model = r'facenet\src\align' model_facenet = './20170512-110547.pb' # mtcnn相关参数 minsize = 40 threshold = [0.4, 0.5, 0.6] # pnet、rnet、onet三个网络输出人脸的阈值,大于阈值则保留,小于阈值则丢弃 factor = 0.709 # scale factor # 创建mtcnn网络 with tf.Graph().as_default(): sess = tf.compat.v1.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, model) margin = 44 image_size = 160 with tf.Graph().as_default(): with tf.compat.v1.Session() as sess: # 根据模型文件载入模型 facenet.load_model(model_facenet) # 得到输入、输出等张量 images_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.compat.v1.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("phase_train:0") # 设置可视化进度条相关参数 jd = '\r %2d%%\t [%s%s]' bar_num_total = 50 total_num = len(img_pairs_list) result, dist = [], [] for i in range(len(img_pairs_list)): # 画进度条 if i % (total_num / bar_num_total) == 0 or i == total_num - 1: bar_num_alright = round(bar_num_total * i / total_num) alright = '#' * bar_num_alright not_alright = '□' * (bar_num_total - bar_num_alright) percent = (bar_num_alright / bar_num_total) * 100 print(jd % (percent, alright, not_alright), end='') # 读取一对人脸图像 img_pairs = img_pairs_list[i] img_list = [] img1 = cv2.imread(img_pairs[0]) img2 = cv2.imread(img_pairs[1]) img_size1 = np.asarray(img1.shape)[0:2] img_size2 = np.asarray(img2.shape)[0:2] # 检测该对图像中的人脸 bounding_box1, _1 = detect_face.detect_face(img1, minsize, pnet, rnet, onet, threshold, factor) bounding_box2, _2 = detect_face.detect_face(img2, minsize, pnet, rnet, onet, threshold, factor) # 未检测到人脸,则将结果标为-1,后续计算准确率时排除 if len(bounding_box1) < 1 or len(bounding_box2) < 1: result.append(-1) dist.append(-1) continue # 将图片1加入img_list det = np.squeeze(bounding_box1[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size1[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size1[0]) cropped = img1[bb[1]:bb[3], bb[0]:bb[2], :] aligned = cv2.resize(cropped, (image_size, image_size)) prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) # 将图片2加入img_list det = np.squeeze(bounding_box2[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size2[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size2[0]) cropped = img2[bb[1]:bb[3], bb[0]:bb[2], :] aligned = cv2.resize(cropped, (image_size, image_size)) prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) # 将两个人脸转化为512维的向量 feed_dict = {images_placeholder: images, phase_train_placeholder: False} emb = sess.run(embeddings, feed_dict=feed_dict) # 计算两个人脸向量的距离 ed = np.sqrt(np.sum(np.square(np.subtract(emb[0], emb[1])))) dist.append(ed) # 根据得出的人脸间的距离,判断是否属于同一个人 if ed <= 1.1: result.append(1) else: result.append(0) return result, dist
def main(filename,tmp_result, known_name): args = lambda: None args.video = True args.youtube_video_url = '' args.video_speedup = 2 args.webcam = False minsize = 20 threshold = [0.6, 0.7, 0.7] factor = 0.709 image_size = 182 input_image_size = 160 img = cv2.imread("test7.jpg") img = cv2.resize(img, (int(img.shape[1] * 0.6), int(img.shape[0] * 0.6))) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) mask = np.zeros_like(img_gray) indexes_triangles = [] # dlib에 있는 정면 얼굴 검출기(detector)로 입력 사진에서 얼굴을 검출해 faces로 반환 detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") # Face 1 faces = detector(img_gray) for face in faces: # print(face) landmarks = predictor(img_gray, face) landmarks_points = [] for n in range(0, 68): x = landmarks.part(n).x y = landmarks.part(n).y landmarks_points.append((x, y)) points = np.array(landmarks_points, np.int32) convexhull = cv2.convexHull(points) # cv2.polylines(img, [convexhull], True, (255, 0, 0), 3) cv2.fillConvexPoly(mask, convexhull, 255) face_image_1 = cv2.bitwise_and(img, img, mask=mask) # Delaunay triangulation rect = cv2.boundingRect(convexhull) subdiv = cv2.Subdiv2D(rect) subdiv.insert(landmarks_points) triangles = subdiv.getTriangleList() # landmarks_point 값을 배열로 변환 triangles = np.array(triangles, dtype=np.int32) for t in triangles: pt1 = (t[0], t[1]) # 삼각형의 좌표를 배열로 저장 pt2 = (t[2], t[3]) pt3 = (t[4], t[5]) # 삼각형의 좌표와 landmarks_point 가 만나는 곳의 점을 저장(0~68) index_pt1 = np.where((points == pt1).all(axis=1)) index_pt1 = extract_index_nparray(index_pt1) index_pt2 = np.where((points == pt2).all(axis=1)) index_pt2 = extract_index_nparray(index_pt2) index_pt3 = np.where((points == pt3).all(axis=1)) index_pt3 = extract_index_nparray(index_pt3) if index_pt1 is not None and index_pt2 is not None and index_pt3 is not None: triangle = [index_pt1, index_pt2, index_pt3] # 삼각형마다의 landmarks_point indexes_triangles.append(triangle) # comment out these lines if you do not want video recording # USE FOR RECORDING VIDEO fourcc = cv2.VideoWriter_fourcc(*'FMP4') # Get the path of the classifier and load it project_root_folder = os.path.join(os.path.abspath(__file__), "C:/Users/mmlab/PycharmProjects/UI_pyqt/") classifier_path = project_root_folder + 'trained_classifier/video_new_name_test4.pkl' print (classifier_path) with open(classifier_path, 'rb') as f: (model, class_names) = pickle.load(f) print("Loaded classifier file") with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Bounding box pnet, rnet, onet = src.align.detect_face.create_mtcnn(sess, project_root_folder + "src/align") # Get the path of the facenet model and load it facenet_model_path = project_root_folder + "20180402-114759/20180402-114759.pb" facenet.load_model(facenet_model_path) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Start video capture people_detected = set() person_detected = collections.Counter() if args.webcam is True: video_capture = cv2.VideoCapture(0) else: video_path = project_root_folder video_name = "vlog" full_original_video_path_name = filename video_capture_path = full_original_video_path_name if not os.path.isfile(full_original_video_path_name): print('Video not found at path ' + full_original_video_path_name + '. Commencing download from YouTube') # Note if the video ever gets removed this will cause issues #YouTube(args.youtube_video_url).streams.first().download(output_path =video_path, filename=video_name) video_capture = cv2.VideoCapture(full_original_video_path_name) width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) # float videoclip = VideoFileClip(full_original_video_path_name) audioclip = videoclip.audio video_recording = cv2.VideoWriter(project_root_folder + 'final_video_swap.avi', fourcc, 13,(int(width), int(height))) output_video_name = project_root_folder + 'final_video_swap.avi' total_frames_passed = 0 while True: try: ret, frame = video_capture.read() except Exception as e: break if ret: # Skip frames if video is to be sped up if args.video_speedup: total_frames_passed += 1 if total_frames_passed % args.video_speedup != 0: continue bounding_boxes, _ = src.align.detect_face.detect_face(frame, minsize, pnet, rnet, onet,threshold, factor) if bounding_boxes is not None: print('maps:' + str(bounding_boxes)) faces_found = bounding_boxes.shape[0] #number = len(under_folder) #for n in range(number): #known_name[n] = under_folder[n] #known_name = ['2human'] if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize(cropped, (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) # cv2.imshow("Cropped and scaled", scaled) # cv2.waitKey(1) scaled = facenet.prewhiten(scaled) # cv2.imshow("\"Whitened\"", scaled) # cv2.waitKey(1) scaled_reshape = scaled.reshape(-1, input_image_size, input_image_size, 3) feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False} emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] best_name = class_names[best_class_indices[0]] print("Name: {}, Probability: {}".format(best_name, best_class_probabilities)) if best_class_probabilities > 0.09: #cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face text_x = bb[i][0] text_y = bb[i][3] + 20 for j in range(len(known_name)): if class_names[best_class_indices[0]] == known_name[j]: img2=frame[bb[i][1]-10 : bb[i][3]+20, bb[i][0]-10: bb[i][2]+20] try: img2_gray=cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) img2_new_face = np.zeros_like(img2) faces2 = detector(img2_gray) print(len(faces2)) if len(faces2)>0: face1=faces2[0] landmarks = predictor(img2_gray, face1) landmarks_points2 = [] for n in range(0, 68): x = landmarks.part(n).x y = landmarks.part(n).y landmarks_points2.append((x, y)) # for face in faces2: # landmarks = predictor(img2_gray, face) # landmarks_points2 = [] # for n in range(0, 68): # x = landmarks.part(n).x # y = landmarks.part(n).y # landmarks_points2.append((x, y)) # cv2.circle(img2, (x, y), 3, (0, 255, 0), -1) points2 = np.array(landmarks_points2, np.int32) convexhull2 = cv2.convexHull(points2) lines_space_mask = np.zeros_like(img_gray) lines_space_new_face = np.zeros_like(img2) # Triangulation of both faces for triangle_index in indexes_triangles: # Triangulation of the first face tr1_pt1 = landmarks_points[triangle_index[0]] tr1_pt2 = landmarks_points[triangle_index[1]] tr1_pt3 = landmarks_points[triangle_index[2]] triangle1 = np.array([tr1_pt1, tr1_pt2, tr1_pt3], np.int32) rect1 = cv2.boundingRect(triangle1) (x, y, w, h) = rect1 cropped_triangle = img[y: y + h, x: x + w] cropped_tr1_mask = np.zeros((h, w), np.uint8) points = np.array([[tr1_pt1[0] - x, tr1_pt1[1] - y],[tr1_pt2[0] - x, tr1_pt2[1] - y],[tr1_pt3[0] - x, tr1_pt3[1] - y]], np.int32) cv2.fillConvexPoly(cropped_tr1_mask, points, 255) # Triangulation of second face tr2_pt1 = landmarks_points2[triangle_index[0]] tr2_pt2 = landmarks_points2[triangle_index[1]] tr2_pt3 = landmarks_points2[triangle_index[2]] triangle2 = np.array([tr2_pt1, tr2_pt2, tr2_pt3], np.int32) rect2 = cv2.boundingRect(triangle2) (x, y, w, h) = rect2 if x<0: x=0 rect2=(x, y, w, h) (x,y,w,h)=rect2 if y<0: y=0 rect2 = (x, y, w, h) (x, y, w, h) = rect2 if w<0: w=0 rect2 = (x, y, w, h) (x, y, w, h) = rect2 if h<0: h=0 rect2 = (x, y, w, h) (x, y, w, h) = rect2 print(rect2) cropped_tr2_mask = np.zeros((h, w), np.uint8) points2 = np.array([[tr2_pt1[0] - x, tr2_pt1[1] - y],[tr2_pt2[0] - x, tr2_pt2[1] - y],[tr2_pt3[0] - x, tr2_pt3[1] - y]], np.int32) cv2.fillConvexPoly(cropped_tr2_mask, points2, 255) # Warp triangles points = np.float32(points) points2 = np.float32(points2) M = cv2.getAffineTransform(points, points2) warped_triangle = cv2.warpAffine(cropped_triangle, M, (w, h)) warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle,mask=cropped_tr2_mask) # Reconstructing destination face img2_new_face_rect_area = img2_new_face[y: y + h, x: x + w] img2_new_face_rect_area_gray = cv2.cvtColor(img2_new_face_rect_area,cv2.COLOR_BGR2GRAY) _, mask_triangles_designed = cv2.threshold(img2_new_face_rect_area_gray, 1,255, cv2.THRESH_BINARY_INV) _, mask_triangles_designed2 = cv2.threshold(warped_triangle,1, 255,cv2.THRESH_BINARY_INV) print(len(warped_triangle)) print(len(mask_triangles_designed)) if len(warped_triangle) == len(mask_triangles_designed): warped_triangle = cv2.bitwise_and(warped_triangle, warped_triangle,mask=mask_triangles_designed) else: warped_triangle = warped_triangle img2_new_face_rect_area = cv2.add(img2_new_face_rect_area, warped_triangle) img2_new_face[y: y + h, x: x + w] = img2_new_face_rect_area img2_face_mask = np.zeros_like(img2_gray) img2_head_mask = cv2.fillConvexPoly(img2_face_mask, convexhull2, 255) img2_face_mask = cv2.bitwise_not(img2_head_mask) img2_head_noface = cv2.bitwise_and(img2, img2, mask=img2_face_mask) result = cv2.add(img2_head_noface, img2_new_face) (x, y, w, h) = cv2.boundingRect(convexhull2) center_face2 = (int((x + x + w) / 2), int((y + y + h) / 2)) seamlessclone = cv2.seamlessClone(result, img2, img2_head_mask, center_face2, cv2.MIXED_CLONE) frame[bb[i][1]-10 : bb[i][3]+20, bb[i][0]-10: bb[i][2]+20]=seamlessclone cv2.imshow("result", result) except Exception as e: print(e) pass # cv2.putText(frame, class_names[best_class_indices[0]], (text_x, text_y),cv2.FONT_HERSHEY_COMPLEX_SMALL,1, (0, 0, 255), thickness=1, lineType=2) # person_detected[best_name] += 1 # total_frames_passed += 1 # if total_frames_passed == 2: for person, count in person_detected.items(): if count > 4: print("Person Detected: {}, Count: {}".format(person, count)) people_detected.add(person) # person_detected.clear() total_frames_passed = 0 # cv2.putText(frame, "People detected so far:", (20, 20), cv2.FONT_HERSHEY_PLAIN, # 1, (255, 0, 0), thickness=1, lineType=2) currentYIndex = 40 for idx, name in enumerate(people_detected): cv2.putText(frame, name, (20, currentYIndex + 20 * idx), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), thickness=1, lineType=2) cv2.imshow("Face Detection and Identification", frame) video_recording.write(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break video_recording.release() video_capture.release() cv2.destroyAllWindows() videoclip2 = VideoFileClip(output_video_name) videoclip2.audio = audioclip videoclip2.write_videofile(tmp_result)
def calculate_embeddings(self, faces, data_from_pipeline=True, batch_size=100, image_size=160): images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] emb_array = None # print('Calculating features for images') # bar = Bar('Processing', max = len(input_data)) i = 0 if data_from_pipeline: i += 1 if len(faces): face_images = [] emb_array = np.zeros((len(faces), embedding_size)) for face in faces: img = face img = cv2.resize(img, dsize=(image_size, image_size), interpolation=cv2.INTER_CUBIC) img = facenet.prewhiten(img) img = facenet.crop(img, False, image_size) img = facenet.flip(img, False) face_images.append(img) feed_dict = { images_placeholder: np.array(face_images), phase_train_placeholder: False } emb_array = self._sess.run(embeddings, feed_dict=feed_dict) # bar.next() else: nrof_images = len(faces) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) for i in range(nrof_batches_per_epoch): start_index = i * batch_size end_index = min((i + 1) * batch_size, nrof_images) paths_batch = faces[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = self._sess.run( embeddings, feed_dict=feed_dict) # bar.goto(bar.index + end_index - start_index) # bar.finish() return emb_array