class MTCNN_Crop_Face(object): def __init__(self): test_mode = "ONet" thresh = [0.9, 0.7, 0.7] min_face_size = 32 stride = 2 slide_window = False shuffle = False batch_size = [2048, 64, 16] detectors = [None, None, None] prefix = ['./data2/MTCNN_model/PNet_landmark/PNet', './data2/MTCNN_model/RNet_landmark/RNet', './data2/MTCNN_model/ONet_landmark/ONet'] epoch = [18, 14, 16] model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)] PNet = FcnDetector(P_Net, model_path[0]) detectors[0] = PNet RNet = Detector(R_Net, 24, 1, model_path[1]) detectors[1] = RNet ONet = Detector(O_Net, 48, 1, model_path[2]) detectors[2] = ONet self.mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, stride=stride, threshold=thresh, slide_window=slide_window) def cropface(self, frame): cropped_list = [] i = 0 image = np.array(frame) all_boxes, landmarks = self.mtcnn_detector.detect(image) # all_boxes,landmarks = self.mtcnn_detector.detect_face(test_data) # image = cv2.imread(imagepath) for bbox in all_boxes: corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] if abs(corpbbox[3] - corpbbox[1]) > abs(corpbbox[2] - corpbbox[0]): n0 = int(0.5 * (corpbbox[2] + corpbbox[0]) - 0.5 * abs(corpbbox[3] - corpbbox[1])) n1 = corpbbox[1] n2 = int(0.5 * (corpbbox[2] + corpbbox[0]) + 0.5 * abs(corpbbox[3] - corpbbox[1])) n3 = corpbbox[3] else: n0 = corpbbox[0] n1 = int(0.5 * (corpbbox[3] + corpbbox[1]) - 0.5 * abs(corpbbox[2] - corpbbox[0])) n2 = corpbbox[2] n3 = int(0.5 * (corpbbox[3] + corpbbox[1]) + 0.5 * abs(corpbbox[2] - corpbbox[0])) # cropped = image[n1:n3, n0:n2] cropped = [int(n0), int(n1), int(n2), int(n3)] cropped_list.append(cropped) # cv2.rectangle(frame, (int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) # cv2.imwrite("../data/test_result/%d.png" %(count),image) # cv2.imshow("image",frame) # cv2.waitKey(0) # return all_boxes return cropped_list
slide_window=slide_window) path = os.path.join(os.pardir, 'data/test/lfpw_testImage') gt_imdb = [os.path.join(path, item) for item in os.listdir(path)] test_data = TestLoader(gt_imdb) # boxes, landmarks = mtcnn_detector.detect_face(test_data) out_dir = os.path.dirname(gt_imdb[0]) + '_out' if not os.path.exists(out_dir): os.makedirs(out_dir) for path in gt_imdb: print(path) image = cv2.imread(path) boxes, landmarks = mtcnn_detector.detect(image) # show rectangles for bbox in boxes: position = (int(bbox[0]), int(bbox[1])) cv2.putText(image, str(np.round(bbox[4], 2)), position, cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255)) cv2.rectangle(image, position, (int(bbox[2]), int(bbox[3])), (0, 0, 255)) # show landmarks for landmark in landmarks: for i, k in grouper(landmark, 2): cv2.circle(image, (i, k), 3, (0, 0, 255)) path = os.path.join(out_dir, os.path.basename(path))
min_face_size=min_face_size, stride=stride, threshold=thresh, slide_window=slide_window) camera = cv2.VideoCapture(1) name = 'test' count = 0 path = os.getcwd() + '/data-face/' + name if not os.path.exists(path): os.makedirs(path) while True: _, frame = camera.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) boxes_c, _ = mtcnn_detector.detect(frame) for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] score = boxes_c[i, 4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] cv2.rectangle(frame, (corpbbox[0], corpbbox[1]), (corpbbox[2], corpbbox[3]), (255, 0, 0), 1) cv2.putText(frame, '{:.3f}'.format(score), (corpbbox[0], corpbbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) try: scipy.misc.imsave( path + '/' + name + str(count) + '.png', gray[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]) except:
def build_camera(self, camera_id, path_name): #print("Loading Deep Face Detector(MTCNN) recognition model ............") #test_mode = "onet" thresh = [0.9, 0.9, 0.8] min_face_size = 81 stride = 2 slide_window = False #shuffle = False #vis = True detectors = [None, None, None] prefix = ['../execute_system/MTCNN_Tensorflow_fast/data/MTCNN_model/PNet_landmark/PNet', '../execute_system/MTCNN_Tensorflow_fast/data/MTCNN_model/RNet_landmark/RNet', '../execute_system/MTCNN_Tensorflow_fast/data/MTCNN_model/ONet_landmark/ONet'] epoch = [40, 36, 36] model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)] PNet = FcnDetector(P_Net, model_path[0]) detectors[0] = PNet RNet = Detector(R_Net, 24, 1, model_path[1]) detectors[1] = RNet ONet = Detector(O_Net, 48, 1, model_path[2]) detectors[2] = ONet mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, stride=stride, threshold=thresh, slide_window=slide_window) # read names from dataset name_list = read_name_list(path_name) cap = cv2.VideoCapture(camera_id) #fps1 = cap.get(cv2.CAP_PROP_FPS) #size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), # int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) #size = 640 x 480 cap.set(cv2.CAP_PROP_FRAME_WIDTH,528) cap.set(cv2.CAP_PROP_FRAME_HEIGHT,384) while True: t1 = cv2.getTickCount() success, frame = cap.read() if success: thickness = (frame.shape[0] + frame.shape[1]) // 350 image = np.array(frame) boxes_c,landmarks = mtcnn_detector.detect(image) #print(landmarks.shape) t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t print('fps:',fps) for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] #score = boxes_c[i, 4] cropbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] W = -int(cropbbox[0]) + int(cropbbox[2]) H = -int(cropbbox[1]) + int(cropbbox[3]) paddingH = 0.02 * H paddingW = 0.01 * W crop_img = frame[int(cropbbox[1]+paddingH):int(cropbbox[3]-paddingH), int(cropbbox[0]-paddingW):int(cropbbox[2]+paddingW)] if crop_img is None: continue if crop_img.shape[0] < 0 or crop_img.shape[1] < 0: continue label,prob = self.model.face_predict(crop_img) if prob > 0.7: show_name = name_list[label] else: show_name = 'Stranger' person_tag = "%s: %.2f" %(show_name, prob) #text_end = (int(cropbbox[0]) + len(person_tag) * 10,int(cropbbox[1]) - 20 ) text_start = (max(int(cropbbox[0]), 10), max(int(cropbbox[1]), 10)) #cv2.rectangle(draw, text_end, text_start, (255, 255, 0), -1, cv2.LINE_AA ) cv2.putText(frame, person_tag, text_start, cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,255), 1) # rectangle for face area for i in range(thickness): start = (int(cropbbox[0]) + i, int(cropbbox[1]) + i) end = (int(cropbbox[2] - i), int(cropbbox[3]) - i) frame = cv2.rectangle(frame, start, end, (0, 255, 0), 1) # display the landmarks for i in range(landmarks.shape[0]): for j in range(len(landmarks[i])//2): cv2.circle(frame, (int(landmarks[i][2*j]),int(int(landmarks[i][2*j+1]))), 2, (0,0,255)) cv2.imshow("Camera", frame) k = cv2.waitKey(10) if k & 0xFF == ord('q'): break else: print ('device not find') break cap.release() cv2.destroyAllWindows()
#imdb_['label'] = 5 path = "lala" for item in os.listdir(path): gt_imdb.append(os.path.join(path,item)) test_data = TestLoader(gt_imdb) all_boxes,landmarks = mtcnn_detector.detect_face(test_data) ''' image_path = 'lala/single_face.jpg' img = cv2.imread(image_path) img = cv2.resize(img, (640, 480)) t1 = time.time() boxes_c,landmarks = mtcnn_detector.detect(img) t = time.time() - t1 print('total time', t) for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] score = boxes_c[i, 4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] # if score > thresh: cv2.rectangle(img, (corpbbox[0], corpbbox[1]), (corpbbox[2], corpbbox[3]), (255, 0, 0), 1) cv2.putText(img, '{:.3f}'.format(score), (corpbbox[0], corpbbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.imshow('show', img) # exit press any key cv2.waitKey(0) '''
def RTrecognization(facenet_model_path, SVCpath, database_path): #facenet_model_path为facenet模型路径 #SVCpath为SVM分类模型路径 #database_path为人脸库数据 with tf.Graph().as_default(): with tf.Session() as sess: # Load the model print('Loading feature extraction model') facenet.load_model(facenet_model_path) with open(SVCpath, 'rb') as infile: (classifymodel, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % SVCpath) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] Database = np.load(database_path) test_mode = "onet" thresh = [0.9, 0.6, 0.7] min_face_size = 24 stride = 2 slide_window = False shuffle = False #vis = True detectors = [None, None, None] prefix = [ '../data/MTCNN_model/PNet_landmark/PNet', '../data/MTCNN_model/RNet_landmark/RNet', '../data/MTCNN_model/ONet_landmark/ONet' ] epoch = [18, 14, 16] model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)] PNet = FcnDetector(P_Net, model_path[0]) detectors[0] = PNet RNet = Detector(R_Net, 24, 1, model_path[1]) detectors[1] = RNet ONet = Detector(O_Net, 48, 1, model_path[2]) detectors[2] = ONet mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, stride=stride, threshold=thresh, slide_window=slide_window) video_capture = cv2.VideoCapture(0) # video_capture.set(3, 340) # video_capture.set(4, 480) video_capture.set(3, 800) video_capture.set(4, 800) corpbbox = None while True: t1 = cv2.getTickCount() ret, frame = video_capture.read() if ret: image = np.array(frame) img_size = np.array(image.shape)[0:2] boxes_c, landmarks = mtcnn_detector.detect(image) # print(boxes_c.shape) # print(boxes_c) # print(img_size) t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] #检测出的人脸区域,左上x,左上y,右下x,右下y score = boxes_c[i, 4] #检测出人脸区域的得分 corpbbox = [ int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) ] x1 = np.maximum(int(bbox[0]) - 16, 0) y1 = np.maximum(int(bbox[1]) - 16, 0) x2 = np.minimum(int(bbox[2]) + 16, img_size[1]) y2 = np.minimum(int(bbox[3]) + 16, img_size[0]) crop_img = image[y1:y2, x1:x2] scaled = misc.imresize(crop_img, (160, 160), interp='bilinear') img = load_image(scaled, False, False, 160) img = np.reshape(img, (-1, 160, 160, 3)) feed_dict = { images_placeholder: img, phase_train_placeholder: False } embvecor = sess.run(embeddings, feed_dict=feed_dict) embvecor = np.array(embvecor) #利用人脸特征与数据库中所有人脸进行一一比较的方法 # tmp=np.sqrt(np.sum(np.square(embvecor-Database['emb'][0]))) # tmp_lable=Database['lab'][0] # for j in range(len(Database['emb'])): # t=np.sqrt(np.sum(np.square(embvecor-Database['emb'][j]))) # if t<tmp: # tmp=t # tmp_lable=Database['lab'][j] # print(tmp) #利用SVM对人脸特征进行分类 predictions = classifymodel.predict_proba(embvecor) best_class_indices = np.argmax(predictions, axis=1) tmp_lable = class_names[best_class_indices] best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) if best_class_probabilities < 0.4: tmp_lable = "others" cv2.rectangle(frame, (corpbbox[0], corpbbox[1]), (corpbbox[2], corpbbox[3]), (255, 0, 0), 1) cv2.putText(frame, '{0}'.format(tmp_lable), (corpbbox[0], corpbbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.putText( frame, '{:.4f}'.format(t) + " " + '{:.3f}'.format(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) for i in range(landmarks.shape[0]): for j in range(len(landmarks[i]) // 2): cv2.circle(frame, (int(landmarks[i][2 * j]), int(int(landmarks[i][2 * j + 1]))), 2, (0, 0, 255)) # time end cv2.imshow("", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: print('device not find') break video_capture.release() cv2.destroyAllWindows()
# 如果从自己laptop 摄像头读入,cv2.VideoCapture(videopath) ===> cv2.VideoCapture(0) # cv2.VideoCapture https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html video_capture = cv2.VideoCapture( videopath) # cv2.VideoCapture(0) for laptop camera video_capture.set(3, 340) video_capture.set(4, 480) corpbbox = None while True: # fps = video_capture.get(cv2.CAP_PROP_FPS) t1 = cv2.getTickCount() ret, frame = video_capture.read( ) # capture frame-by-frame. read() returns a bool(True/False). If frame is read correctly, it's True. if ret: image = np.array(frame) boxes_c, landmarks = mtcnn_detector.detect( image) # use mtcnn detector to read image t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] score = boxes_c[i, 4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] # if score > thresh: cv2.rectangle(frame, (corpbbox[0], corpbbox[1]), (corpbbox[2], corpbbox[3]), (255, 0, 0), 1) cv2.putText(frame, '{:.3f}'.format(score), (corpbbox[0], corpbbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.putText(frame, '{:.4f}'.format(t) + " " + '{:.3f}'.format(fps),
detectors[2] = ONet videopath = "./video_test.avi" mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, stride=stride, threshold=thresh, slide_window=slide_window) video_capture = cv2.VideoCapture(videopath) video_capture.set(3, 340) video_capture.set(4, 480) corpbbox = None while True: # fps = video_capture.get(cv2.CAP_PROP_FPS) t1 = cv2.getTickCount() ret, frame = video_capture.read() if ret: image = np.array(frame) boxes_c,landmarks = mtcnn_detector.detect(image) print landmarks.shape t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] score = boxes_c[i, 4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] # if score > thresh: cv2.rectangle(frame, (corpbbox[0], corpbbox[1]), (corpbbox[2], corpbbox[3]), (255, 0, 0), 1) cv2.putText(frame, '{:.3f}'.format(score), (corpbbox[0], corpbbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.putText(frame, '{:.4f}'.format(t) + " " + '{:.3f}'.format(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
def RTrecognization(facenet_model_path,SVCpath,database_path): #facenet_model_path为facenet模型路径 #SVCpath为SVM分类模型路径 #database_path为人脸库数据 with tf.Graph().as_default(): with tf.Session() as sess: # Load the model print('Loading feature extraction model') facenet.load_model(facenet_model_path) with open(SVCpath, 'rb') as infile: (classifymodel, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % SVCpath) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] Database=np.load(database_path) """ model_folder : string path for the models 训练好的模型路径 minsize : float number minimal face to detect threshold : float number detect threshold for 3 stages threshold参数是一个包含3个值的列表,这3个值在算法的3个stage中将分别用到, 可以看到这3个threshold值是递增的,是因为在3个stage中对一个bbox是否是人脸的置信度要求越来越高。 factor: float number scale factor for image pyramid factor表示和图像金字塔相关的一个参数,表示图像金字塔的每相邻两层之间的倍数关系是factor。 num_worker: int number number of processes we use for first stage accurate_landmark: bool use accurate landmark localization or not """ test_mode = "onet" thresh = [0.9, 0.6, 0.7] min_face_size = 24 stride = 2 slide_window = False shuffle = False #vis = True detectors = [None, None, None] prefix = ['../data/MTCNN_model/PNet_landmark/PNet', '../data/MTCNN_model/RNet_landmark/RNet', '../data/MTCNN_model/ONet_landmark/ONet'] epoch = [18, 14, 16] model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)] PNet = FcnDetector(P_Net, model_path[0]) detectors[0] = PNet RNet = Detector(R_Net, 24, 1, model_path[1]) detectors[1] = RNet ONet = Detector(O_Net, 48, 1, model_path[2]) detectors[2] = ONet mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, stride=stride, threshold=thresh, slide_window=slide_window) video_capture = cv2.VideoCapture(0) # video_capture.set(3, 340) # video_capture.set(4, 480) video_capture.set(3, 800) video_capture.set(4, 800) corpbbox = None while True: t1 = cv2.getTickCount() ret, frame = video_capture.read() if ret: image = np.array(frame) img_size=np.array(image.shape)[0:2] boxes_c,landmarks = mtcnn_detector.detect(image) # print(boxes_c.shape) # print(boxes_c) # print(img_size) t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4]#检测出的人脸区域,左上x,左上y,右下x,右下y score = boxes_c[i, 4]#检测出人脸区域的得分 corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] x1=np.maximum(int(bbox[0])-16,0) y1=np.maximum(int(bbox[1])-16,0) x2=np.minimum( int(bbox[2])+16,img_size[1]) y2=np.minimum( int(bbox[3])+16,img_size[0]) crop_img=image[y1:y2,x1:x2] scaled=misc.imresize(crop_img,(160,160),interp='bilinear') img=load_image(scaled,False, False,160) img=np.reshape(img,(-1,160,160,3)) feed_dict = { images_placeholder:img, phase_train_placeholder:False } embvecor=sess.run(embeddings, feed_dict=feed_dict) embvecor=np.array(embvecor) #利用人脸特征与数据库中所有人脸进行一一比较的方法 # tmp=np.sqrt(np.sum(np.square(embvecor-Database['emb'][0]))) # tmp_lable=Database['lab'][0] # for j in range(len(Database['emb'])): # t=np.sqrt(np.sum(np.square(embvecor-Database['emb'][j]))) # if t<tmp: # tmp=t # tmp_lable=Database['lab'][j] # print(tmp) # 利用SVM对人脸特征进行分类 # 通过输入的图片的编码,通过此分类器预测模型的名称 # 得到的结果是对于每张图片,都进行预测,得出可能是某个人物的可能性 predictions = classifymodel.predict_proba(embvecor) # 返回每行最大值的索引,也就是返回这张图片对应的人物的可能性最大的姓名的索引 best_class_indices = np.argmax(predictions, axis=1) tmp_lable=class_names[best_class_indices] # 行索引,加列索引,就得到了所有图片预测的可能性 best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) if best_class_probabilities<0.4: tmp_lable="unknown" cv2.rectangle(frame, (corpbbox[0], corpbbox[1]), (corpbbox[2], corpbbox[3]), (255, 0, 0), 1) cv2.putText(frame, '{0}'.format(tmp_lable), (corpbbox[0], corpbbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.putText(frame, '{:.4f}'.format(t) + " " + '{:.3f}'.format(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) for i in range(landmarks.shape[0]): for j in range(len(landmarks[i])//2): cv2.circle(frame, (int(landmarks[i][2*j]),int(int(landmarks[i][2*j+1]))), 2, (0,0,255)) # time end cv2.imshow("", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: print('device not find') break video_capture.release() cv2.destroyAllWindows()
if len(h264[n]) < 1000: count += 1 if count == 3: break temp.append(h264[n]) break if k > 2: with open('temp.h264', 'wb') as fopen: fopen.write(header + b''.join(temp[::-1])) h264.clear() cap = cv2.VideoCapture('temp.h264') while True: try: last_time = time.time() ret, img = cap.read() boxes_c, _ = mtcnn_detector.detect(img) for u in range(boxes_c.shape[0]): bbox = boxes_c[u, :4] # tl,tr,bl,br = [int(bbox[0]),int(bbox[1])],[int(bbox[2]),int(bbox[1])],[int(bbox[0]),int(bbox[3])],[int(bbox[2]),int(bbox[3])] # (tltrX, tltrY) = midpoint(tl, tr) # (blbrX, blbrY) = midpoint(bl, br) # (tlblX, tlblY) = midpoint(tl, bl) # (trbrX, trbrY) = midpoint(tr, br) # print(land()) # # virtual width # dA = dist.euclidean((tltrX, tltrY), (blbrX, blbrY)) # # virtual height # dB = dist.euclidean((tlblX, tlblY), (trbrX, trbrY)) # distance = distance_to_camera(initial_flight_height, focal_length, dA) # print(distance) visualization_utils.draw_bounding_box_on_image_array(
detectors[1] = RNet ONet = Detector(O_Net, 48, 1, model_path[2]) detectors[2] = ONet image_path = "/home/tamvm/Pictures/test1/test_face_detect_2.jpeg" mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, scale_factor=scale_factor, stride=stride, threshold=thresh, slide_window=slide_window) # fps = video_capture.get(cv2.CAP_PROP_FPS) t1 = cv2.getTickCount() image = cv2.imread(image_path) image = cv2.resize(image, (input_img_size, input_img_size)) # frame = load_image_into_numpy_array(image) frame = image boxes_c, landmarks = mtcnn_detector.detect(frame) print(landmarks.shape) t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] score = boxes_c[i, 4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] # if score > thresh: cv2.rectangle(frame, (corpbbox[0], corpbbox[1]), (corpbbox[2], corpbbox[3]), (255, 0, 0), 1) cv2.putText(frame, '{:.3f}'.format(score), (corpbbox[0], corpbbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.putText(frame, '{:.4f}'.format(t) + " " + '{:.3f}'.format(fps), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
def build_camera(self, camera_id, path): count = 500 thresh = [0.9, 0.9, 0.8] min_face_size = 100 stride = 2 slide_window = False detectors = [None, None, None] prefix = [ 'E:/sign_system/execute_system/MTCNN_Tensorflow_fast/data/MTCNN_model_V2/PNet_landmark/PNet', 'E:/sign_system/execute_system/MTCNN_Tensorflow_fast/data/MTCNN_model_V2/RNet_landmark/RNet', 'E:/sign_system/execute_system/MTCNN_Tensorflow_fast/data/MTCNN_model_V2/ONet_landmark/ONet' ] epoch = [40, 36, 36] model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)] PNet = FcnDetector(P_Net, model_path[0]) detectors[0] = PNet RNet = Detector(R_Net, 24, 1, model_path[1]) detectors[1] = RNet ONet = Detector(O_Net, 48, 1, model_path[2]) detectors[2] = ONet mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size, stride=stride, threshold=thresh, slide_window=slide_window) cap = cv2.VideoCapture(camera_id) num = 0 cur = self.read_feature(path) while True: success, frame = cap.read() thickness = (frame.shape[0] + frame.shape[1]) // 350 if success: t1 = time.time() image = np.array(frame) boxes_c, landmarks = mtcnn_detector.detect(image) #print(boxes_c) for i in range(boxes_c.shape[0]): bbox = boxes_c[i, :4] #score = boxes_c[i, 4] cropbbox = [ int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) ] W = -int(cropbbox[0]) + int(cropbbox[2]) H = -int(cropbbox[1]) + int(cropbbox[3]) paddingH = 0.02 * H paddingW = 0.01 * W crop_img = frame[int(cropbbox[1] + paddingH):int(cropbbox[3] - paddingH), int(cropbbox[0] - paddingW):int(cropbbox[2] + paddingW)] image = cv2.resize(crop_img, (96, 96), interpolation=cv2.INTER_CUBIC) image = image.astype('float32') image = image - 127.5 image = image * 0.0078125 f1_emb = self.get_feature(image) f1 = f1_emb.reshape(256) #计算距离 d1 = 100 show_name = '' embed1 = sklearn.preprocessing.normalize(f1_emb) for n, v in cur.items(): v = np.array(v) v_emb = v.reshape(1, 256) embed2 = sklearn.preprocessing.normalize(v_emb) diff = np.subtract(embed1, embed2) dist = np.sum(np.square(diff), 1) d = np.dot( v, f1) / (np.linalg.norm(v) * np.linalg.norm(f1)) print( "name: %s total cosin distance %f and Euclidean distance %f" % (n, d, dist)) if dist < d1: d1 = dist show_name = str(n) else: pass #print(show_name) t2 = time.time() delta_t = t2 - t1 text_start = (max(int(cropbbox[0]), 10), max(int(cropbbox[1]), 10)) cv2.putText(frame, show_name, text_start, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 1) cv2.putText(frame, "time cost:" + '%.04f' % delta_t, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2) # rectangle for face area for i in range(thickness): start = (int(cropbbox[0]) + i, int(cropbbox[1]) + i) end = (int(cropbbox[2] - i), int(cropbbox[3]) - i) frame = cv2.rectangle(frame, start, end, (0, 255, 0), 1) # display the landmarks for i in range(landmarks.shape[0]): for j in range(len(landmarks[i]) // 2): cv2.circle(frame, (int(landmarks[i][2 * j]), int(int(landmarks[i][2 * j + 1]))), 2, (0, 0, 255)) num = num + 1 cv2.imshow("Camera", frame) k = cv2.waitKey(10) # 如果输入q则退出循环 if (k & 0xFF == ord('q') or count == num): break else: print('device not find') break cap.release() cv2.destroyAllWindows()