def videoVivoRec(): CHUNK = 1024 dir_voces = './voces/' nombres_hablados = [] # angelica = wave.open(f'{dir_voces}/angelica.wav', 'rb') # angelica = wave.open(f'{dir_voces}/angelica.wav', 'rb') # voces = [] modeldir = './modelo/modelo_preentrenado_caras.pb' classifier_filename = './clase/clasificador.pkl' npy = './npy' train_img = "./imagenes_entrenamiento" def speech(wf=''): p = pyaudio.PyAudio() stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) data = wf.readframes(CHUNK) while len(data) > 0: stream.write(data) data = wf.readframes(CHUNK) stream.stop_stream() stream.close() p.terminate() with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # tamaño mínimo de la cara threshold = [0.6, 0.7, 0.7] # umbral de tres pasos factor = 0.709 # factor de escala margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Cargando modelo') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) video_capture = cv2.VideoCapture(0) # cambiado a canal 1 (celular) c = 0 print('Comenzando Reconocimiento :D!') prevTime = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0, 0), fx=1, fy=1) # redimensionar frame (opcional) curTime = time.time() + 1 # calculando fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Número de caras detectadas: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # Excepción interna if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('La cara esta muy cerca!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # print("predicciones") print(best_class_indices, ' con una precisión de ', best_class_probabilities) # print(best_class_probabilities) if best_class_probabilities > 0.53: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (66, 153, 236), 1) # encajando cara # plotear resulto idx debajo de la caja text_x = bb[i][0] text_y = bb[i][3] + 20 prob_x = bb[i][0] + 15 prob_y = bb[i][1] - 10 print('Índices de resultados: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: if HumanNames[ best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] dec = np.round( best_class_probabilities, 4) cv2.putText(frame, str(dec), (prob_x, prob_y), cv2.FONT_ITALIC, 0.5, (30, 103, 202), thickness=1, lineType=1) cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX, 0.7, (8, 6, 98), thickness=1, lineType=0) wf = wave.open( f'{dir_voces}/{result_names}.wav', 'rb') # print('longitud: ' , len(nombres_hablados)) if result_names not in nombres_hablados: nombres_hablados.append( result_names) speech(wf) print('hola') else: print('Fallo de alineación') # c+=1 marco_display = cv2.resize(frame, (1200, 650), interpolation=cv2.INTER_CUBIC) cv2.imshow('Detectando rostros en vivo..', marco_display) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
def video_svm(): #input_video="akshay_mov.mp4" modeldir = './modelo_transferlearning/20170511-185253.pb' classifier_filename = './resultados/classifier2.pkl' npy = './npy' train_img = './static/photos' with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Modal') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) video_capture = cv2.VideoCapture(0) c = 0 print('Comieza el reconocimiento') prevTime = 0 while True: ret, frame = video_capture.read() #frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Nro de Caras detectadas: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('Face is very close!') continue if (i > len(cropped)): print('Running') break else: cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # print("predictions") print(best_class_indices, ' with accuracy ', best_class_probabilities) # print(best_class_probabilities) #if best_class_probabilities[0]: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Resultado Indice Vector: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: if HumanNames[ best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] #text = "{:.2f}%".format(best_class_probabilities*100) text = '{}: {:.2f}%'.format( result_names, best_class_probabilities[0] * 100) cv2.putText( frame, text, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) #else: # cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box # text_x = bb[i][0] # text_y = bb[i][3] + 20 # print('Resultado Indice Vector: ', best_class_indices[0]) # print(HumanNames) # for H_i in HumanNames: # if HumanNames[best_class_indices[0]] == H_i: # result_names = HumanNames[best_class_indices[0]] # cv2.putText(frame, 'Desconocido', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) else: print('Alignment Failure') # c+=1 cv2.imshow('Presione la tecla "q" para cerrar ', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows() return render_template('index.html')
def main(args): videoLink = args.video_link print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = args.frame_interval batch_size = 1000 image_size = 182 input_image_size = 160 max_age = args.max_age print('Loading feature extraction model') modeldir = args.modeldir debug = args.debug print("Debug: ", debug) if debug == 'True': debug = True else: debug = False if debug: print("videoLink: ", args.video_link) facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = args.classifier_filename classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('load classifier file-> %s' % classifier_filename_exp) # video_capture = cv2.VideoCapture(0) #webcam video_capture = cv2.VideoCapture(args.video_link) c = 0 fid = 0 faces = [] target_distance = args.target_distance print('Start Recognition!') prevTime = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) # resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval new = True show = False for i in faces: i.age_one() if (c % timeF == 0): if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): if debug: print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) try: cropped[i] = facenet.flip(cropped[i], False) except: continue if debug: print('Processing Status: PROCESSING FRAME') scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 if debug: print('frame_interval: ', frame_interval) # track faces result_names = class_names[best_class_indices[0]] for k in faces: # print(best_class_probabilities[0]) if abs(bb[i][0]-k.getX()) <= target_distance\ and abs(bb[i][1] - k.getY())\ <= target_distance and k.getDone() is False: if debug: print(k.getAge(), 'X Diff: ', abs(bb[i][0] - k.getX()), 'Y Diff: ', abs(bb[i][1] - k.getY())) new = False if best_class_probabilities[0] > 0.20: k.updateCoords(bb[i][0], bb[i][1]) k.updateConfidence( best_class_probabilities[0]) result_names = class_names[ best_class_indices[0]] k.updateStaffID( result_names.split(' ')[0]) k.updateName( result_names.split(' ')[1]) if k.getAge() > 1: show = True color = k.getRGB() counter = Counter(k.getName()) most_common = counter.most_common() if debug: print('Show: ', show) print(most_common) if show: if len(most_common) >= 2: f_n, f_v = most_common[0] s_n, s_v = most_common[1] if f_n != 'Unk': name_to_show = f_n # name_to_show = name_mode else: name_to_show = s_n if len(most_common) == 1: f_n, f_v = most_common[0] name_to_show = f_n # print(name_to_show) if new: f = Face.MyFace(fid, bb[i][0], bb[i][1], max_age) f.updateConfidence(best_class_probabilities[0]) result_names = class_names[ best_class_indices[0]] f.updateStaffID(result_names.split(' ')[0]) name = result_names.split(' ')[1] f.updateName(name) color = f.getRGB() faces.append(f) fid += 1 name_to_show = '' cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), color, 2) # boxing face if name_to_show == 'Unk': name_to_show = 'Unknown' if debug: print('Detected As: ', name_to_show) cv2.putText(frame, name_to_show, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, thickness=1, lineType=2) else: if debug: print('Unable to align') else: if debug: print('Processing Status: NOT PROCESSING FRAME') if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): if debug: print('face is inner of range!') continue for k in faces: # print(best_class_probabilities[0]) if abs(bb[i][0]-k.getX()) <= target_distance\ and abs(bb[i][1] - k.getY())\ <= target_distance and k.getDone() is False: if debug: print(k.getAge(), 'X Diff: ', abs(bb[i][0] - k.getX()), 'Y Diff: ', abs(bb[i][1] - k.getY())) if k.getAge() > 1: show = True color = k.getRGB() counter = Counter(k.getName()) most_common = counter.most_common() text_x = bb[i][0] text_y = bb[i][3] + 20 if debug: print('Show: ', show) print(most_common) if show: if len(most_common) >= 2: f_n, f_v = most_common[0] s_n, s_v = most_common[1] if f_n != 'Unk': name_to_show = f_n # name_to_show = name_mode else: name_to_show = s_n elif len(most_common) == 1: f_n, f_v = most_common[0] name_to_show = f_n else: name_to_show = 'Unknown' cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), color, 2) # boxing face if name_to_show == 'Unk': name_to_show = 'Unknown' if debug: print('Detected As: ', name_to_show) cv2.putText( frame, name_to_show, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, thickness=1, lineType=2) sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) str = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 cv2.putText(frame, str, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) c += 1 if frame.shape[0] < 1000: frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5) cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() # #video writer # out.release() cv2.destroyAllWindows()
def Recognize(idList): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 30 # minimum size of face threshold = [0.7, 0.8, 0.8] # three steps's threshold factor = 0.709 # scale factor margin = 44 batch_size = 100 #1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile, encoding='latin1') video_capture = cv2.VideoCapture(video) print('Start Recognition') while True: ret, frame = video_capture.read() #frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) timer = time.time() if frame.ndim == 2: frame = facenet.to_rgb(frame) bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) faceNum = bounding_boxes.shape[0] if faceNum > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] for i in range(faceNum): emb_array = np.zeros((1, embedding_size)) xmin = int(det[i][0]) ymin = int(det[i][1]) xmax = int(det[i][2]) ymax = int(det[i][3]) try: # inner exception if xmin <= 0 or ymin <= 0 or xmax >= len( frame[0]) or ymax >= len(frame): print('Face is very close!') continue cropped.append(frame[ymin:ymax, xmin:xmax, :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( np.array( Image.fromarray(cropped[i]).resize( (image_size, image_size)))) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] if best_class_probabilities > 0.87: cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) #boxing face for H_i in HumanNames: if HumanNames[ best_class_indices[0]] == H_i: result_ids = HumanNames[ best_class_indices[0]] result_names = "?" profile = GetProfile(result_ids) if (profile != None): result_names = profile[1] if int(result_ids) not in idList: idList.append(int(result_ids)) print( "Predictions : [ name: {} , accuracy: {:.3f} ]" .format( HumanNames[ best_class_indices[0]], best_class_probabilities[0])) cv2.rectangle(frame, (xmin, ymin - 20), (xmax, ymin - 2), (0, 255, 255), -1) cv2.putText( frame, result_names, (xmin, ymin - 5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=1) else: cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.rectangle(frame, (xmin, ymin - 20), (xmax, ymin - 2), (0, 255, 255), -1) cv2.putText(frame, "?", (xmin, ymin - 5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=1) except: print("error") endtimer = time.time() fps = 1 / (endtimer - timer) cv2.rectangle(frame, (15, 30), (135, 60), (0, 255, 255), -1) cv2.putText(frame, "fps: {:.2f}".format(fps), (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) cv2.imshow('Face Recognition', frame) key = cv2.waitKey(1) if key == 113: # "q" break video_capture.release() cv2.destroyAllWindows()
def predict(img_path): global sess, pnet, rnet, onet, embedding_size, images_placeholder, embeddings, model, HumanNames, phase_train_placeholder minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] # số detec print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) print(HumanNames[best_class_indices[0]]) # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') cv2.imshow('Image', frame) if cv2.waitKey(1000000) & 0xFF == ord('q'): #sys.exit("Thanks") cv2.destroyAllWindows()
def _main(): args = get_args() with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # pnet, rnet, onet = detect_face.create_mtcnn(sess, './models/') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 print('Loading feature extraction model') modeldir = './models/facenet/20190310-055158' facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = './myclassifier/my_classifier.pkl' classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('load classifier file-> %s' % classifier_filename_exp) video_capture = cv2.VideoCapture(0) c = 0 print('Start Recognition!') prevTime = 0 myYolo = YOLO(args) while True: ret, frame = video_capture.read() # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] #print(frame.shape[0]) #print(frame.shape[1]) image = Image.fromarray(frame) img, bounding_boxes = myYolo.detect_image(image) # Remove the bounding boxes with low confidence nrof_faces = len(bounding_boxes) ## Use MTCNN to get the bounding boxes # bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) # nrof_faces = bounding_boxes.shape[0] #print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: # det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] # cropped = [] # scaled = [] # scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = bounding_boxes[i][0] bb[i][1] = bounding_boxes[i][1] bb[i][2] = bounding_boxes[i][2] bb[i][3] = bounding_boxes[i][3] if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue # cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) # cropped[0] = facenet.flip(cropped[0], False) # scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) # scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size), # interpolation=cv2.INTER_CUBIC) # scaled[0] = facenet.prewhiten(scaled[0]) # scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3)) # feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False} cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1], bb[i][2], bb[i][3])) cropped = facenet.flip(cropped, False) scaled = (misc.imresize(cropped, (image_size, image_size), interp='bilinear')) scaled = cv2.resize(scaled, (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 # for H_i in HumanNames: # if HumanNames[best_class_indices[0]] == H_i: result_names = class_names[best_class_indices[0]] if best_class_probabilities[0] > 0.45 else "Unknown" #print(result_names) cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) str = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 cv2.putText(frame, str, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() # #video writer # out.release() cv2.destroyAllWindows()
def one_by_one(rel_path, url=False): print('Start Recognition!') prevTime = 0 # TODO: support multiple url if url: img_list = [None] else: img_list = glob.glob(os.path.join(rel_path, '*')) results = list() # cnt = 0 # ok_list = list() for img_path in img_list: # for each image in the list res = None # print('===', url) if url: try: rsp = urlget(rel_path) # print(rsp) if rsp.status_code == 200: frame = np.array(Image.open(BytesIO(rsp.content))) else: print('status code: ', rsp.status_code) exit(-1) except Exception as e: print(repr(e)) exit(-1) else: frame = cv2.imread(img_path) # ret, frame = video_capture.read() # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) if frame is None: print( f'failure in reading image {img_path}, do not use chinese characters in file name!' ) continue curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): # detect faces in the current image find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): # crop all the faces emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is out of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[0] = facenet.flip(cropped[0], False) scaled.append( facenet.imresize(cropped[0], (image_size, image_size), interp='bilinear')) scaled[0] = cv2.resize( scaled[0], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[0] = facenet.prewhiten(scaled[0]) scaled_reshape.append(scaled[0].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[0], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] if i == 0: res = best_class_indices[0] # ok_list.append(cnt) # cnt += 1 cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 # print('result: ', best_class_indices[0]) if show_flag: for H_i in class_names: if class_names[best_class_indices[0]] == H_i: result_names = class_names[ best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('No face detected.') exit(-1) sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) str = 'FPS: %2.3f' % fps text_fps_x = len(frame[0]) - 150 text_fps_y = 20 if show_flag: cv2.putText(frame, str, (text_fps_x, text_fps_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(0) & 0xFF == ord('q'): break a, b, m, n = bb[0] if res is not None: results.append([res] + list(predictions[0]) + [m - a, n - b]) else: # results.append([res]*10) print(f'cannot detect any face for {img_path}, skip') continue # video_capture.release() # #video writer # out.release() try: cv2.destroyAllWindows() except: pass # pred = np.zeros_like(img_list) # print(len(ok_list),len(results)) # pred[ok_list] = results # print(pred) if len(results) == 0: return None results = np.array(results) # print(results.shape) # print(results) # labels = [class_names[int(i)] if i is not None else None for i in results[:,0]] # comb = np.concatenate([np.array(img_list).reshape((-1,1)),np.array(labels).reshape((-1,1)), results[:,1:]], axis=1)#list(zip(img_list, results)) # pd.DataFrame(comb).to_csv(args.output_file + '.csv', index=False, header=header) comb = results[:, 1:] # 1,9 df = pd.DataFrame(comb) ret = df.apply(proc_line, axis=1) # return df.iloc[:,:-2].values, ret.values return ret.values
def identify_face_video(self): modeldir = './model/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy='./npy' train_img="./train_img" with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Modal') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) video_capture = cv2.VideoCapture(1) c = 0 print('Start Recognition') prevTime = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time()+1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) try: for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('Face is very close!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] # print("predictions") print(best_class_indices,' with accuracy ',best_class_probabilities) # print(best_class_probabilities) if best_class_probabilities>0.85: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print(HumanNames) global getName getName = best_class_indices[0] global name,fetch name=HumanNames[getName] for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] predict_name = result_names[ :-17] fetch = result_names[-16:-1] cv2.putText(frame, predict_name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) except IndexError: print("Oops! IndexError : list index out of range for multi_faces") else: print('Alignment Failure') # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
def Face_Recognize(frame): global minsize, pnet, rnet, onet, threshold, factor, sess, embedding_size, image_size, phase_train_placeholder, embeddings, embeddings if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected Faces: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print("Distances:") print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Names: ') print(names) for H_i in names: if names[best_class_indices[0]] == H_i: result_names = names[best_class_indices[0]] print("Person: " + result_names) cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') cv2.imshow('Video', frame)
def predict(): frame = YUVtoRGB(list(request.data)) frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) # rotate 2700 degree (h, w) = frame.shape[:2] center = (w / 2, h / 2) M = cv2.getRotationMatrix2D(center, 270, 1.0) frame = cv2.warpAffine(frame, M, (h, w)) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: print('Detected_FaceNum: %d' % nrof_faces) det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face if best_class_probabilities > 0.8: text_x = bb[i][0] text_y = bb[i][3] + 20 for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] + " "+ str(best_class_probabilities * 100) + "%" print(result_names) return str(result_names) return ""
def Detect(): # dectect image in img_path ans = "Unknown" print(sys.path) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) # resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) if nrof_faces > 1: return ans for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') return ans continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face # plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 result_names = HumanNames[best_class_indices[0]] print('Result Indices: ', result_names) ans = result_names cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: print('Unable to align') cv2.imwrite('result.jpg', frame) return ans
def runTest(self): print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn( sess, self.alignDirectory) minimunSizeOfFace = 20 scaleFactor = 0.709 threshold = [0.6, 0.7, 0.7] # three steps's threshold # margin = 44 frame_interval = 2 image_size = 182 input_image_size = 160 facesList = self.getFacesList() print('Listado de rostros', facesList) facenet.load_model(self.modelFilePath) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] try: model = self.getModel() video_capture = cv2.VideoCapture(0) #'./test.mp4' #video_capture.set(3,4920) #video_capture.set(4,3080) c = 0 # #video writer fourcc = cv2.VideoWriter_fourcc(*'DIVX') out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(4920, 3080)) print('Start Recognition!') prevTime = 0 while True: ret, frame = video_capture.read() #if (frame != None): frame = cv2.resize(frame, (0, 0), fx=2, fy=2) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] boundingBoxesOfAllDetectedFacesFromCameraFrame, _ = align.detect_face.detect_face( frame, minimunSizeOfFace, pnet, rnet, onet, threshold, scaleFactor) numberOfFacesDeteted = boundingBoxesOfAllDetectedFacesFromCameraFrame.shape[ 0] print("----------------------") self.printTextToImage( frame, "No. Faces " + str(numberOfFacesDeteted), 20, 20, "black") if numberOfFacesDeteted > 0: boundingBoxesOfDetectedFacesWith4PositionsFromCameraFrame = boundingBoxesOfAllDetectedFacesFromCameraFrame[:, 0: 4] # img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] boundingBoxesOfDetectedFace = np.zeros( (numberOfFacesDeteted, 4), dtype=np.int32) for indexOfFaceDetected in range( numberOfFacesDeteted): emb_array = np.zeros((1, embedding_size)) boundingBoxesOfDetectedFace[ indexOfFaceDetected] = self.getBoundingBoxesOfDetectedFaceFromCameraFrame( boundingBoxesOfDetectedFacesWith4PositionsFromCameraFrame, indexOfFaceDetected) # inner exception if boundingBoxesOfDetectedFace[indexOfFaceDetected][ 0] <= 0 or boundingBoxesOfDetectedFace[ indexOfFaceDetected][ 1] <= 0 or boundingBoxesOfDetectedFace[ indexOfFaceDetected][2] >= len( frame[0] ) or boundingBoxesOfDetectedFace[ indexOfFaceDetected][ 3] >= len( frame): #print('face is inner of range!') continue cropped.append( frame[boundingBoxesOfDetectedFace[ indexOfFaceDetected][1]: boundingBoxesOfDetectedFace[ indexOfFaceDetected][3], boundingBoxesOfDetectedFace[ indexOfFaceDetected][0]: boundingBoxesOfDetectedFace[ indexOfFaceDetected][2], :]) cropped[ indexOfFaceDetected] = facenet.flip( cropped[indexOfFaceDetected], False) scaled.append( misc.imresize( cropped[indexOfFaceDetected], (image_size, image_size), interp='bilinear')) scaled[indexOfFaceDetected] = cv2.resize( scaled[indexOfFaceDetected], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[ indexOfFaceDetected] = facenet.prewhiten( scaled[indexOfFaceDetected]) scaled_reshape.append( scaled[indexOfFaceDetected].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[indexOfFaceDetected], phase_train_placeholder: False } emb_array[0, :] = sess.run( embeddings, feed_dict=feed_dict) predictions = model.predict_proba( emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] faceName = self.getFaceNameFromFacesListByIndex( facesList, best_class_indices[0]) if ((int)(best_class_probabilities[0] * 100)) > 30: faceNameWithProbability = faceName + " " + str( (int)(best_class_probabilities[0] * 100)) + "%" self.printTextAndBox( frame, boundingBoxesOfDetectedFace, indexOfFaceDetected, faceNameWithProbability) # print('best class indices: ',best_class_indices) # print("best class probabilities ",best_class_probabilities[0]) print('Predicción: ', predictions) print("face ", faceName, (int)( best_class_probabilities[0] * 100), "%", " indice ", best_class_indices[0]) #else: #print('Unable to align, no faces') sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) strFPS = 'FPS: %2.3f' % fps self.printTextToImage(frame, strFPS, 20, 50, "black") cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break #else: # print('No video') video_capture.release() # #video writer out.release() cv2.destroyAllWindows() except Exception as e: print( 'Error on line {}'.format( sys.exc_info()[-1].tb_lineno), type(e).__name__, e)
emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1], bb[i][2], bb[i][3])) cropped = facenet.flip(cropped, False) scaled = (misc.imresize(cropped, (image_size, image_size), interp='bilinear')) scaled = cv2.resize(scaled, (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = (scaled.reshape(-1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
def show_frame(): print(button_flag) _, cv2image = cap.read() bounding_boxes, cv2image = detector.run_mtcnn(cv2image) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(cv2image.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] #여기부터 add ------------> face = cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]] if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( cv2image[0]) or bb[i][3] >= len(cv2image): print('Face is very close! 0:', bb[i][0], ' 1:', bb[i][1], ' 2:', bb[i][2], ' 3:', bb[i][3]) continue cropped.append(cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1, input_image_size, input_image_size, 3)) #서버로 넘김. URL = server + "video" tolist_img = scaled_reshape[i].tolist() json_feed = {'images_placeholder': tolist_img} response = requests.post(URL, json=json_feed) img_data = response.json() #확인 #img_data = facenet.check_features(feature_list, emb_array[0], {"name" : "", "cos_sim" : 0}, 0) print("name : ", img_data["name"], "\nsimilarity : ", img_data["cos_sim"]) ########################################################################################################## # # # 현재 GUI에서 button부분이랑 연결이 안되서 우선 이렇게 밖으로 뺴서 얼굴부분은 모두 모자이크 처리하도록 했으요 # # # ########################################################################################################## #cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]] = cv2.blur(cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]], (23,23)) if img_data["cos_sim"] >= 0.5: if button_flag[button_name.index(img_data["name"])] % 2 == 0: cv2.rectangle(cv2image, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 cv2.putText(cv2image, img_data["name"], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]] = cv2.blur( cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]], (23, 23)) else: cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]] = cv2.blur( cv2image[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2]], (23, 23)) cv2image = cv2.cvtColor(cv2image, cv2.COLOR_BGR2RGBA) #cv2image = cv2.flip(cv2image, 1) #face = cv2.flip(face, 1) #face = cv2.cvtColor(face, cv2.COLOR_BGR2RGBA) webcam_img = ImageTk.PhotoImage(image=Image.fromarray(cv2image)) mv_label.imgtk = webcam_img mv_label.configure(image=webcam_img) mv_label.after(10, show_frame)
emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('Face is very close!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
def main(): with tf.Graph().as_default(): last_log=str(datetime.now().time()) mycursor = mydb.cursor() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor image_size = 182 input_image_size = 160 print('Loading Modal') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] HumanNames = os.listdir(pre_img) HumanNames.sort() classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture(0) print('Start Recognition') with no_ssl_verification(): while True: args = parse_args() # ret, frame = video_capture.read() try: resp = requests.get(args.url) frame = np.asarray(bytearray(resp.content), dtype=np.uint8) frame = cv2.imdecode(frame, cv2.IMREAD_COLOR) frame_raw = frame if frame_raw.ndim == 2: frame_raw = facenet.to_rgb(frame_raw) img_raw_size = np.asarray(frame_raw.shape)[0:2] frame_raw = frame_raw[:, :, 0:3] except: continue frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] rcropped = [] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('Face is very close!') continue rbb = [ int((bb[i][0] / img_size[0]) * img_raw_size[0] - (args.margin / 2)), int((bb[i][1] / img_size[1]) * img_raw_size[1] - (args.margin / 2)), int((bb[i][2] / img_size[0]) * img_raw_size[0] + (args.margin / 2)), int((bb[i][3] / img_size[1]) * img_raw_size[1] + (args.margin / 2)) ] if rbb[0] < 0: rbb[0] = 0 if rbb[1] < 0: rbb[1] = 0 if rbb[2] > len(frame_raw[0]): rbb[2] = len(frame_raw[0]) if rbb[3] > len(frame_raw): rbb[3] = len(frame_raw) rcropped.append(frame_raw[rbb[1]:rbb[3], rbb[0]:rbb[2], :]) cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) try: rcropped[i] = cv2.cvtColor(rcropped[i], cv2.COLOR_RGB2BGR) cropped[i] = facenet.flip(cropped[i], False) scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] print(best_class_indices,' with accuracy ',best_class_probabilities) scale_x = 1 / img_size[0] scale_y = 1 / img_size[1] tbb = [ scale_x * bb[i][0], scale_y * bb[i][1], scale_x * bb[i][2], scale_y * bb[i][3] ] bb_w = tbb[2] - tbb[0] bb_h = tbb[3] - tbb[1] bb_area = bb_w * bb_h posY = tbb[1] / 0.5 # area = bb_area * 100 # area = round(area, 2) # text_x = bb[i][0] # text_y = bb[i][1] - 10 # cv2.putText(frame, str(area)+" "+str(round(posY*100,2)), (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) if bb_area>args.bb_area: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) if bb_area>args.bb_area and posY>args.yframe: if best_class_probabilities>args.class_probability: #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices:', best_class_indices[0]) print(HumanNames) start_time = datetime.strptime(last_log, '%H:%M:%S.%f') end_time = datetime.strptime(str(datetime.now().time()), '%H:%M:%S.%f') diff = end_time - start_time elapsed_time = int((diff.seconds * 1000) + (diff.microseconds / 1000)) for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] print('Face recognized:', result_names) if elapsed_time>5000: last_log = str(datetime.now().time()) currdatetime = time.strftime('%Y-%m-%d %H:%M:%S') sql = "INSERT INTO "+args.log+" (id_num, date) VALUES (%s, %s)" val = (result_names, currdatetime) mycursor.execute(sql, val) mydb.commit() cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) timestr = time.strftime('%Y%m%d%H%M%S') misc.imsave(os.path.join(cluster_dir, timestr + '.png'), rcropped[i]) except: pass else: print('Alignment Failure') cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[0] = facenet.flip(cropped[0], False) scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[0] = facenet.prewhiten(scaled[0]) scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20
for i in range(number_of_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): # print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[0] = facenet.flip(cropped[0], False) scaled.append( misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) scaled[0] = cv2.resize(scaled[0], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[0] = facenet.prewhiten(scaled[0]) scaled_reshape.append(scaled[0].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[0], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array)
if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue # cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) # cropped[0] = facenet.flip(cropped[0], False) # scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) # scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size), # interpolation=cv2.INTER_CUBIC) # scaled[0] = facenet.prewhiten(scaled[0]) # scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3)) # feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False} cropped = (frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) print("{0} {1} {2} {3}".format(bb[i][0], bb[i][1], bb[i][2], bb[i][3])) cropped = facenet.flip(cropped, False) scaled = (misc.imresize(cropped, (image_size, image_size), interp='bilinear')) scaled = cv2.resize(scaled, (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = (scaled.reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20
def batch_inp(rel_path): print('Start Recognition!') prevTime = 0 img_list = glob.glob(os.path.join(rel_path, '*')) results = list() cnt = 0 ok_ind = list() for img_path in img_list: # for each image in the list res = None frame = cv2.imread(img_path) # ret, frame = video_capture.read() # frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): # detect faces in the current image find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] scaled_reshape = [] bb = [int(np.round(i)) for i in det[0]] # inner exception if bb[0] <= 0 or bb[1] <= 0 or bb[2] >= len( frame[0]) or bb[3] >= len(frame): print('face is out of range!') continue cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :] cropped = facenet.flip(cropped, False) scaled = facenet.imresize(cropped, (image_size, image_size), interp='bilinear') scaled = cv2.resize(scaled, (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape.append( scaled.reshape(input_image_size, input_image_size, 3)) ok_ind.append(cnt) cnt += 1 feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) # n,n_emb predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) # n,1 # best_class_probabilities = np.max(predictions, axis=1) results = np.zeros_like(img_list) results[ok_ind] = [class_names[i] for i in best_class_indices] comb = list(zip(img_list, results)) pd.DataFrame(comb).to_csv('test_results.csv')