def score_acc_f1(X, y, face_algorithm='rec', title='', show=True): from sklearn.metrics import f1_score, accuracy_score import matplotlib.pyplot as plt import numpy as np distances = [] # squared L2 distance between pairs identical = [] # 1 if same identity, 0 otherwise module_verify = import_verify(face_algorithm) num = len(y) from tqdm import tqdm for i in tqdm(range(num - 1)): for j in range(i + 1, num): distances.append(module_verify.face_distance([X[i]], X[j])) identical.append(1 if y[i] == y[j] else 0) distances = np.array(distances) identical = np.array(identical) thresholds = np.arange(0.1, 2.0, 0.01) f1_scores = [f1_score(identical, distances < t) for t in thresholds] acc_scores = [accuracy_score(identical, distances < t) for t in thresholds] opt_idx = np.argmax(f1_scores) # Threshold at maximal F1 score opt_tau = thresholds[opt_idx] # Accuracy at maximal F1 score opt_acc = accuracy_score(identical, distances < opt_tau) if show: # Plot F1 score and accuracy as function of distance threshold plt.plot(thresholds, f1_scores, label='F1 score') plt.plot(thresholds, acc_scores, label='Accuracy') plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold') plt.title('Accuracy at threshold {:.4} = {:.4}'.format( opt_tau, opt_acc)) plt.xlabel(title + ' Distance threshold') plt.legend() plt.show() return opt_tau, opt_acc
def get_features_thread_db(face_algorithm, model_name, image_data, group_id, data_type='base64', request_id='', classifier='knn'): import keras.backend.tensorflow_backend as tb tb._SYMBOLIC_SCOPE.value = True module_verify = import_verify(face_algorithm) X_base64 = image_data faces_encodings, X_face_locations, faces = module_verify.get_features_b64( X_base64, angle=ALGORITHM[face_algorithm]['p_angle']) if len(X_face_locations) == 0: return [], [] # 保存人脸到临时表, 只保存vgg的 if request_id != '' and face_algorithm == 'vgg': face_save_to_temp(group_id, request_id, image=faces[0]) return faces_encodings, X_face_locations
if __name__ == "__main__": if len(sys.argv) < 3: print("usage: python3 %s <train_data_dir> <group_id>" % sys.argv[0]) sys.exit(2) train_dir = sys.argv[1] group_id = sys.argv[2] if dbport.group_info(group_id) == -1: # 新建分组 dbport.group_new(group_id) # 动态载入 verify库 module_verify = [ import_verify('vgg'), import_verify('evo'), ] # Loop through each person in the training set for class_dir in sorted(os.listdir(train_dir)): if SHUTDOWN: # 处理完一个目录后再退出 print('done.') break if not os.path.isdir(os.path.join(train_dir, class_dir)): continue if dbport.user_info(group_id, class_dir) != -1: print('existed: ', class_dir) continue
def predict_K(X_base64, group_id, model_path='', face_algorithm='vgg', data_type='base64', request_id=''): """ Recognizes faces in given image using a trained Keras classifier """ global CLF_CACHE # Load a trained Keras model (if one was passed in) clf_path = os.path.join(model_path, '%s.%s.h5' % (group_id, face_algorithm)) # 检查是否已缓存clf mtime = int(os.path.getmtime(clf_path)) # 模型最近修改时间 with cache_lock: if (clf_path in CLF_CACHE.keys()) and (CLF_CACHE[clf_path][1] == mtime): model, label_y = CLF_CACHE[clf_path][0] #print('Bingo clf cache!', group_id) else: with graph.as_default(): with session.as_default(): #with open(clf_path, 'rb') as f: # keras_clf = pickle.load(f) # 读取模型,并识别 with open(clf_path + '.save', 'rb') as f: input_dim, output_dim, label_y = pickle.load(f) from train_classifier import get_model model = get_model(input_dim, output_dim) model.load_weights(clf_path) # 放进cache CLF_CACHE[clf_path] = ((model, label_y), mtime) print('Feeding CLF cache: ', CLF_CACHE.keys()) if data_type == 'base64': # 动态载入 verify库 module_verify = import_verify(face_algorithm) # Load image file and find face locations # Find encodings for faces in the test iamge faces_encodings, X_face_locations, faces = module_verify.get_features_b64( X_base64, angle=ALGORITHM[face_algorithm]['p_angle']) if len(X_face_locations) == 0: return [] # 保存人脸到临时表, 只保存vgg的 if request_id != '' and face_algorithm == 'vgg': dbport.face_save_to_temp(group_id, request_id, image=faces[0]) else: # data_type = 'encodings' faces_encodings = X_base64 X_face_locations = [(0, 0, 0, 0)] # 从db来的数据没有人脸框坐标,只有一个人脸 with graph.as_default(): with session.as_default(): results = [] for x in range(len(X_face_locations)): # 按概率返回结果 result = model.predict(np.array([faces_encodings[x]])) # 整理结果 max_list = result[0].argsort()[-5:][::-1] # 返回 5 个概率最大的结果 percent_list = [result[0][i] for i in max_list] class_list = label_y.inverse_transform(max_list) # 保留概率大于 10% 的结果, 这里返回的评分是(1-概率), 为与距离表示一致:越小越接近 result_list = [ [i, X_face_locations[x], 1-j, 1] for i,j in zip(class_list, percent_list) \ if j>KERAS_THRESHOLD_PERCENTAGE ] #print(result_list) results.extend(result_list) return results
def predict(X_base64, group_id, model_path='', distance_threshold=0.6, face_algorithm='vgg', data_type='base64', request_id=''): """ Recognizes faces in given image using a trained KNN classifier :param X_base64: image data in base64 coding :param model_path: (optional) 已训练模型路径,默认当前路径 """ global CLF_CACHE # Load a trained KNN model (if one was passed in) clf_path = os.path.join(model_path, group_id + ALGORITHM[face_algorithm]['ext']) # 检查是否已缓存clf mtime = int(os.path.getmtime(clf_path)) # 模型最近修改时间 with cache_lock: if (clf_path in CLF_CACHE.keys()) and (CLF_CACHE[clf_path][1] == mtime): knn_clf = CLF_CACHE[clf_path][0] #print('Bingo clf cache!', group_id) else: with open(clf_path, 'rb') as f: knn_clf = pickle.load(f) # 放进cache CLF_CACHE[clf_path] = (knn_clf, mtime) print('Feeding CLF cache: ', CLF_CACHE.keys()) if data_type == 'base64': # 动态载入 verify库 module_verify = import_verify(face_algorithm) # Load image file and find face locations # Find encodings for faces in the test iamge faces_encodings, X_face_locations, faces = module_verify.get_features_b64( X_base64, angle=ALGORITHM[face_algorithm]['p_angle']) if len(X_face_locations) == 0: return [] # 保存人脸到临时表, 只保存vgg的 if request_id != '' and face_algorithm == 'vgg': dbport.face_save_to_temp(group_id, request_id, image=faces[0]) else: # data_type = 'encodings' faces_encodings = X_base64 X_face_locations = [(0, 0, 0, 0)] # 从db来的数据没有人脸框坐标,只有一个人脸 #print(faces_encodings) # Use the KNN model to find the first 5 best matches for the test face # 返回5个最佳结果 closest_distances = knn_clf.kneighbors(faces_encodings, n_neighbors=5) #are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))] # Predict classes and remove classifications that aren't within the threshold #return [(pred, loc) if rec else ("unknown", loc) for pred, loc, rec in zip(knn_clf.predict(faces_encodings), X_face_locations, are_matches)] #print(closest_distances) # return multi results results = [] for i in range(len(X_face_locations)): # 第一个超过阈值,说明未匹配到 if closest_distances[0][i][0] > distance_threshold: results.append([ 'unknown', X_face_locations[i], round(closest_distances[0][i][0], 6), 0 ]) continue # 将阈值范围内的结果均返回 labels = {} temp_result = [] for j in range(len(closest_distances[0][i])): if closest_distances[0][i][j] <= distance_threshold: # labels are in classes_ l = knn_clf.classes_[knn_clf._y[closest_distances[1][i][j]]] #results.append( (l, X_face_locations[i], round(closest_distances[0][i][j], 6)) ) if l not in labels.keys(): temp_result.append([ l, X_face_locations[i], #round(closest_distances[0][i][j], 6) closest_distances[0][i][j] / distance_threshold ]) labels[l] = 1 else: labels[l] += 1 # 找到labels里count最大值 max_count = max(labels.items(), key=operator.itemgetter(1))[1] # 相同人脸位置,labels 里 count最大的认为就是结果,如果count相同才返回多结果 #results.extend([i+[labels[i[0]]] for i in temp_result if labels[i[0]]==max_count]) # 当count最大的不是距离最短的结果时,同时返回距离最短的结果 #results.extend( [result+[labels[result[0]]] for i,result in enumerate(temp_result) \ # if labels[result[0]]==max_count or (i==0 and labels[result[0]]!=max_count)] ) # 最短距离的不是最大count,且距离短很多时,也加入结果 temp_result2 = [ i + [labels[i[0]]] for i in temp_result if labels[i[0]] == max_count ] if labels[temp_result[0][0]] != max_count and temp_result[0][ 2] / temp_result2[0][2] < 0.5: temp_result2.insert(0, temp_result[0] + [labels[temp_result[0][0]]]) results.extend(temp_result2) return results
def train(train_dir, model_save_path=None, n_neighbors=None, knn_algo='ball_tree', verbose=False, face_algorithm='rec'): """ Trains a k-nearest neighbors classifier for face recognition. :param train_dir: directory that contains a sub-directory for each known person, with its name. (View in source code to see train_dir example tree structure) Structure: <train_dir>/ ├── <person1>/ │ ├── <somename1>.jpeg │ ├── <somename2>.jpeg │ ├── ... ├── <person2>/ │ ├── <somename1>.jpeg │ └── <somename2>.jpeg └── ... :param model_save_path: (optional) path to save model on disk :param n_neighbors: (optional) number of neighbors to weigh in classification. Chosen automatically if not specified :param knn_algo: (optional) underlying data structure to support knn.default is ball_tree :param verbose: verbosity of training :return: returns knn classifier that was trained on the given data. """ X = [] y = [] # 动态载入 verify库 module_verify = import_verify(face_algorithm) # Loop through each person in the training set for class_dir in os.listdir(train_dir): if not os.path.isdir(os.path.join(train_dir, class_dir)): continue print('training: ', class_dir) # Loop through each training image for the current person for img_path in image_files_in_folder( os.path.join(train_dir, class_dir)): for angle in TRAINING_ANGLE: # 旋转不同角度训练 multi2 face_encodings, _, _ = module_verify.get_features(img_path, angle=angle) if len(face_encodings) != 1: # If there are no people (or too many people) in a training image, skip the image. if verbose: print("Image {} not suitable for training: {}".format( img_path, "Didn't find a face" if len(face_encodings) < 1 else "Found more than one face")) elif len(face_encodings) > 0: # Add face encoding for current image to the training set X.append(face_encodings[0]) y.append(class_dir) # Determine how many neighbors to use for weighting in the KNN classifier if n_neighbors is None: n_neighbors = int(round(math.sqrt(len(X)))) if verbose: print("Chose n_neighbors automatically:", n_neighbors) # Create and train the KNN classifier start_time = datetime.now() knn_clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm=knn_algo, weights='distance') knn_clf.fit(X, y) print('[Time taken: {!s}]'.format(datetime.now() - start_time)) # Save the trained KNN classifier if model_save_path is not None: with open(model_save_path, 'wb') as f: pickle.dump(knn_clf, f) # 保存 X,y with open(model_save_path + '.xy', 'wb') as f: pickle.dump((X, y), f) return knn_clf
def predict(X_img_path, knn_clf=None, model_path=None, distance_threshold=0.6, face_algorithm='rec'): """ Recognizes faces in given image using a trained KNN classifier :param X_img_path: path to image to be recognized :param knn_clf: (optional) a knn classifier object. if not specified, model_save_path must be specified. :param model_path: (optional) path to a pickled knn classifier. if not specified, model_save_path must be knn_clf. :param distance_threshold: (optional) distance threshold for face classification. the larger it is, the more chance of mis-classifying an unknown person as a known one. :return: a list of names and face locations for the recognized faces in the image: [(name, bounding box), ...]. For faces of unrecognized persons, the name 'unknown' will be returned. """ if not os.path.isfile(X_img_path) or os.path.splitext( X_img_path)[1][1:] not in ALLOWED_EXTENSIONS: raise Exception("Invalid image path: {}".format(X_img_path)) if knn_clf is None and model_path is None: raise Exception( "Must supply knn classifier either thourgh knn_clf or model_path") # Load a trained KNN model (if one was passed in) if knn_clf is None: with open(model_path, 'rb') as f: knn_clf = pickle.load(f) # 动态载入 verify库 module_verify = import_verify(face_algorithm) # Load image file and find face locations # Find encodings for faces in the test iamge # angle=None 识别时不修正角度, angle=0 识别时修正角度 faces_encodings, X_face_locations, _ = module_verify.get_features( X_img_path, angle=ALGORITHM[face_algorithm]['p_angle']) if len(X_face_locations) == 0: return [] #print(faces_encodings) # Use the KNN model to find the first 5 best matches for the test face # 返回5个最佳结果 closest_distances = knn_clf.kneighbors(faces_encodings, n_neighbors=10) #are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))] # Predict classes and remove classifications that aren't within the threshold #return [(pred, loc) if rec else ("unknown", loc) for pred, loc, rec in zip(knn_clf.predict(faces_encodings), X_face_locations, are_matches)] #print(closest_distances) # return multi results results = [] for i in range(len(X_face_locations)): # 第一个超过阈值,说明未匹配到 if closest_distances[0][i][0] > distance_threshold: results.append([ 'unknown', X_face_locations[i], round(closest_distances[0][i][0], 6), 0 ]) continue # 将阈值范围内的结果均返回 labels = {} temp_result = [] for j in range(len(closest_distances[0][i])): if closest_distances[0][i][j] <= distance_threshold: # labels are in classes_ l = knn_clf.classes_[knn_clf._y[closest_distances[1][i][j]]] #results.append( (l, X_face_locations[i], round(closest_distances[0][i][j], 6)) ) if l not in labels.keys(): temp_result.append([ l, X_face_locations[i], #round(closest_distances[0][i][j], 6) closest_distances[0][i][j] / distance_threshold ]) labels[l] = 1 else: labels[l] += 1 # 找到labels里count最大值 max_count = max(labels.items(), key=operator.itemgetter(1))[1] # 相同人脸位置,labels 里 count最大的认为就是结果,如果count相同才返回多结果 #results.extend([i+[labels[i[0]]] for i in temp_result if labels[i[0]]==max_count]) # 当count最大的不是距离最短的结果时,同时返回距离最短的结果 #results.extend( [result+[labels[result[0]]] for i,result in enumerate(temp_result) \ # if labels[result[0]]==max_count or (i==0 and labels[result[0]]!=max_count)] ) # 最短距离的不是最大count,且距离短很多时,也加入结果 temp_result2 = [ i + [labels[i[0]]] for i in temp_result if labels[i[0]] == max_count ] if labels[temp_result[0][0]] != max_count and temp_result[0][ 2] / temp_result2[0][2] < 0.5: temp_result2.insert(0, temp_result[0] + [labels[temp_result[0][0]]]) results.extend(temp_result2) return results
def predict(X_base64, group_id, model_path='', distance_threshold=0.6, face_algorithm='vgg', data_type='base64'): """ Recognizes faces in given image using a trained KNN classifier :param X_base64: image data in base64 coding :param model_path: (optional) 已训练模型路径,默认当前路径 :param distance_threshold: (optional) distance threshold for face classification. the larger it is, the more chance of mis-classifying an unknown person as a known one. :return: a list of names and face locations for the recognized faces in the image: [(name, bounding box), ...]. For faces of unrecognized persons, the name 'unknown' will be returned. """ global CLF_CACHE # Load a trained KNN model (if one was passed in) clf_path = os.path.join(model_path, group_id + ALGORITHM[face_algorithm]['ext']) # 检查是否已缓存clf mtime = int(os.path.getmtime(clf_path)) # 模型最近修改时间 if (clf_path in CLF_CACHE.keys()) and (CLF_CACHE[clf_path][1] == mtime): svc_clf = CLF_CACHE[clf_path][0] #print('Bingo clf cache!', group_id) else: with open(clf_path, 'rb') as f: svc_clf = pickle.load(f) # 放进cache CLF_CACHE[clf_path] = (svc_clf, mtime) print('Feeding CLF cache: ', CLF_CACHE.keys()) if data_type == 'base64': # 动态载入 verify库 module_verify = import_verify(face_algorithm) # Load image file and find face locations # Find encodings for faces in the test iamge faces_encodings, X_face_locations = module_verify.get_features_b64( X_base64) if len(X_face_locations) == 0: return [] else: # data_type = 'encodings' faces_encodings = X_base64 X_face_locations = [(0, 0, 0, 0)] # 从db来的数据没有人脸框坐标,只有一个人脸 #print(faces_encodings) # Use the SVM model to find the first the best matches for the test face name = svc_clf.predict(faces_encodings) #print(name) # return multi results results = [] for i in range(len(X_face_locations)): results.append([name[i], X_face_locations[i], 0, 1]) return results