def chinese_whispers(encodings, threshold=0.5): """ Chinese Whispers - an Efficient Graph Clustering Algorithm and its Application to Natural Language Processing Problems """ encodings = [dlib.vector(enc) for enc in encodings] return dlib.chinese_whispers_clustering(encodings, threshold)
def CLUSTER_TRACKS(DT, threshold): track_feats = [] for i in DT.keys(): track_feats.append(dlib.vector(DT[i]['BBOX_FEAT'].mean(0))) CL = defaultdict(dict) cluster_ids = dlib.chinese_whispers_clustering(track_feats, threshold) for i in cluster_ids: try: CL[i]['BBOX'] = CL[i]['BBOX'] + DT[list(DT.keys())[i]]['BBOX'] CL[i]['Frame_ID'] = CL[i]['Frame_ID'] + DT[list(DT.keys())[i]]['Frame_ID'] CL[i]['BBOX_FEAT'] = CL[i]['BBOX_FEAT'] + DT[list(DT.keys())[i]]['BBOX_FEAT'] CL[i]['ANGLE'] = CL[i]['ANGLE'] + DT[list(DT.keys())[i]]['ANGLE'] CL[i]['IMG'] = CL[i]['IMG'] + DT[list(DT.keys())[i]]['IMG'] CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN'] + DT[list(DT.keys())[i]]['LEN'] except: CL[i]['BBOX'] = DT[list(DT.keys())[i]]['BBOX'] CL[i]['Frame_ID'] = DT[list(DT.keys())[i]]['Frame_ID'] CL[i]['BBOX_FEAT'] = DT[list(DT.keys())[i]]['BBOX_FEAT'] CL[i]['ANGLE'] = DT[list(DT.keys())[i]]['ANGLE'] CL[i]['IMG'] = DT[list(DT.keys())[i]]['IMG'] CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN'] return CL
def create_clusters(self, descriptors, d_value=0.5, method="CW"): print("\nUsing {} cluster method".format(method)) if method == "DB": #Compute clusters using DBSCAN clust = DBSCAN(metric="euclidean", n_jobs=1) clust.fit(descriptors) clusters = np.unique(clust.labels_) num_classes = len(np.where(clusters > -1)[0]) clusters = clust.labels_ elif method == "CW": # Computer clusters using chinese whispers clusters = dlib.chinese_whispers_clustering(descriptors, d_value) num_classes = len(set(clusters)) else: print("Please provide proper method as CW or DB") exit() print("Number of clusters: {}".format(num_classes)) print("Clusters: {}".format(clusters)) return clusters
def preprocess_faces(self, faces): # Cluster the faces with chinese whispers encodings = [dlib.vector(face['encoding']) for face in faces] labels = dlib.chinese_whispers_clustering(encodings, 0.5) selected_faces = [] # Select face most close to average group groups = list(set(labels)) for group in groups: # Get indices for each group indices = [i for i in range(len(labels)) if labels[i] == group] group_encodings = [faces[i]['encoding'] for i in indices] # Get centroid for group encodings avg_group_encoding = np.average(group_encodings, axis=0) # Get the closest face to the centroid avg_distance = face_recognition.face_distance( group_encodings, avg_group_encoding) min_index = np.argmin(avg_distance) face_index = indices[min_index] selected_faces.append(faces[face_index]) return selected_faces
def cluster_faces(self): self.labels = dlib.chinese_whispers_clustering(self.descriptors, 0.5) num_classes = len(set(self.labels)) info("Number of clusters: {:,}".format(num_classes)) self.indices = [] for i, label in enumerate(self.labels): self.indices.append(i)
def clustring(self, faces_info): for data in faces_info: encode = data['face_encoding'] self.face_encodings.append(dlib.vector(encode)) labels = dlib.chinese_whispers_clustering(self.face_encodings, 0.5) labels = np.array(labels) print("All cluster labels :", labels) unique_labels = np.unique(labels) print("Number of unique faces found : ", len(unique_labels)) print("Saving faces..........") for label in unique_labels: index = np.where(labels == label)[0] for i in index: image_path = self.faces_info[i]['img_path'] image_name = image_path.split('/')[-1].split('.')[0] image_ext = image_path.split('/')[-1].split('.')[1] image = cv2.imread(image_path) output_dir = os.getcwd() + '/' + str(label) if not os.path.isdir(output_dir): os.mkdir(str(label)) cv2.imwrite(output_dir + '/' + image_name + '.' + image_ext, image)
def __clusterize(self, files_faces, debug_out_folder=None): self.__start_stage(len(files_faces)) encs = [] indexes = list(range(len(files_faces))) random.shuffle(indexes) for i in indexes: for j in range(len(files_faces[i]['faces'])): encs.append(dlib.vector( files_faces[i]['faces'][j]['encoding'])) labels = dlib.chinese_whispers_clustering( encs, self.__threshold_clusterize) labels = self.__reassign_by_count(labels) lnum = 0 for i in indexes: if self.__step_stage(): break for j in range(len(files_faces[i]['faces'])): files_faces[i]['faces'][j]['name'] = \ 'unknown_{:05d}'.format(labels[lnum]) lnum += 1 if debug_out_folder: filename = files_faces[i]['filename'] media = tools.load_media(filename, self.__max_size, self.__max_video_frames, self.__video_frames_step) debug_out_file_name = self.__extract_filename(filename) self.__save_debug_images( files_faces[i]['faces'], media, debug_out_folder, debug_out_file_name) self.__end_stage()
def cluster(self): face_labels = dlib.chinese_whispers_clustering(self.all_faces, 0.5) self.pose_by_label = {} for i, label in enumerate(face_labels): if label not in self.pose_by_label: self.pose_by_label[label] = [] self.pose_by_label[label].append(self.all_poses[i])
def __create_dlib_cluster(config, shape): data = json.loads(config) threshod = data.get('threshold') if data.get('threshold') else 0.5 shape = __get_value__(shape) labels = dlib.chinese_whispers_clustering(shape, threshod) return len(set(labels)), labels
def cluster_faces(src_dir): # Load face metadata faces_df = pd.read_csv(os.path.join(src_dir, 'metadata.csv')) # Check if clustering already exists if 'cluster' not in faces_df.columns: # Chinese whispers clustering faces_df['embedding'] = faces_df['json_embedding'].apply(json.loads) X = np.array([x for x in faces_df['embedding']]) faces_df['cluster'] = dlib.chinese_whispers_clustering( [dlib.vector(x) for x in X], 0.5) # Persist clustering faces_df.to_csv(os.path.join(src_dir, 'metadata.csv'), index=False)
def compute_similarities(data_dir, similarity_threshold=0.6, identity_threshold=0.4, criminal_fraction=0.1, **kwargs): t = Timer() all_descriptors = db.get_all_descriptors() descriptors = [json.loads(f[1]) for f in all_descriptors] face_ids = [f[0] for f in all_descriptors] num_faces = len(all_descriptors) #print("get_all_descriptors():", t) #print("Faces: %d" % len(all_descriptors), end='') if num_faces < 2: #print() return num_faces, 0, 0 X = Y = np.array(descriptors) #print("convert to array:", t) X2 = Y2 = np.sum(np.square(X), axis=-1) dists = np.sqrt(np.maximum(X2[:, np.newaxis] + Y2[np.newaxis] - 2 * np.dot(X, Y.T), 0)) #print("calculate dists:", t) db.delete_similarities() #print("delete similarities:", t) num_similarities = 0 for i, j in zip(*np.where(dists < float(similarity_threshold))): if i != j: db.insert_similarity([face_ids[i], face_ids[j], dists[i, j]]) num_similarities += 1 #print("save similarities:", t) # cluster faces and update labels descriptors_dlib = [dlib.vector(d) for d in descriptors] clusters = dlib.chinese_whispers_clustering(descriptors_dlib, float(identity_threshold)) db.update_labels(zip(clusters, face_ids)) num_clusters = len(set(clusters)) if args.save_clusters: for cluster_num, face_id in zip(clusters, face_ids): facefile = os.path.realpath(os.path.join(data_dir, args.save_faces, "face_%05d.jpg" % face_id)) clusterdir = os.path.join(data_dir, args.save_clusters, str(cluster_num)) makedirs(clusterdir) os.symlink(facefile, os.path.join(clusterdir, 'tmpfile')) os.rename(os.path.join(clusterdir, 'tmpfile'), os.path.join(clusterdir, "face_%05d.jpg" % face_id)) # remove clusters with more than given amount of criminals criminal_clusters = db.get_clusters_with_criminals(criminal_fraction) for cluster in criminal_clusters: db.remove_cluster(cluster['cluster_num']) db.commit() #print("commit:", t) #print(", Similarities: %d, Time: %.2fs" % (num_similarities, t.total())) return num_faces, num_similarities, num_clusters
def cluster_embeddings(encodings_path=None): # Load previously generated embeddings print("Loading encodings...") data = pickle.loads(open(Path(encodings_path), "rb").read()) data = np.array(data) # Specifically grab the encodings from the data array # If using dlib's Chinese Whispers Clustering, convert to dlib vector format encodings = [dlib.vector(d["encoding"].squeeze()) for d in data] # If using KNN, keep in Numpy format # encodings = [d["encoding"] for d in data] # encodings = np.asarray(encodings).squeeze() # Calculate a threshold value for Chinese Whispers neigh = NearestNeighbors(n_neighbors=5) nbrs = neigh.fit(encodings) distances, indices = nbrs.kneighbors(encodings) distances = np.sort(distances, axis=0) distances = distances[:, 2] mean_distance = np.mean(distances) # plt.plot(distances) # plt.show() # Clustering with Chinese Whispers algorithm labels = dlib.chinese_whispers_clustering(encodings, mean_distance) # kmeans = KMeans(n_clusters=5, random_state=0).fit(encodings) # label_ids = np.unique(kmeans.labels_) # labels = kmeans.labels_ # Determine the total number of unique faces, as well # as their occurrences label_ids, counts = np.unique(labels, return_counts=True) num_unique_faces = len(label_ids) # Split images into clusters based on labels image_paths = [d["image_path"] for d in data] output_folder = image_paths[0].parent.parent.joinpath("clustered_faces") Path(output_folder).mkdir(parents=True, exist_ok=True) for i in range(len(image_paths)): current_label = labels[i] current_file = image_paths[i] new_path = output_folder.joinpath( str(current_label) + "_" + current_file.name) shutil.copy(current_file, new_path)
def same_person(photo1, photo2): detector = dlib.get_frontal_face_detector() sp = dlib.shape_predictor(f"{current_app.config['MODEL_DIR']}/sp.dat") facerec = dlib.face_recognition_model_v1(f"{current_app.config['MODEL_DIR']}/fr.dat") p1 = dlib.load_rgb_image(photo1) p2 = dlib.load_rgb_image(photo2) face1 = detector(p1, 1) face2 = detector(p2, 1) if not face1 or not face2: return False for k, d in enumerate(face1): shape = sp(p1, d) desc1 = facerec.compute_face_descriptor(p1, shape) for k, d in enumerate(face2): shape = sp(p2, d) desc2 = facerec.compute_face_descriptor(p2, shape) cv2.rectangle( p1, (face1[0].left(), face1[0].top()), (face1[0].right(), face1[0].bottom()), (0, 255, 0), 3 ) cv2.rectangle( p2, (face2[0].left(), face2[0].top()), (face2[0].right(), face2[0].bottom()), (0, 255, 0), 3 ) p1 = cv2.cvtColor(p1, cv2.COLOR_BGR2RGB) p2 = cv2.cvtColor(p2, cv2.COLOR_BGR2RGB) cv2.imwrite(photo1, p1) cv2.imwrite(photo2, p2) cluster = dlib.chinese_whispers_clustering([desc1, desc2], 0.5) if len(set(cluster)) == 1: return True return False
def cluster(): s = time.time() query = '' descriptors = [] dvec = dlib.vectors() date = input("enter a date in dd-mm-yyy format") from_time = input("enter start time in hh:mm format") to_time = input("enter end time in hh:mm format") data = ptf.retrive(date, from_time, to_time) for d in data: descriptors.append(dlib.vector(d)) # Cluster the faces. labels = dlib.chinese_whispers_clustering(descriptors, 0.5) e = time.time() print(labels) print(len(descriptors)) print(len(labels)) labset = set(labels) print(labset) num_classes = len(set(labels)) #total number of clusters print("Number of clusters: {}".format(num_classes)) print(e - s) return num_classes
def __init__(self, data_path): names = ['time', 'track'] for i in range(128): names += ['d{0}'.format(i)] # self.data = read_table(data_path, delim_whitespace=True, header=None, names=names) self.data.sort_values(by=['track', 'time'], inplace=True) # create a descriptor list with dlibs descriptor vector descriptors = [] embeddings = self.data.iloc[:, 2:].values for each_i in embeddings: face_descriptor = dlib.vector(each_i) descriptors.append(face_descriptor) # returns series of labels [0 0 2 2 2] for each row of embeddings labels = dlib.chinese_whispers_clustering(descriptors, 0.5) # put the series into a column self.data['cluster'] = pandas.Series(labels, index=self.data.index) # TODO: this can be improved by taking highest count of label in each track # get the label for each track track_label = self.data.groupby(by='track', as_index=False).first()[ ['track', 'cluster']].values # get unique labels self.labels = np.unique(track_label[:][:, [1]]) self.starting_point = Annotation(modality='face') for track, segment in self.data.groupby('track').apply(_to_segment).iteritems(): if not segment: continue self.starting_point[segment, track] = track_label[track][1]
def cluster(): labelIdx = 0 encodings = [d["encoding"] for d in helpers.candidate_persons] labels = dlib.chinese_whispers_clustering(encodings, 0.5) num_classes = len(set(labels)) for label in labels: helpers.candidate_persons[labelIdx]["label"] = int(label) labelIdx += 1 for label in range(num_classes): face_encs = [ fe for fe in helpers.candidate_persons if fe["label"] == label ] if (len(face_encs) >= helpers.MIN_FACES_PER_CLUSTER): mean_enc = np.zeros(128) for fe in face_encs: mean_enc += fe["encoding"] mean_enc = mean_enc / len(face_encs) helpers.unique_persons.append({ "uuid": uuid.uuid1(), "Mean": mean_enc }) uuids = [d["uuid"] for d in helpers.unique_persons] helpers.candidate_persons = []
def clustering(self, image_list=None, output_folder_path=configs_clustering_output_folder, shape_predictor_path=config_shape_predictor_path, recognition_model_path=config_recognition_model_path): """ 人脸聚类函数:使用聚类分析方式进行人脸识别,可以在某一群人中认出特定的人 首先假设最大的群集将包含照片集中的普通人照片 然后提取人脸图像保存150x150格式的最大聚类中 TODO: 可以保存所有大于2的聚类到文件夹中 这里图片不能使用黑白的,否则报错: RuntimeError: Unsupported image type, must be RGB image. """ if not os.path.isdir(output_folder_path): os.makedirs(output_folder_path) detector = dlib.get_frontal_face_detector() shape_predictor = dlib.shape_predictor(shape_predictor_path) recognition_model = dlib.face_recognition_model_v1( recognition_model_path) descriptors = [] images = [] # 找到所有人脸并为每个人脸计算出128维人脸描述器 for i in image_list: print('正在处理图片: {}'.format(i)) img = io.imread(i) dets = detector(img, 1) num_faces = len(dets) if num_faces == 0: print("没有找到人脸,文件路径{}".format(i)) continue print('检测到的人脸数: {}'.format(num_faces)) for k, d in enumerate(dets): # 得到的人脸特征点/部分在矩形框d中 shape = shape_predictor(img, d) # 计算128维向量描述的人脸形状 face_descriptor = recognition_model.compute_face_descriptor( img, shape) descriptors.append(face_descriptor) images.append((img, shape)) # 对人脸进行聚类 labels = dlib.chinese_whispers_clustering(descriptors, 0.5) num_classes = len(set(labels)) print("聚类的数量: {}".format(num_classes)) # 找到人脸聚类最多的那个类 biggest_class = None biggest_class_length = 0 for i in range(0, num_classes): class_length = len([label for label in labels if label == i]) if class_length > biggest_class_length: biggest_class_length = class_length biggest_class = i print("最大聚类的索引号: {}".format(biggest_class)) print("最大聚类中存储的人脸数: {}".format(biggest_class_length)) # 生成最大聚类生成索引 indices = [] for i, label in enumerate(labels): if label == biggest_class: indices.append(i) print("最大聚类中的图片索引:{}".format(str(indices))) # 确认输出字典的存在 if not os.path.isdir(output_folder_path): os.makedirs(output_folder_path) # 保存提取出来的脸部 print('正在保存最大s聚类到脸部文件夹{}'.format(output_folder_path)) for i, index in enumerate(indices): img, shape = images[index] file_path = os.path.join(output_folder_path, 'face_' + str(i)) # 大小(size)和填充(padding)参数默认设置为150x150, 0.25 dlib.save_face_chip(img, shape, file_path, size=150, padding=0.25)
embDim = 128 emb_array = np.zeros((nbFaces,embDim)) faceIx = 0 for file_encs in encs: for enc_array in file_encs: dlib_vec = dlib.vector(enc_array) emb_array[faceIx,:] = enc_array faceIx = faceIx+1 encodings.append(dlib_vec) print("[INFO] Clustering faces with Chinese Whispers algorithm") labels_pred = dlib.chinese_whispers_clustering(encodings, 0.5) ncols=20 fig = plt.figure(figsize=(20, 4)) thumbnails_labels = sorted(zip(thumbnails,labels_pred), key = lambda t: t[1]) for idx,(npimg,label) in enumerate(thumbnails_labels): ax = fig.add_subplot(2, ncols, idx+1, xticks=[], yticks=[]) ax.imshow(npimg) ax.set_title(label) fig.savefig( 'output_images-thumbnails-classes.png', bbox_inches='tight',
def cluster(request, eventname): start = time.time() md = AzureMediaStorage() block_blob_service = BlockBlobService(account_name=md.account_name, account_key=md.account_key) # Download the pre trained models, unzip them and save them in the save folder as this file # predictor_path = 'shape_predictor_5_face_landmarks.dat' #'C:/Users/lenovo/Desktop/PicProcure/events/shape_predictor_5_face_landmarks.dat' face_rec_model_path = 'dlib_face_recognition_resnet_model_v1.dat' faces_folder_path = block_blob_service.list_blobs(container_name=eventname) output_folder = [] check_folder = block_blob_service.list_blobs(container_name='profile-pics') user_list = Register.objects.all().filter(event_id=Events.objects.get( event_name=eventname)) username_list = [] for user in user_list: img = user.user_id.profile_pic username_list.append(img) #for f in check_folder: #username_list.append(f.name) #print(username_list) detector = dlib.get_frontal_face_detector() #a detector to find the faces sp = dlib.shape_predictor( predictor_path) #shape predictor to find face landmarks facerec = dlib.face_recognition_model_v1( face_rec_model_path) #face recognition model descriptors = [] images = [] output_list = [] for img in check_folder: print('Processing file:{}', format(img.name)) url = "https://picprocurestorageaccount.blob.core.windows.net/profile-pics/" + img.name #img1 = dlib.load_rgb_image(urllib.request.urlopen(url).read()) #win = dlib.image_window() img1 = numpy.array( Image.open(io.BytesIO(urllib.request.urlopen(url).read()))) #win.set_image(img1) # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces. dets = detector(img1, 1) print("Number of faces detected: {}".format(len(dets))) # Now process each face we found. for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img1, d) # Compute the 128D vector that describes the face in img identified by shape. face_descriptor = facerec.compute_face_descriptor(img1, shape) descriptors.append(face_descriptor) images.append(('profile-pics', img.name, img1, shape)) print('profile pics ended') for f in faces_folder_path: print("Processing file: {}".format(f.name)) url = "https://picprocurestorageaccount.blob.core.windows.net/" + eventname + '/' + f.name #img = dlib.load_rgb_image(f) #win = dlib.image_window() img = numpy.array( Image.open(io.BytesIO(urllib.request.urlopen(url).read()))) print('reading completed ' + f.name) #win.set_image(img) # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces. dets = detector(img, 1) print("Number of faces detected: {}".format(len(dets))) # Now process each face we found. for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img, d) # Compute the 128D vector that describes the face in img identified by shape. face_descriptor = facerec.compute_face_descriptor(img, shape) descriptors.append(face_descriptor) images.append((eventname, f.name, img, shape)) print('image appended ' + f.name) # Cluster the faces. print("event load completed") labels = dlib.chinese_whispers_clustering(descriptors, 0.5) num_classes = len(set(labels)) # Total number of clusters print("Number of clusters: {}".format(num_classes)) for i in range(0, num_classes): indices = [] class_length = len([label for label in labels if label == i]) for j, label in enumerate(labels): if label == i: indices.append(j) print("Indices of images in the cluster {0} : {1}".format( str(i), str(indices))) print("Size of cluster {0} : {1}".format(str(i), str(class_length))) #output_folder_path = output_folder + '/output' + str(i) # Output folder for each cluster #os.path.normpath(output_folder_path) #os.makedirs(output_folder_path) block_blob_service.create_container(eventname + str(i), public_access='blob') # Save each face to the respective cluster folder print("Saving faces to output folder...") #img, shape = images[index] #file_path = os.path.join(output_folder_path,"face_"+str(k)+"_"+str(i)) md.azure_container = eventname + str(i) output_folder.append(md.azure_container) for k, index in enumerate(indices): container, name, img, shape = images[index] #dlib.save_face_chip(img, shape, file_path, size=1000, padding = 2) url = "https://picprocurestorageaccount.blob.core.windows.net/" + container + '/' + name block_blob_service.copy_blob(container_name=md.azure_container, blob_name=name, copy_source=url) # md._save(name,img) if 0 == k: output_list.append("ouput/output" + str(i) + "/face_0" + "_" + str(i) + ".jpg") for imgs in check_folder: for output in output_folder: try: block_blob_service.get_blob_metadata(container_name=output, blob_name=imgs.name) container_name = eventname + '-' + imgs.name.split('.')[0] block_blob_service.create_container( container_name=container_name, public_access='blob') for i in block_blob_service.list_blobs(container_name=output): url = url = "https://picprocurestorageaccount.blob.core.windows.net/" + output + '/' + i.name block_blob_service.copy_blob(container_name=container_name, blob_name=i.name, copy_source=url) block_blob_service.delete_container(output) output_folder.remove(output) break except: pass block_blob_service.delete_container(eventname) return HttpResponse("Successfull")
dets = detector(img, 1) print("Number of faces detected: {}".format(len(dets))) # Now process each face we found. for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img, d) # Compute the 128D vector that describes the face in img identified by # shape. face_descriptor = facerec.compute_face_descriptor(img, shape) descriptors.append(face_descriptor) images.append((img, shape)) # Now let's cluster the faces. labels = dlib.chinese_whispers_clustering(descriptors, 0.5) num_classes = len(set(labels)) print("Number of clusters: {}".format(num_classes)) # Find biggest class biggest_class = None biggest_class_length = 0 for i in range(0, num_classes): class_length = len([label for label in labels if label == i]) if class_length > biggest_class_length: biggest_class_length = class_length biggest_class = i print("Biggest cluster id number: {}".format(biggest_class)) print("Number of faces in biggest cluster: {}".format(biggest_class_length))
def run(self): if (os.path.exists(self.input_dir) == False): print( u'Input directory does not exist.Please check your input directory.' ) return if (self.output_dir is None): father_path = os.path.abspath( os.path.dirname(self.input_dir) + os.path.sep + ".") self.output_dir = os.path.join(father_path, 'face_clustering_output') print(self.output_dir) # 为后面操作方便,建了几个列表 descriptors = [] images = [] self.log.append(u'----' * 30) # 分界符 # 遍历faces文件夹中所有的图片 for f in glob.glob(os.path.join(self.input_dir, "*.jpg")): print('Processing file:{}'.format(f)) self.log.append(u'Processing file:{}'.format(f)) # 读取图片 img = self.cv_imread(f) # 转换到rgb颜色空间 img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 检测人脸 dets = self.detector(img2, 1) print("Number of faces detected: {}".format(len(dets))) self.log.append(u"Number of faces detected: {}".format(len(dets))) # 遍历所有的人脸 for index, face in enumerate(dets): # 检测人脸特征点 shape = self.shape_predictor(img2, face) # 投影到128D face_descriptor = self.face_recognizer.compute_face_descriptor( img2, shape) # 保存相关信息 descriptors.append(face_descriptor) images.append((img2, shape)) self.log.append(u'----' * 30) # 聚类 labels = dlib.chinese_whispers_clustering(descriptors, 0.5) print("labels: {}".format(labels)) self.log.append(u"labels: {}".format(labels)) num_classes = len(set(labels)) print("Number of clusters: {}".format(num_classes)) self.log.append(u"Number of clusters: {}".format(num_classes)) self.log.append(u'----' * 30) # 为了方便操作,用字典类型保存 face_dict = {} for i in range(num_classes): face_dict[i] = [] # print face_dict for i in range(len(labels)): face_dict[labels[i]].append(images[i]) # 遍历字典,保存结果 for key in face_dict.keys(): file_dir = os.path.join(self.output_dir, str(key)) if not os.path.isdir(file_dir): os.makedirs(file_dir) for index, (image, shape) in enumerate(face_dict[key]): file_path = os.path.join(file_dir, 'face_' + str(index) + '.jpg') print file_path # dlib.save_face_chip(image, shape, file_path, size=150, padding=0.25) res = dlib.get_face_chip(image, shape, size=150, padding=0.25) res = cv2.cvtColor(res, cv2.COLOR_RGB2BGR) cv2.imwrite(file_path, res)
facerec = dlib.face_recognition_model_v1(face_rec_model_path) paths = glob.glob('faces/*.jpg') vectors = [] images = [] for path in paths: img = imread(path) dets = detector(img, 1) for i, d in enumerate(dets): shape = predictor(img, d) face_vector = facerec.compute_face_descriptor(img, shape) vectors.append(face_vector) images.append((img, shape)) # 聚类函数 labels = dlib.chinese_whispers_clustering(vectors, 0.5) num_classes = len(set(labels)) print('共聚为 %d 类' % num_classes) biggest_class = Counter(labels).most_common(1) print(biggest_class) output_dir = 'most_common' if not os.path.exists(output_dir): os.mkdir(output_dir) face_id = 1 for i in range(len(images)): if labels[i] == biggest_class[0][0]: img, shape = images[i] # 把人脸切出来 dlib.save_face_chip(img, shape,
def get_feature(video_path, save_path, save_list): video_files = sorted(os.listdir(video_path)) features = [] idxs = [] faces = [] videos = [] face_attrs = [] fout = open(save_list,'w') for video_file in video_files: full_video_path = os.path.join(video_path, video_file) video = cv2.VideoCapture(full_video_path) fps = int(video.get(cv2.CAP_PROP_FPS)) num_frame = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) print("processing video_file: {}, num_frame: {}, fps: {}".format(video_file, num_frame, fps)) idx = 0 for i in range(0,num_frame, fps): video.set(1,i) ret, frame = video.read() if not ret: print("done") break; transport.open() req = ImageReq() req.name =video_file+"#"+str(i) req.image_data = cv2.imencode(".jpg",frame)[1] rsp = client.predict_image(req) if rsp.status == "OK": for _, face_feature in enumerate(rsp.face_features): r = face_feature.region meta =[video_file, i] meta.append([int(r.x1),int(r.x2), int(r.y1), int(r.y2)]) meta.append(int(face_feature.age)) meta.append(face_feature.gender) meta.append(int(face_feature.attractive)) exp_idx = np.argmax(face_feature.exps) exp = exps[exp_idx] meta.append(exp) if (r.x2-r.x1)*(r.y2-r.y1) < MinFaceSize*MinFaceSize: continue features.append(dlib.dlib.vector(face_feature.feature)) idxs.append(i) faces.append(copy.deepcopy(frame[r.y1:r.y2,r.x1:r.x2,:])) videos.append(video_file) face_attrs.append(meta) else: print("error status: {}".format(rsp.status)) transport.close(); idx+=1 # TODO(crw): for large features size, this would be very slow # for my case, it cost near 6 hours to complete. labels = dlib.chinese_whispers_clustering(features, FaceClustingThreshold) counter = Counter(labels) ids = [] for id_, cnt in counter.most_common(): if cnt > MinFaceCount: ids.append(id_) for i in range(len(labels)): if labels[i] in ids: id_path = os.path.join(save_path, str(labels[i])) if not os.path.exists(id_path): os.makedirs(id_path) cv2.imwrite(os.path.join(id_path, videos[i]+"_"+str(idxs[i])+".jpg"), faces[i]) fout.write(str(labels[i])+"\t"+str(face_attrs[i])+"\n") fout.close()
def Get_face_clustered_labels(faces_folder_path): import sys import os import dlib import glob from tqdm import tqdm #print (faces_folder_path, os.listdir(faces_folder_path)) # Download the pre trained models, unzip them and save them in the save folder as this file predictor_path = 'FaceClust/shape_predictor_5_face_landmarks.dat' # Download from http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2 face_rec_model_path = 'FaceClust/dlib_face_recognition_resnet_model_v1.dat' # Download from http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2 detector = dlib.cnn_face_detection_model_v1( 'FaceClust/mmod_human_face_detector.dat' ) #a detector to find the faces sp = dlib.shape_predictor( predictor_path) #shape predictor to find face landmarks facerec = dlib.face_recognition_model_v1( face_rec_model_path) #face recognition model descriptors = [] images = [] # Load the images from input folder FACE_PATHS = [] dn = glob.glob(os.path.join(faces_folder_path, "*.jpg")) f = open('/tmp/ProgressN', 'w+') f.write(str(len(dn))) f.close() ccc = 1 for f in tqdm(dn): ff = open('/tmp/ProgressI', 'w+') ff.write(str(ccc)) ff.close() ccc = ccc + 1 #print("Processing file: {}".format(f)) img = dlib.load_rgb_image(f) # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upsample the image 1 time. This will make everything bigger and allow us to detect more faces. dets = detector(img, 1) #print("Number of faces detected: {}".format(len(dets))) # Now process each face we found. for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img, d.rect) # Compute the 128D vector that describes the face in img identified by shape. face_descriptor = facerec.compute_face_descriptor(img, shape) descriptors.append(face_descriptor) FACE_PATHS.append(f) # Cluster the faces. labels = dlib.chinese_whispers_clustering(descriptors, 0.5) num_classes = len(set(labels)) # Total number of clusters print("Number of clusters: {}".format(num_classes)) face_label_dicts = {} #print ('xxxxxx') #print (FACE_PATHS) for i, j in zip(FACE_PATHS, labels): if j in face_label_dicts: face_label_dicts[j].append(i) else: face_label_dicts[j] = [i] #print ('xxxxxx') if os.path.isfile('/tmp/ProgressN'): os.remove('/tmp/ProgressN') if os.path.isfile('/tmp/ProgressI'): os.remove('/tmp/ProgressI') del detector del sp del facerec del dlib return face_label_dicts
def __cluster(faces_vectors: list, threshold=0.5): return dlib.chinese_whispers_clustering(faces_vectors, threshold)
def cluster_faces(descriptors, threshold=0.45): return dlib.chinese_whispers_clustering(descriptors, threshold)
def process(): mylogger = loger.getLoger("Whisper", Constants.boltpath + "logs") try: timenow = Blocker.current_milli_time() # Generic models video = DatabaseSession.session.query(Videos).first() # Some paths detector = dlib.get_frontal_face_detector() sp = dlib.shape_predictor( Constants.predictor_path) # 128D face descriptor predictor facerec = dlib.face_recognition_model_v1(Constants.face_rec_model_path) descriptors = [] images = [] # Now find all the faces and compute 128D face descriptors for each face. for f in glob.glob(os.path.join(Constants.faces_folder_path, "*.png")): mylogger.info("Processing file: {}".format(f)) img = dlib.load_rgb_image(f) # Ask the detector to find the bounding boxes of each face. The 1 in the # second argument indicates that we should upsample the image 1 time. This # will make everything bigger and allow us to detect more faces. dets = detector(img, 1) mylogger.info("Number of faces detected: {}".format(len(dets))) # Now process each face we found. for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img, d) # Compute the 128D vector that describes the face in img identified by # shape. face_descriptor = facerec.compute_face_descriptor(img, shape) descriptors.append(face_descriptor) images.append((img, shape)) # Now let's cluster the faces. labels = dlib.chinese_whispers_clustering(descriptors, Constants.TOLERANCE_WHISPER) num_classes = len(set(labels)) mylogger.info("Number of clusters: {}".format(num_classes)) if num_classes > 0: # Find the indices for the biggest class indices = [] counts = 0 DatabaseSession.session.query(Wishper).filter( Wishper.video == video).delete() DatabaseSession.session.commit() for i, label in enumerate(labels): # Ensure output directory exists output_folder_path_real = Constants.output_folder_path + str( label) if not os.path.isdir(output_folder_path_real): os.makedirs(output_folder_path_real) # Save the extracted faces mylogger.info("Saving faces cluster to output folder...") img, shape = images[i] file_path = os.path.join(output_folder_path_real, "face_" + str(counts)) # The size and padding arguments are optional size=300x300 and padding=0.25 dlib.save_face_chip(img, shape, file_path, size=300, padding=0.25) counts = counts + 1 whisper = Wishper(alias=file_path, original_image=file_path, video=video, group=str(label), path=file_path + ".jpg") DatabaseSession.session.add(whisper) DatabaseSession.session.commit() except Exception as inst: mylogger.error("Python error.") mylogger.error(type(inst)) mylogger.error(inst)
def cluster_faces_by_CW(data, threshold=0.5): encodings = [dlib.vector(d["encoding"]) for d in data] labels = dlib.chinese_whispers_clustering(encodings, threshold) return labels
def cluster_faces_in_class(args): preds_per_person = utils.load_faces_from_csv(args.db, args.imgs_root) if preds_per_person.get(args.cls) == None: print('Class {} not found.'.format(args.cls)) return descriptors = [] for i, p in enumerate(preds_per_person[args.cls]): descriptors.append(dlib.vector(p[2])) # cluster the faces print('clustering...') all_indices = [] all_lengths = [] if 1: # chinese whispers labels = dlib.chinese_whispers_clustering(descriptors, args.threshold) num_classes = len(set(labels)) print("Number of clusters: {}".format(num_classes)) for j in range(0, num_classes): class_length = len([label for label in labels if label == j]) if class_length >= args.min_members: indices = [] for i, label in enumerate(labels): if label == j: indices.append(i) all_indices.append(indices) all_lengths.append(class_length) else: #DBSCAN from sklearn.cluster import DBSCAN clt = DBSCAN(eps=args.threshold, metric="euclidean", n_jobs=4, min_samples=args.min_members) clt.fit(descriptors) labels = np.unique(clt.labels_) num_classes = len(np.where(labels > -1)[0]) if num_classes > 1: # to be checked!! print("Number of clusters: {}".format(num_classes)) for j in labels: idxs = np.where(clt.labels_ == j)[0] class_length = len(idxs) indices = [] for i in idxs: indices.append(i) all_indices.append(indices) all_lengths.append(class_length) sort_index = np.argsort(np.array(all_lengths))[::-1] # Move the clustered faces to individual groups print('Moving the clustered faces to the database.') to_delete = [] for i in sort_index[:args.max_clusters]: cluster_name = "group_" + str(i) # export to folders if args.export: cluster_path = os.path.join(args.outdir, cluster_name) if not os.path.isdir(cluster_path): os.makedirs(cluster_path) to_delete += all_indices[i] for n, index in enumerate(all_indices[i]): utils.insert_element_preds_per_person(preds_per_person, args.cls, index, cluster_name) if args.export: file_name = os.path.join( cluster_path, 'face_' + cluster_name + '_' + str(n) + '.jpg') utils.save_face_crop(file_name, preds_per_person[args.cls][index][1], preds_per_person[args.cls][index][0][1]) to_delete = sorted(to_delete) to_delete.reverse() for i in to_delete: preds_per_person[cls].pop( i ) # if not, they would exist double, in 'deleted' and in the cluster group # utils.delete_element_preds_per_person(preds_per_person, args.cls, i) utils.export_persons_to_csv(preds_per_person, args.imgs_root, args.db)
for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img, d) # Compute the 128D vector that describes the face in img identified by shape. face_descriptor = facerec.compute_face_descriptor(img, shape) descriptors.append(face_descriptor) images.append((img, shape)) except: continue cv2.imshow('frame', frame) if cv2.waitKey(20) & 0xFF == ord('q'): break cap.release() # Cluster the faces. labels = dlib.chinese_whispers_clustering(descriptors, 0.5) num_classes = len(set(labels)) # Total number of clusters print("Number of clusters: {}".format(num_classes)) for i in range(0, num_classes): indices = [] class_length = len([label for label in labels if label == i]) for j, label in enumerate(labels): if label == i: indices.append(j) print("Indices of images in the cluster {0} : {1}".format(str(i),str(indices))) print("Size of cluster {0} : {1}".format(str(i),str(class_length))) output_folder_path = output_folder +'./'+ str(i) # Output folder for each cluster os.path.normpath(output_folder_path) os.makedirs(output_folder_path)
files = glob.glob(path.join(PICKLES_DIR, '*.pickle')) print(len(files)) for picklefile in files: d = pickle.loads(open(picklefile, "rb").read()) data.extend(d) #data = pickle.loads(open(args["encodings"], "rb").read()) #data = np.array(data) print(len(data)) encodings = [dlib.vector(d["encoding"]) for d in data] #[start:end]] print(len(encodings)) #print(type(encodings)) #print(encodings[0]) # cluster the embeddings print("[INFO] clustering...") #clt = DBSCAN(metric="euclidean", n_jobs=-1) #clt.fit(encodings) labels = dlib.chinese_whispers_clustering(encodings, THRESHOLD) tup = (data, labels) pickleFile = 'clusters-%.3f.pickle' % THRESHOLD with open(pickleFile, "wb") as f: f.write(pickle.dumps(tup))