Esempio n. 1
0
def chinese_whispers(encodings, threshold=0.5):
    """
    Chinese Whispers - an Efficient Graph Clustering Algorithm 
    and its Application to Natural Language Processing Problems
    """
    encodings = [dlib.vector(enc) for enc in encodings]
    return dlib.chinese_whispers_clustering(encodings, threshold)
Esempio n. 2
0
def CLUSTER_TRACKS(DT, threshold):

    track_feats = []

    for i in DT.keys():

      track_feats.append(dlib.vector(DT[i]['BBOX_FEAT'].mean(0)))
      
    CL = defaultdict(dict)

    cluster_ids = dlib.chinese_whispers_clustering(track_feats, threshold)
    
    for i in cluster_ids:
    
      try:
        CL[i]['BBOX']  = CL[i]['BBOX'] +  DT[list(DT.keys())[i]]['BBOX'] 
        CL[i]['Frame_ID']  = CL[i]['Frame_ID'] +  DT[list(DT.keys())[i]]['Frame_ID'] 
        CL[i]['BBOX_FEAT']  = CL[i]['BBOX_FEAT'] +  DT[list(DT.keys())[i]]['BBOX_FEAT'] 
        CL[i]['ANGLE']  = CL[i]['ANGLE'] +  DT[list(DT.keys())[i]]['ANGLE'] 
        CL[i]['IMG']  = CL[i]['IMG'] +  DT[list(DT.keys())[i]]['IMG'] 
        CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] 
        CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] 
        CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN']  + DT[list(DT.keys())[i]]['LEN'] 

      except:
        CL[i]['BBOX']  =  DT[list(DT.keys())[i]]['BBOX']  
        CL[i]['Frame_ID']  =  DT[list(DT.keys())[i]]['Frame_ID']  
        CL[i]['BBOX_FEAT']  =  DT[list(DT.keys())[i]]['BBOX_FEAT']  
        CL[i]['ANGLE']  =  DT[list(DT.keys())[i]]['ANGLE']   
        CL[i]['IMG']  =  DT[list(DT.keys())[i]]['IMG']     
        CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] 
        CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] 
        CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN'] 
        
    return CL
Esempio n. 3
0
    def create_clusters(self, descriptors, d_value=0.5, method="CW"):

        print("\nUsing {} cluster method".format(method))

        if method == "DB":
            #Compute clusters using DBSCAN
            clust = DBSCAN(metric="euclidean", n_jobs=1)
            clust.fit(descriptors)
            clusters = np.unique(clust.labels_)
            num_classes = len(np.where(clusters > -1)[0])
            clusters = clust.labels_

        elif method == "CW":

            # Computer clusters using chinese whispers
            clusters = dlib.chinese_whispers_clustering(descriptors, d_value)
            num_classes = len(set(clusters))

        else:
            print("Please provide proper method as CW or DB")
            exit()

        print("Number of clusters: {}".format(num_classes))
        print("Clusters: {}".format(clusters))

        return clusters
Esempio n. 4
0
    def preprocess_faces(self, faces):
        # Cluster the faces with chinese whispers
        encodings = [dlib.vector(face['encoding']) for face in faces]
        labels = dlib.chinese_whispers_clustering(encodings, 0.5)

        selected_faces = []

        # Select face most close to average group
        groups = list(set(labels))
        for group in groups:
            # Get indices for each group
            indices = [i for i in range(len(labels)) if labels[i] == group]
            group_encodings = [faces[i]['encoding'] for i in indices]

            # Get centroid for group encodings
            avg_group_encoding = np.average(group_encodings, axis=0)

            # Get the closest face to the centroid
            avg_distance = face_recognition.face_distance(
                group_encodings, avg_group_encoding)
            min_index = np.argmin(avg_distance)

            face_index = indices[min_index]
            selected_faces.append(faces[face_index])

        return selected_faces
Esempio n. 5
0
 def cluster_faces(self):
     self.labels = dlib.chinese_whispers_clustering(self.descriptors, 0.5)
     num_classes = len(set(self.labels))
     info("Number of clusters: {:,}".format(num_classes))
     self.indices = []
     for i, label in enumerate(self.labels):
         self.indices.append(i)
Esempio n. 6
0
    def clustring(self, faces_info):

        for data in faces_info:
            encode = data['face_encoding']
            self.face_encodings.append(dlib.vector(encode))

        labels = dlib.chinese_whispers_clustering(self.face_encodings, 0.5)
        labels = np.array(labels)
        print("All cluster labels :", labels)

        unique_labels = np.unique(labels)
        print("Number of unique faces found  : ", len(unique_labels))
        print("Saving faces..........")
        for label in unique_labels:
            index = np.where(labels == label)[0]

            for i in index:
                image_path = self.faces_info[i]['img_path']
                image_name = image_path.split('/')[-1].split('.')[0]
                image_ext = image_path.split('/')[-1].split('.')[1]
                image = cv2.imread(image_path)

                output_dir = os.getcwd() + '/' + str(label)

                if not os.path.isdir(output_dir):
                    os.mkdir(str(label))

                cv2.imwrite(output_dir + '/' + image_name + '.' + image_ext,
                            image)
Esempio n. 7
0
    def __clusterize(self, files_faces, debug_out_folder=None):
        self.__start_stage(len(files_faces))
        encs = []
        indexes = list(range(len(files_faces)))
        random.shuffle(indexes)
        for i in indexes:
            for j in range(len(files_faces[i]['faces'])):
                encs.append(dlib.vector(
                    files_faces[i]['faces'][j]['encoding']))

        labels = dlib.chinese_whispers_clustering(
            encs, self.__threshold_clusterize)

        labels = self.__reassign_by_count(labels)
        lnum = 0
        for i in indexes:
            if self.__step_stage():
                break
            for j in range(len(files_faces[i]['faces'])):
                files_faces[i]['faces'][j]['name'] = \
                    'unknown_{:05d}'.format(labels[lnum])
                lnum += 1

            if debug_out_folder:
                filename = files_faces[i]['filename']
                media = tools.load_media(filename,
                                         self.__max_size,
                                         self.__max_video_frames,
                                         self.__video_frames_step)
                debug_out_file_name = self.__extract_filename(filename)
                self.__save_debug_images(
                    files_faces[i]['faces'], media,
                    debug_out_folder, debug_out_file_name)
        self.__end_stage()
 def cluster(self):
     face_labels = dlib.chinese_whispers_clustering(self.all_faces, 0.5)
     self.pose_by_label = {}
     for i, label in enumerate(face_labels):
         if label not in self.pose_by_label:
             self.pose_by_label[label] = []
         self.pose_by_label[label].append(self.all_poses[i])
def __create_dlib_cluster(config, shape):

    data = json.loads(config)
    threshod = data.get('threshold') if data.get('threshold') else 0.5
    shape = __get_value__(shape)
    labels = dlib.chinese_whispers_clustering(shape, threshod)
    return len(set(labels)), labels
Esempio n. 10
0
def cluster_faces(src_dir):
    # Load face metadata
    faces_df = pd.read_csv(os.path.join(src_dir, 'metadata.csv'))

    # Check if clustering already exists
    if 'cluster' not in faces_df.columns:
        # Chinese whispers clustering
        faces_df['embedding'] = faces_df['json_embedding'].apply(json.loads)
        X = np.array([x for x in faces_df['embedding']])
        faces_df['cluster'] = dlib.chinese_whispers_clustering(
            [dlib.vector(x) for x in X], 0.5)

        # Persist clustering
        faces_df.to_csv(os.path.join(src_dir, 'metadata.csv'), index=False)
Esempio n. 11
0
def compute_similarities(data_dir, similarity_threshold=0.6, identity_threshold=0.4, criminal_fraction=0.1, **kwargs):
    t = Timer()
    all_descriptors = db.get_all_descriptors()
    descriptors = [json.loads(f[1]) for f in all_descriptors]
    face_ids = [f[0] for f in all_descriptors]
    num_faces = len(all_descriptors)
    #print("get_all_descriptors():", t)
    #print("Faces: %d" % len(all_descriptors), end='')
    if num_faces < 2:
        #print()
        return num_faces, 0, 0

    X = Y = np.array(descriptors)
    #print("convert to array:", t)
    X2 = Y2 = np.sum(np.square(X), axis=-1)
    dists = np.sqrt(np.maximum(X2[:, np.newaxis] + Y2[np.newaxis] - 2 * np.dot(X, Y.T), 0))
    #print("calculate dists:", t)

    db.delete_similarities()
    #print("delete similarities:", t)
    num_similarities = 0
    for i, j in zip(*np.where(dists < float(similarity_threshold))):
        if i != j:
            db.insert_similarity([face_ids[i], face_ids[j], dists[i, j]])
            num_similarities += 1
    #print("save similarities:", t)

    # cluster faces and update labels
    descriptors_dlib = [dlib.vector(d) for d in descriptors]
    clusters = dlib.chinese_whispers_clustering(descriptors_dlib, float(identity_threshold))
    db.update_labels(zip(clusters, face_ids))
    num_clusters = len(set(clusters))

    if args.save_clusters:
        for cluster_num, face_id in zip(clusters, face_ids):
            facefile = os.path.realpath(os.path.join(data_dir, args.save_faces, "face_%05d.jpg" % face_id))
            clusterdir = os.path.join(data_dir, args.save_clusters, str(cluster_num))
            makedirs(clusterdir)
            os.symlink(facefile, os.path.join(clusterdir, 'tmpfile'))
            os.rename(os.path.join(clusterdir, 'tmpfile'), os.path.join(clusterdir, "face_%05d.jpg" % face_id))

    # remove clusters with more than given amount of criminals
    criminal_clusters = db.get_clusters_with_criminals(criminal_fraction)
    for cluster in criminal_clusters:
        db.remove_cluster(cluster['cluster_num'])

    db.commit()
    #print("commit:", t)
    #print(", Similarities: %d, Time: %.2fs" % (num_similarities, t.total()))
    return num_faces, num_similarities, num_clusters
def cluster_embeddings(encodings_path=None):
    # Load previously generated embeddings
    print("Loading encodings...")
    data = pickle.loads(open(Path(encodings_path), "rb").read())
    data = np.array(data)

    # Specifically grab the encodings from the data array
    # If using dlib's Chinese Whispers Clustering, convert to dlib vector format
    encodings = [dlib.vector(d["encoding"].squeeze()) for d in data]
    # If using KNN, keep in Numpy format
    # encodings = [d["encoding"] for d in data]
    # encodings = np.asarray(encodings).squeeze()

    # Calculate a threshold value for Chinese Whispers
    neigh = NearestNeighbors(n_neighbors=5)
    nbrs = neigh.fit(encodings)
    distances, indices = nbrs.kneighbors(encodings)
    distances = np.sort(distances, axis=0)
    distances = distances[:, 2]
    mean_distance = np.mean(distances)
    # plt.plot(distances)
    # plt.show()

    # Clustering with Chinese Whispers algorithm
    labels = dlib.chinese_whispers_clustering(encodings, mean_distance)

    # kmeans = KMeans(n_clusters=5, random_state=0).fit(encodings)
    # label_ids = np.unique(kmeans.labels_)
    # labels = kmeans.labels_

    # Determine the total number of unique faces, as well
    # as their occurrences
    label_ids, counts = np.unique(labels, return_counts=True)
    num_unique_faces = len(label_ids)

    # Split images into clusters based on labels
    image_paths = [d["image_path"] for d in data]
    output_folder = image_paths[0].parent.parent.joinpath("clustered_faces")
    Path(output_folder).mkdir(parents=True, exist_ok=True)
    for i in range(len(image_paths)):
        current_label = labels[i]
        current_file = image_paths[i]
        new_path = output_folder.joinpath(
            str(current_label) + "_" + current_file.name)
        shutil.copy(current_file, new_path)
Esempio n. 13
0
def same_person(photo1, photo2):
    detector = dlib.get_frontal_face_detector()
    sp = dlib.shape_predictor(f"{current_app.config['MODEL_DIR']}/sp.dat")
    facerec = dlib.face_recognition_model_v1(f"{current_app.config['MODEL_DIR']}/fr.dat")

    p1 = dlib.load_rgb_image(photo1)
    p2 = dlib.load_rgb_image(photo2)

    face1 = detector(p1, 1)
    face2 = detector(p2, 1)

    if not face1 or not face2:
        return False

    for k, d in enumerate(face1):
        shape = sp(p1, d)
        desc1 = facerec.compute_face_descriptor(p1, shape)

    for k, d in enumerate(face2):
        shape = sp(p2, d)
        desc2 = facerec.compute_face_descriptor(p2, shape)

    cv2.rectangle(
        p1, (face1[0].left(), face1[0].top()), (face1[0].right(), face1[0].bottom()), (0, 255, 0), 3
    )
    cv2.rectangle(
        p2, (face2[0].left(), face2[0].top()), (face2[0].right(), face2[0].bottom()), (0, 255, 0), 3
    )

    p1 = cv2.cvtColor(p1, cv2.COLOR_BGR2RGB)
    p2 = cv2.cvtColor(p2, cv2.COLOR_BGR2RGB)

    cv2.imwrite(photo1, p1)
    cv2.imwrite(photo2, p2)

    cluster = dlib.chinese_whispers_clustering([desc1, desc2], 0.5)
    if len(set(cluster)) == 1:
        return True
    return False
Esempio n. 14
0
def cluster():
    s = time.time()
    query = ''
    descriptors = []
    dvec = dlib.vectors()
    date = input("enter a date in dd-mm-yyy format")
    from_time = input("enter start time in hh:mm format")
    to_time = input("enter end time in hh:mm format")
    data = ptf.retrive(date, from_time, to_time)
    for d in data:
        descriptors.append(dlib.vector(d))
    # Cluster the faces.
    labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
    e = time.time()
    print(labels)
    print(len(descriptors))
    print(len(labels))
    labset = set(labels)
    print(labset)
    num_classes = len(set(labels))  #total number of clusters
    print("Number of clusters: {}".format(num_classes))
    print(e - s)
    return num_classes
Esempio n. 15
0
    def __init__(self, data_path):
        names = ['time', 'track']

        for i in range(128):
            names += ['d{0}'.format(i)]
        #
        self.data = read_table(data_path, delim_whitespace=True,
                               header=None, names=names)

        self.data.sort_values(by=['track', 'time'], inplace=True)

        # create a descriptor list with dlibs descriptor vector
        descriptors = []
        embeddings = self.data.iloc[:, 2:].values
        for each_i in embeddings:
            face_descriptor = dlib.vector(each_i)
            descriptors.append(face_descriptor)

        # returns series of labels [0 0 2 2 2] for each row of embeddings
        labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
        # put the series into a column
        self.data['cluster'] = pandas.Series(labels, index=self.data.index)
        # TODO: this can be improved by taking highest count of label in each track
        # get the label for each track
        track_label = self.data.groupby(by='track', as_index=False).first()[
            ['track', 'cluster']].values

        # get unique labels
        self.labels = np.unique(track_label[:][:, [1]])

        self.starting_point = Annotation(modality='face')

        for track, segment in self.data.groupby('track').apply(_to_segment).iteritems():
            if not segment:
                continue
            self.starting_point[segment, track] = track_label[track][1]
Esempio n. 16
0
def cluster():
    labelIdx = 0
    encodings = [d["encoding"] for d in helpers.candidate_persons]
    labels = dlib.chinese_whispers_clustering(encodings, 0.5)
    num_classes = len(set(labels))
    for label in labels:
        helpers.candidate_persons[labelIdx]["label"] = int(label)
        labelIdx += 1

    for label in range(num_classes):
        face_encs = [
            fe for fe in helpers.candidate_persons if fe["label"] == label
        ]
        if (len(face_encs) >= helpers.MIN_FACES_PER_CLUSTER):
            mean_enc = np.zeros(128)
            for fe in face_encs:
                mean_enc += fe["encoding"]
            mean_enc = mean_enc / len(face_encs)
            helpers.unique_persons.append({
                "uuid": uuid.uuid1(),
                "Mean": mean_enc
            })
    uuids = [d["uuid"] for d in helpers.unique_persons]
    helpers.candidate_persons = []
Esempio n. 17
0
    def clustering(self,
                   image_list=None,
                   output_folder_path=configs_clustering_output_folder,
                   shape_predictor_path=config_shape_predictor_path,
                   recognition_model_path=config_recognition_model_path):
        """
        人脸聚类函数:使用聚类分析方式进行人脸识别,可以在某一群人中认出特定的人
        首先假设最大的群集将包含照片集中的普通人照片
        然后提取人脸图像保存150x150格式的最大聚类中
        TODO: 可以保存所有大于2的聚类到文件夹中
        这里图片不能使用黑白的,否则报错:
        RuntimeError: Unsupported image type, must be RGB image.
        """
        if not os.path.isdir(output_folder_path):
            os.makedirs(output_folder_path)

        detector = dlib.get_frontal_face_detector()
        shape_predictor = dlib.shape_predictor(shape_predictor_path)
        recognition_model = dlib.face_recognition_model_v1(
            recognition_model_path)

        descriptors = []
        images = []

        # 找到所有人脸并为每个人脸计算出128维人脸描述器
        for i in image_list:
            print('正在处理图片: {}'.format(i))
            img = io.imread(i)
            dets = detector(img, 1)
            num_faces = len(dets)
            if num_faces == 0:
                print("没有找到人脸,文件路径{}".format(i))
                continue
            print('检测到的人脸数: {}'.format(num_faces))

            for k, d in enumerate(dets):
                # 得到的人脸特征点/部分在矩形框d中
                shape = shape_predictor(img, d)
                # 计算128维向量描述的人脸形状
                face_descriptor = recognition_model.compute_face_descriptor(
                    img, shape)
                descriptors.append(face_descriptor)
                images.append((img, shape))

        # 对人脸进行聚类
        labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
        num_classes = len(set(labels))
        print("聚类的数量: {}".format(num_classes))
        # 找到人脸聚类最多的那个类
        biggest_class = None
        biggest_class_length = 0
        for i in range(0, num_classes):
            class_length = len([label for label in labels if label == i])
            if class_length > biggest_class_length:
                biggest_class_length = class_length
                biggest_class = i
        print("最大聚类的索引号: {}".format(biggest_class))
        print("最大聚类中存储的人脸数: {}".format(biggest_class_length))
        # 生成最大聚类生成索引
        indices = []
        for i, label in enumerate(labels):
            if label == biggest_class:
                indices.append(i)

        print("最大聚类中的图片索引:{}".format(str(indices)))
        # 确认输出字典的存在
        if not os.path.isdir(output_folder_path):
            os.makedirs(output_folder_path)
        # 保存提取出来的脸部
        print('正在保存最大s聚类到脸部文件夹{}'.format(output_folder_path))
        for i, index in enumerate(indices):
            img, shape = images[index]
            file_path = os.path.join(output_folder_path, 'face_' + str(i))
            # 大小(size)和填充(padding)参数默认设置为150x150, 0.25
            dlib.save_face_chip(img, shape, file_path, size=150, padding=0.25)
Esempio n. 18
0
embDim = 128

emb_array = np.zeros((nbFaces,embDim))

faceIx = 0

for file_encs in encs:
    
    for enc_array in file_encs:
        dlib_vec = dlib.vector(enc_array)
        emb_array[faceIx,:] = enc_array
        faceIx = faceIx+1
        encodings.append(dlib_vec)

print("[INFO] Clustering faces with Chinese Whispers algorithm")
labels_pred = dlib.chinese_whispers_clustering(encodings, 0.5)

ncols=20
fig = plt.figure(figsize=(20, 4))

thumbnails_labels = sorted(zip(thumbnails,labels_pred), key = lambda t: t[1])

for idx,(npimg,label) in enumerate(thumbnails_labels):

    ax = fig.add_subplot(2, ncols, idx+1, xticks=[], yticks=[])
    ax.imshow(npimg)
    ax.set_title(label)

fig.savefig(
   'output_images-thumbnails-classes.png',
   bbox_inches='tight',
Esempio n. 19
0
def cluster(request, eventname):
    start = time.time()
    md = AzureMediaStorage()
    block_blob_service = BlockBlobService(account_name=md.account_name,
                                          account_key=md.account_key)
    # Download the pre trained models, unzip them and save them in the save folder as this file
    #
    predictor_path = 'shape_predictor_5_face_landmarks.dat'  #'C:/Users/lenovo/Desktop/PicProcure/events/shape_predictor_5_face_landmarks.dat'
    face_rec_model_path = 'dlib_face_recognition_resnet_model_v1.dat'

    faces_folder_path = block_blob_service.list_blobs(container_name=eventname)
    output_folder = []
    check_folder = block_blob_service.list_blobs(container_name='profile-pics')
    user_list = Register.objects.all().filter(event_id=Events.objects.get(
        event_name=eventname))
    username_list = []
    for user in user_list:
        img = user.user_id.profile_pic
        username_list.append(img)
    #for f in check_folder:
    #username_list.append(f.name)
    #print(username_list)

    detector = dlib.get_frontal_face_detector()  #a detector to find the faces
    sp = dlib.shape_predictor(
        predictor_path)  #shape predictor to find face landmarks
    facerec = dlib.face_recognition_model_v1(
        face_rec_model_path)  #face recognition model

    descriptors = []
    images = []
    output_list = []

    for img in check_folder:

        print('Processing file:{}', format(img.name))
        url = "https://picprocurestorageaccount.blob.core.windows.net/profile-pics/" + img.name
        #img1 = dlib.load_rgb_image(urllib.request.urlopen(url).read())
        #win = dlib.image_window()
        img1 = numpy.array(
            Image.open(io.BytesIO(urllib.request.urlopen(url).read())))
        #win.set_image(img1)

        # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces.
        dets = detector(img1, 1)
        print("Number of faces detected: {}".format(len(dets)))

        # Now process each face we found.
        for k, d in enumerate(dets):
            # Get the landmarks/parts for the face in box d.
            shape = sp(img1, d)

            # Compute the 128D vector that describes the face in img identified by shape.
            face_descriptor = facerec.compute_face_descriptor(img1, shape)
            descriptors.append(face_descriptor)
            images.append(('profile-pics', img.name, img1, shape))
    print('profile pics ended')
    for f in faces_folder_path:
        print("Processing file: {}".format(f.name))
        url = "https://picprocurestorageaccount.blob.core.windows.net/" + eventname + '/' + f.name
        #img = dlib.load_rgb_image(f)
        #win = dlib.image_window()
        img = numpy.array(
            Image.open(io.BytesIO(urllib.request.urlopen(url).read())))
        print('reading completed ' + f.name)
        #win.set_image(img)
        # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upoutput_listple the image 1 time. This will make everything bigger and allow us to detect more faces.
        dets = detector(img, 1)
        print("Number of faces detected: {}".format(len(dets)))
        # Now process each face we found.

        for k, d in enumerate(dets):
            # Get the landmarks/parts for the face in box d.
            shape = sp(img, d)
            # Compute the 128D vector that describes the face in img identified by shape.
            face_descriptor = facerec.compute_face_descriptor(img, shape)
            descriptors.append(face_descriptor)
            images.append((eventname, f.name, img, shape))
            print('image appended ' + f.name)

        # Cluster the faces.
    print("event load completed")
    labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
    num_classes = len(set(labels))  # Total number of clusters
    print("Number of clusters: {}".format(num_classes))

    for i in range(0, num_classes):
        indices = []
        class_length = len([label for label in labels if label == i])
        for j, label in enumerate(labels):
            if label == i:
                indices.append(j)
        print("Indices of images in the cluster {0} : {1}".format(
            str(i), str(indices)))
        print("Size of cluster {0} : {1}".format(str(i), str(class_length)))
        #output_folder_path = output_folder + '/output' + str(i) # Output folder for each cluster
        #os.path.normpath(output_folder_path)
        #os.makedirs(output_folder_path)
        block_blob_service.create_container(eventname + str(i),
                                            public_access='blob')

        # Save each face to the respective cluster folder
        print("Saving faces to output folder...")
        #img, shape = images[index]
        #file_path = os.path.join(output_folder_path,"face_"+str(k)+"_"+str(i))
        md.azure_container = eventname + str(i)
        output_folder.append(md.azure_container)

        for k, index in enumerate(indices):
            container, name, img, shape = images[index]
            #dlib.save_face_chip(img, shape, file_path, size=1000, padding = 2)
            url = "https://picprocurestorageaccount.blob.core.windows.net/" + container + '/' + name
            block_blob_service.copy_blob(container_name=md.azure_container,
                                         blob_name=name,
                                         copy_source=url)
            # md._save(name,img)
            if 0 == k:
                output_list.append("ouput/output" + str(i) + "/face_0" + "_" +
                                   str(i) + ".jpg")

    for imgs in check_folder:

        for output in output_folder:
            try:
                block_blob_service.get_blob_metadata(container_name=output,
                                                     blob_name=imgs.name)
                container_name = eventname + '-' + imgs.name.split('.')[0]
                block_blob_service.create_container(
                    container_name=container_name, public_access='blob')
                for i in block_blob_service.list_blobs(container_name=output):
                    url = url = "https://picprocurestorageaccount.blob.core.windows.net/" + output + '/' + i.name
                    block_blob_service.copy_blob(container_name=container_name,
                                                 blob_name=i.name,
                                                 copy_source=url)
                block_blob_service.delete_container(output)
                output_folder.remove(output)
                break
            except:
                pass

    block_blob_service.delete_container(eventname)
    return HttpResponse("Successfull")
Esempio n. 20
0
    dets = detector(img, 1)
    print("Number of faces detected: {}".format(len(dets)))

    # Now process each face we found.
    for k, d in enumerate(dets):
        # Get the landmarks/parts for the face in box d.
        shape = sp(img, d)

        # Compute the 128D vector that describes the face in img identified by
        # shape.  
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        descriptors.append(face_descriptor)
        images.append((img, shape))

# Now let's cluster the faces.  
labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
num_classes = len(set(labels))
print("Number of clusters: {}".format(num_classes))

# Find biggest class
biggest_class = None
biggest_class_length = 0
for i in range(0, num_classes):
    class_length = len([label for label in labels if label == i])
    if class_length > biggest_class_length:
        biggest_class_length = class_length
        biggest_class = i

print("Biggest cluster id number: {}".format(biggest_class))
print("Number of faces in biggest cluster: {}".format(biggest_class_length))
Esempio n. 21
0
    def run(self):
        if (os.path.exists(self.input_dir) == False):
            print(
                u'Input directory does not exist.Please check your input directory.'
            )
            return
        if (self.output_dir is None):
            father_path = os.path.abspath(
                os.path.dirname(self.input_dir) + os.path.sep + ".")
            self.output_dir = os.path.join(father_path,
                                           'face_clustering_output')
            print(self.output_dir)
        # 为后面操作方便,建了几个列表
        descriptors = []
        images = []

        self.log.append(u'----' * 30)  # 分界符

        # 遍历faces文件夹中所有的图片
        for f in glob.glob(os.path.join(self.input_dir, "*.jpg")):
            print('Processing file:{}'.format(f))
            self.log.append(u'Processing file:{}'.format(f))
            # 读取图片
            img = self.cv_imread(f)
            # 转换到rgb颜色空间
            img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # 检测人脸
            dets = self.detector(img2, 1)
            print("Number of faces detected: {}".format(len(dets)))
            self.log.append(u"Number of faces detected: {}".format(len(dets)))
            # 遍历所有的人脸
            for index, face in enumerate(dets):
                # 检测人脸特征点
                shape = self.shape_predictor(img2, face)
                # 投影到128D
                face_descriptor = self.face_recognizer.compute_face_descriptor(
                    img2, shape)

                # 保存相关信息
                descriptors.append(face_descriptor)
                images.append((img2, shape))

        self.log.append(u'----' * 30)

        # 聚类
        labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
        print("labels: {}".format(labels))
        self.log.append(u"labels: {}".format(labels))
        num_classes = len(set(labels))
        print("Number of clusters: {}".format(num_classes))
        self.log.append(u"Number of clusters: {}".format(num_classes))

        self.log.append(u'----' * 30)

        # 为了方便操作,用字典类型保存
        face_dict = {}
        for i in range(num_classes):
            face_dict[i] = []
        # print face_dict
        for i in range(len(labels)):
            face_dict[labels[i]].append(images[i])

        # 遍历字典,保存结果
        for key in face_dict.keys():
            file_dir = os.path.join(self.output_dir, str(key))
            if not os.path.isdir(file_dir):
                os.makedirs(file_dir)

            for index, (image, shape) in enumerate(face_dict[key]):
                file_path = os.path.join(file_dir,
                                         'face_' + str(index) + '.jpg')
                print file_path
                #                 dlib.save_face_chip(image, shape, file_path, size=150, padding=0.25)
                res = dlib.get_face_chip(image, shape, size=150, padding=0.25)
                res = cv2.cvtColor(res, cv2.COLOR_RGB2BGR)
                cv2.imwrite(file_path, res)
facerec = dlib.face_recognition_model_v1(face_rec_model_path)
paths = glob.glob('faces/*.jpg')

vectors = []
images = []
for path in paths:
    img = imread(path)
    dets = detector(img, 1)
    for i, d in enumerate(dets):
        shape = predictor(img, d)
        face_vector = facerec.compute_face_descriptor(img, shape)
        vectors.append(face_vector)
        images.append((img, shape))

# 聚类函数
labels = dlib.chinese_whispers_clustering(vectors, 0.5)
num_classes = len(set(labels))
print('共聚为 %d 类' % num_classes)
biggest_class = Counter(labels).most_common(1)
print(biggest_class)

output_dir = 'most_common'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
face_id = 1
for i in range(len(images)):
    if labels[i] == biggest_class[0][0]:
        img, shape = images[i]
        # 把人脸切出来
        dlib.save_face_chip(img,
                            shape,
Esempio n. 23
0
def get_feature(video_path, save_path, save_list):
    video_files = sorted(os.listdir(video_path))
    features = []
    idxs = []
    faces = []
    videos = []
    face_attrs = []
    fout = open(save_list,'w')
    for video_file in video_files:
        full_video_path = os.path.join(video_path, video_file)
        video = cv2.VideoCapture(full_video_path)
        fps = int(video.get(cv2.CAP_PROP_FPS))
        num_frame = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        print("processing video_file: {}, num_frame: {}, fps: {}".format(video_file, num_frame, fps))
        idx = 0
        for i in range(0,num_frame, fps):
            video.set(1,i)
            ret, frame = video.read()
            if not ret:
                print("done")
                break;
            transport.open()
            req = ImageReq()
            req.name =video_file+"#"+str(i)
            req.image_data = cv2.imencode(".jpg",frame)[1]
            rsp = client.predict_image(req)
            if rsp.status == "OK":
                for _, face_feature in enumerate(rsp.face_features):
                    r = face_feature.region
                    meta =[video_file, i]
                    meta.append([int(r.x1),int(r.x2), int(r.y1), int(r.y2)])
                    meta.append(int(face_feature.age))
                    meta.append(face_feature.gender)
                    meta.append(int(face_feature.attractive))
                    exp_idx = np.argmax(face_feature.exps)
                    exp = exps[exp_idx]
                    meta.append(exp)
                    if (r.x2-r.x1)*(r.y2-r.y1) < MinFaceSize*MinFaceSize:
                        continue
                    features.append(dlib.dlib.vector(face_feature.feature))
                    idxs.append(i)
                    faces.append(copy.deepcopy(frame[r.y1:r.y2,r.x1:r.x2,:]))
                    videos.append(video_file)
                    face_attrs.append(meta)
            else:
                print("error status: {}".format(rsp.status))
            transport.close();
            idx+=1
    # TODO(crw): for large features size, this would be very slow
    # for my case, it cost near 6 hours to complete.
    labels = dlib.chinese_whispers_clustering(features, FaceClustingThreshold)
    counter = Counter(labels)
    ids = []
    for id_, cnt in counter.most_common():
        if cnt > MinFaceCount:
            ids.append(id_)
    for i in range(len(labels)):
        if labels[i] in ids:
            id_path = os.path.join(save_path, str(labels[i]))
            if not os.path.exists(id_path):
                os.makedirs(id_path)
            cv2.imwrite(os.path.join(id_path, videos[i]+"_"+str(idxs[i])+".jpg"), faces[i])
            fout.write(str(labels[i])+"\t"+str(face_attrs[i])+"\n")
    fout.close()
Esempio n. 24
0
def Get_face_clustered_labels(faces_folder_path):

    import sys
    import os
    import dlib
    import glob
    from tqdm import tqdm

    #print (faces_folder_path, os.listdir(faces_folder_path))

    # Download the pre trained models, unzip them and save them in the save folder as this file
    predictor_path = 'FaceClust/shape_predictor_5_face_landmarks.dat'  # Download from http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2
    face_rec_model_path = 'FaceClust/dlib_face_recognition_resnet_model_v1.dat'  # Download from http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2

    detector = dlib.cnn_face_detection_model_v1(
        'FaceClust/mmod_human_face_detector.dat'
    )  #a detector to find the faces
    sp = dlib.shape_predictor(
        predictor_path)  #shape predictor to find face landmarks
    facerec = dlib.face_recognition_model_v1(
        face_rec_model_path)  #face recognition model

    descriptors = []
    images = []

    # Load the images from input folder

    FACE_PATHS = []
    dn = glob.glob(os.path.join(faces_folder_path, "*.jpg"))
    f = open('/tmp/ProgressN', 'w+')
    f.write(str(len(dn)))
    f.close()
    ccc = 1
    for f in tqdm(dn):
        ff = open('/tmp/ProgressI', 'w+')
        ff.write(str(ccc))
        ff.close()
        ccc = ccc + 1
        #print("Processing file: {}".format(f))
        img = dlib.load_rgb_image(f)

        # Ask the detector to find the bounding boxes of each face. The 1 in the second argument indicates that we should upsample the image 1 time. This will make everything bigger and allow us to detect more faces.
        dets = detector(img, 1)

        #print("Number of faces detected: {}".format(len(dets)))

        # Now process each face we found.

        for k, d in enumerate(dets):

            # Get the landmarks/parts for the face in box d.
            shape = sp(img, d.rect)

            # Compute the 128D vector that describes the face in img identified by shape.
            face_descriptor = facerec.compute_face_descriptor(img, shape)

            descriptors.append(face_descriptor)

            FACE_PATHS.append(f)

    # Cluster the faces.
    labels = dlib.chinese_whispers_clustering(descriptors, 0.5)

    num_classes = len(set(labels))  # Total number of clusters

    print("Number of clusters: {}".format(num_classes))

    face_label_dicts = {}

    #print ('xxxxxx')
    #print (FACE_PATHS)
    for i, j in zip(FACE_PATHS, labels):

        if j in face_label_dicts:

            face_label_dicts[j].append(i)

        else:
            face_label_dicts[j] = [i]

    #print ('xxxxxx')

    if os.path.isfile('/tmp/ProgressN'): os.remove('/tmp/ProgressN')
    if os.path.isfile('/tmp/ProgressI'): os.remove('/tmp/ProgressI')
    del detector
    del sp
    del facerec
    del dlib
    return face_label_dicts
Esempio n. 25
0
 def __cluster(faces_vectors: list, threshold=0.5):
     return dlib.chinese_whispers_clustering(faces_vectors, threshold)
Esempio n. 26
0
def cluster_faces(descriptors, threshold=0.45):
    return dlib.chinese_whispers_clustering(descriptors, threshold)
def process():
    mylogger = loger.getLoger("Whisper", Constants.boltpath + "logs")
    try:
        timenow = Blocker.current_milli_time()
        # Generic models
        video = DatabaseSession.session.query(Videos).first()

        # Some paths

        detector = dlib.get_frontal_face_detector()
        sp = dlib.shape_predictor(
            Constants.predictor_path)  # 128D face descriptor predictor
        facerec = dlib.face_recognition_model_v1(Constants.face_rec_model_path)
        descriptors = []
        images = []

        # Now find all the faces and compute 128D face descriptors for each face.
        for f in glob.glob(os.path.join(Constants.faces_folder_path, "*.png")):
            mylogger.info("Processing file: {}".format(f))
            img = dlib.load_rgb_image(f)

            # Ask the detector to find the bounding boxes of each face. The 1 in the
            # second argument indicates that we should upsample the image 1 time. This
            # will make everything bigger and allow us to detect more faces.
            dets = detector(img, 1)
            mylogger.info("Number of faces detected: {}".format(len(dets)))

            # Now process each face we found.
            for k, d in enumerate(dets):
                # Get the landmarks/parts for the face in box d.
                shape = sp(img, d)

                # Compute the 128D vector that describes the face in img identified by
                # shape.
                face_descriptor = facerec.compute_face_descriptor(img, shape)
                descriptors.append(face_descriptor)
                images.append((img, shape))

        # Now let's cluster the faces.
        labels = dlib.chinese_whispers_clustering(descriptors,
                                                  Constants.TOLERANCE_WHISPER)
        num_classes = len(set(labels))
        mylogger.info("Number of clusters: {}".format(num_classes))
        if num_classes > 0:
            # Find the indices for the biggest class
            indices = []
            counts = 0
            DatabaseSession.session.query(Wishper).filter(
                Wishper.video == video).delete()
            DatabaseSession.session.commit()
            for i, label in enumerate(labels):
                # Ensure output directory exists
                output_folder_path_real = Constants.output_folder_path + str(
                    label)
                if not os.path.isdir(output_folder_path_real):
                    os.makedirs(output_folder_path_real)

                # Save the extracted faces
                mylogger.info("Saving faces cluster to output folder...")
                img, shape = images[i]
                file_path = os.path.join(output_folder_path_real,
                                         "face_" + str(counts))
                # The size and padding arguments are optional size=300x300 and padding=0.25
                dlib.save_face_chip(img,
                                    shape,
                                    file_path,
                                    size=300,
                                    padding=0.25)
                counts = counts + 1
                whisper = Wishper(alias=file_path,
                                  original_image=file_path,
                                  video=video,
                                  group=str(label),
                                  path=file_path + ".jpg")

                DatabaseSession.session.add(whisper)
                DatabaseSession.session.commit()

    except Exception as inst:
        mylogger.error("Python error.")
        mylogger.error(type(inst))
        mylogger.error(inst)
Esempio n. 28
0
def cluster_faces_by_CW(data, threshold=0.5):
    encodings = [dlib.vector(d["encoding"]) for d in data]
    labels = dlib.chinese_whispers_clustering(encodings, threshold)
    return labels
Esempio n. 29
0
def cluster_faces_in_class(args):
    preds_per_person = utils.load_faces_from_csv(args.db, args.imgs_root)
    if preds_per_person.get(args.cls) == None:
        print('Class {} not found.'.format(args.cls))
        return

    descriptors = []
    for i, p in enumerate(preds_per_person[args.cls]):
        descriptors.append(dlib.vector(p[2]))

    # cluster the faces
    print('clustering...')
    all_indices = []
    all_lengths = []

    if 1:
        # chinese whispers
        labels = dlib.chinese_whispers_clustering(descriptors, args.threshold)
        num_classes = len(set(labels))
        print("Number of clusters: {}".format(num_classes))
        for j in range(0, num_classes):
            class_length = len([label for label in labels if label == j])
            if class_length >= args.min_members:
                indices = []
                for i, label in enumerate(labels):
                    if label == j:
                        indices.append(i)
                all_indices.append(indices)
                all_lengths.append(class_length)
    else:
        #DBSCAN
        from sklearn.cluster import DBSCAN
        clt = DBSCAN(eps=args.threshold,
                     metric="euclidean",
                     n_jobs=4,
                     min_samples=args.min_members)
        clt.fit(descriptors)
        labels = np.unique(clt.labels_)
        num_classes = len(np.where(labels > -1)[0])

        if num_classes > 1:  # to be checked!!
            print("Number of clusters: {}".format(num_classes))
            for j in labels:
                idxs = np.where(clt.labels_ == j)[0]
                class_length = len(idxs)
                indices = []
                for i in idxs:
                    indices.append(i)
                all_indices.append(indices)
                all_lengths.append(class_length)

    sort_index = np.argsort(np.array(all_lengths))[::-1]

    # Move the clustered faces to individual groups
    print('Moving the clustered faces to the database.')
    to_delete = []
    for i in sort_index[:args.max_clusters]:
        cluster_name = "group_" + str(i)

        # export to folders
        if args.export:
            cluster_path = os.path.join(args.outdir, cluster_name)
            if not os.path.isdir(cluster_path):
                os.makedirs(cluster_path)

        to_delete += all_indices[i]
        for n, index in enumerate(all_indices[i]):
            utils.insert_element_preds_per_person(preds_per_person, args.cls,
                                                  index, cluster_name)
            if args.export:
                file_name = os.path.join(
                    cluster_path,
                    'face_' + cluster_name + '_' + str(n) + '.jpg')
                utils.save_face_crop(file_name,
                                     preds_per_person[args.cls][index][1],
                                     preds_per_person[args.cls][index][0][1])

    to_delete = sorted(to_delete)
    to_delete.reverse()
    for i in to_delete:
        preds_per_person[cls].pop(
            i
        )  # if not, they would exist double, in 'deleted' and in the cluster group
        # utils.delete_element_preds_per_person(preds_per_person, args.cls, i)

    utils.export_persons_to_csv(preds_per_person, args.imgs_root, args.db)
Esempio n. 30
0
    for k, d in enumerate(dets):
        # Get the landmarks/parts for the face in box d.
        shape = sp(img, d)

        # Compute the 128D vector that describes the face in img identified by shape.  
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        descriptors.append(face_descriptor)
        images.append((img, shape))
  except:
     continue
  cv2.imshow('frame', frame)
  if cv2.waitKey(20) & 0xFF == ord('q'):
        break
cap.release()
# Cluster the faces.  
labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
num_classes = len(set(labels)) # Total number of clusters
print("Number of clusters: {}".format(num_classes))

for i in range(0, num_classes):
    indices = []
    class_length = len([label for label in labels if label == i])
    for j, label in enumerate(labels):
        if label == i:
            indices.append(j)
    print("Indices of images in the cluster {0} : {1}".format(str(i),str(indices)))
    print("Size of cluster {0} : {1}".format(str(i),str(class_length)))
    output_folder_path = output_folder +'./'+ str(i) # Output folder for each cluster
    os.path.normpath(output_folder_path)
    os.makedirs(output_folder_path)
    
Esempio n. 31
0
files = glob.glob(path.join(PICKLES_DIR, '*.pickle'))
print(len(files))

for picklefile in files:
    d = pickle.loads(open(picklefile, "rb").read())
    data.extend(d)

#data = pickle.loads(open(args["encodings"], "rb").read())
#data = np.array(data)
print(len(data))
encodings = [dlib.vector(d["encoding"]) for d in data]
#[start:end]]
print(len(encodings))
#print(type(encodings))
#print(encodings[0])

# cluster the embeddings
print("[INFO] clustering...")
#clt = DBSCAN(metric="euclidean", n_jobs=-1)
#clt.fit(encodings)

labels = dlib.chinese_whispers_clustering(encodings, THRESHOLD)

tup = (data, labels)

pickleFile = 'clusters-%.3f.pickle' % THRESHOLD

with open(pickleFile, "wb") as f:
    f.write(pickle.dumps(tup))