Beispiel #1
0
def test_dot():
    v1 = vector([1, 0])
    v2 = vector([0, 1])
    v3 = vector([-1, 0])
    assert dot(v1, v1) == 1
    assert dot(v1, v2) == 0
    assert dot(v1, v3) == -1
Beispiel #2
0
 def trainListFile(self, listTrainFile, listmanualfiles):
     if len(listmanualfiles) != len(listTrainFile):
         print("Co loi")
         sys.exit()
     self.reset()
     
     
     queries = dlib.ranking_pairs()
     
     for index in range(0, len(listTrainFile)):
         self.reset()
         data = dlib.ranking_pair()
         
         inputNonRelevant = " ".join([line for line in open(listTrainFile[index], 'r').readlines()])
         tpAllSent = myTokenizer(inputNonRelevant)
         self.inputFromString(inputNonRelevant)
         inputRelevant = " ".join([line for line in open(listmanualfiles[index], 'r').readlines()])
         tpRelevant = myTokenizer(inputRelevant)
         tpNonRelevant = list(set(tpAllSent).difference(set(tpRelevant)))
         
         self.genAllVector()
         for sent in tpRelevant:
             data.relevant.append(dlib.vector(self.dicVector.get(sent.strip())))
         for sent in tpNonRelevant:
             data.nonrelevant.append(dlib.vector(self.dicVector.get(sent.strip())))
             
         queries.append(data)
     
     trainer = dlib.svm_rank_trainer()
     trainer.c = 10
     rank = trainer.train(queries)
     _weight = []
     for i in range(0, len(rank.weights)):
         _weight.append(rank.weights[i])
     return _weight
Beispiel #3
0
def generate_test_vectors():
    vs = vectors()
    vs.append(vector([0, 1, 2]))
    vs.append(vector([3, 4, 5]))
    vs.append(vector([6, 7, 8]))
    assert len(vs) == 3
    return vs
Beispiel #4
0
def train(tagged):
    """
    Trains an SVM classifier based on the training data passed.

    Mostly based on http://dlib.net/svm_binary_classifier.py.html.

    :param tagged: list of TaggedFace to train on
    :return: dlib.svm
    """
    x = dlib.vectors()  # will carry the facial encodings
    y = dlib.array()  # will carry the gender label
    print("Preparing dataset...")
    total = len(tagged)
    for i, t in enumerate(tagged):
        print(f"\rEncoding {t.path} ({i + 1}/{total})...", end="")
        faces = encode(t.img)
        x.append(dlib.vector(faces[0]))
        y.append(t.tag)
        img = t.img
        for _ in range(5):
            faces = encode(img)
            if not faces:
                break
            x.append(dlib.vector(faces[0]))
            y.append(t.tag)
            img = cv2.resize(img, None, fx=0.7, fy=0.7)

    print("Training SVM...")
    trainer = dlib.svm_c_trainer_radial_basis()
    #trainer.be_verbose()
    trainer.set_c(10)
    model = trainer.train(x, y)
    with open(PATH_SVMFILE, "wb") as filehandle:
        pickle.dump(model, filehandle)
    return None
Beispiel #5
0
def sentence_to_vectors(sentence):
    # Create an empty array of vectors
    vects = dlib.vectors()
    for word in sentence.split():
        # Our vectors are very simple 1-dimensional vectors.  The value of the single
        # feature is 1 if the first letter of the word is capitalized and 0 otherwise.
        if (word[0].isupper()):
            vects.append(dlib.vector([1]))
        else:
            vects.append(dlib.vector([0]))
    return vects
def sentence_to_vectors(sentence):
    # Create an empty array of vectors
    vects = dlib.vectors()
    for word in sentence.split():
        # Our vectors are very simple 1-dimensional vectors.  The value of the single
        # feature is 1 if the first letter of the word is capitalized and 0 otherwise.
        if (word[0].isupper()):
            vects.append(dlib.vector([1]))
        else:
            vects.append(dlib.vector([0]))
    return vects
Beispiel #7
0
def dlibVectorFormating(data, tolist=True, key_descr='descr'):
    for d in data:
        if tolist:
            d[key_descr] = [list(dd) for dd in d[key_descr]]
        else:
            d[key_descr] = [dlib.vector(dd) for dd in d[key_descr]]
    return data
Beispiel #8
0
def CLUSTER_TRACKS(DT, threshold):

    track_feats = []

    for i in DT.keys():

      track_feats.append(dlib.vector(DT[i]['BBOX_FEAT'].mean(0)))
      
    CL = defaultdict(dict)

    cluster_ids = dlib.chinese_whispers_clustering(track_feats, threshold)
    
    for i in cluster_ids:
    
      try:
        CL[i]['BBOX']  = CL[i]['BBOX'] +  DT[list(DT.keys())[i]]['BBOX'] 
        CL[i]['Frame_ID']  = CL[i]['Frame_ID'] +  DT[list(DT.keys())[i]]['Frame_ID'] 
        CL[i]['BBOX_FEAT']  = CL[i]['BBOX_FEAT'] +  DT[list(DT.keys())[i]]['BBOX_FEAT'] 
        CL[i]['ANGLE']  = CL[i]['ANGLE'] +  DT[list(DT.keys())[i]]['ANGLE'] 
        CL[i]['IMG']  = CL[i]['IMG'] +  DT[list(DT.keys())[i]]['IMG'] 
        CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] 
        CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] 
        CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN']  + DT[list(DT.keys())[i]]['LEN'] 

      except:
        CL[i]['BBOX']  =  DT[list(DT.keys())[i]]['BBOX']  
        CL[i]['Frame_ID']  =  DT[list(DT.keys())[i]]['Frame_ID']  
        CL[i]['BBOX_FEAT']  =  DT[list(DT.keys())[i]]['BBOX_FEAT']  
        CL[i]['ANGLE']  =  DT[list(DT.keys())[i]]['ANGLE']   
        CL[i]['IMG']  =  DT[list(DT.keys())[i]]['IMG']     
        CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] 
        CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] 
        CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN'] 
        
    return CL
Beispiel #9
0
    def preprocess_faces(self, faces):
        # Cluster the faces with chinese whispers
        encodings = [dlib.vector(face['encoding']) for face in faces]
        labels = dlib.chinese_whispers_clustering(encodings, 0.5)

        selected_faces = []

        # Select face most close to average group
        groups = list(set(labels))
        for group in groups:
            # Get indices for each group
            indices = [i for i in range(len(labels)) if labels[i] == group]
            group_encodings = [faces[i]['encoding'] for i in indices]

            # Get centroid for group encodings
            avg_group_encoding = np.average(group_encodings, axis=0)

            # Get the closest face to the centroid
            avg_distance = face_recognition.face_distance(
                group_encodings, avg_group_encoding)
            min_index = np.argmin(avg_distance)

            face_index = indices[min_index]
            selected_faces.append(faces[face_index])

        return selected_faces
Beispiel #10
0
    def clustring(self, faces_info):

        for data in faces_info:
            encode = data['face_encoding']
            self.face_encodings.append(dlib.vector(encode))

        labels = dlib.chinese_whispers_clustering(self.face_encodings, 0.5)
        labels = np.array(labels)
        print("All cluster labels :", labels)

        unique_labels = np.unique(labels)
        print("Number of unique faces found  : ", len(unique_labels))
        print("Saving faces..........")
        for label in unique_labels:
            index = np.where(labels == label)[0]

            for i in index:
                image_path = self.faces_info[i]['img_path']
                image_name = image_path.split('/')[-1].split('.')[0]
                image_ext = image_path.split('/')[-1].split('.')[1]
                image = cv2.imread(image_path)

                output_dir = os.getcwd() + '/' + str(label)

                if not os.path.isdir(output_dir):
                    os.mkdir(str(label))

                cv2.imwrite(output_dir + '/' + image_name + '.' + image_ext,
                            image)
Beispiel #11
0
 def predict_gender(self, encoding, thresh=0.4):
     result = self.classifier(dlib.vector(encoding))        
     if result > thresh:
         return "male"
     if result < -thresh:
         return "female"
     return "unknown"
Beispiel #12
0
def chinese_whispers(encodings, threshold=0.5):
    """
    Chinese Whispers - an Efficient Graph Clustering Algorithm 
    and its Application to Natural Language Processing Problems
    """
    encodings = [dlib.vector(enc) for enc in encodings]
    return dlib.chinese_whispers_clustering(encodings, threshold)
    def __clusterize(self, files_faces, debug_out_folder=None):
        self.__start_stage(len(files_faces))
        encs = []
        indexes = list(range(len(files_faces)))
        random.shuffle(indexes)
        for i in indexes:
            for j in range(len(files_faces[i]['faces'])):
                encs.append(dlib.vector(
                    files_faces[i]['faces'][j]['encoding']))

        labels = dlib.chinese_whispers_clustering(
            encs, self.__threshold_clusterize)

        labels = self.__reassign_by_count(labels)
        lnum = 0
        for i in indexes:
            if self.__step_stage():
                break
            for j in range(len(files_faces[i]['faces'])):
                files_faces[i]['faces'][j]['name'] = \
                    'unknown_{:05d}'.format(labels[lnum])
                lnum += 1

            if debug_out_folder:
                filename = files_faces[i]['filename']
                media = tools.load_media(filename,
                                         self.__max_size,
                                         self.__max_video_frames,
                                         self.__video_frames_step)
                debug_out_file_name = self.__extract_filename(filename)
                self.__save_debug_images(
                    files_faces[i]['faces'], media,
                    debug_out_folder, debug_out_file_name)
        self.__end_stage()
Beispiel #14
0
    def make_psi(self, x, label):
        """Compute PSI(x,label)."""
        psi = dlib.vector()
        # Set it to have 9 dimensions.  Note that the elements of the vector
        # are 0 initialized.
        psi.resize(self.num_dimensions)

        # first
        label_num = label[0]
        # psi[:label_num * 128] = label_num * 128 * [0]
        for index in range(128):
            psi[label_num * 128 + index] = x[0][index]
        # psi[label_num*128, (label_num+1)*128] = x[0].tolist()
        # psi[(label_num+1)*128:] = (26 - label_num) * 128 * [0]

        label_num = label[1]
        for index in range(128):
            psi[label_num * 128 + 128 * 27 + index] = x[1][index]

        # get changing label
        if label[0] != label[1]:
            psi[-1] = 1
        else:
            psi[-1] = 0

        return psi
def predict_gender(encoding):
    result = _classifier(dlib.vector(encoding))
    if result > 0.5:
        return "male"

    if result < -0.5:
        return "female"

    return "unknown"
Beispiel #16
0
def predict_gender(encoding, threshold=0.5):
    result = _classifier(dlib.vector(encoding))
    if result > threshold:
        return "male"

    if result < -threshold:
        return "female"

    return "unknown"
Beispiel #17
0
def calc_embded2(file_list):
    embd_list = []
    file_name = []
    for f in file_list:
        img = cv2.imread(f)
        ret = face_recognition.face_encodings(img)
        if len(ret) == 0:
            continue
        file_name.append(f)
        embd_list.append(dlib.vector(ret[0]))
    return file_name, embd_list
Beispiel #18
0
def test_vector_set_size():
    v = vector(3)

    v.set_size(0)
    assert len(v) == 0
    assert v.shape == (0, 1)

    v.resize(10)
    assert len(v) == 10
    assert v.shape == (10, 1)
    for i in range(10):
        assert v[i] == 0
    def __recognize(self):
        """
        Recognize face and return it's descriptor
        :return:
        """

        try:
            face_roi: RoiData = self.__frames.get()

            if not self.__config.recognize_faces:
                return

            img: np.ndarray = face_roi.img

            b, g, r = cv2.split(img)
            img_rgb = cv2.merge((r, g, b))

            # win = dlib.image_window()
            # win.clear_overlay()
            # win.set_image(img_rgb)
            # win.add_overlay(face_roi.shape)
            # win.wait_until_closed()

            face_desc = self.__face_rec_model.compute_face_descriptor(img_rgb, face_roi.shape)

            faces = self.__db_worker.select_all_faces()

            wrong_face = True

            for face in faces:
                desc: str = face[3]
                values = [float(x) for x in desc.split('\n')]
                vector = dlib.vector(values)

                faces_dist = distance.euclidean(face_desc, vector)

                if faces_dist < 0.6:
                    wrong_face = False
                    break

            if wrong_face:
                if len(self.__faces) < 5:
                    self.__faces.append(img)

                if not self.__thread_started:
                    self.__thread_started = True
                    self.__thread = Thread(target=self.__send_notification)
                    self.__thread.name = "NotificationThread"
                    self.__thread.start()

        except Exception as ex:
            self.__recognition_error(f"{ex}")
Beispiel #20
0
def estimate_gender(face):
    """
    Estimates a characteristic based on the face that is passed.

    :param face: dlibs 128-long face encoding
    :return: float, estimated gender. The gender model has been trained as
        value 1 for females, and -1 for males. So, a value of -0.5 means "mainly
        male" and can be considered as such. Values between -0.3 and 0.3 mean
        the model is not certain enough, and should be considered as "unknown"
        or "uncertain"
    """
    vector = dlib.vector(face)
    return gender_model(vector)
Beispiel #21
0
def compute_similarities(data_dir, similarity_threshold=0.6, identity_threshold=0.4, criminal_fraction=0.1, **kwargs):
    t = Timer()
    all_descriptors = db.get_all_descriptors()
    descriptors = [json.loads(f[1]) for f in all_descriptors]
    face_ids = [f[0] for f in all_descriptors]
    num_faces = len(all_descriptors)
    #print("get_all_descriptors():", t)
    #print("Faces: %d" % len(all_descriptors), end='')
    if num_faces < 2:
        #print()
        return num_faces, 0, 0

    X = Y = np.array(descriptors)
    #print("convert to array:", t)
    X2 = Y2 = np.sum(np.square(X), axis=-1)
    dists = np.sqrt(np.maximum(X2[:, np.newaxis] + Y2[np.newaxis] - 2 * np.dot(X, Y.T), 0))
    #print("calculate dists:", t)

    db.delete_similarities()
    #print("delete similarities:", t)
    num_similarities = 0
    for i, j in zip(*np.where(dists < float(similarity_threshold))):
        if i != j:
            db.insert_similarity([face_ids[i], face_ids[j], dists[i, j]])
            num_similarities += 1
    #print("save similarities:", t)

    # cluster faces and update labels
    descriptors_dlib = [dlib.vector(d) for d in descriptors]
    clusters = dlib.chinese_whispers_clustering(descriptors_dlib, float(identity_threshold))
    db.update_labels(zip(clusters, face_ids))
    num_clusters = len(set(clusters))

    if args.save_clusters:
        for cluster_num, face_id in zip(clusters, face_ids):
            facefile = os.path.realpath(os.path.join(data_dir, args.save_faces, "face_%05d.jpg" % face_id))
            clusterdir = os.path.join(data_dir, args.save_clusters, str(cluster_num))
            makedirs(clusterdir)
            os.symlink(facefile, os.path.join(clusterdir, 'tmpfile'))
            os.rename(os.path.join(clusterdir, 'tmpfile'), os.path.join(clusterdir, "face_%05d.jpg" % face_id))

    # remove clusters with more than given amount of criminals
    criminal_clusters = db.get_clusters_with_criminals(criminal_fraction)
    for cluster in criminal_clusters:
        db.remove_cluster(cluster['cluster_num'])

    db.commit()
    #print("commit:", t)
    #print(", Similarities: %d, Time: %.2fs" % (num_similarities, t.total()))
    return num_faces, num_similarities, num_clusters
Beispiel #22
0
def match(candidate):

    bestThresh = 9999
    bestIndex = -1
    if (len(helpers.unique_persons) > 0):
        for index, person in enumerate(helpers.unique_persons):
            currThresh = helpers.euclidean_dist(candidate,
                                                dlib.vector(person["Mean"]))
            if (currThresh < helpers.MAX_MATCHING_THRESH):
                if (currThresh < bestThresh):
                    bestIndex = index
                    bestThresh = currThresh

    return bestIndex
def cluster_faces(src_dir):
    # Load face metadata
    faces_df = pd.read_csv(os.path.join(src_dir, 'metadata.csv'))

    # Check if clustering already exists
    if 'cluster' not in faces_df.columns:
        # Chinese whispers clustering
        faces_df['embedding'] = faces_df['json_embedding'].apply(json.loads)
        X = np.array([x for x in faces_df['embedding']])
        faces_df['cluster'] = dlib.chinese_whispers_clustering(
            [dlib.vector(x) for x in X], 0.5)

        # Persist clustering
        faces_df.to_csv(os.path.join(src_dir, 'metadata.csv'), index=False)
Beispiel #24
0
def training_data():
    r = Random(0)
    predictors = vectors()
    sparse_predictors = sparse_vectors()
    response = array()
    for i in range(30):
        for c in [-1, 1]:
            response.append(c)
            values = [r.random() + c * 0.5 for _ in range(3)]
            predictors.append(vector(values))
            sp = sparse_vector()
            for i, v in enumerate(values):
                sp.append(pair(i, v))
            sparse_predictors.append(sp)
    return predictors, sparse_predictors, response
Beispiel #25
0
def training_data():
    r = Random(0)
    predictors = vectors()
    sparse_predictors = sparse_vectors()
    response = array()
    for i in range(30):
        for c in [-1, 1]:
            response.append(c)
            values = [r.random() + c * 0.5 for _ in range(3)]
            predictors.append(vector(values))
            sp = sparse_vector()
            for i, v in enumerate(values):
                sp.append(pair(i, v))
            sparse_predictors.append(sp)
    return predictors, sparse_predictors, response
Beispiel #26
0
def test_vector_slice():
    v = vector([1, 2, 3, 4, 5])
    v_slice = v[1:4]
    assert len(v_slice) == 3
    for idx, val in enumerate([2, 3, 4]):
        assert v_slice[idx] == val

    v_slice = v[-3:-1]
    assert len(v_slice) == 2
    for idx, val in enumerate([3, 4]):
        assert v_slice[idx] == val

    v_slice = v[1:-2]
    assert len(v_slice) == 2
    for idx, val in enumerate([2, 3]):
        assert v_slice[idx] == val
Beispiel #27
0
 def make_psi(self, x, label):
     """Compute PSI(x,label)."""
     # All we are doing here is taking x, which is a 3 dimensional sample
     # vector in this example program, and putting it into one of 3 places in
     # a 9 dimensional PSI vector, which we then return.  So this function
     # returns PSI(x,label).  To see why we setup PSI like this, recall how
     # predict_label() works.  It takes in a 9 dimensional weight vector and
     # breaks the vector into 3 pieces.  Each piece then defines a different
     # classifier and we use them in a one-vs-all manner to predict the
     # label.  So now that we are in the structural SVM code we have to
     # define the PSI vector to correspond to this usage.  That is, we need
     # to setup PSI so that argmax_y dot(weights,PSI(x,y)) ==
     # predict_label(weights,x).  This is how we tell the structural SVM
     # solver what kind of problem we are trying to solve.
     #
     # It's worth emphasizing that the single biggest step in using a
     # structural SVM is deciding how you want to represent PSI(x,label).  It
     # is always a vector, but deciding what to put into it to solve your
     # problem is often not a trivial task. Part of the difficulty is that
     # you need an efficient method for finding the label that makes
     # dot(w,PSI(x,label)) the biggest.  Sometimes this is easy, but often
     # finding the max scoring label turns into a difficult combinatorial
     # optimization problem.  So you need to pick a PSI that doesn't make the
     # label maximization step intractable but also still well models your
     # problem.
     #
     # Create a dense vector object (note that you can also use unsorted
     # sparse vectors (i.e.  dlib.sparse_vector objects) to represent your
     # PSI vector.  This is useful if you have very high dimensional PSI
     # vectors that are mostly zeros.  In the context of this example, you
     # would simply return a dlib.sparse_vector at the end of make_psi() and
     # the rest of the example would still work properly. ).
     psi = dlib.vector()
     # Set it to have 9 dimensions.  Note that the elements of the vector
     # are 0 initialized.
     psi.resize(self.num_dimensions)
     dims = len(x)
     if label == 0:
         for i in range(0, dims):
             psi[i] = x[i]
     elif label == 1:
         for i in range(dims, 2 * dims):
             psi[i] = x[i - dims]
     else:  # the label must be 2
         for i in range(2 * dims, 3 * dims):
             psi[i] = x[i - 2 * dims]
     return psi
Beispiel #28
0
 def make_psi(self, x, label):
     """Compute PSI(x,label)."""
     # All we are doing here is taking x, which is a 3 dimensional sample
     # vector in this example program, and putting it into one of 3 places in
     # a 9 dimensional PSI vector, which we then return.  So this function
     # returns PSI(x,label).  To see why we setup PSI like this, recall how
     # predict_label() works.  It takes in a 9 dimensional weight vector and
     # breaks the vector into 3 pieces.  Each piece then defines a different
     # classifier and we use them in a one-vs-all manner to predict the
     # label.  So now that we are in the structural SVM code we have to
     # define the PSI vector to correspond to this usage.  That is, we need
     # to setup PSI so that argmax_y dot(weights,PSI(x,y)) ==
     # predict_label(weights,x).  This is how we tell the structural SVM
     # solver what kind of problem we are trying to solve.
     #
     # It's worth emphasizing that the single biggest step in using a
     # structural SVM is deciding how you want to represent PSI(x,label).  It
     # is always a vector, but deciding what to put into it to solve your
     # problem is often not a trivial task. Part of the difficulty is that
     # you need an efficient method for finding the label that makes
     # dot(w,PSI(x,label)) the biggest.  Sometimes this is easy, but often
     # finding the max scoring label turns into a difficult combinatorial
     # optimization problem.  So you need to pick a PSI that doesn't make the
     # label maximization step intractable but also still well models your
     # problem.
     #
     # Create a dense vector object (note that you can also use unsorted
     # sparse vectors (i.e.  dlib.sparse_vector objects) to represent your
     # PSI vector.  This is useful if you have very high dimensional PSI
     # vectors that are mostly zeros.  In the context of this example, you
     # would simply return a dlib.sparse_vector at the end of make_psi() and
     # the rest of the example would still work properly. ).
     psi = dlib.vector()
     # Set it to have 9 dimensions.  Note that the elements of the vector
     # are 0 initialized.
     psi.resize(self.num_dimensions)
     dims = len(x)
     if label == 0:
         for i in range(0, dims):
             psi[i] = x[i]
     elif label == 1:
         for i in range(dims, 2 * dims):
             psi[i] = x[i - dims]
     else:  # the label must be 2
         for i in range(2 * dims, 3 * dims):
             psi[i] = x[i - 2 * dims]
     return psi
def cluster_embeddings(encodings_path=None):
    # Load previously generated embeddings
    print("Loading encodings...")
    data = pickle.loads(open(Path(encodings_path), "rb").read())
    data = np.array(data)

    # Specifically grab the encodings from the data array
    # If using dlib's Chinese Whispers Clustering, convert to dlib vector format
    encodings = [dlib.vector(d["encoding"].squeeze()) for d in data]
    # If using KNN, keep in Numpy format
    # encodings = [d["encoding"] for d in data]
    # encodings = np.asarray(encodings).squeeze()

    # Calculate a threshold value for Chinese Whispers
    neigh = NearestNeighbors(n_neighbors=5)
    nbrs = neigh.fit(encodings)
    distances, indices = nbrs.kneighbors(encodings)
    distances = np.sort(distances, axis=0)
    distances = distances[:, 2]
    mean_distance = np.mean(distances)
    # plt.plot(distances)
    # plt.show()

    # Clustering with Chinese Whispers algorithm
    labels = dlib.chinese_whispers_clustering(encodings, mean_distance)

    # kmeans = KMeans(n_clusters=5, random_state=0).fit(encodings)
    # label_ids = np.unique(kmeans.labels_)
    # labels = kmeans.labels_

    # Determine the total number of unique faces, as well
    # as their occurrences
    label_ids, counts = np.unique(labels, return_counts=True)
    num_unique_faces = len(label_ids)

    # Split images into clusters based on labels
    image_paths = [d["image_path"] for d in data]
    output_folder = image_paths[0].parent.parent.joinpath("clustered_faces")
    Path(output_folder).mkdir(parents=True, exist_ok=True)
    for i in range(len(image_paths)):
        current_label = labels[i]
        current_file = image_paths[i]
        new_path = output_folder.joinpath(
            str(current_label) + "_" + current_file.name)
        shutil.copy(current_file, new_path)
Beispiel #30
0
def cluster():
    s = time.time()
    query = ''
    descriptors = []
    dvec = dlib.vectors()
    date = input("enter a date in dd-mm-yyy format")
    from_time = input("enter start time in hh:mm format")
    to_time = input("enter end time in hh:mm format")
    data = ptf.retrive(date, from_time, to_time)
    for d in data:
        descriptors.append(dlib.vector(d))
    # Cluster the faces.
    labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
    e = time.time()
    print(labels)
    print(len(descriptors))
    print(len(labels))
    labset = set(labels)
    print(labset)
    num_classes = len(set(labels))  #total number of clusters
    print("Number of clusters: {}".format(num_classes))
    print(e - s)
    return num_classes
Beispiel #31
0
    def __init__(self, data_path):
        names = ['time', 'track']

        for i in range(128):
            names += ['d{0}'.format(i)]
        #
        self.data = read_table(data_path, delim_whitespace=True,
                               header=None, names=names)

        self.data.sort_values(by=['track', 'time'], inplace=True)

        # create a descriptor list with dlibs descriptor vector
        descriptors = []
        embeddings = self.data.iloc[:, 2:].values
        for each_i in embeddings:
            face_descriptor = dlib.vector(each_i)
            descriptors.append(face_descriptor)

        # returns series of labels [0 0 2 2 2] for each row of embeddings
        labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
        # put the series into a column
        self.data['cluster'] = pandas.Series(labels, index=self.data.index)
        # TODO: this can be improved by taking highest count of label in each track
        # get the label for each track
        track_label = self.data.groupby(by='track', as_index=False).first()[
            ['track', 'cluster']].values

        # get unique labels
        self.labels = np.unique(track_label[:][:, [1]])

        self.starting_point = Annotation(modality='face')

        for track, segment in self.data.groupby('track').apply(_to_segment).iteritems():
            if not segment:
                continue
            self.starting_point[segment, track] = track_label[track][1]
Beispiel #32
0
    def train(self, directoryPlain, directoryManual):
        self.reset()
        listFile = []
        listPlainFile = listAllFileInFolder(directoryPlain)
        listManualFile = listAllFileInFolder(directoryManual)
        dicPlainFile = {}
        dicManualFile = {}
        for file in listPlainFile:
            fname = file.strip().split('/')[-1]
            listFile.append(fname)
            dicPlainFile[fname] = file
        
        for file in listManualFile:
            fname = file.strip().split('/')[-1]
            listFile.append(fname)
            dicManualFile[fname] = file
        
        listFile = list(set(listFile))
        
        queries = dlib.ranking_pairs()
        
        countt = 0
        outfile = open("completefile.txt", 'w')
        for file in listFile:
            outvecfile = open("/home/hien/Data/Work/Wordnet_naiscorp/test/valuevector/"+file.strip().split('/')[-1], 'w')
            countt = countt + 1
            outfile.write(file+'\n')
            print (file, countt)
            self.reset()
            data = dlib.ranking_pair()
            
            inputNonRelevant = " ".join([line for line in open(dicPlainFile.get(file), 'r').readlines()])
            tpAllSent = myTokenizer(inputNonRelevant)
            self.inputFromString(inputNonRelevant)
            inputRelevant = " ".join([line for line in open(dicManualFile.get(file), 'r').readlines()])
            tpRelevant = myTokenizer(inputRelevant)
            tpNonRelevant = list(set(tpAllSent).difference(set(tpRelevant)))
            
            self.genAllVector()
            for sent in tpAllSent:
                outvecfile.write(str(self.dicVector.get(sent.strip()))+"\t"+sent.strip()+'\n')
            outvecfile.close()
            for sent in tpRelevant:
#                 print (sent)
#                 print(self.dicVector.get(sent))
#                 print(type(self.dicVector.get(sent)))
                data.relevant.append(dlib.vector(self.dicVector.get(sent.strip())))
#                 outvecfile.write(str(self.dicVector.get(sent.strip()))+"\t"+sent.strip()+'\n')
#                 outvecfile.
            for sent in tpNonRelevant:
#                 print(self.dicVector.get(sent))
                data.nonrelevant.append(dlib.vector(self.dicVector.get(sent.strip())))
                
            queries.append(data)
        
        trainer = dlib.svm_rank_trainer()
        trainer.c = 10
        rank = trainer.train(queries)
        _weight = []
        for i in range(0, len(rank.weights)):
            _weight.append(rank.weights[i])
#         print(type(rank.weights))
#         print (rank.weights[0])
#         print (rank.weights)
#         print(_weight)
#         return rank.weights
        return _weight
import settings

train_data = pd.read_csv('training_features.csv', index_col=0, encoding="ISO-8859-1")
query_id_train = train_data["query_id"].tolist()
doc_id_train = train_data["doc_id"].tolist()
train_features = train_data[settings.feature_selected]
# train_true = list(train_data["label"])
train_true = train_data["label"].tolist()

# testing
test_data = pd.read_csv('test_features.csv', index_col=0, encoding="ISO-8859-1")
query_id_test = test_data["query_id"].tolist()
doc_id_test = test_data["doc_id"].tolist()
test_features = test_data[settings.feature_selected]
test_true = test_data["label"]

data = dlib.ranking_pair()
for i in range(len(train_true)):
	if train_true[i] == 1:
		data.relevant.append(dlib.vector(train_features[i]))
	elif train_true[i] == 0:
		data.nonrelevant.append(dlib.vector(train_features[i]))

trainer = dlib.svm_rank_trainer()
trainer.c = 10
rank = trainer.train(data)
print("Ranking score for a relevant vector:     {}".format(
    rank(data.relevant[0])))
print("Ranking score for a non-relevant vector: {}".format(
    rank(data.nonrelevant[0])))
Beispiel #34
0
def test_vector_getitem():
    v = vector([1, 2, 3])
    assert v[0] == 1
    assert v[-1] == 3
    assert v[1] == v[-2]
Beispiel #35
0
#   run compile_dlib_python_module.bat.  This should work on any operating system
#   so long as you have CMake and boost-python installed.  On Ubuntu, this can be
#   done easily by running the command:  sudo apt-get install libboost-python-dev cmake

import dlib

# Now let's make some testing data.  To make it really simple, let's suppose that
# we are ranking 2D vectors and that vectors with positive values in the first
# dimension should rank higher than other vectors.  So what we do is make
# examples of relevant (i.e. high ranking) and non-relevant (i.e. low ranking)
# vectors and store them into a ranking_pair object like so:

data = dlib.ranking_pair()
# Here we add two examples.  In real applications, you would want lots of
# examples of relevant and non-relevant vectors.
data.relevant.append(dlib.vector([1, 0]))
data.nonrelevant.append(dlib.vector([0, 1]))

# Now that we have some data, we can use a machine learning method to learn a
# function that will give high scores to the relevant vectors and low scores to
# the non-relevant vectors.
trainer = dlib.svm_rank_trainer()
# Note that the trainer object has some parameters that control how it behaves.
# For example, since this is the SVM-Rank algorithm it has a C parameter that
# controls the trade-off between trying to fit the training data exactly or
# selecting a "simpler" solution which might generalize better.
trainer.c = 10

# So let's do the training.
rank = trainer.train(data)
Beispiel #36
0
def test_vector_serialization():
    v = vector([1, 2, 3])
    ser = pickle.dumps(v, 2)
    deser = pickle.loads(ser)
    assert str(v) == str(deser)
Beispiel #37
0
#       sudo apt-get install libboost-python-dev cmake
#

import dlib
try:
    import cPickle as pickle
except ImportError:
    import pickle

x = dlib.vectors()
y = dlib.array()

# Make a training dataset.  Here we have just two training examples.  Normally
# you would use a much larger training dataset, but for the purpose of example
# this is plenty.  For binary classification, the y labels should all be either +1 or -1.
x.append(dlib.vector([1, 2, 3, -1, -2, -3]))
y.append(+1)

x.append(dlib.vector([-1, -2, -3, 1, 2, 3]))
y.append(-1)


# Now make a training object.  This object is responsible for turning a
# training dataset into a prediction model.  This one here is a SVM trainer
# that uses a linear kernel.  If you wanted to use a RBF kernel or histogram
# intersection kernel you could change it to one of these lines:
#  svm = dlib.svm_c_trainer_histogram_intersection()
#  svm = dlib.svm_c_trainer_radial_basis()
svm = dlib.svm_c_trainer_linear()
svm.be_verbose()
svm.set_c(10)
Beispiel #38
0
def test_vector_empty_init():
    v = vector()
    assert len(v) == 0
    assert v.shape == (0, 1)
    assert str(v) == ""
    assert repr(v) == "dlib.vector([])"
Beispiel #39
0
def test_vector_init_with_negative_number():
    with raises(Exception):
        vector(-3)
Beispiel #40
0
def test_vector_invalid_getitem():
    v = vector([1, 2, 3])
    with raises(IndexError):
        v[-4]
    with raises(IndexError):
        v[3]
Beispiel #41
0
def test_vector_init_with_number():
    v = vector(3)
    assert len(v) == 3
    assert v.shape == (3, 1)
    assert str(v) == "0\n0\n0"
    assert repr(v) == "dlib.vector([0, 0, 0])"
Beispiel #42
0
#   command:
#       sudo apt-get install cmake
#

import dlib


# Now let's make some testing data.  To make it really simple, let's suppose
# that we are ranking 2D vectors and that vectors with positive values in the
# first dimension should rank higher than other vectors.  So what we do is make
# examples of relevant (i.e. high ranking) and non-relevant (i.e. low ranking)
# vectors and store them into a ranking_pair object like so:
data = dlib.ranking_pair()
# Here we add two examples.  In real applications, you would want lots of
# examples of relevant and non-relevant vectors.
data.relevant.append(dlib.vector([1, 0]))
data.nonrelevant.append(dlib.vector([0, 1]))

# Now that we have some data, we can use a machine learning method to learn a
# function that will give high scores to the relevant vectors and low scores to
# the non-relevant vectors.
trainer = dlib.svm_rank_trainer()
# Note that the trainer object has some parameters that control how it behaves.
# For example, since this is the SVM-Rank algorithm it has a C parameter that
# controls the trade-off between trying to fit the training data exactly or
# selecting a "simpler" solution which might generalize better. 
trainer.c = 10

# So let's do the training.
rank = trainer.train(data)
Beispiel #43
0
#       sudo apt-get install cmake
#

import dlib
try:
    import cPickle as pickle
except ImportError:
    import pickle

x = dlib.vectors()
y = dlib.array()

# Make a training dataset.  Here we have just two training examples.  Normally
# you would use a much larger training dataset, but for the purpose of example
# this is plenty.  For binary classification, the y labels should all be either +1 or -1.
x.append(dlib.vector([1, 2, 3, -1, -2, -3]))
y.append(+1)

x.append(dlib.vector([-1, -2, -3, 1, 2, 3]))
y.append(-1)

# Now make a training object.  This object is responsible for turning a
# training dataset into a prediction model.  This one here is a SVM trainer
# that uses a linear kernel.  If you wanted to use a RBF kernel or histogram
# intersection kernel you could change it to one of these lines:
#  svm = dlib.svm_c_trainer_histogram_intersection()
#  svm = dlib.svm_c_trainer_radial_basis()
svm = dlib.svm_c_trainer_linear()
svm.be_verbose()
svm.set_c(10)
Beispiel #44
0
def test_vectors_extend():
    vs = vectors()
    vs.extend([vector([1, 2, 3]), vector([4, 5, 6])])
    assert len(vs) == 2
descriptors = []
images = []

# Now find all the persons 1024D descriptors.

personDesc = open(descriptor_file_path, "r")

for line in personDesc:
    descriptorElements = line.split("|")
    print("Processing image: {}".format(descriptorElements[0]))

    # Compute the 128D vector that describes the face in img identified by
    # shape.
    descriptor = np.array(descriptorElements[1:])
    descriptor = descriptor.astype(np.float)
    descriptors.append(dlib.vector(descriptor))
    images.append(descriptorElements[0])

#descriptors = dlib.vector(descriptors)

# Now let's cluster the faces.
labels = dlib.chinese_whispers_clustering(descriptors, 0.20)
num_classes = len(set(labels))
print("Number of clusters: {}".format(num_classes))

# Find biggest class
biggest_class = None
biggest_class_length = 0
for i in range(0, num_classes):
    class_length = len([label for label in labels if label == i])
    if class_length > biggest_class_length:
Beispiel #46
0
def test_vector_init_with_list():
    v = vector([1, 2, 3])
    assert len(v) == 3
    assert v.shape == (3, 1)
    assert str(v) == "1\n2\n3"
    assert repr(v) == "dlib.vector([1, 2, 3])"