Python Similarity Exemples, DistanceMetrics.Similarity Python Exemples

Exemple #1

0

Afficher le fichier

    def find_clusters(self, representative=None, use_CI=False):
        with open('results.pkl', 'rb') as file:
            data = pickle.load(file)
        #------------ FOR DEBUG -----------------#
        #with open('representative.pkl', 'rb') as file:
        #    representative = pickle.load(file)
        #----------------------------------------#
        metric = Similarity()
        possibilities = list()
        if representative is not None:
            with open('standardization_data.pkl', 'rb') as file:
                standardization_data = pickle.load(file)
                mean = standardization_data['s_mean']
                std_dev = standardization_data['s_var']
                std_dev = np.power(std_dev, 0.500000)
            representative = np.divide(np.subtract(representative, mean),
                                       std_dev)
            if use_CI is True:
                # Use confidence intervals to check whether the person corresponds to a given
                # cluster or not. A z-distribution is assumed here.
                for labelID in data.keys():
                    if labelID == -1:
                        pass
                    mean_encoding = data[labelID]['mean_encoding']
                    error = data[labelID]['std_dev']
                    n = data[labelID]['sample_size']
                    lower_bound = mean_encoding - (np.multiply(error, 1.96) /
                                                   np.power(n, 0.5))
                    upper_bound = mean_encoding + (np.multiply(error, 1.96) /
                                                   np.power(n, 0.5))
                    l1 = representative >= lower_bound
                    l2 = representative <= upper_bound
                    if np.all(l1 & l2):
                        result = {
                            'labelID': labelID,
                            'paths': data[labelID]['paths']
                        }
                        possibilities.append(result)
            else:
                for labelID in data.keys():
                    centre_point = data[labelID]['mean_encoding']
                    error = data[labelID]['std_dev'] * 1.25
                    # We assume that our representative encoding lies within 1.25 standard
                    # deviations of the mean for it to match that cluster
                    sphere_point = np.add(centre_point, error)
                    sphere_radius = metric.fractional_distance(
                        centre_point, sphere_point)
                    distance = metric.fractional_distance(
                        centre_point, representative)
                    if distance <= sphere_radius and labelID != -1:
                        result = {
                            'labelID': labelID,
                            'paths': data[labelID]['paths']
                        }
                        possibilities.append(result)

        return possibilities

Exemple #2

0

Afficher le fichier

Fichier : image_cluster.py Projet : namanarora00/batchsnap-sorter

def cluster_data_points(data_points,
                        cluster_size=5,
                        distance_metric_func="Fractional"):
    points = [d['encoding'] for d in data_points]
    points = np.vstack(points)
    scaler = StandardScaler()
    scaler.fit(points)
    points = scaler.transform(points)
    dist_metric = Similarity()
    if distance_metric_func == "Fractional":
        dist_metric_func = dist_metric.fractional_distance
    else:
        dist_metric_func = dist_metric.euclidean_distance
    clusterer = HDBSCAN(min_cluster_size=cluster_size,
                        metric='pyfunc',
                        func=dist_metric_func)
    clusterer.fit(points)
    logging.info("Fit complete.")
    results = {}
    labelIDs = np.unique(clusterer.labels_)
    for labelID in labelIDs:
        paths = []
        encodings = []
        idxs = np.where(clusterer.labels_ == labelID)[0]
        for i in idxs:
            data = data_points[i]
            paths.append(data['path'])
            encodings.append(data['encoding'])
        results[labelID] = {
            'paths': paths,
            'mean_encoding': np.mean(np.asarray(encodings), axis=0),
            'std_dev': np.std(encodings, axis=0),
            'sample_size': len(paths)
        }
    return results

Exemple #3

0

Afficher le fichier

def find_clusters(representative=None, use_CI=True, sigma=1.25):
    metric = Similarity()
    possibilities = list()
    if representative is not None:
        if use_CI is True:
            # Use confidence intervals to check whether the person corresponds
            # to a given cluster or not. A z-distribution is assumed here.
            for labelID in data.keys():
                if labelID == -1:
                    pass
                mean_encoding = data[labelID]['mean_encoding']
                error = data[labelID]['std_dev']
                n = data[labelID]['sample_size']
                lower_bound = mean_encoding - \
                    (np.multiply(error, 1.96) / np.power(n, 0.5))
                upper_bound = mean_encoding + \
                    (np.multiply(error, 1.96) / np.power(n, 0.5))
                l1 = representative >= lower_bound
                l2 = representative <= upper_bound
                if np.all(l1 & l2):
                    result = {'labelID': labelID,
                              'paths': data[labelID]['paths']}
                    possibilities.append(result)
        else:
            for labelID in data.keys():
                centre_point = data[labelID]['mean_encoding']
                error = data[labelID]['std_dev'] * sigma
                sphere_point = np.add(centre_point, error)
                sphere_radius = metric.fractional_distance(
                    centre_point, sphere_point)
                distance = metric.fractional_distance(
                    centre_point, representative)
                if distance <= sphere_radius and labelID != -1:
                    result = {'labelID': labelID,
                              'paths': data[labelID]['paths']}
                    possibilities.append(result)

    return possibilities

Exemple #4

0

Afficher le fichier

Fichier : video_cluster.py Projet : crux-bphc/batchsnap-sorter

    def cluster_data_points(self, data=None, processed=False):
        if data is None or len(data) < 1:
            return None
        if processed is True:
            with open('video_data.pkl', 'rb') as file:
                data = pickle.load(file)
                self.clusterSize = self.cs
        points = [d['encoding'] for d in data]
        points = np.vstack(points)
        points = normalize(points, norm='l2', axis=1)
        dist_metric = Similarity()

        clusterer = HDBSCAN(min_cluster_size=self.clusterSize,
                            metric='pyfunc',
                            func=dist_metric.fractional_distance)
        clusterer.fit(points)
        results = dict()

        labelIDs = np.unique(clusterer.labels_)
        for labelID in labelIDs:
            idxs = np.where(clusterer.labels_ == labelID)[0]
            encodings = list()
            for i in idxs:
                if labelID not in results:
                    results[labelID] = dict()
                    results[labelID]['paths'] = list()
                    results[labelID]['mean_encoding'] = None
                    results[labelID]['std_dev'] = None
                results[labelID]['paths'].append(data[i]['path'])
                encodings.append(data[i]['encoding'])
            results[labelID]['mean_encoding'], results[labelID][
                'std_dev'] = self._compute_statistics(encodings)

        if processed is False:
            return results
        else:
            with open('video_results.pkl', 'wb') as file:
                pickle.dump(results, file, protocol=pickle.HIGHEST_PROTOCOL)

            return results

        return None

Exemple #5

0

Afficher le fichier

    def cluster_data_points(self):
        with open('data_points.pkl', 'rb') as file:
            data = pickle.load(file)

        points = [d['encoding'] for d in data]
        points = np.vstack(points)
        # points = normalize(points, norm='l2', axis=1)
        scaler = StandardScaler()
        scaler.fit(points)
        points = scaler.transform(points)
        with open('standardization_data.pkl', 'wb') as file:
            std_data = {'s_mean': scaler.mean_, 's_var': scaler.var_}
            pickle.dump(std_data, file, protocol=pickle.HIGHEST_PROTOCOL)
        dist_metric = Similarity()

        clusterer = HDBSCAN(min_cluster_size=self.clusterSize,
                            metric='pyfunc',
                            func=dist_metric.fractional_distance)
        clusterer.fit(points)
        results = dict()

        labelIDs = np.unique(clusterer.labels_)
        for labelID in labelIDs:
            idxs = np.where(clusterer.labels_ == labelID)[0]
            encodings = list()
            for i in idxs:
                if labelID not in results:
                    results[labelID] = dict()
                    results[labelID]['paths'] = list()
                    results[labelID]['mean_encoding'] = None
                    results[labelID]['std_dev'] = None
                results[labelID]['paths'].append(data[i]['path'])
                encodings.append(data[i]['encoding'])
            results[labelID]['mean_encoding'], results[labelID][
                'std_dev'] = self._compute_statistics(encodings)
            results[labelID]['sample_size'] = len(results[labelID]['paths'])

        with open('results.pkl', 'wb') as file:
            pickle.dump(results, file, protocol=pickle.HIGHEST_PROTOCOL)

        return True

Exemple #6

0

Afficher le fichier

    return processed


def hist_equalize(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(16, 16))
    l1 = clahe.apply(l)
    processed = cv2.merge((l1, a, b))
    processed = cv2.cvtColor(processed, cv2.COLOR_LAB2BGR)

    return processed


cap = cv2.VideoCapture(0)
dist = Similarity()

with tf.Graph().as_default():
    with tf.Session() as session:

        facenet.load_model('20180402-114759.pb')
        img_holder = tf.get_default_graph().get_tensor_by_name('input:0')
        embeddings = tf.get_default_graph().get_tensor_by_name('embeddings:0')
        phase_train = tf.get_default_graph().get_tensor_by_name(
            'phase_train:0')

        test_image = cv2.imread('test2.jpg')
        #test_image = hist_equalize(test_image)
        (y1, x2, y2, x1) = FR.face_locations(test_image, model='hog')[0]
        test_face = cv2.resize(test_image[y1:y2, x1:x2], (160, 160))
        #test_face = hist_equalize(test_face)