def find_clusters(self, representative=None, use_CI=False): with open('results.pkl', 'rb') as file: data = pickle.load(file) #------------ FOR DEBUG -----------------# #with open('representative.pkl', 'rb') as file: # representative = pickle.load(file) #----------------------------------------# metric = Similarity() possibilities = list() if representative is not None: with open('standardization_data.pkl', 'rb') as file: standardization_data = pickle.load(file) mean = standardization_data['s_mean'] std_dev = standardization_data['s_var'] std_dev = np.power(std_dev, 0.500000) representative = np.divide(np.subtract(representative, mean), std_dev) if use_CI is True: # Use confidence intervals to check whether the person corresponds to a given # cluster or not. A z-distribution is assumed here. for labelID in data.keys(): if labelID == -1: pass mean_encoding = data[labelID]['mean_encoding'] error = data[labelID]['std_dev'] n = data[labelID]['sample_size'] lower_bound = mean_encoding - (np.multiply(error, 1.96) / np.power(n, 0.5)) upper_bound = mean_encoding + (np.multiply(error, 1.96) / np.power(n, 0.5)) l1 = representative >= lower_bound l2 = representative <= upper_bound if np.all(l1 & l2): result = { 'labelID': labelID, 'paths': data[labelID]['paths'] } possibilities.append(result) else: for labelID in data.keys(): centre_point = data[labelID]['mean_encoding'] error = data[labelID]['std_dev'] * 1.25 # We assume that our representative encoding lies within 1.25 standard # deviations of the mean for it to match that cluster sphere_point = np.add(centre_point, error) sphere_radius = metric.fractional_distance( centre_point, sphere_point) distance = metric.fractional_distance( centre_point, representative) if distance <= sphere_radius and labelID != -1: result = { 'labelID': labelID, 'paths': data[labelID]['paths'] } possibilities.append(result) return possibilities
def cluster_data_points(data_points, cluster_size=5, distance_metric_func="Fractional"): points = [d['encoding'] for d in data_points] points = np.vstack(points) scaler = StandardScaler() scaler.fit(points) points = scaler.transform(points) dist_metric = Similarity() if distance_metric_func == "Fractional": dist_metric_func = dist_metric.fractional_distance else: dist_metric_func = dist_metric.euclidean_distance clusterer = HDBSCAN(min_cluster_size=cluster_size, metric='pyfunc', func=dist_metric_func) clusterer.fit(points) logging.info("Fit complete.") results = {} labelIDs = np.unique(clusterer.labels_) for labelID in labelIDs: paths = [] encodings = [] idxs = np.where(clusterer.labels_ == labelID)[0] for i in idxs: data = data_points[i] paths.append(data['path']) encodings.append(data['encoding']) results[labelID] = { 'paths': paths, 'mean_encoding': np.mean(np.asarray(encodings), axis=0), 'std_dev': np.std(encodings, axis=0), 'sample_size': len(paths) } return results
def find_clusters(representative=None, use_CI=True, sigma=1.25): metric = Similarity() possibilities = list() if representative is not None: if use_CI is True: # Use confidence intervals to check whether the person corresponds # to a given cluster or not. A z-distribution is assumed here. for labelID in data.keys(): if labelID == -1: pass mean_encoding = data[labelID]['mean_encoding'] error = data[labelID]['std_dev'] n = data[labelID]['sample_size'] lower_bound = mean_encoding - \ (np.multiply(error, 1.96) / np.power(n, 0.5)) upper_bound = mean_encoding + \ (np.multiply(error, 1.96) / np.power(n, 0.5)) l1 = representative >= lower_bound l2 = representative <= upper_bound if np.all(l1 & l2): result = {'labelID': labelID, 'paths': data[labelID]['paths']} possibilities.append(result) else: for labelID in data.keys(): centre_point = data[labelID]['mean_encoding'] error = data[labelID]['std_dev'] * sigma sphere_point = np.add(centre_point, error) sphere_radius = metric.fractional_distance( centre_point, sphere_point) distance = metric.fractional_distance( centre_point, representative) if distance <= sphere_radius and labelID != -1: result = {'labelID': labelID, 'paths': data[labelID]['paths']} possibilities.append(result) return possibilities
def cluster_data_points(self, data=None, processed=False): if data is None or len(data) < 1: return None if processed is True: with open('video_data.pkl', 'rb') as file: data = pickle.load(file) self.clusterSize = self.cs points = [d['encoding'] for d in data] points = np.vstack(points) points = normalize(points, norm='l2', axis=1) dist_metric = Similarity() clusterer = HDBSCAN(min_cluster_size=self.clusterSize, metric='pyfunc', func=dist_metric.fractional_distance) clusterer.fit(points) results = dict() labelIDs = np.unique(clusterer.labels_) for labelID in labelIDs: idxs = np.where(clusterer.labels_ == labelID)[0] encodings = list() for i in idxs: if labelID not in results: results[labelID] = dict() results[labelID]['paths'] = list() results[labelID]['mean_encoding'] = None results[labelID]['std_dev'] = None results[labelID]['paths'].append(data[i]['path']) encodings.append(data[i]['encoding']) results[labelID]['mean_encoding'], results[labelID][ 'std_dev'] = self._compute_statistics(encodings) if processed is False: return results else: with open('video_results.pkl', 'wb') as file: pickle.dump(results, file, protocol=pickle.HIGHEST_PROTOCOL) return results return None
def cluster_data_points(self): with open('data_points.pkl', 'rb') as file: data = pickle.load(file) points = [d['encoding'] for d in data] points = np.vstack(points) # points = normalize(points, norm='l2', axis=1) scaler = StandardScaler() scaler.fit(points) points = scaler.transform(points) with open('standardization_data.pkl', 'wb') as file: std_data = {'s_mean': scaler.mean_, 's_var': scaler.var_} pickle.dump(std_data, file, protocol=pickle.HIGHEST_PROTOCOL) dist_metric = Similarity() clusterer = HDBSCAN(min_cluster_size=self.clusterSize, metric='pyfunc', func=dist_metric.fractional_distance) clusterer.fit(points) results = dict() labelIDs = np.unique(clusterer.labels_) for labelID in labelIDs: idxs = np.where(clusterer.labels_ == labelID)[0] encodings = list() for i in idxs: if labelID not in results: results[labelID] = dict() results[labelID]['paths'] = list() results[labelID]['mean_encoding'] = None results[labelID]['std_dev'] = None results[labelID]['paths'].append(data[i]['path']) encodings.append(data[i]['encoding']) results[labelID]['mean_encoding'], results[labelID][ 'std_dev'] = self._compute_statistics(encodings) results[labelID]['sample_size'] = len(results[labelID]['paths']) with open('results.pkl', 'wb') as file: pickle.dump(results, file, protocol=pickle.HIGHEST_PROTOCOL) return True
return processed def hist_equalize(image): lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(16, 16)) l1 = clahe.apply(l) processed = cv2.merge((l1, a, b)) processed = cv2.cvtColor(processed, cv2.COLOR_LAB2BGR) return processed cap = cv2.VideoCapture(0) dist = Similarity() with tf.Graph().as_default(): with tf.Session() as session: facenet.load_model('20180402-114759.pb') img_holder = tf.get_default_graph().get_tensor_by_name('input:0') embeddings = tf.get_default_graph().get_tensor_by_name('embeddings:0') phase_train = tf.get_default_graph().get_tensor_by_name( 'phase_train:0') test_image = cv2.imread('test2.jpg') #test_image = hist_equalize(test_image) (y1, x2, y2, x1) = FR.face_locations(test_image, model='hog')[0] test_face = cv2.resize(test_image[y1:y2, x1:x2], (160, 160)) #test_face = hist_equalize(test_face)