def fit(self, data): distance_matrix = self.get_DistanceMatrix(data) adjacent = self.Distance_to_Weigt_knn(distance_matrix, k=10) laplacian_matrix = self.get_LaplacianMatrix(adjacent) Y = self.get_YMatrix(laplacian_matrix) my_kmeans = K_Means(n_clusters=self.n_clusters) my_kmeans.fit(Y) self.labels = my_kmeans.predict(Y)
def fit(self, X, eigValueGap=False, bShowGap=False): W = self.calculateDistanceMatrix(X) Adjacent = self.distTransToWeightKNN(W, k=10) Laplacian = self.calculateLaplacianMatrix(Adjacent, normalized='rm') Y = self.calculateYMatrix(Laplacian, eigValueGap=eigValueGap, bShowGap=bShowGap) MY_KNN = 1 if MY_KNN: knn = K_Means(self.n_clusters) knn.fit(Y) labels = knn.predict(Y) else: labels = KMeans(n_clusters=self.n_clusters).fit(Y).labels_ return labels
def predict(self,data): #第i个点连出的所有线的和 self.D_mat = np.diag(np.sum(self.simi_graph,axis=1)) #print(self.D_mat,self.simi_graph) self.lap_mat = (self.D_mat - self.simi_graph) #self.lap_mat=np.linalg.inv(self.D_mat)*self.simi_graph eigenvalues, eigenvectors = np.linalg.eig(self.lap_mat) sort = eigenvalues.argsort() eigenvalues = eigenvalues[sort] #print("eigenvalues",eigenvalues) #print("eigenvalue",eigenvalues) eigenvectors = eigenvectors[:, sort] k_eigenvectors=eigenvectors[:,:self.k_] #print("eigen vectors", eigenvectors) print("k eigen shape",np.shape(k_eigenvectors)) k_means = K_Means(self.k_) #print("eigen shape",np.shape(k_eigenvectors)) k_means.fit(k_eigenvectors) cat = k_means.predict(k_eigenvectors) return cat
class Spectral(object): def __init__(self): self.tolerance_ = 0.00001 # def distance(self, eular_dis): # return 1 / (eular_dis + 0.000001) def distance(self, eular_dis): return np.exp(-eular_dis) def confirm_k(self, value, sort_idx): sum_diff = value[sort_idx[0]] - value[sort_idx[1]] sorted_value = value[sort_idx] prev = sorted_value[0:-1] last = sorted_value[1:] diff = last - prev mean_diff = np.mean(diff[0:5]) kk = 1 for kk in range(1, value.shape[0]): curr_diff = sorted_value[kk] - sorted_value[kk - 1] if (curr_diff > mean_diff): break sum_diff += curr_diff return kk def fit(self, data): data_num = data.shape[0] W = np.zeros((data_num, data_num), dtype=np.float) D = np.zeros((data_num, data_num), dtype=np.float) Dinv = np.zeros((data_num, data_num), dtype=np.float) self.kdtree = KDTree(data) for ii in range(data_num): eular_dis, idx = self.kdtree.query(data[ii, :], k=max(int(data_num / 20), 10)) distance_all = self.distance(eular_dis) W[ii, idx] = distance_all W[ii, ii] = 0 W = np.sqrt(W * W.transpose()) for ii in range(data_num): D[ii, ii] = np.sum(W[ii, :]) if (D[ii, ii] > 0.0001): Dinv[ii, ii] = 1 / D[ii, ii] else: Dinv[ii, ii] = 1 / 0.0001 # Lrw = np.matmul(Dinv, D-W) Lrw = np.eye(data_num) - np.matmul(Dinv, W) # Lrw = D-W value, vector = np.linalg.eig(Lrw) sort_idx = np.argsort(value) k_means_k = self.confirm_k(value, sort_idx) # k_means_k = 2 print('k is evaluated as {}'.format(k_means_k)) print('idx:', sort_idx[0:k_means_k], 'lambda', value[sort_idx[0:k_means_k]]) k_means_data = vector[:, sort_idx[0:k_means_k]] self.k_means_manager = K_Means(k_means_k) self.k_means_manager.fit(k_means_data) self.spectral_result = np.array( self.k_means_manager.predict(k_means_data)) # plt.imshow(W, vmin=0, vmax=100) # plt.show() # plt.plot(k_means_data[:,0], k_means_data[:,1],'r.') # plt.plot(value[sort_idx], 'r.') # plt.show() # exit(0) def predict(self, data): ret = [] # convert to spectral data for ii in range(data.shape[0]): distance, idx = self.kdtree.query(data[ii, :], k=1) ret.append(self.spectral_result[idx]) # print(spec_data) return ret
class SpectralCluster(object): # k是分组数 def __init__(self, n_clusters=2): self.k_ = n_clusters self.kmeans = K_Means(n_clusters=n_clusters) def squared_exponential(self, x, y, sig=0.8, sig2=1): norm = np.linalg.norm(x - y) dist = norm * norm return np.exp(-dist / (2 * sig * sig2)) def affinity(self, data): N = data.shape[0] sig = [] ans = np.zeros((N, N)) for i in range(N): dists = [] for j in range(N): dis = np.linalg.norm(data[i, :] - data[j, :]) dists.append(dis) dists.sort() sig.append(np.mean(dists[:5])) for i in range(N): for j in range(N): ans[i][j] = self.squared_exponential(data[i], data[j], sig[i], sig[j]) return ans def affinity_fast(self, data): N = data.shape[0] sig = [] ans = np.zeros((N, N)) dists = distance.cdist(data, data) dists.sort() sig = np.mean(dists[:, :5], axis=1) # neighour of 5 distances as variance for i in range(N): for j in range(N): ans[i][j] = self.squared_exponential(data[i], data[j], sig[i], sig[j]) return ans def get_laplacian_features(self, data): N = data.shape[0] W = self.affinity_fast(data) D_half_inv = np.zeros(W.shape) tmp = np.sum(W, axis=1) D_half_inv.flat[::len(tmp) + 1] = tmp**(-0.5) #import pdb; pdb.set_trace() L = D_half_inv.dot(W).dot(D_half_inv) #graph laplacian w, v = scipy.sparse.linalg.eigs(L, self.k_) X = v.real rows_norm = np.linalg.norm(X, axis=1, ord=2) X = (X.T / rows_norm).T return X def fit(self, data): V = self.get_laplacian_features(data) self.kmeans.fit(V) def predict(self, data): V = self.get_laplacian_features(data) return self.kmeans.predict(V)