Ejemplo n.º 1
0
    def fit(self, data):
        distance_matrix = self.get_DistanceMatrix(data)
        adjacent = self.Distance_to_Weigt_knn(distance_matrix, k=10)
        laplacian_matrix = self.get_LaplacianMatrix(adjacent)
        Y = self.get_YMatrix(laplacian_matrix)

        my_kmeans = K_Means(n_clusters=self.n_clusters)
        my_kmeans.fit(Y)
        self.labels = my_kmeans.predict(Y)
Ejemplo n.º 2
0
    def fit(self, X, eigValueGap=False, bShowGap=False):
        W = self.calculateDistanceMatrix(X)
        Adjacent = self.distTransToWeightKNN(W, k=10)
        Laplacian = self.calculateLaplacianMatrix(Adjacent, normalized='rm')
        Y = self.calculateYMatrix(Laplacian,
                                  eigValueGap=eigValueGap,
                                  bShowGap=bShowGap)

        MY_KNN = 1
        if MY_KNN:
            knn = K_Means(self.n_clusters)
            knn.fit(Y)
            labels = knn.predict(Y)
        else:
            labels = KMeans(n_clusters=self.n_clusters).fit(Y).labels_
        return labels
Ejemplo n.º 3
0
 def predict(self,data):
     #第i个点连出的所有线的和
     self.D_mat = np.diag(np.sum(self.simi_graph,axis=1))
     #print(self.D_mat,self.simi_graph)
     self.lap_mat = (self.D_mat - self.simi_graph)
     #self.lap_mat=np.linalg.inv(self.D_mat)*self.simi_graph
     eigenvalues, eigenvectors = np.linalg.eig(self.lap_mat)
     sort = eigenvalues.argsort()
     eigenvalues = eigenvalues[sort]
     #print("eigenvalues",eigenvalues)
     #print("eigenvalue",eigenvalues)
     eigenvectors = eigenvectors[:, sort]
     k_eigenvectors=eigenvectors[:,:self.k_]
     #print("eigen vectors", eigenvectors)
     print("k eigen shape",np.shape(k_eigenvectors))
     k_means = K_Means(self.k_)
     #print("eigen shape",np.shape(k_eigenvectors))
     k_means.fit(k_eigenvectors)
     cat = k_means.predict(k_eigenvectors)
     return cat
Ejemplo n.º 4
0
class Spectral(object):
    def __init__(self):
        self.tolerance_ = 0.00001

    # def distance(self, eular_dis):
    #     return 1 / (eular_dis + 0.000001)
    def distance(self, eular_dis):
        return np.exp(-eular_dis)

    def confirm_k(self, value, sort_idx):
        sum_diff = value[sort_idx[0]] - value[sort_idx[1]]
        sorted_value = value[sort_idx]
        prev = sorted_value[0:-1]
        last = sorted_value[1:]
        diff = last - prev
        mean_diff = np.mean(diff[0:5])
        kk = 1
        for kk in range(1, value.shape[0]):
            curr_diff = sorted_value[kk] - sorted_value[kk - 1]
            if (curr_diff > mean_diff):
                break
            sum_diff += curr_diff
        return kk

    def fit(self, data):
        data_num = data.shape[0]
        W = np.zeros((data_num, data_num), dtype=np.float)
        D = np.zeros((data_num, data_num), dtype=np.float)
        Dinv = np.zeros((data_num, data_num), dtype=np.float)
        self.kdtree = KDTree(data)
        for ii in range(data_num):
            eular_dis, idx = self.kdtree.query(data[ii, :],
                                               k=max(int(data_num / 20), 10))
            distance_all = self.distance(eular_dis)
            W[ii, idx] = distance_all
            W[ii, ii] = 0
        W = np.sqrt(W * W.transpose())
        for ii in range(data_num):
            D[ii, ii] = np.sum(W[ii, :])
            if (D[ii, ii] > 0.0001):
                Dinv[ii, ii] = 1 / D[ii, ii]
            else:
                Dinv[ii, ii] = 1 / 0.0001

        # Lrw = np.matmul(Dinv, D-W)
        Lrw = np.eye(data_num) - np.matmul(Dinv, W)
        # Lrw = D-W
        value, vector = np.linalg.eig(Lrw)
        sort_idx = np.argsort(value)

        k_means_k = self.confirm_k(value, sort_idx)
        # k_means_k = 2
        print('k is evaluated as {}'.format(k_means_k))

        print('idx:', sort_idx[0:k_means_k], 'lambda',
              value[sort_idx[0:k_means_k]])
        k_means_data = vector[:, sort_idx[0:k_means_k]]

        self.k_means_manager = K_Means(k_means_k)
        self.k_means_manager.fit(k_means_data)
        self.spectral_result = np.array(
            self.k_means_manager.predict(k_means_data))
        # plt.imshow(W, vmin=0, vmax=100)
        # plt.show()
        # plt.plot(k_means_data[:,0], k_means_data[:,1],'r.')
        # plt.plot(value[sort_idx], 'r.')
        # plt.show()
        # exit(0)

    def predict(self, data):
        ret = []
        # convert to spectral data
        for ii in range(data.shape[0]):
            distance, idx = self.kdtree.query(data[ii, :], k=1)
            ret.append(self.spectral_result[idx])
            # print(spec_data)
        return ret
Ejemplo n.º 5
0
class SpectralCluster(object):
    # k是分组数
    def __init__(self, n_clusters=2):
        self.k_ = n_clusters
        self.kmeans = K_Means(n_clusters=n_clusters)

    def squared_exponential(self, x, y, sig=0.8, sig2=1):
        norm = np.linalg.norm(x - y)
        dist = norm * norm
        return np.exp(-dist / (2 * sig * sig2))

    def affinity(self, data):
        N = data.shape[0]
        sig = []
        ans = np.zeros((N, N))
        for i in range(N):
            dists = []
            for j in range(N):
                dis = np.linalg.norm(data[i, :] - data[j, :])
                dists.append(dis)
            dists.sort()
            sig.append(np.mean(dists[:5]))

        for i in range(N):
            for j in range(N):
                ans[i][j] = self.squared_exponential(data[i], data[j], sig[i],
                                                     sig[j])
        return ans

    def affinity_fast(self, data):
        N = data.shape[0]
        sig = []
        ans = np.zeros((N, N))
        dists = distance.cdist(data, data)

        dists.sort()
        sig = np.mean(dists[:, :5],
                      axis=1)  # neighour of 5 distances as variance

        for i in range(N):
            for j in range(N):
                ans[i][j] = self.squared_exponential(data[i], data[j], sig[i],
                                                     sig[j])

        return ans

    def get_laplacian_features(self, data):
        N = data.shape[0]
        W = self.affinity_fast(data)
        D_half_inv = np.zeros(W.shape)
        tmp = np.sum(W, axis=1)
        D_half_inv.flat[::len(tmp) + 1] = tmp**(-0.5)
        #import pdb; pdb.set_trace()
        L = D_half_inv.dot(W).dot(D_half_inv)  #graph laplacian

        w, v = scipy.sparse.linalg.eigs(L, self.k_)
        X = v.real
        rows_norm = np.linalg.norm(X, axis=1, ord=2)
        X = (X.T / rows_norm).T
        return X

    def fit(self, data):
        V = self.get_laplacian_features(data)
        self.kmeans.fit(V)

    def predict(self, data):
        V = self.get_laplacian_features(data)
        return self.kmeans.predict(V)