class SCC: def __init__(self, X, K, dist=EuclidDistance, ftype="Normalized"): ''' X is a M*N matrix contain M case of train data K is the number of cluster you want to get dist is a function that to make the matrix ftype support "Normalized" or "Ratio" two different way to calculate Laplacian ''' self.X = X self.K = K self.dist = dist self.labels = [] self.centroids = [] self.W = self.distmat(X, X) self.D = np.diag(self.W.sum(axis=0)) self.L = self.D - self.W self.ftype = ftype if ftype == "Normalized": self.D[self.D == 0] = 1 self.L = self.D**(-0.5) * self.L * self.D**(-0.5) pass def train(self, maxiter=100, threshold=0.1): v, self.T = eig(self.L) #print v self.km = KMEANSC(self.T[:, 1:self.K], self.K) self.km.train(maxiter, threshold) self.labels = self.km.labels def distmat(self, X, Y): ''' return the distance matrix for X and Y ''' dm = np.zeros((X.shape[0], Y.shape[0])) for i in range(X.shape[0]): for j in range(Y.shape[0]): dm[i][j] = self.dist(X[i], Y[j]) return dm def result(self): return self.labels
class SCC: def __init__(self,X,K,dist=EuclidDistance,ftype="Normalized"): ''' X is a M*N matrix contain M case of train data K is the number of cluster you want to get dist is a function that to make the matrix ftype support "Normalized" or "Ratio" two different way to calculate Laplacian ''' self.X=X self.K=K self.dist=dist self.labels=[] self.centroids=[] self.W=self.distmat(X,X) self.D=np.diag(self.W.sum(axis=0)) self.L=self.D-self.W self.ftype=ftype if ftype=="Normalized": self.D[self.D==0]=1 self.L=self.D**(-0.5)*self.L*self.D**(-0.5) pass def train(self,maxiter=100,threshold=0.1): v,self.T=eig(self.L) #print v self.km=KMEANSC(self.T[:,1:self.K].transpose(),self.K) self.km.train(maxiter,threshold) self.labels=self.km.labels def distmat(self,X,Y): ''' return the distance matrix for X and Y ''' dm = np.zeros((X.shape[0],Y.shape[0])); for i in range(X.shape[0]): for j in range(Y.shape[0]): dm[i][j]=self.dist(X[i],Y[j]) return dm def result(self): return self.labels
def train(self,maxiter=100,threshold=0.1): v,self.T=eig(self.L) #print v self.km=KMEANSC(self.T[:,1:self.K].transpose(),self.K) self.km.train(maxiter,threshold) self.labels=self.km.labels
[18.75,9.8], [18.9,10.35], [18.9,11.05], [18.8,12.15], [18.3,12.65], [17.8,13.4], [16.95,14.15], [16.1,14.8], [14.8,15.35], [13.55,15.35], [11.6,15], [10.4,14.25], [11.3,14.4], [12.2,15.15], [12.45,15.35], [13.05,15.4], [13.85,15.25]] ).transpose() a=KMEANSC(features,2) a.train(180) print a.result() for i in range(features.shape[1]): if a.labels[i]==0: plt.plot(features[0][i],features[1][i],'or') elif a.labels[i]==1: plt.plot(features[0][i],features[1][i],'ob') else: plt.plot(features[0][i],features[1][i],'oy') plt.show() #print a.result() #print a.bfWhiteCen()