def cluster(request): context={} lables=[1,2,3] knndata=[[0.833921748,0.560733679,0.381229963,1,1],[0.27499438, 0.346274739 ,0.353645499,2,2 ],[0.356473217, 0.430978784, 0.07547164,3,3]] context['knn']=knndata context['lables']=lables #下面是决策树的数据 #testdata=testdeciton() testdata=[['presbyopic', 'hyper', 'no', 'normal', 'soft', 'soft'], ['presbyopic', 'hyper', 'yes', 'reduced', 'no-lenses', 'no-lenses'], ['presbyopic', 'hyper', 'yes', 'normal', 'no-lenses', 'no-lenses']] context['deci']=testdata suxing=['age','prescript','astigmatic','terarfate'] newstock=New_stock.objects.filter(name='CEO') #以下是贝叶斯 lajifenlei={1:'spam',0:'ham'} testEntry=[['love my dalmation'],['stupid garbage']] print 'test'*10 byeslei=[] for i in testbyes(testEntry): byeslei.append(lajifenlei[i]) print byeslei context['byeslei']=byeslei context['byestest']=testEntry byes=[['love my dalmation','ham','ham'],['stupid garbage','spam','spam']] context['byes']=byes #以下是smo-svm svm=[[7.551510, -1.580030,1,1], [2.114999, -0.004466,-1,-1]] context['svm']=svm # 以下是kmeans聚类算法 kmeans=kMeans(1)#K值为4 #print kmeans[0,0] #返回的是一个矩阵 这样取第一行第一列的元素 kmeandata=[kmeans[0,0],kmeans[0,1]] context['kmeans']=kmeandata return render_to_response('cluster.html', context)
def main(): type = 4 data = kmeans.kMeans(kmeans.createCoordinateList(), type, 50) newCoordinate = kmeans.Coordinate(10, 68) #增加的坐标标识 newCoordinate.increFlag = 1 knnMethod(data, newCoordinate, type)
def kMeansPlot(self, dataSet, k, statusBar, button): self.kMeans = kMeans(dataSet, k) # KMeans生成器 # 设置Timer self.timerStart = True self.timer = QTimer(self) self.timer.timeout.connect( lambda: self.update_figure(dataSet, k, statusBar, button)) # 每隔一段时间就会触发一次 update函数 self.update_figure(dataSet, k, statusBar, button) # 第一次自己来调用, 后面交给定时器Timer
def selectCenters(data, k, x): if data == 'cis': # Random selection of centers from data samples centers = x[np.random.randint(x.shape[0], size=k), :] # return k random centers std = args.std elif data == 'fa': # Select centers using kMeans algorithm centers, std = kMeans(x, k) return centers, std
def clustering(data,cvImg,nframe,error,K=1): flag1=0 flag2=0 l0=0 l1=0 centroid, labels=np.array([]),np.array([]) if len(data)>0: dataarray = np.asarray(data) centroid, labels = kmeans.kMeans(dataarray, K, maxIters = 20, plot_progress = None) try: cv.Line(cvImg,(int(centroid[0][0]),int(centroid[0][1])),(int(centroid[1][0]),int(centroid[1][1])),(255,0,0)) cv.Circle(cvImg,int(centroid[0][0]),int(centroid[0][1]),5,(0,255,0),-1) except: per=False i=0 for l in labels: if l==0: l0=l0+1 if l==1: l1=l1+1 if l1>l0: temp = centroid[0] centroid[0] = centroid[1] centroid[1] = temp for l in labels: if l==0: cv.Circle(cvImg,(data[i][0],data[i][1]),5,(254,0,254),-1) flag1=1 if l==1: cv.Circle(cvImg,(data[i][0],data[i][1]),5,(0,255,255),-1) flag2=1 i=i+1 else: for l in labels: if l==0: cv.Circle(cvImg,(data[i][0],data[i][1]),5,(0,255,255),-1) flag1=1 if l==1: cv.Circle(cvImg,(data[i][0],data[i][1]),5,(254,0,254),-1) flag2=1 i=i+1 try: cv.Circle(cvImg,(int(centroid[0][0]),int(centroid[0][1])),5,(0,255,0),-1) except: per=False if(flag1 + flag2<2): error=error+1 pcterror = (error/nframe)*100.0 #print "current error of kmeans = ",pcterror,"%" return cvImg,error,centroid, labels
def main(): iris = datasets.load_iris() x = iris.data t = iris.target k = int(sys.argv[1]) kmeans = kMeans(k, 4) results = kmeans.clustering(iris.data) for i in range(3): print iris.target_names[i], results[iris.target == i]
def linearRegression(tr, te, m, Zee, k=False): trainData = tr testData = te Z = Zee if(k): featureVectors = kMeans(trainData, m) else: #pca matrix transposed mean, featureVectors = pca.pca(trainData, m) #to extract 1st column: pcaMatT[:, 0] trainData = np.matrix(trainData).transpose() testData = np.matrix(testData).transpose() featureVectors = np.pad(featureVectors, ((0, 1),(0, 0)), 'constant', constant_values = 1) #get compressed training data ctData = featureVectors * trainData ctestData = featureVectors * testData Phi = ctData.transpose() # Compute the Wopt Wopt = (inv(Phi.transpose() * Phi) * Phi.transpose() * Z.transpose()).transpose() # print(Wopt.shape) SEkTrain = 0 MRTrain = 0 SEkTest = 0 MRTest = 0 # Calculate the mean square errors and misclassification ratio for the training and testing for i in range (0, 1000): SEkTrain += pow(norm((Wopt * ctData[:,i] - Z[:,i])[:-1,0]), 2) #Removing the padding SEkTrain /= 1000 for i in range(0, 1000): MRTrain += getMCBool(Wopt, ctData[:,i], Z[:, i]) MRTrain /= 1000.0 for i in range (0, 1000): test = pow(norm((Wopt * ctestData[:,i] - Z[:,i])[:-1,0]), 2) #Removing the padding SEkTest += test SEkTest /= 1000 for i in range(0, 1000): MRTest += getMCBool(Wopt, ctestData[:,i], Z[:, i]) MRTest /= 1000.0 #print SEkTrain, MRTrain #print SEkTest, MRTest return SEkTrain, MRTrain, SEkTest, MRTest
def test(): dataMat = mat(kmeans.loadData('testSet.txt')) print("min[0]:", min(dataMat[:,0])) print("max[0]:", max(dataMat[:,0])) print("min[1]:", min(dataMat[:,1])) print("max[1]:", max(dataMat[:,1])) print("randCent:", kmeans.randCent(dataMat, 2)) print("distEclud:", kmeans.distEclud(dataMat[0], dataMat[1])) myCentroids, clustAssing = kmeans.kMeans(dataMat, 4) print("myCentroids:", myCentroids) print("clustAssing:", clustAssing)
def evalFitness(dataMat, k, pop, preEval, dist): ''' Evaluate fitness of the entire population of feature sets. Loop over every feature set in the population, and for every features set (individual), check a dictionary of evaluated fitness scores to see if the feature set has been evaluated already. If so, use memoized results. If not, calculate Silouette coefficient and add 1 to the coefficient as the fitness score, and then save this result to the memo dictionary. ''' fitness = np.empty(pop.shape[0]) # store fitness of individuals for n, indv in enumerate(pop): # loop over populations one by one gene = ''.join(['1' if x else '0' for x in indv]) # string repr of DNA if gene in preEval: # combo of features previously evaluated fitness[n] = preEval[gene] # recall from dict else: # never evaluated before means, labels = kMeans(dataMat[:, indv], k) # cluster w/ features fitness[n] = Silhouette(dataMat, labels, dist).mean() + 1 # fit > 0 preEval[gene] = fitness[n] # store into dict for memoization return fitness, preEval
def _init(self, x): print ">> _init() entered." print "kmeans..." myCentroids, clustAssing = kmeans.kMeans(x, self.n_state) mean_kmeans = myCentroids self.emit_means = mean_kmeans print "shape of self.emit_means: ", shape(self.emit_means) for each in self.emit_means: print each print "calculating self.emit_covars" for i in range(self.n_state): for each in cov(x, x): print each self.emit_covars[i] = add(cov(x, x), mulByNum(eye(len(x[0])), 0.01)) print "shape of self.emit_covars: ", shape(self.emit_covars) for each in self.emit_covars: print each print "<< _init() finished."
def plotDiaRadius(self, paths, dia, thickestPath, nrOfClusters): print 'do the kmeans :-)' pts = [] for i in range(len(paths)): pts.append([paths[i], dia[i]]) cl = km.kMeans(pts) c = cl.kmeans(nrOfClusters, 0.01) cX = [] cY = [] for nc in range(nrOfClusters): cX.append([]) cY.append([]) for nc in range(nrOfClusters): for i in c[nc]: cX[nc].append(i[0]) cY[nc].append(i[1]) for nc in range(nrOfClusters): self.__io.saveArray( cX[nc], self.__io.getHomePath() + 'Plots/' + self.__io.getFileName() + '_PathsDiaAx_' + str(nrOfClusters) + '_' + str(nc)) self.__io.saveArray( cY[nc], self.__io.getHomePath() + 'Plots/' + self.__io.getFileName() + '_PathsDiaAy_' + str(nrOfClusters) + '_' + str(nc)) if nrOfClusters == 2: if cY[0][0] > cY[1][0]: return cX[0], cY[0], cX[1], cY[1] else: return cX[1], cY[1], cX[0], cY[0] if nrOfClusters == 3: if cY[0][0] > cY[1][0] and cY[1][0] > cY[2][0]: return cX[0], cY[0], cX[1], cY[1], cX[2], cY[2] if cY[0][0] > cY[1][0] and cY[1][0] < cY[2][0]: return cX[0], cY[0], cX[2], cY[2], cX[1], cY[1] if cY[0][0] < cY[1][0] and cY[1][0] < cY[2][0]: return cX[2], cY[2], cX[1], cY[1], cX[0], cY[0] else: return cX[2], cY[2], cX[0], cY[0], cX[1], cY[1]
def plotDiaRadius(self,paths,dia,thickestPath,nrOfClusters): print 'do the kmeans :-)' pts=[] for i in range(len(paths)): pts.append([paths[i],dia[i]]) cl=km.kMeans(pts) c=cl.kmeans(nrOfClusters, 0.01) cX=[] cY=[] for nc in range(nrOfClusters): cX.append([]) cY.append([]) for nc in range(nrOfClusters): for i in c[nc]: cX[nc].append(i[0]) cY[nc].append(i[1]) for nc in range(nrOfClusters): self.__io.saveArray(cX[nc],self.__io.getHomePath()+'Plots/'+self.__io.getFileName()+'_PathsDiaAx_'+str(nrOfClusters)+'_'+str(nc)) self.__io.saveArray(cY[nc],self.__io.getHomePath()+'Plots/'+self.__io.getFileName()+'_PathsDiaAy_'+str(nrOfClusters)+'_'+str(nc)) if nrOfClusters ==2: if cY[0][0]>cY[1][0]: return cX[0],cY[0],cX[1],cY[1] else: return cX[1],cY[1],cX[0],cY[0] if nrOfClusters ==3: if cY[0][0]>cY[1][0] and cY[1][0]>cY[2][0]: return cX[0],cY[0],cX[1],cY[1],cX[2],cY[2] if cY[0][0]>cY[1][0] and cY[1][0]<cY[2][0]: return cX[0],cY[0],cX[2],cY[2],cX[1],cY[1] if cY[0][0]<cY[1][0] and cY[1][0]<cY[2][0]: return cX[2],cY[2],cX[1],cY[1],cX[0],cY[0] else: return cX[2],cY[2],cX[0],cY[0],cX[1],cY[1]
def SelectBestFeature(dataMat, selected, Nk, dists): ''' Select the non-selected features that provides the most improvement to Silhouette coefficient. Given a matrix of data, a list of selected columns, the number of clusters, and a pre-computed distance matrix, the function lopos through all of the unselected features, calculating the maximum coefficient for the feature when added to the already selected set. ''' # get list of index of currently unselected features unselect = np.where(~np.isin(np.arange(dataMat.shape[1]), selected))[0] bestCoeff = -1 - 1e-9 # worst possible coefficient value is -1 for n, j in enumerate(unselect): # loop over unselected features testSet = np.hstack([selected, j]) # add curr feature to selected ones means, labels = kMeans(dataMat[:, testSet], Nk) # cluster w/ test features coeff = Silhouette(dataMat, labels, dists).mean() # mean silhouette coeff #print((coeff,bestCoeff)) if coeff > bestCoeff: # if this feature produce better coeff bestCoeff = coeff # record new best coeff outs = (j, coeff, means, labels) # record output variables #print(unselect) return outs # output: the feature, best coeff, means, and labels
from kmeans import kMeans import numpy as np test_data = np.random.rand(40, 2) datMat = np.mat(test_data) myCentroids, clustAssing = kMeans(datMat, 4) a, b = kMeans(datMat, 4) c = b.tolist() d = [0, 0, 0, 0] e = [[None]] * 4 for i in range(len(c)): class_id = int(c[i][0]) d[class_id] += 1
plt.ioff() plt.show() # generate 3 cluster data filename = "C:/Users/Jyotsna/Desktop/medium12.txt" data = np.genfromtxt(filename) '''m1, cov1 = [[80],[15.26],[-0.25],[40],[40]] m2, cov2 = [5, 13], [[2.5, -1.5], [-1.5, 1.5]] m3, cov3 = [3, 7], [[0.25, 0.5], [-0.1, 0.5]] data1 = np.random.multivariate_normal(m1, cov1, 250) data2 = np.random.multivariate_normal(m2, cov2, 180) data3 = np.random.multivariate_normal(m3, cov3, 100)''' X = np.vstack((data)) np.random.shuffle(X) centroids, C = kMeans(X, K=10) print(C) count = 0 number = 1 elsenumber = 0 #show(X, C, centroids, True) file = open("C:/Users/Jyotsna/Desktop/Kmeansout1.txt", "w") file.flush() #index = [X[0] for X, value in np.ndenumerate(centroids) if value==] ind = np.argsort(C)[::-1][:298] print(ind) for i in range(0, 912): if i in ind: file.write("1\n") else: file.write("0\n")
fig = plt.figure() ax = fig.add_subplot(111, projection='3d') data = loadmat('data/bird_small.mat') A = data['A'] A = A / 255.0 height, width, channels = A.shape X = np.mat(A.reshape(height * width, channels)) m, n = X.shape clusterNum = 16 cmap = getCmap(clusterNum) centroids, clusterAssment = kmeans.kMeans(X, clusterNum) # 随机选择 1000 个样本绘制 sampleSize = 1000 sampleIndexs = np.random.choice(m, sampleSize) clusters = clusterAssment[sampleIndexs] samples = X[sampleIndexs] # 三维下观察 for i in range(sampleSize): x, y, z = samples[i, :].A[0] center = clusters[i, 0] color = cmap(center) ax.scatter([x], [y], [z], color=color, marker='o') plt.show() # 二维下观察
import numpy as np import pandas as pd from kmeans import kMeans, WCSS if __name__ == "__main__": df = pd.read_csv('data.csv') X = df.values[:, :-1] y = df.values[:, -1] num_clusters = 14 clusters = kMeans(X, num_clusters) wcss = WCSS(clusters) print("Within Cluster Sum of Squares score {} s".format(round(wcss, 2)))
# coding: utf-8 # kmeans/test_normal_kmeans.py import kmeans import numpy as np import matplotlib.pyplot as plt if __name__ == "__main__": dataMat = np.mat(kmeans.loadDataSet('data/testSet2.txt')) k=4 centroids, clusterAssment = kmeans.kMeans(dataMat, k) clusterCount = np.shape(centroids)[0] m = np.shape(dataMat)[0] # 绘制散点图 patterns = ['o', 'D','^','s'] colors = ['b', 'g', 'black','m'] fig = plt.figure() title = 'kmeans with k='+str(k) ax = fig.add_subplot(111, title=title) for k in range(clusterCount): # 绘制聚类中心 ax.scatter(centroids[k, 0], centroids[k, 1], color='r', marker='+', linewidth=20) for i in range(m): # 绘制属于该聚类中心的样本 ptsInCluster = dataMat[np.nonzero(clusterAssment[:, 0].A==k)[0]] ax.scatter(ptsInCluster[:, 0].flatten().A[0], ptsInCluster[:, 1].flatten().A[0], marker=patterns[k], color=colors[k]) plt.show()
import matplotlib.pyplot as plt from kmeans import kMeans from main import * insts = parseTrainingData() xs = [inst.data[5] for inst in insts] ys = [inst.data[7] for inst in insts] insts = [Instance([x, y], []) for x, y in zip(xs, ys)] protos, clusters = kMeans(20, insts) pxs = [proto.data[0] for proto in protos] pys = [proto.data[1] for proto in protos] plt.plot(xs, ys, 'bo', pxs, pys, 'r^') plt.show()
from kmeans import kMeans from pyflann import * from numpy import * from numpy.random import * dataset = [[1.0, 1.0], [1.1, 1.1], [0.1, 0.1], [0.0, 0.0]] k = kMeans(dataset, 2, 10) k.train() print k.get_centers() print k.predict([[0.0, 0.0], [1.0, 1.0], [0.9, 0.9], [-0.1, -0.1]])
def train(self, insts, rate, convergenceThreshold, maxIters): '''Train this RBFNN - calculate beta values for each RBF node, and perform gradient descent to learn weights for the weighted sum nodes. The wtMean and wtStdDev parameters are the mean and standard deviation of the gaussian distribution from which initial weights for the weighted sum nodes will be randomly drawn.''' protos, clusters = kMeans(self.numProtos, insts) # Filter empty clusters newProtos = [] newClusters = [] toRemove = [False if len(c) == 0 else True for c in clusters] for idx, shouldKeep in enumerate(toRemove): if shouldKeep: newProtos.append(protos[idx]) newClusters.append(clusters[idx]) protos = newProtos clusters = newClusters # Calculate beta coefficients betas = [] for cluster in clusters: # If the cluster is empty, make the beta coefficient equal 1, which # will cause the activation of this node decrease very sharply as # the given instance gets further from the prototype, effectively # rendering that prototype irrelevant. if len(cluster) == 0: betas.append(0) else: clusterMean = meanInst(cluster) dists = [ euclideanDist(inst.data, clusterMean.data) for inst in cluster ] sigma = sum(dists) / len(cluster) if sum(dists) == 0: betas.append(1) else: betas.append(1.0 / (2 * math.pow(sigma, 2))) # Create the RBF nodes from the prototype & beta coefficient self.rbfNodes = [ RBFNode(proto, beta) for proto, beta in zip(protos, betas) ] # Perform gradient descent to learn weights for the output nodes. conv = ConvergenceTester(convergenceThreshold) for x in range(maxIters): rbfOutputs = [[1] + self.passRBFLayer(inst) for inst in insts] predictions = [self.fwdPass(inst) for inst in insts] for outputIndex, node in enumerate(self.wtSumNodes): for wtIdx in range(len(node.wts)): node.wts[wtIdx] -= (rate * (sum([( \ predictions[i][outputIndex] - \ inst.label[outputIndex]) * rbfOutputs[i][wtIdx] \ for i, inst in enumerate(insts)])/len(insts))) if conv.test(flatten([node.wts for node in self.wtSumNodes])): break