import kMeans ''' geoResults = kMeans.geoGrab('1 VA Center', 'Augusta, ME') print geoResults print geoResults['ResultSet']['Error'] print geoResults['ResultSet']['Results'][0]['longitude'] print geoResults['ResultSet']['Results'][0]['latitude'] kMeans.massPlaceFind('portlandClubs.txt') ''' kMeans.clusterClubs(5)
def test2(): #kMeans.geoGrab('1 VA Center', 'Augusta, ME1') #kMeans.testURLLib() kMeans.clusterClubs(5)
''' myCentroids, clustAssing = kMeans.kMeans(datMat, 4) print print print myCentroids print print print clustAssing ''' #datMat3 = mat(kMeans.loadDataSet('testSet2.txt')) #datMat3 = mat(kMeans.loadDataSet('baidu_poi.txt')) #myCentroids, clustAssing = kMeans.kMeans(datMat3, 2) #print myCentroids #print #print clustAssing #centList, myNewAssessments = kMeans.biKmeans(datMat3, 3) #print #print #print centList #print #print #print myNewAssessments kMeans.clusterClubs(9)
min(datMat[:,0]) max(datMat[:,0]) min(datMat[:,1]) max(datMat[:,1]) kMeans.randCent(datMat,2) # 看一下初始化的质心是否在取值范围内 kMeans.distEclud(datMat[0],datMat[1]) # 在实际数据上看下K-means reload(kMeans) datMat = np.mat(kMeans.loadDataSet('testSet.txt')) myCentroids,clustAssing = kMeans.kMeans(datMat,4) # 不一定是全局最优解 # 二分k-means reload(kMeans) datMat3 = np.mat(kMeans.loadDataSet('testSet2.txt')) centList,myNewAssments = kMeans.biKmeans(datMat3,3) # 其实依然无法保证全局最优解,只能是局部最优解 centList myNewAssments # 利用二分k-means在图上画出簇 reload(kMeans) kMeans.clusterClubs(4)
# This Python file uses the following encoding: utf-8 import os, sys import kMeans from numpy import * reload(kMeans) ''' datMat=mat(kMeans.loadDataSet("C:\Users\YAN\Desktop\Kmeans/testSet.txt")) print (kMeans.randCent(datMat,2)) print (kMeans.distEclud(datMat[0],datMat[1])) myCentroids,clustAssing=kMeans.kMeans(datMat,4) print ("the centroids are:",myCentroids) print ("the assignment is:",clustAssing) ''' ''' #-----------二分法Kmeans-------------# datMat3=mat(kMeans.loadDataSet("C:\Users\YAN\Desktop\Kmeans/testSet2.txt")) centList,myNewAssments=kMeans.biKmeans(datMat3,3) print [centList[0],centList[1],centList[2]] ''' #geoResults=kMeans.geoGrab('1 VA Center','Augusta, ME') kMeans.clusterClubs(5)
print "datMat:", datMat print "min(datMat[:,0]):", min(datMat[:, 0]) print "min(datMat[:,1]):", min(datMat[:, 1]) print "max(datMat[:,0]):", max(datMat[:, 0]) print "max(datMat[:,1]):", max(datMat[:, 1]) print "randCent(datMat,2):", kMeans.randCent(datMat, 2) print "distEclud( datMat[ 0], datMat[ 1]):", kMeans.distEclud( datMat[0], datMat[1]) print "myCentroids:", myCentroids print "clustAssing:", clustAssing print ":", print ":", #10.3 二分k均值算法 datMat3 = mat(kMeans.loadDataSet(homedir + 'testSet2.txt')) centList, myNewAssments = kMeans.biKmeans(datMat3, 3) print "datMat3:", datMat3 print "centList:", centList print "myNewAssments:", myNewAssments #10.4.1 Yahoo!PlaceFinder API # geoResults=kMeans.geoGrab('1 VA Center', 'Augusta, ME') # print "geoResults:",geoResults # print "geoResults['ResultSet']['Error']:",geoResults['ResultSet']['Error'] # print "geoResults['ResultSet']['Results'][0]['longitude']:",geoResults['ResultSet']['Results'][0]['longitude'] # print "kMeans.massPlaceFind(homedir+'portlandClubs.txt'):",kMeans.massPlaceFind(homedir+'portlandClubs.txt') #10.4.2 对地理坐标进行聚类 print "kMeans.clusterClubs(5):", kMeans.clusterClubs(5)
import kMeans from numpy import * datMat = mat(kMeans.loadDataSet('testSet2.txt')) print('datMat = ', datMat) #centroids = kMeans.randCent(datMat, 2) #print('centroids = ',centroids) #centroids, clusterAssment = kMeans.biKmeans(datMat, 3) #kMeans.Draw(datMat, centroids) distance = kMeans.clusterClubs()