Ejemplo n.º 1
0
def do_biKmeans(datMat, result, n=3):
    from kMeans import biKmeans
    from sklearn.metrics import adjusted_rand_score as ari
    import numpy as np
    import pandas as pd
    datMat = np.mat(datMat)
    i = 0
    pred = []
    ari_count = []
    '''while i<6: 
            #做好多次以确认类标
        try:
            centList, clustAssing = biKmeans(datMat, n)
            y_pred = clustAssing.A[:, 0].astype(pd.np.int)
            pred.append(y_pred)
            ari_count.append(ari(y_pred, result))
        except:
            y_pred = []
            pred.append(y_pred)
            ari_count.append(0)
        i = i + 1
    '''
    centList, clustAssing = biKmeans(datMat, n)
    y_pred = clustAssing.A[:, 0].astype(pd.np.int)
    f = count_f(y_pred, result)
    RI = ri(f)
    J = j(f)
    return y_pred, [RI, J]
Ejemplo n.º 2
0
def test1():
    datMat = mat(kMeans.loadDataSet('testSet.txt'))
    print(min(datMat[:, 0]))
    print(kMeans.randCent(datMat, 2))
    print(kMeans.distEclud(datMat[0], datMat[1]))

    #myCentroids, clustAssing = kMeans.kMeans(datMat,3)
    myCentroids, clustAssing = kMeans.biKmeans(datMat, 4)
    print(myCentroids)

    kMeans.plot1(datMat, myCentroids)
Ejemplo n.º 3
0
def main():
	dataSet = kMeans.loadfromcsv('./data/8.csv')
	dataMat = np.mat(dataSet)
	# normalize dataMat
	norMat = kMeans.normalize(dataMat)
	# centroids is the center of clusters
	# clusterAssment[cluster_index,deviation],in which deviation represents the dist
	# from current point to centroids. 
	# 使用Bisceting Kmeans算法对游客进行聚类,预期聚类簇数目为4
	centroids, clusterAssment = kMeans.biKmeans(norMat,4)
	cluster_label = clusterAssment[:,0]
	clusters = [[],[],[],[]]
	for i in range(0,len(cluster_label)):
		clusters[(int)(cluster_label[i])].append(np.asarray(norMat)[i])
	clusters = np.asarray(clusters)
	for i in range(0,len(clusters)):
		clusters[i] = np.asarray(clusters[i])
	clusters = np.asarray(clusters)

	#找出含有元素最多的簇,以及最少的簇
	minCount = 10000
	maxCount = 0
	max_cluster = 0
	min_cluster = 0
	for i in range(0,len(clusters)):
		if minCount > len(clusters[i]):
			minCount = len(clusters[i])
		if maxCount < len(clusters[i]):
			maxCount = len(clusters[i])
		if len(clusters[max_cluster]) < len(clusters[i]):
			max_cluster = i
		if len(clusters[min_cluster]) > len(clusters[i]):
			min_cluster = i
		print "%d cluster has %d elements " % (i, len(clusters[i])),
		print "the centroids is",
		print centroids[i]
	number_weight = float(len(clusters[max_cluster]))/(len(clusters[min_cluster]))
	print centroids[max_cluster]

	# 计算Dunn指标
	di = base.dunn(clusters)
	# 计算N-Dunn指标
	NDunnIndex = di*(maxCount/minCount)
	print di
	print NDunnIndex

	print "original dunn is %f" % di
	print "weighted dunn is %f" % (number_weight*di)
def biKmeans_func(data_set, k, cent_file="", clus_file=""):
	print "kMeans : " + cent_file
	cent, clus = kMeans.biKmeans(data_set, k)
	# print cent
	# print clus

	kmean_res_cent_file = open(cent_file, 'w')
	for item in cent.A:
		item_str = ""
		for column in item:
			item_str = item_str + str(column) + " "
		item_str = item_str + '\n'
		kmean_res_cent_file.write(item_str)
	kmean_res_cent_file.close

	kmean_res_clus_file = open(clus_file, 'w')
	for item in clus.A:
		item_str = ""
		for column in item:
			item_str = item_str + str(column) + " "
		item_str = item_str + '\n'
		kmean_res_clus_file.write(item_str)
	kmean_res_clus_file.close
Ejemplo n.º 5
0
import kMeans
from numpy import *

# 导入txt数据
datMat = mat(kMeans.loadDataSet('data2.txt'))

# datMat矩阵的第2-4列分别对应半长轴、偏心率和轨道倾角
datMat[0, 2:5]

# 计算距离
delta_v = kMeans.distdeltaV(datMat[0, 2:5], datMat[1, 2:5])

# 随机生成k个质心
centroids = kMeans.randCent(datMat[:, 2:5], 4)

# k-均值聚类
myCentroids, clustAssing = kMeans.kMeans(datMat[:, 2:5], 5, kMeans.distdeltaV)

# 二分 k-均值聚类
centList, myNewAssments = kMeans.biKmeans(datMat[:, 2:5], 5, kMeans.distdeltaV)

# 画图
kMeans.showCluster_SRQ(datMat[:, 2:5], myNewAssments)
Ejemplo n.º 6
0
import kMeans
from numpy import*
import matplotlib
import matplotlib.pyplot as plt

k = 4
datmat = array(kMeans.loadDataSet('testSet.txt'))
centerList, clusterAssment = kMeans.biKmeans(datmat,k)
print 'The cendroids is:',centerList
fig = plt.figure()
fig.add_subplot(111)
colorList = ['b','c','g','k','r','y']
makerList = ['.','^','*','o','+']
for i in range(k):
    ax = plt.scatter(datmat[nonzero(clusterAssment[:,0].A == i)[0],0],datmat[nonzero(clusterAssment[:,0].A == i)[0],1],
                     c = colorList[i],marker=makerList[i])
    ax = plt.scatter(array(centerList[:,0]),array(centerList[:,1]),c = colorList[4],marker=makerList[3])
plt.title('Graph of k_Means ',)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
Ejemplo n.º 7
0
# -*- coding:utf-8 -*-

import kMeans
from numpy import  *

datMat = mat(kMeans.loadDataSet("testSet.txt"))

'''
myCentroids,clusterAssing = kMeans.kMeans(datMat, 4)
print("myCentroids is %s " % myCentroids)
print("clusterAssing is %s " % clusterAssing)
'''


#kMeans test example two
dataMat2 = mat(kMeans.loadDataSet('testSet2.txt'))
centList,myNewAssment = kMeans.biKmeans(dataMat2, 3)
print(centList)

#geoResult = kMeans.geoGrab('1 VA Center', 'Augusta,ME')


Ejemplo n.º 8
0
import kMeans
from numpy import *
dataMat = mat(kMeans.loadDataSet('testSet.txt'))
# print dataMat

randMat = kMeans.randCent(dataMat, 2)
# print dataMat[:, 0]
# print randMat

res = kMeans.kMeans(dataMat, 4)
# print res

dataMat3 = mat(kMeans.loadDataSet('testSet2.txt'))
kMeans.biKmeans(dataMat3, 3)

# centList, myNewAssments =
Ejemplo n.º 9
0
import kMeans
import numpy as np
import matplotlib.pyplot as plt
dataMat = np.mat(kMeans.loadDataSet('testSet.txt'))
#print(kMeans.randCent(dataMat,2))
myCentroids, clustAssing = kMeans.kMeans(dataMat,4)
#print(myCentroids,clustAssing)
datalist = dataMat.tolist()
#print([x[0] for x in datalist])
'''plt.figure()
plt.scatter([x[0] for x in datalist],[x[1] for x in datalist])
plt.scatter([x[0] for x in myCentroids.tolist()],[x[1] for x in myCentroids.tolist()])
plt.title('kmeans')
plt.show()'''
dataMat = kMeans.loadDataSet("testSet2.txt")
centList, clusteAssment = kMeans.biKmeans(dataMat, 3)
print(centList)

import kMeans
import ProbIN
from numpy import *
import subprocess
import numpy as np

datMat = mat(kMeans.loadDataSet('motionData_Training.txt'))
kMeans.biKmeans(datMat,12)

# datMat2 = mat(kMeans.loadDataSet('GPS_1Hz_training.txt'))
# kMeans.biKmeans(datMat2,7)
Ejemplo n.º 11
0
# coding:utf-8

import kMeans

from  numpy import *

datMat=mat(kMeans.loadDataSet('testSet.txt'))
print datMat[1:5,:]


myCentroids,clustAssing=kMeans.kMeans(datMat,4)
print myCentroids
print ' '
print clustAssing

datMat3=mat(kMeans.loadDataSet('testSet2.txt'))
centList,myNewAssments=kMeans.biKmeans(datMat3,3)
print centList,myNewAssments
Ejemplo n.º 12
0
def test4():
    dataMat = np.mat(kMeans.loadDataSet('testSet2.txt'))
    centList, myNewAssments = kMeans.biKmeans(dataMat, 3)
    kMeans.plotScatter(dataMat, centList, myNewAssments)
    print centList
Ejemplo n.º 13
0
#coding=utf-8
import kMeans
from numpy import *

# datMat=mat(kMeans.loadDataSet('testSet.txt'))
# print(datMat)
# myCentroids,clustAssing=kMeans.kMeans(datMat,4)
# print(clustAssing)

datMat = mat(kMeans.loadDataSet('testSet2.txt'))
myCentroids, clustAssing = kMeans.biKmeans(datMat, 3)
print(myCentroids)
# kMeans.clusterClubs(5)
import kMeans
from numpy import *

datMat = mat(kMeans.loadDataSet('testSet.txt'))
print min(datMat[:,0])
print min(datMat[:,1])
print max(datMat[:,0])
print max(datMat[:,1])
print kMeans.randCent(datMat, 2)
print kMeans.distEclud(datMat[0], datMat[1])

myCentroids, clustAssing = kMeans.kMeans(datMat, 4)
#print myCentroids, clustAssing

datMat3 = mat(kMeans.loadDataSet('testSet2.txt'))
centList, myNewAssments = kMeans.biKmeans(datMat3, 3)
print centList
Ejemplo n.º 15
0
#!/usr/bin/env python
__coding__ = "utf-8"
__author__ = "Ng WaiMing"

from kMeans import kMeans
from kMeans import loadDataSet
from kMeans import randCent
from kMeans import distEclud
from kMeans import biKmeans
from numpy import *

if __name__ == '__main__':
    dataMat = mat(loadDataSet('testSet.txt'))
    print('min(dataMat[:, 0])', min(dataMat[:, 0]), '\n')
    print('min(dataMat[:, 1])', min(dataMat[:, 1]), '\n')
    print('max(dataMat[:, 0])', max(dataMat[:, 0]), '\n')
    print('max(dataMat[:, 1])', max(dataMat[:, 1]), '\n')
    print(randCent(dataMat, 2), '\n')
    print(distEclud(dataMat[0], dataMat[1]))
    centroids, clusterAssment = kMeans(dataMat, 4)
    print('centroids:\n', centroids, '\n')
    print('clusterAssment:\n', clusterAssment, '\n')
    dataMat3 = mat(loadDataSet('testSet2.txt'))
    centList, myNewAssments = biKmeans(dataMat3, 3)
    print('centList: \n', centList, '\n')
    # fileName = '../../../../data/k-means/places.txt'
    # imgName = '../../../../data/k-means/Portland.png'
    # kMeans.clusterClubs(fileName=fileName, imgName=imgName, numClust=5)
Ejemplo n.º 16
0
#-*- coding:utf-8 -*-
import kMeans
from numpy import *
datamat = mat(kMeans.loadDataSet('testSet.txt'))
centList, myNewAssments = kMeans.biKmeans(datamat, 3)  #每次给出四个质心,三次迭代后收敛
print centList
#!/usr/bin/python2.7
# _*_ coding: utf-8 _*_

"""
@Author: MarkLiu
"""

import numpy as np
import kMeans
import matplotlib.pyplot as plt

dataArr = kMeans.loadDataSet('datasets/testSet2.txt')
dataMat = np.matrix(dataArr)
k = 3
centroids, clusterAssment = kMeans.biKmeans(dataMat, k)
# centroids, clusterAssment = kMeans.kMeans(dataMat, k)

# 计算原始数据加上中心数据,将数据分离
m = np.shape(dataMat)[0]

# 分离出不同簇的x,y坐标
xPoint_0 = []
yPoint_0 = []
xPoint_1 = []
yPoint_1 = []
xPoint_2 = []
yPoint_2 = []
xPoint_3 = []
yPoint_3 = []
for i in range(m):
    if int(clusterAssment[i, 0]) == 0:
Ejemplo n.º 18
0
def plotCluster():
	dataSetShangHai = kMeans.loadfromcsv('./data/8.csv')
	dataSetXiAn = kMeans.loadfromcsv('./data/10195.csv')
	dataSetQingDao = kMeans.loadfromcsv('./data/10444.csv')
	dataSetSanYa = kMeans.loadfromcsv('./data/10030.csv')
	dataSetJiuZhaigou = kMeans.loadfromcsv('./data/10136.csv')
	dataSetTaiShan = kMeans.loadfromcsv('./data/10284.csv')

	dataMatXiAn = np.mat(dataSetXiAn)
	dataMatQingDao = np.mat(dataSetQingDao)
	dataMatShangHai = np.mat(dataSetShangHai)
	dataMatSanYa = np.mat(dataSetSanYa)
	dataMatTaiShan = np.mat(dataSetTaiShan)
	dataMatJiuZhaigou = np.mat(dataSetJiuZhaigou)


	norMatXiAn = kMeans.normalize(dataMatXiAn)
	norMatQingDao = kMeans.normalize(dataMatQingDao)
	norMatShangHai = kMeans.normalize(dataMatShangHai)
	norMatSanYa = kMeans.normalize(dataMatSanYa)
	norMatTaiShan = kMeans.normalize(dataMatTaiShan)
	norMatJiuZhaigou = kMeans.normalize(dataMatJiuZhaigou)

	centroidsShangHai, clusterAssmentShangHai = kMeans.biKmeans(norMatShangHai,4)
	centroidsXiAn, clusterAssmentXiAn = kMeans.biKmeans(norMatXiAn,4)
	centroidsQingDao, clusterAssmentQingDao = kMeans.biKmeans(norMatQingDao,4)
	centroidsSanYa, clusterAssmentSanYa = kMeans.biKmeans(norMatSanYa,4)
	centroidsJiuZhaigou, clusterAssmentJiuZhaigou = kMeans.biKmeans(norMatJiuZhaigou,4)
	centroidsTaiShan, clusterAssmentTaiShan = kMeans.biKmeans(norMatTaiShan,4)

	print "==================上海聚类结果=========="
	printBasicInfo(centroidsShangHai,clusterAssmentShangHai,norMatShangHai)

	print "==================西安聚类结果=========="
	printBasicInfo(centroidsXiAn, clusterAssmentXiAn, norMatXiAn)

	print "==================青岛聚类结果=========="
	printBasicInfo(centroidsQingDao, clusterAssmentQingDao, norMatQingDao)

	print "==================三亚聚类结果=========="
	printBasicInfo(centroidsSanYa, clusterAssmentSanYa, norMatSanYa)

	print "==================九寨沟聚类结果========"
	printBasicInfo(centroidsJiuZhaigou, clusterAssmentJiuZhaigou, norMatJiuZhaigou)

	print "==================泰山聚类结果=========="
	printBasicInfo(centroidsTaiShan, clusterAssmentTaiShan, norMatTaiShan)

	# shanghai
	plt.subplot(321)
	
	pointClusNumShangHai = clusterAssmentShangHai[:,0].A.T
	n = np.shape(pointClusNumShangHai)[1]
	plt.title(u'上海')
	for i in range(n):
		if 0.0 == pointClusNumShangHai.item(i):
			plt.plot(norMatShangHai[i,0],norMatShangHai[i,1],'g^')
		elif 1.0 == pointClusNumShangHai.item(i):
			plt.plot(norMatShangHai[i,0],norMatShangHai[i,1],'b*')
		elif 2.0 == pointClusNumShangHai.item(i):
			plt.plot(norMatShangHai[i,0],norMatShangHai[i,1],'k<')
		elif 3.0 == pointClusNumShangHai.item(i):
			plt.plot(norMatShangHai[i,0],norMatShangHai[i,1],'ms')

	plt.plot(centroidsShangHai[:,0],centroidsShangHai[:,1],'ro')
	plt.axis([0,4.0,0,12])
	plt.xticks([0,1,2,3])
	# plt.xlabel(u'Distance Index')
	# plt.ylabel(u'Activity Degree Index')


	# XiAn
	plt.subplot(322)
	pointClusNumXiAn = clusterAssmentXiAn[:,0].A.T
	n = np.shape(pointClusNumXiAn)[1]
	plt.title(u'西安')
	for i in range(n):
		if 0.0 == pointClusNumXiAn.item(i):
			plt.plot(norMatXiAn[i,0],norMatXiAn[i,1],'g^')
		elif 1.0 == pointClusNumXiAn.item(i):
			plt.plot(norMatXiAn[i,0],norMatXiAn[i,1],'b*')
		elif 2.0 == pointClusNumXiAn.item(i):
			plt.plot(norMatXiAn[i,0],norMatXiAn[i,1],'k<')
		elif 3.0 == pointClusNumXiAn.item(i):
			plt.plot(norMatXiAn[i,0],norMatXiAn[i,1],'ms')

	plt.plot(centroidsXiAn[:,0],centroidsXiAn[:,1],'ro')
	plt.axis([0,4.0,0,12])
	plt.xticks([0,1,2,3])
	# plt.xlabel(u'Distance Index')
	# plt.ylabel(u'Activity Degree Index')

	# QingDao
	plt.subplot(323)
	pointClusNumQingDao = clusterAssmentQingDao[:,0].A.T
	n = np.shape(pointClusNumQingDao)[1]
	plt.title(u'青岛')
	for i in range(n):
		if 0.0 == pointClusNumQingDao.item(i):
			plt.plot(norMatQingDao[i,0],norMatQingDao[i,1],'g^')
		elif 1.0 == pointClusNumQingDao.item(i):
			plt.plot(norMatQingDao[i,0],norMatQingDao[i,1],'b*')
		elif 2.0 == pointClusNumQingDao.item(i):
			plt.plot(norMatQingDao[i,0],norMatQingDao[i,1],'k<')
		elif 3.0 == pointClusNumQingDao.item(i):
			plt.plot(norMatQingDao[i,0],norMatQingDao[i,1],'ms')

	plt.plot(centroidsQingDao[:,0],centroidsQingDao[:,1],'ro')
	plt.axis([0,4.0,0,12])
	plt.xticks([0,1,2,3])
	# plt.xlabel(u'Distance Index')
	# plt.ylabel(u'Activity Degree Index')

	# SanYa
	plt.subplot(324)
	pointClusNumSanYa = clusterAssmentSanYa[:,0].A.T
	n = np.shape(pointClusNumSanYa)[1]
	plt.title(u'三亚')
	for i in range(n):
		if 0.0 == pointClusNumSanYa.item(i):
			plt.plot(norMatSanYa[i,0],norMatSanYa[i,1],'g^')
		elif 1.0 == pointClusNumSanYa.item(i):
			plt.plot(norMatSanYa[i,0],norMatSanYa[i,1],'b*')
		elif 2.0 == pointClusNumSanYa.item(i):
			plt.plot(norMatSanYa[i,0],norMatSanYa[i,1],'k<')
		elif 3.0 == pointClusNumSanYa.item(i):
			plt.plot(norMatSanYa[i,0],norMatSanYa[i,1],'ms')

	plt.plot(centroidsSanYa[:,0],centroidsSanYa[:,1],'ro')
	plt.axis([0,4.0,0,12])
	plt.xticks([0,1,2,3])
	# plt.xlabel(u'Distance Index')
	# plt.ylabel(u'Activity Index')

	# JiuZhaigou
	plt.subplot(325)
	pointClusNumJiuZhaigou = clusterAssmentJiuZhaigou[:,0].A.T
	n = np.shape(pointClusNumJiuZhaigou)[1]
	plt.title(u'九寨沟')
	for i in range(n):
		if 0.0 == pointClusNumJiuZhaigou.item(i):
			plt.plot(norMatJiuZhaigou[i,0],norMatJiuZhaigou[i,1],'g^')
		elif 1.0 == pointClusNumJiuZhaigou.item(i):
			plt.plot(norMatJiuZhaigou[i,0],norMatJiuZhaigou[i,1],'b*')
		elif 2.0 == pointClusNumJiuZhaigou.item(i):
			plt.plot(norMatJiuZhaigou[i,0],norMatJiuZhaigou[i,1],'k<')
		elif 3.0 == pointClusNumJiuZhaigou.item(i):
			plt.plot(norMatJiuZhaigou[i,0],norMatJiuZhaigou[i,1],'ms')

	plt.plot(centroidsJiuZhaigou[:,0],centroidsJiuZhaigou[:,1],'ro')
	plt.axis([0,4.0,0,12])
	plt.xticks([0,1,2,3])
	plt.xlabel(u'Distance Index')
	plt.ylabel(u'Activity Index')

	# TaiShan
	plt.subplot(326)
	pointClusNumTaiShan = clusterAssmentTaiShan[:,0].A.T
	n = np.shape(pointClusNumTaiShan)[1]
	plt.title(u'泰山')
	for i in range(n):
		if 0.0 == pointClusNumTaiShan.item(i):
			plt.plot(norMatTaiShan[i,0],norMatTaiShan[i,1],'g^')
		elif 1.0 == pointClusNumTaiShan.item(i):
			plt.plot(norMatTaiShan[i,0],norMatTaiShan[i,1],'b*')
		elif 2.0 == pointClusNumTaiShan.item(i):
			plt.plot(norMatTaiShan[i,0],norMatTaiShan[i,1],'k<')
		elif 3.0 == pointClusNumTaiShan.item(i):
			plt.plot(norMatTaiShan[i,0],norMatTaiShan[i,1],'ms')

	plt.plot(centroidsTaiShan[:,0],centroidsTaiShan[:,1],'ro')
	plt.axis([0,4.0,0,12])
	plt.xticks([0,1,2,3])
	plt.xlabel(u'Distance Index')
	plt.ylabel(u'Activity Index')

	plt.show()
Ejemplo n.º 19
0
st = time.time()
my_ima = imread('city.jpg')
fig0 = plt.figure()
ax0 = fig0.add_subplot(111)
imshow(my_ima)
lab = color.rgb2lab(my_ima)
ab = double(lab[:, :, 1:3])
nrows = ab.shape[0]
ncols = ab.shape[1]
X = ab.reshape(nrows * ncols, 2)
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
ax1.scatter(X[:, 0], X[:, 1])

k = 10
centList, clusterAssment = kMeans.biKmeans(X, k)
fig2 = plt.figure()
ax2 = fig2.add_subplot(111)
centroids = array(centList)
datMat = array(X)
colorList = ['b', 'c', 'g', 'k', 'r', 'y', 'm', 'w']
makerList = ['.', '^', '*', '+', 'o']
for i in range(k):
    ax2.scatter(datMat[kMeans.find_all_index(clusterAssment[:, 0], i), 0],
                datMat[kMeans.find_all_index(clusterAssment[:, 0], i), 1],
                c=colorList[i % 8],
                marker=makerList[3])
    ax2.scatter(centroids[:, 0],
                centroids[:, 1],
                marker=makerList[4],
                c=colorList[4])
Ejemplo n.º 20
0
# kmeansTest.py

import kMeans

from numpy import *

# dataMat = mat(kMeans.loadDataSet('testSet.txt'))
'''
print('min(dataMat[:, 0]) = ', min(dataMat[:, 0]))

print('min(dataMat[:, 1]) = ', min(dataMat[:, 1]))

print('max(dataMat[:, 0]) = ', max(dataMat[:, 0]))

print('max(dataMat[:, 1]) = ', max(dataMat[:, 1]))

print('randCent of dataset : ', kMeans.randCent(dataMat, 2))
print('distance of eclud : ', kMeans.distEclud(dataMat[0], dataMat[1]))
'''

# myCentroids, clusterAssing = kMeans.kMeans(dataMat, 4)

# print('myCentroids : ', myCentroids)
# print('clusterAssing : ', clusterAssing)

dataMat3 = mat(kMeans.loadDataSet('testSet2.txt'))

centList, newAssments = kMeans.biKmeans(dataMat3, 3)

print('centList = ', centList)
Ejemplo n.º 21
0
min(datMat[:,0])
max(datMat[:,0])
min(datMat[:,1])
max(datMat[:,1])
kMeans.randCent(datMat,2)   # 看一下初始化的质心是否在取值范围内
kMeans.distEclud(datMat[0],datMat[1])

# 在实际数据上看下K-means
reload(kMeans)
datMat = np.mat(kMeans.loadDataSet('testSet.txt'))
myCentroids,clustAssing = kMeans.kMeans(datMat,4)   # 不一定是全局最优解


# 二分k-means
reload(kMeans)
datMat3 = np.mat(kMeans.loadDataSet('testSet2.txt'))
centList,myNewAssments = kMeans.biKmeans(datMat3,3) # 其实依然无法保证全局最优解,只能是局部最优解
centList
myNewAssments

# 利用二分k-means在图上画出簇
reload(kMeans)
kMeans.clusterClubs(4)







             
Ejemplo n.º 22
0
def test3():
    dataMat = np.mat(kMeans.loadDataSet('testSet.txt'))
    kMeans.biKmeans(dataMat, 4)