예제 #1
0
def printBasicInfo(centroids,clusterAssment,norMat):
	minCount = 10000
	maxCount = 0
	cluster_label = clusterAssment[:,0]
	clusters = [[],[],[],[]]
	for i in range(0,len(cluster_label)):
		clusters[int(cluster_label[i])].append(np.asarray(norMat)[i])
	clusters = np.asarray(clusters)
	max_cluster = 0
	min_cluster = 0
	for i in range(0,len(clusters)):
		if minCount > len(clusters[i]):
			minCount = len(clusters[i])
		if maxCount < len(clusters[i]):
			maxCount = len(clusters[i])
		if len(clusters[max_cluster]) < len(clusters[i]):
			max_cluster = i
		if len(clusters[min_cluster]) > len(clusters[i]):
			min_cluster = i
		print "%d cluster has %d elements " % (i, len(clusters[i])),
		print "the centroids is",
		print centroids[i]
	number_weight = float(len(clusters[max_cluster]))/(len(clusters[min_cluster]))
	print centroids[max_cluster]
	di = base.dunn(clusters)
	NDunnIndex = di*(maxCount/minCount)
	print "original dunn is %f" % di
	print "weighted dunn is %f" % (number_weight*di)
예제 #2
0
def main():
	dataSet = kMeans.loadfromcsv('./data/8.csv')
	dataMat = np.mat(dataSet)
	# normalize dataMat
	norMat = kMeans.normalize(dataMat)
	# centroids is the center of clusters
	# clusterAssment[cluster_index,deviation],in which deviation represents the dist
	# from current point to centroids. 
	# 使用Bisceting Kmeans算法对游客进行聚类,预期聚类簇数目为4
	centroids, clusterAssment = kMeans.biKmeans(norMat,4)
	cluster_label = clusterAssment[:,0]
	clusters = [[],[],[],[]]
	for i in range(0,len(cluster_label)):
		clusters[(int)(cluster_label[i])].append(np.asarray(norMat)[i])
	clusters = np.asarray(clusters)
	for i in range(0,len(clusters)):
		clusters[i] = np.asarray(clusters[i])
	clusters = np.asarray(clusters)

	#找出含有元素最多的簇,以及最少的簇
	minCount = 10000
	maxCount = 0
	max_cluster = 0
	min_cluster = 0
	for i in range(0,len(clusters)):
		if minCount > len(clusters[i]):
			minCount = len(clusters[i])
		if maxCount < len(clusters[i]):
			maxCount = len(clusters[i])
		if len(clusters[max_cluster]) < len(clusters[i]):
			max_cluster = i
		if len(clusters[min_cluster]) > len(clusters[i]):
			min_cluster = i
		print "%d cluster has %d elements " % (i, len(clusters[i])),
		print "the centroids is",
		print centroids[i]
	number_weight = float(len(clusters[max_cluster]))/(len(clusters[min_cluster]))
	print centroids[max_cluster]

	# 计算Dunn指标
	di = base.dunn(clusters)
	# 计算N-Dunn指标
	NDunnIndex = di*(maxCount/minCount)
	print di
	print NDunnIndex

	print "original dunn is %f" % di
	print "weighted dunn is %f" % (number_weight*di)