def printBasicInfo(centroids,clusterAssment,norMat): minCount = 10000 maxCount = 0 cluster_label = clusterAssment[:,0] clusters = [[],[],[],[]] for i in range(0,len(cluster_label)): clusters[int(cluster_label[i])].append(np.asarray(norMat)[i]) clusters = np.asarray(clusters) max_cluster = 0 min_cluster = 0 for i in range(0,len(clusters)): if minCount > len(clusters[i]): minCount = len(clusters[i]) if maxCount < len(clusters[i]): maxCount = len(clusters[i]) if len(clusters[max_cluster]) < len(clusters[i]): max_cluster = i if len(clusters[min_cluster]) > len(clusters[i]): min_cluster = i print "%d cluster has %d elements " % (i, len(clusters[i])), print "the centroids is", print centroids[i] number_weight = float(len(clusters[max_cluster]))/(len(clusters[min_cluster])) print centroids[max_cluster] di = base.dunn(clusters) NDunnIndex = di*(maxCount/minCount) print "original dunn is %f" % di print "weighted dunn is %f" % (number_weight*di)
def main(): dataSet = kMeans.loadfromcsv('./data/8.csv') dataMat = np.mat(dataSet) # normalize dataMat norMat = kMeans.normalize(dataMat) # centroids is the center of clusters # clusterAssment[cluster_index,deviation],in which deviation represents the dist # from current point to centroids. # 使用Bisceting Kmeans算法对游客进行聚类,预期聚类簇数目为4 centroids, clusterAssment = kMeans.biKmeans(norMat,4) cluster_label = clusterAssment[:,0] clusters = [[],[],[],[]] for i in range(0,len(cluster_label)): clusters[(int)(cluster_label[i])].append(np.asarray(norMat)[i]) clusters = np.asarray(clusters) for i in range(0,len(clusters)): clusters[i] = np.asarray(clusters[i]) clusters = np.asarray(clusters) #找出含有元素最多的簇,以及最少的簇 minCount = 10000 maxCount = 0 max_cluster = 0 min_cluster = 0 for i in range(0,len(clusters)): if minCount > len(clusters[i]): minCount = len(clusters[i]) if maxCount < len(clusters[i]): maxCount = len(clusters[i]) if len(clusters[max_cluster]) < len(clusters[i]): max_cluster = i if len(clusters[min_cluster]) > len(clusters[i]): min_cluster = i print "%d cluster has %d elements " % (i, len(clusters[i])), print "the centroids is", print centroids[i] number_weight = float(len(clusters[max_cluster]))/(len(clusters[min_cluster])) print centroids[max_cluster] # 计算Dunn指标 di = base.dunn(clusters) # 计算N-Dunn指标 NDunnIndex = di*(maxCount/minCount) print di print NDunnIndex print "original dunn is %f" % di print "weighted dunn is %f" % (number_weight*di)