sum_all += d[j] # 5、取得sum_all之间的随机值 sum_all *= random() # 6、获得距离最远的样本点作为聚类中心点 for j, di in enumerate(d): sum_all -= di if sum_all > 0: continue cluster_centers[i] = np.copy(points[j, ]) break return cluster_centers if __name__ == "__main__": k = 4#聚类中心的个数 file_path = "data.txt" # 1、导入数据 print "---------- 1.load data ------------" data = load_data(file_path) # 2、KMeans++的聚类中心初始化方法 print "---------- 2.K-Means++ generate centers ------------" centroids = get_centroids(data, k) # 3、聚类计算 print "---------- 3.kmeans ------------" subCenter = kmeans(data, k, centroids) # 4、保存所属的类别文件 print "---------- 4.save subCenter ------------" save_result("sub_pp", subCenter) # 5、保存聚类中心 print "---------- 5.save centroids ------------" save_result("center_pp", centroids)
sum_all += d[j] # 5、取得sum_all之间的随机值 sum_all *= random() # 6、获得距离最远的样本点作为聚类中心点 for j, di in enumerate(d): sum_all -= di if sum_all > 0: continue cluster_centers[i] = np.copy(points[j, ]) break return cluster_centers if __name__ == "__main__": k = 4 # 聚类中心的个数 file_path = "data.txt" # 1、导入数据 print("---------- 1.load data ------------") data = load_data(file_path) # 2、KMeans++的聚类中心初始化方法 print("---------- 2.K-Means++ generate centers ------------") centroids = get_centroids(data, k) # 3、聚类计算 print("---------- 3.kmeans ------------") subCenter = kmeans(data, k, centroids) # 4、保存所属的类别文件 print("---------- 4.save subCenter ------------") save_result("sub_pp", subCenter) # 5、保存聚类中心 print("---------- 5.save centroids ------------") save_result("center_pp", centroids)