sum_all += d[j]
        # 5、取得sum_all之间的随机值
        sum_all *= random()
        # 6、获得距离最远的样本点作为聚类中心点
        for j, di in enumerate(d):
            sum_all -= di
            if sum_all > 0:
                continue
            cluster_centers[i] = np.copy(points[j, ])
            break
    return cluster_centers

if __name__ == "__main__":
    k = 4#聚类中心的个数
    file_path = "data.txt"
    # 1、导入数据
    print "---------- 1.load data ------------"
    data = load_data(file_path)
    # 2、KMeans++的聚类中心初始化方法
    print "---------- 2.K-Means++ generate centers ------------"
    centroids = get_centroids(data, k)
    # 3、聚类计算
    print "---------- 3.kmeans ------------"
    subCenter = kmeans(data, k, centroids)
    # 4、保存所属的类别文件
    print "---------- 4.save subCenter ------------"
    save_result("sub_pp", subCenter)
    # 5、保存聚类中心
    print "---------- 5.save centroids ------------"
    save_result("center_pp", centroids)
            sum_all += d[j]
        # 5、取得sum_all之间的随机值
        sum_all *= random()
        # 6、获得距离最远的样本点作为聚类中心点
        for j, di in enumerate(d):
            sum_all -= di
            if sum_all > 0:
                continue
            cluster_centers[i] = np.copy(points[j, ])
            break
    return cluster_centers

if __name__ == "__main__":
    k = 4 # 聚类中心的个数
    file_path = "data.txt"
    # 1、导入数据
    print("---------- 1.load data ------------")
    data = load_data(file_path)
    # 2、KMeans++的聚类中心初始化方法
    print("---------- 2.K-Means++ generate centers ------------")
    centroids = get_centroids(data, k)
    # 3、聚类计算
    print("---------- 3.kmeans ------------")
    subCenter = kmeans(data, k, centroids)
    # 4、保存所属的类别文件
    print("---------- 4.save subCenter ------------")
    save_result("sub_pp", subCenter)
    # 5、保存聚类中心
    print("---------- 5.save centroids ------------")
    save_result("center_pp", centroids)