def __cluster_data(params): i, data_set, k = params return (string.ascii_lowercase[i], cluster_data(load_data(data_set), k))
# compute the covariance of the data sigma = np.cov(centered, rowvar=0) # get a vector of eigenvalues and matrix of eigenvectors lmbda, w = np.linalg.eig(np.mat(sigma)) # sort in descending order idx = lmbda.argsort()[::-1] # apply the new indexing and transpose the eigenvector matrix lmbda = lmbda[idx] w = w[idx].T # use top eigenvalues if have less than ten if len(lmbda) < top_b: return lmbda, w # if not, return the top ten eigenvalues else: return lmbda[:top_b], w[:top_b] if __name__ == '__main__': data = load_data('./data/p4-data.txt', to_float32=True) vals, w = pca(data) print("Eigenvalues in descending order:") for idx, val in enumerate(vals, 1): print("{:3d}: {:5.3f}".format(idx, np.real(val)))
# 5、取得sum_all之间的随机值 sum_all *= random() # 6、获得距离最远的样本点作为聚类中心点 for j, di in enumerate(d): sum_all -= di if sum_all > 0: continue cluster_centers[i] = np.copy(points[j, ]) break return cluster_centers if __name__ == "__main__": k = 4 # 聚类中心的个数 file_path = "./Data/data.txt" # 1、导入数据 print("---------- 1.load data ------------") data = load_data(file_path) # 2、KMeans++的聚类中心初始化方法 print("---------- 2.K-Means++ generate centers ------------") centroids = get_centroids(data, k) # 3、聚类计算 print("---------- 3.kmeans ------------") subCenter = kmeans(data, k, centroids) # 4、保存所属的类别文件 print("---------- 4.save subCenter ------------") save_result("./TrainingResult/kmeans++_sub", subCenter) # 5、保存聚类中心 print("---------- 5.save centroids ------------") save_result("./TrainingResult/kmeans++_center", centroids)