Esempio n. 1
0
def __cluster_data(params):
    i, data_set, k = params
    return (string.ascii_lowercase[i], cluster_data(load_data(data_set), k))
Esempio n. 2
0
    # compute the covariance of the data
    sigma = np.cov(centered, rowvar=0)

    # get a vector of eigenvalues and matrix of eigenvectors
    lmbda, w = np.linalg.eig(np.mat(sigma))

    # sort in descending order
    idx = lmbda.argsort()[::-1]

    # apply the new indexing and transpose the eigenvector matrix
    lmbda = lmbda[idx]
    w = w[idx].T

    # use top eigenvalues if have less than ten
    if len(lmbda) < top_b:
        return lmbda, w

    # if not, return the top ten eigenvalues
    else:
        return lmbda[:top_b], w[:top_b]


if __name__ == '__main__':
    data = load_data('./data/p4-data.txt', to_float32=True)
    vals, w = pca(data)

    print("Eigenvalues in descending order:")

    for idx, val in enumerate(vals, 1):
        print("{:3d}: {:5.3f}".format(idx, np.real(val)))
Esempio n. 3
0
        # 5、取得sum_all之间的随机值
        sum_all *= random()
        # 6、获得距离最远的样本点作为聚类中心点
        for j, di in enumerate(d):
            sum_all -= di
            if sum_all > 0:
                continue
            cluster_centers[i] = np.copy(points[j, ])
            break
    return cluster_centers


if __name__ == "__main__":
    k = 4  # 聚类中心的个数
    file_path = "./Data/data.txt"
    # 1、导入数据
    print("---------- 1.load data ------------")
    data = load_data(file_path)
    # 2、KMeans++的聚类中心初始化方法
    print("---------- 2.K-Means++ generate centers ------------")
    centroids = get_centroids(data, k)
    # 3、聚类计算
    print("---------- 3.kmeans ------------")
    subCenter = kmeans(data, k, centroids)
    # 4、保存所属的类别文件
    print("---------- 4.save subCenter ------------")
    save_result("./TrainingResult/kmeans++_sub", subCenter)
    # 5、保存聚类中心
    print("---------- 5.save centroids ------------")
    save_result("./TrainingResult/kmeans++_center", centroids)
Esempio n. 4
0
def __cluster_data(params):
    i, data_set, k = params
    return (string.ascii_lowercase[i], cluster_data(load_data(data_set), k))