def clusterCore(channelData1, covMatrixList1, channelData2, centroids, centroidUList, type): newChannelData1 = [] newChannelData2 = [] newDimension = np.shape(centroidUList[0])[1] if type == "C": # 计算信道相关系数矩阵并输出,然后放到一个矩阵中 allCovMatrix1 = tools.matrixListToMatrix(covMatrixList1) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment(allCovMatrix1, centroids) # 变换域 for i in range(np.shape(channelDataAll1)[0]): newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)])) newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)])) if type == "general": newChannelData1 = pca.pca_general(channelData1, newDimension) newChannelData2 = pca.pca_general(channelData2, newDimension) if type == "none": newChannelData1 = channelData1 newChannelData2 = channelData2 if type == "wt": # 变换域 for i in range(np.shape(channelData1)[0]): newChannelData1.append(wt.wt(channelData1[i], newDimension)) newChannelData2.append(wt.wt(channelData2[i], newDimension)) return newChannelData1, newChannelData2
def elbowCore(channelDataAll, a, k, iRate, schedule): n = np.shape(channelDataAll[0])[1] # 列数 p = len(channelDataAll) # 页数 sub = n >> a rates_C = [] rates_U = [] rates_S = [] for g in range(1 << a): # 显示进度 schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') channelData = [] for h in range(p): channelDataPage = channelDataAll[h] channelData.append(channelDataPage[:, g * sub:(g + 1) * sub]) covMatrixList = tools.getCovMatrixList(channelData) allCovMatrix = tools.matrixListToMatrix(covMatrixList) # 对协方差进行聚类 centroids, clusterAssment = kmeans.KMeansOushi(allCovMatrix, k) centroidList = tools.matrixToMatrixList(centroids) # 计算原信道信息量、协方差矩阵特征值、变换矩阵 informations, SigmaList, UList = tools.getInformations(covMatrixList) # 分析PCA效果,计算信息量保留程度 tmpRates = pca.pca(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1] rates_C.append(np.mean(tmpRates)) # 对变换矩阵进行聚类 allU = tools.matrixListToMatrix_U(UList) weights = tools.matrixListToMatrix_U(SigmaList) centroids, clusterAssment = kmeans.KMeansOushi_U(allU, k, weights, iRate) centroidList = tools.matrixToMatrixList_U(centroids) # 分析PCA效果,计算信息量保留程度 tmpRates = pca.pca_U(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1] rates_U.append(np.mean(tmpRates)) # 不聚类,直接PCA tmpRates = pca.pca_S(SigmaList, iRate)[0][:, 1] rates_S.append(np.mean(tmpRates)) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!') rate_C = np.mean(rates_C) rate_U = np.mean(rates_U) rate_S = np.mean(rates_S) return rate_S.real, rate_C.real, rate_U.real
def clusterCore(channelData1, covMatrixList1, channelData2, centroids, centroidUList, type): newChannelData1 = [] newChannelData2 = [] newDimension = np.shape(centroidUList[0])[1] p = np.shape(channelData1)[0] if type == "C": # 计算信道相关系数矩阵并输出,然后放到一个矩阵中 allCovMatrix1 = tools.matrixListToMatrix(covMatrixList1) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment(allCovMatrix1, centroids) # 变换域 for i in range(p): newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)])) newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)])) if type == "U": informations, SigmaList, UList = tools.getInformations(covMatrixList1) allU = tools.matrixListToMatrix_U(UList) weights = tools.matrixListToMatrix_U(SigmaList) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment_U(allU, weights, centroids, newDimension) # 变换域 for i in range(p): newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)])) newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)])) if type == "S": covMatrixList2 = tools.getCovMatrixList(channelData2) UList1 = tools.getInformations(covMatrixList1)[2] UList2 = tools.getInformations(covMatrixList2)[2] iRate = np.shape(centroidUList[0])[1] # 变换域 for i in range(p): newChannelData1.append(np.dot(channelData1[i], UList1[i][:, 0:iRate])) newChannelData2.append(np.dot(channelData2[i], UList2[i][:, 0:iRate])) # 输出处理后的信道数据 # path = u'/Users/jinruimeng/Downloads/keyan/' # nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time())) # pathSuffix = type + "_" + slice + "_" + nowTime # # outNewChannel1ListPath = path + "clusterAddNoise_outNewChannel1List_" + pathSuffix # outNewChannel2ListPath = path + "clusterAddNoise_outNewChannel2List_" + pathSuffix # readAndWriteDataSet.write(newChannelData1, outNewChannel1ListPath, ".xlsx") # readAndWriteDataSet.write(newChannelData2, outNewChannel2ListPath, ".xlsx") return newChannelData1, newChannelData2
def readCentroids(path, iRate, type, a): allCentroids = [] allCentroidUList = [] for g in range(1, (1 << a) + 1): # 读取聚类中心 centroidListPath = path + u'getCentroids_outCentroidList_' + type + u'_' + str( g) + u'_' # 合并多个文件 centroidList_g = [] UList_g = [] for root, dirs, files in os.walk(path, topdown=True): for file in files: file = os.path.join(root, file) if centroidListPath in file: centroidListTmp = excelToMatrixList(file) for centroid in centroidListTmp: centroidList_g.append(centroid) break # 计算聚类中心的变换矩阵 if u'C' == type: for i in range(len(centroidList_g)): U, Sigma, VT = np.linalg.svd(centroidList_g[i]) sum = np.sum(Sigma) curSum = 0 if iRate <= 1: index = 0 for j in range(len(Sigma)): curSum += Sigma[j] if iRate - (curSum / sum) > 0: index += 1 else: break else: index = iRate - 1 U2 = np.transpose(VT[0:index + 1, :]) UList_g.append(U2) allCentroids.append(tools.matrixListToMatrix(centroidList_g)) allCentroidUList.append(UList_g) if u'U' == type: for i in range(len(centroidList_g)): U2 = centroidList_g[i][:, 0:iRate] for j in range(np.shape(U2)[1]): # 噪声功率归一 U2[:, j] = U2[:, j] / np.linalg.norm((U2[:, j])) UList_g.append(U2) allCentroids.append(tools.matrixListToMatrix_U(centroidList_g)) allCentroidUList.append(UList_g) return allCentroids, allCentroidUList
def clusterCore(channelData1, covMatrixList1, channelData2, centroids, centroidUList, type): newChannelData1 = [] newChannelData2 = [] newDimension = np.shape(centroidUList[0])[1] p = np.shape(channelData1)[0] if type == "C": # 计算信道相关系数矩阵并输出,然后放到一个矩阵中 allCovMatrix1 = tools.matrixListToMatrix(covMatrixList1) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment(allCovMatrix1, centroids) # 变换域 for i in range(p): newChannelData1.append( np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)])) newChannelData2.append( np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)])) if type == "U": informations, SigmaList, UList = tools.getInformations(covMatrixList1) allU = tools.matrixListToMatrix_U(UList) weights = tools.matrixListToMatrix_U(SigmaList) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment_U(allU, weights, centroids, newDimension) # 变换域 for i in range(p): newChannelData1.append( np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)])) newChannelData2.append( np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)])) if type == "general": newChannelData1 = pca.pca_general(channelData1, newDimension) newChannelData2 = pca.pca_general(channelData2, newDimension) if type == "none": newChannelData1 = channelData1 newChannelData2 = channelData2 allNewCorr = [] for i in range(p): for j in range(newDimension): cowCor = np.corrcoef(newChannelData1[i][:, j], newChannelData2[i][:, j]) if i == 0: allNewCorr.append(cowCor[0, 1]) else: allNewCorr[j] += cowCor[0, 1] for i in range(newDimension): allNewCorr[i] = abs(allNewCorr[i] / (np.shape(channelData1)[0])) path = u'/Users/jinruimeng/Downloads/keyan/' nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time())) pathSuffix = type + u'_' + nowTime newChannelData1Path = path + "clusterAddNoise_newChannelData1_" + pathSuffix newChannelData2Path = path + "clusterAddNoise_newChannelData2_" + pathSuffix readAndWriteDataSet.write(newChannelData1, newChannelData1Path, ".xlsx") readAndWriteDataSet.write(newChannelData2, newChannelData2Path, ".xlsx") return allNewCorr
def getCentroidsCore(path, suffix, channelData, covMatrixList, informations, SigmaList, UList, g, k, iRate, type="C"): nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time())) pathSuffix = type + "_" + str(g) + "_" + nowTime outOldCovMatrixListPath = path + "getCentroids_outOldCovMatrixList_" + pathSuffix outCentroidListPath = path + "getCentroids_outCentroidList_" + pathSuffix outClusterAssmentPath = path + "getCentroids_outClusterAssment_" + pathSuffix outNewChannelDataPath = path + "getCentroids_outNewChannelData_" + pathSuffix outNewCovMatrixListPath = path + "getCentroids_outNewCovMatrixList_" + pathSuffix ratesPath = path + "getCentroids_rates_" + pathSuffix UTsPath = path + "getCentroids_UTs_" + pathSuffix clusterAssmentList = [] newChannelData = [] newCovMatrixList = [] UTs = [] rates = [] centroidList = [] if type == u'C': allCovMatrix = tools.matrixListToMatrix(covMatrixList) # 对协方差进行聚类 centroids, clusterAssment = kmeans.KMeansOushi(allCovMatrix, k) clusterAssmentList.append(clusterAssment) centroidList = tools.matrixToMatrixList(centroids) # 分析PCA效果 # newChannelData, newCovMatrixList, UTs, rates = pca.pca(channelData, informations, centroidList, clusterAssment, iRate) if type == u'U': allU = tools.matrixListToMatrix_U(UList) weights = tools.matrixListToMatrix_U(SigmaList) # 对协方差进行聚类 centroids, clusterAssment = kmeans.KMeansOushi_U( allU, k, weights, iRate) clusterAssmentList.append(clusterAssment) centroidList = tools.matrixToMatrixList_U(centroids) # 分析PCA效果 # newChannelData, newCovMatrixList, UTs, rates = pca.pca_U(channelData, informations, centroidList, clusterAssment, iRate) # 输出结果 # 输出聚类结果 # readAndWriteDataSet.write(clusterAssmentList, outClusterAssmentPath, suffix) # 协方差矩阵太大了,先不输出 # readAndWriteDataSet.write(covMatrixList, outOldCovMatrixListPath, suffix) # 聚类中心太大了,先不输出 readAndWriteDataSet.write(centroidList, outCentroidListPath, suffix)
def cluster(schedule, path, suffix, channelData, g, iRate): if iRate > np.shape(channelData)[1]: print(u'降维后维度不能大于样本原有的维度!') return if iRate <= 0: print(u'降维后维度不能小于1!') return schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') pathSuffix = "C" + "_" + str(g) + "_" centroidListPath = path + "getCentroids_outCentroidList_" + pathSuffix nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time())) pathSuffix = pathSuffix + str(nowTime) outOldCovMatrixListPath = path + "cluster_outOldCovMatrixList_" + pathSuffix outClusterAssmentPath = path + "cluster_outClusterAssment_" + pathSuffix outNewChannelDataPath = path + "cluster_outNewChannelData_" + pathSuffix outNewCovMatrixsPath = path + "cluster_outNewCovMatrixList_" + pathSuffix ratesPath = path + "cluster_rates_" + pathSuffix UTsPath = path + "cluster_UTs_" + pathSuffix # 读入聚类中心信息 # 合并多个文件 centroidList = [] for root, dirs, files in os.walk(path, topdown=True): for file in files: file = os.path.join(root, file) if centroidListPath in file: centroidListTmp = readAndWriteDataSet.excelToMatrixList(file) for centroid in centroidListTmp: centroidList.append(centroid) break centroids = tools.matrixListToMatrix(centroidList) # 计算信道相关系数矩阵并输出,然后放到一个矩阵中 covMatrixList = tools.getCovMatrixList(channelData) allCovMatrix = tools.matrixListToMatrix(covMatrixList) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment(allCovMatrix, centroids) clusterAssmentList = [] clusterAssmentList.append(clusterAssment) # 分析PCA效果 newChannelData, newCovMatrixList, UTs, rates = pca.pca( channelData, covMatrixList, centroidList, clusterAssment, iRate) # 输出结果 # 输出聚类结果 readAndWriteDataSet.write(clusterAssmentList, outClusterAssmentPath, suffix) # 协方差矩阵太大了,先不输出 # readAndWriteDataSet.write(covMatrixList, outOldCovMatrixListPath, suffix) # 输出PCA结果 readAndWriteDataSet.write(newChannelData, outNewChannelDataPath, suffix) readAndWriteDataSet.write(newCovMatrixList, outNewCovMatrixsPath, suffix) readAndWriteDataSet.write(UTs, UTsPath, suffix) readAndWriteDataSet.write(rates, ratesPath, suffix) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!')