def pca_U(channelDataList, informations, centroidList, clusterAssment, newDimension=1): newChannelDataList = [] U2s = [] rates = np.array(np.zeros((len(channelDataList), 2)), dtype=complex) # 为了输出,要把rates放到list中 rateList = [] # 计算变换矩阵 for i in range(len(centroidList)): U2 = centroidList[i][:, 0:newDimension] U2s.append(U2) # 降维 for i in range(len(channelDataList)): newChannelData = np.dot(channelDataList[i], U2s[(int)(clusterAssment[i, 0].real)]) newChannelDataList.append(newChannelData) newCovMatrixList = tools.getCovMatrixList(newChannelDataList) newInformation = tools.getInformations(newCovMatrixList)[0] for i in range(len(channelDataList)): rate2 = newInformation[0][i] / informations[0][i] rates[i, 1] = rate2 rateList.append(rates) return newChannelDataList, newCovMatrixList, U2s, rateList
def elbowCore(channelDataAll, a, k, iRate, schedule): n = np.shape(channelDataAll[0])[1] # 列数 p = len(channelDataAll) # 页数 sub = n >> a rates_C = [] rates_U = [] rates_S = [] for g in range(1 << a): # 显示进度 schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') channelData = [] for h in range(p): channelDataPage = channelDataAll[h] channelData.append(channelDataPage[:, g * sub:(g + 1) * sub]) covMatrixList = tools.getCovMatrixList(channelData) allCovMatrix = tools.matrixListToMatrix(covMatrixList) # 对协方差进行聚类 centroids, clusterAssment = kmeans.KMeansOushi(allCovMatrix, k) centroidList = tools.matrixToMatrixList(centroids) # 计算原信道信息量、协方差矩阵特征值、变换矩阵 informations, SigmaList, UList = tools.getInformations(covMatrixList) # 分析PCA效果,计算信息量保留程度 tmpRates = pca.pca(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1] rates_C.append(np.mean(tmpRates)) # 对变换矩阵进行聚类 allU = tools.matrixListToMatrix_U(UList) weights = tools.matrixListToMatrix_U(SigmaList) centroids, clusterAssment = kmeans.KMeansOushi_U(allU, k, weights, iRate) centroidList = tools.matrixToMatrixList_U(centroids) # 分析PCA效果,计算信息量保留程度 tmpRates = pca.pca_U(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1] rates_U.append(np.mean(tmpRates)) # 不聚类,直接PCA tmpRates = pca.pca_S(SigmaList, iRate)[0][:, 1] rates_S.append(np.mean(tmpRates)) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!') rate_C = np.mean(rates_C) rate_U = np.mean(rates_U) rate_S = np.mean(rates_S) return rate_S.real, rate_C.real, rate_U.real
def clusterCore(channelData1, covMatrixList1, channelData2, centroids, centroidUList, type): newChannelData1 = [] newChannelData2 = [] newDimension = np.shape(centroidUList[0])[1] p = np.shape(channelData1)[0] if type == "C": # 计算信道相关系数矩阵并输出,然后放到一个矩阵中 allCovMatrix1 = tools.matrixListToMatrix(covMatrixList1) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment(allCovMatrix1, centroids) # 变换域 for i in range(p): newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)])) newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)])) if type == "U": informations, SigmaList, UList = tools.getInformations(covMatrixList1) allU = tools.matrixListToMatrix_U(UList) weights = tools.matrixListToMatrix_U(SigmaList) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment_U(allU, weights, centroids, newDimension) # 变换域 for i in range(p): newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)])) newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)])) if type == "S": covMatrixList2 = tools.getCovMatrixList(channelData2) UList1 = tools.getInformations(covMatrixList1)[2] UList2 = tools.getInformations(covMatrixList2)[2] iRate = np.shape(centroidUList[0])[1] # 变换域 for i in range(p): newChannelData1.append(np.dot(channelData1[i], UList1[i][:, 0:iRate])) newChannelData2.append(np.dot(channelData2[i], UList2[i][:, 0:iRate])) # 输出处理后的信道数据 # path = u'/Users/jinruimeng/Downloads/keyan/' # nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time())) # pathSuffix = type + "_" + slice + "_" + nowTime # # outNewChannel1ListPath = path + "clusterAddNoise_outNewChannel1List_" + pathSuffix # outNewChannel2ListPath = path + "clusterAddNoise_outNewChannel2List_" + pathSuffix # readAndWriteDataSet.write(newChannelData1, outNewChannel1ListPath, ".xlsx") # readAndWriteDataSet.write(newChannelData2, outNewChannel2ListPath, ".xlsx") return newChannelData1, newChannelData2
def cluster(schedule, channelDataAll1, channelDataAll2, allCentroidsC, allCentroidUList, a, low, high, step): inconsistencyRates_old = [] inconsistencyRates_new_noCom = [] inconsistencyRates_new = [] for g in range(1, (1 << a) + 1): schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') channelData1 = [] channelData2 = [] for i in range(np.shape(channelDataAll1)[0]): channelData1.append(channelDataAll1[i][:, (g - 1) * sub:g * sub]) channelData2.append(channelDataAll2[i][:, (g - 1) * sub:g * sub]) # 计算信道协方差矩阵呢 covMatrixList1 = tools.getCovMatrixList(channelData1) newChannelData01, newChannelData02 = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "S") newChannelData1, newChannelData2 = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "C") # 量化并计算不一致率 for i in range(low, high + 1, step): bit_inconsistencyRates_old = [] bit_inconsistencyRates_new_noCom = [] bit_inconsistencyRates_new = [] for j in range(np.shape(channelData1)[0]): oldKey1, oldKey2 = quantification.quantificate(channelData1[j], channelData2[j], i) newKey01, newKey02 = quantification.quantificate(newChannelData01[j], newChannelData02[j], i) newKey1, newKey2 = quantification.quantificate(newChannelData1[j], newChannelData2[j], i) inconsistencyRate_old = quantification.getInconsistencyRate(oldKey1, oldKey2) inconsistencyRate_new_noCom = quantification.getInconsistencyRate(newKey01, newKey02) inconsistencyRate_new = quantification.getInconsistencyRate(newKey1, newKey2) bit_inconsistencyRates_old.append(inconsistencyRate_old) bit_inconsistencyRates_new_noCom.append(inconsistencyRate_new_noCom) bit_inconsistencyRates_new.append(inconsistencyRate_new) if g == 1: inconsistencyRates_old.append(mean(bit_inconsistencyRates_old)) inconsistencyRates_new_noCom.append(mean(bit_inconsistencyRates_new_noCom)) inconsistencyRates_new.append(mean(bit_inconsistencyRates_new)) else: inconsistencyRates_old[i - low] += mean(bit_inconsistencyRates_old) inconsistencyRates_new_noCom[i - low] += mean(bit_inconsistencyRates_new_noCom) inconsistencyRates_new[i - low] += mean(bit_inconsistencyRates_new) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!') for g in range(np.shape(inconsistencyRates_old)[0]): inconsistencyRates_old[g] = inconsistencyRates_old[g] / (1 << a) inconsistencyRates_new_noCom[g] = inconsistencyRates_new_noCom[g] / (1 << a) inconsistencyRates_new[g] = inconsistencyRates_new[g] / (1 << a) return inconsistencyRates_old, inconsistencyRates_new_noCom, inconsistencyRates_new
def cluster(a, schedule, channelDataAll1, channelDataAll2, allCentroidsC, allCentroidUList, allCentroidsU, allCentroidUList2): newPca1 = [] newPca2 = [] newC1 = [] newC2 = [] newU1 = [] newU2 = [] newWt1 = [] newWt2 = [] for g in range(1, (1 << a) + 1): schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') channelData1 = [] channelData2 = [] for i in range(np.shape(channelDataAll1)[0]): channelData1.append(channelDataAll1[i][:, (g - 1) * sub:g * sub]) channelData2.append(channelDataAll2[i][:, (g - 1) * sub:g * sub]) # 计算信道协方差矩阵 covMatrixList1 = tools.getCovMatrixList(channelData1) # 无交互PCA tmpNewPca1, tmpNewPca2 = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "general") newPca1.append(tmpNewPca1) newPca2.append(tmpNewPca2) # 聚类协方差矩阵 tmpNewC1, tmpNewC2 = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "C") newC1.append(tmpNewC1) newC2.append(tmpNewC2) # 聚类变换矩阵 tmpNewU1, tmpNewU2 = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsU[g - 1], allCentroidUList2[g - 1], "U") newU1.append(tmpNewU1) newU2.append(tmpNewU2) # DCT变换 tmpNewWt1, tmpNewWt2 = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsU[g - 1], allCentroidUList2[g - 1], "wt") newWt1.append(tmpNewWt1) newWt2.append(tmpNewWt2) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!') return newPca1, newPca2, newC1, newC2, newU1, newU2, newWt1, newWt2
def elbow2(channelDataAll, low, high, step, a, schedule): # 检查参数合理性 if low <= 0: print(u'下限太低:下限小于等于0!') return if high >= (shape(channelDataAll[0])[1] / (1 << a)): print(u'上限太高:降维后维度数大于原数据维度!') return # 计算PCA的总次数 time1 = ((int)((high - low) / step + 1)) time2 = 1 << a schedule[0] = time2 # 利用SSE选择k SSE_S = [] # 存放所有结果 rates_S = np.array(np.zeros((time2, time1))) # 存放单次结果 n = np.shape(channelDataAll[0])[1] # 列数 p = len(channelDataAll) # 页数 sub = n >> a for g in range(time2): channelData = [] for h in range(p): channelDataPage = channelDataAll[h] channelData.append(channelDataPage[:, g * sub:(g + 1) * sub]) covMatrixList = tools.getCovMatrixList(channelData) # 计算原信道信息量、协方差矩阵特征值、变换矩阵 informations, SigmaList, UList = tools.getInformations(covMatrixList) for h in range(time1): tmpRates = pca.pca_S(SigmaList, h * step + low)[0][:, 1] rates_S[g, h] = np.mean(tmpRates).real # 显示进度 schedule[1] += 1 print(u'共' + str(schedule[0]) + u'轮,' + u'已完成' + str(schedule[1]) + u'轮,' + u'完成度:' + '%.2f%%' % ( schedule[1] / schedule[0] * 100) + u'!') for h in range(time1): SSE_S.append(np.mean(rates_S[:, h])) plt.xlabel(u'保留维度数k') X = range(low, high + 1, step) plt.ylabel(u'特征值保留') plt.plot(X, SSE_S, 'k-s') plt.show() print(u'主进程结束!')
def cluster(a, schedule, channelDataAll1, channelDataAll2, allCentroidsC, allCentroidUList, allCentroidsU, allCentroidUList2): totalOldCorr = [] totalPcaCorr = [] totalNewCCorr = [] totalNewUCorr = [] for g in range(1, (1 << a) + 1): schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') channelData1 = [] channelData2 = [] for i in range(np.shape(channelDataAll1)[0]): channelData1.append(channelDataAll1[i][:, (g - 1) * sub:g * sub]) channelData2.append(channelDataAll2[i][:, (g - 1) * sub:g * sub]) # 计算信道协方差矩阵呢 covMatrixList1 = tools.getCovMatrixList(channelData1) allOldCorr = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "none") totalOldCorr.append(allOldCorr) allNewPcaCorr = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "general") totalPcaCorr.append(allNewPcaCorr) allNewCCorr = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "C") totalNewCCorr.append(allNewCCorr) allNewUCorr = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsU[g - 1], allCentroidUList2[g - 1], "U") totalNewUCorr.append(allNewUCorr) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!') return totalOldCorr, totalPcaCorr, totalNewCCorr, totalNewUCorr
def pca_general(data, newDimension=1): try: # 如果输入是单个信道,进行以下步骤 m, n = np.shape(data) # 计算协方差矩阵 rowvar=False代表每一列是一个变量 covMatrix = np.cov(data, rowvar=False) # SVD分解协方差矩阵得出变换矩阵 U = np.transpose(np.linalg.svd(covMatrix)[2]) return np.dot(data, U[:, 0:newDimension]) except: print(u'pca_general') # 如果输入是列表,进行以下步骤 out = [] covList = tools.getCovMatrixList(data) UList = tools.getInformations(covList)[2] for i in range(len(data)): out.append(np.dot(data[i], UList[i][:, 0:newDimension])) return out
def pca(channelData, informations, centroidList, clusterAssment, rate=1): U2s = [] rates = np.array(np.zeros((len(channelData), 2)), dtype=complex) rateList = [] newChannelDataList = [] # 计算变换矩阵 for i in range(len(centroidList)): U, Sigma, VT = np.linalg.svd(centroidList[i]) sum = np.sum(Sigma) curSum = 0 index = 0 if rate <= 1: for j in range(len(Sigma)): curSum += Sigma[j] if rate - (curSum / sum) > 0: index += 1 else: break else: index = rate - 1 U2 = np.transpose(VT[0:index + 1, :]) U2s.append(U2) # 降维 for i in range(len(channelData)): newChannelData = np.dot(channelData[i], U2s[(int)(clusterAssment[i, 0].real)]) newChannelDataList.append(newChannelData) index = np.shape(newChannelData)[1] rates[i, 0] = index newCovMatrixList = tools.getCovMatrixList(newChannelDataList) newInformations = tools.getInformations(newCovMatrixList)[0] for i in range(len(channelData)): rate2 = newInformations[0][i] / informations[0][i] rates[i, 1] = rate2 rateList.append(rates) return newChannelDataList, newCovMatrixList, U2s, rateList
def cluster(a, schedule, channelDataAll1, channelDataAll2, allCentroidsC, allCentroidUList, allCentroidsU, allCentroidUList2): allOldCorr = [] allNewCCorr = [] allNewUCorr = [] for g in range(1, (1 << a) + 1): schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') channelData1 = [] channelData2 = [] for i in range(p): channelData1.append(channelDataAll1[i][:, (g - 1) * sub:g * sub]) channelData2.append(channelDataAll2[i][:, (g - 1) * sub:g * sub]) oldCorr = [] channelDatas1 = tools.matrixListToMatrix_U(channelData1) channelDatas2 = tools.matrixListToMatrix_U(channelData2) # 计算信道协方差矩阵呢 covMatrixList1 = tools.getCovMatrixList(channelData1) for i in range(np.shape(channelData1)[0]): oldCorr.append(np.corrcoef(channelDatas1[i, :], channelDatas2[i, :])) allOldCorr.append(np.mean(oldCorr)) newCCorrMean = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsC[g - 1], allCentroidUList[g - 1], "C") allNewCCorr.append(newCCorrMean) newUCorrMean = clusterCore(channelData1, covMatrixList1, channelData2, allCentroidsU[g - 1], allCentroidUList2[g - 1], "U") allNewUCorr.append(newUCorrMean) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!') return abs(np.mean(allOldCorr)), abs(np.mean(allNewCCorr)), abs(np.mean(allNewUCorr))
def getCentroids(schedule, path, suffix, channelData, g, k, iRate, type=u'C'): # 校验数据正确性 if k > np.shape(channelData)[0]: print(u'聚类中心数量不能大于样本数量!') return if iRate > np.shape(channelData)[1]: print(u'降维后维度不能大于样本原有的维度!') return if k <= 0 or iRate <= 0: print(u'聚类中心数量和降维后维度不能小于1!') return schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') # 得到相关系数矩阵并输出,然后放到一个矩阵中 covMatrixList = tools.getCovMatrixList(channelData) informations, SigmaList, UList = tools.getInformations(covMatrixList) if type == u'total': # 对协方差进行聚类 getCentroidsCore(path, suffix, channelData, covMatrixList, informations, SigmaList, UList, g, k, iRate, "C") # 对变换矩阵进行聚类 getCentroidsCore(path, suffix, channelData, covMatrixList, informations, SigmaList, UList, g, k, iRate, "U") else: getCentroidsCore(path, suffix, channelData, covMatrixList, informations, SigmaList, UList, g, k, iRate, type) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!')
def cluster(schedule, path, suffix, channelData, g, iRate): if iRate > np.shape(channelData)[1]: print(u'降维后维度不能大于样本原有的维度!') return if iRate <= 0: print(u'降维后维度不能小于1!') return schedule[1] += 1 tmpSchedule = schedule[1] print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!') pathSuffix = "C" + "_" + str(g) + "_" centroidListPath = path + "getCentroids_outCentroidList_" + pathSuffix nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time())) pathSuffix = pathSuffix + str(nowTime) outOldCovMatrixListPath = path + "cluster_outOldCovMatrixList_" + pathSuffix outClusterAssmentPath = path + "cluster_outClusterAssment_" + pathSuffix outNewChannelDataPath = path + "cluster_outNewChannelData_" + pathSuffix outNewCovMatrixsPath = path + "cluster_outNewCovMatrixList_" + pathSuffix ratesPath = path + "cluster_rates_" + pathSuffix UTsPath = path + "cluster_UTs_" + pathSuffix # 读入聚类中心信息 # 合并多个文件 centroidList = [] for root, dirs, files in os.walk(path, topdown=True): for file in files: file = os.path.join(root, file) if centroidListPath in file: centroidListTmp = readAndWriteDataSet.excelToMatrixList(file) for centroid in centroidListTmp: centroidList.append(centroid) break centroids = tools.matrixListToMatrix(centroidList) # 计算信道相关系数矩阵并输出,然后放到一个矩阵中 covMatrixList = tools.getCovMatrixList(channelData) allCovMatrix = tools.matrixListToMatrix(covMatrixList) # 确定每个数据分别属于哪个簇 clusterAssment = kmeans.getClusterAssment(allCovMatrix, centroids) clusterAssmentList = [] clusterAssmentList.append(clusterAssment) # 分析PCA效果 newChannelData, newCovMatrixList, UTs, rates = pca.pca( channelData, covMatrixList, centroidList, clusterAssment, iRate) # 输出结果 # 输出聚类结果 readAndWriteDataSet.write(clusterAssmentList, outClusterAssmentPath, suffix) # 协方差矩阵太大了,先不输出 # readAndWriteDataSet.write(covMatrixList, outOldCovMatrixListPath, suffix) # 输出PCA结果 readAndWriteDataSet.write(newChannelData, outNewChannelDataPath, suffix) readAndWriteDataSet.write(newCovMatrixList, outNewCovMatrixsPath, suffix) readAndWriteDataSet.write(UTs, UTsPath, suffix) readAndWriteDataSet.write(rates, ratesPath, suffix) # 显示进度 print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!')