def runAff(fileName, title='AffinityPropagation'): propertysMatrix, labelMatrix = loadData(fileName) cluster = AffinityPropagation().fit(propertysMatrix) myLabel = cluster.labels_ re = computeARI(myLabel, labelMatrix) print re drawLabel(propertysMatrix, myLabel, t=title)
def runDBSCAN(fileName,eps,min_samples,title='DBSCAN'): propertysMatrix, labelMatrix = loadData(fileName) cluster = DBSCAN(eps=eps, min_samples=min_samples,metric='euclidean').fit(propertysMatrix) myLabel = cluster.labels_ re= computeARI(myLabel, labelMatrix),eps,min_samples print re # drawLabel(propertysMatrix, myLabel, t=title) return re[0][-3]
def runCBLP(fileName, iterationTime=30, WA2SIAway='norm', topN=0, percent=1, onlyTopN=False): propertysMatrix, labelMatrix = loadData(fileName) n = np.shape(propertysMatrix)[0] # ====================导入数据集========================= EDA = squareform(pdist(propertysMatrix, metric='seuclidean')) # ============计算标准欧式距离============================ SIA = EDA2SIA(EDA) # ===================计算相似性 1/(x+1),再标准化========================= WA = SIA2WA(SIA, WA2SIAway=WA2SIAway, topN=topN, percent=percent, onlyTopN=onlyTopN) # printDegVar(WA) # ======================过滤相似性,得到权重矩阵========================== seedList = climbAlgorithm(WA, int(0.1 * n)) # ===========================得到种子集合=============================== LabelA = np.zeros((n, len(seedList))) for i in range(len(seedList)): LabelA[seedList[i]][i] = 1 # ==========================基于种子集合初始化标签向量矩阵================ start = clock() for t in range(iterationTime): LabelA, changed = updateLabelA(WA, LabelA) # =============更新标签矩阵======================================= # =============输出结果,画图====================================== if t == 5: LabelA = cleanSmallLabel(LabelA, k=len(seedList)) # ==========================大致收敛后,减去太小的社区===================== if changed < 0.0001 and t > 5: # print('iteration stop in ',t) break # ============================标签传播至收敛=========================== end = clock() print('time cost:', end - start) LabelA = A2onehotA(LabelA) mylabelMatrix = np.zeros(n) - 1 index = np.where(LabelA == 1) mylabelMatrix[index[0]] = index[1]
DBSCAN_para = { 'heart': [0.3, 2], 'cancer': [0.5, 40], 'wine': [0.6, 37], 'vote': [0.9, 10], 'letterqz': [0.4, 4], 'letteraw': [0.7, 14], 'seeds': [0.3, 21], 'aggregation': [0.1, 36], 'five_cluster': [0.1, 36], } result = [] data = [] for filename in fileList.keys(): X, labels_true = loadData(filename) # drawLabel(X, LabelA=labels_true, ) # 原始聚类 # 其他算法最好的结果 k = len(set(labels_true)) ap = AffinityPropagation().fit(X) kmeans = KMeans(n_clusters=k, random_state=0).fit(X) hierarchical = AgglomerativeClustering(n_clusters=k).fit(X) f = filename.split('/')[-1].split('.')[0] dbscan = DBSCAN(eps=DBSCAN_para[f][0], min_samples=DBSCAN_para[f][1], metric='euclidean').fit(X) re_dbscan = computeARI(dbscan.labels_, labels_true, X=X) re_ap = computeARI(ap.labels_, labels_true, X=X) re_kMeans = computeARI(kmeans.labels_, labels_true, X=X)