def runAff(fileName, title='AffinityPropagation'): propertysMatrix, labelMatrix = loadData(fileName) cluster = AffinityPropagation().fit(propertysMatrix) myLabel = cluster.labels_ re = computeARI(myLabel, labelMatrix) print re drawLabel(propertysMatrix, myLabel, t=title)
def runDBSCAN(fileName,eps,min_samples,title='DBSCAN'): propertysMatrix, labelMatrix = loadData(fileName) cluster = DBSCAN(eps=eps, min_samples=min_samples,metric='euclidean').fit(propertysMatrix) myLabel = cluster.labels_ re= computeARI(myLabel, labelMatrix),eps,min_samples print re # drawLabel(propertysMatrix, myLabel, t=title) return re[0][-3]
data = [] for filename in fileList.keys(): X, labels_true = loadData(filename) # drawLabel(X, LabelA=labels_true, ) # 原始聚类 # 其他算法最好的结果 k = len(set(labels_true)) ap = AffinityPropagation().fit(X) kmeans = KMeans(n_clusters=k, random_state=0).fit(X) hierarchical = AgglomerativeClustering(n_clusters=k).fit(X) f = filename.split('/')[-1].split('.')[0] dbscan = DBSCAN(eps=DBSCAN_para[f][0], min_samples=DBSCAN_para[f][1], metric='euclidean').fit(X) re_dbscan = computeARI(dbscan.labels_, labels_true, X=X) re_ap = computeARI(ap.labels_, labels_true, X=X) re_kMeans = computeARI(kmeans.labels_, labels_true, X=X) re_Hierarchical = computeARI(hierarchical.labels_, labels_true, X=X) re_max = np.max(np.vstack( (re_kMeans, re_Hierarchical, re_ap, re_dbscan)), axis=0) # =============== noisePercent = computeNoise(SIA2WA(file2SIA(filename), percent=0.05)) if noisePercent < 0.05: percent = 2 * noisePercent + 0.1 cblp = CBLP(model='even', percent=percent).fit(X) re_cblp = computeARI(cblp.labels_, labels_true, X=X) print('model:even,percent:', percent, re_cblp, (re_max < computeARI(cblp.labels_, labels_true, X=X)) * 1)
def runSpectralClustering(fileName, k=2): propertysMatrix, labelMatrix = loadData(fileName) re = SpectralClustering(n_clusters=k).fit(propertysMatrix) myLabel = re.labels_ print computeARI(myLabel, labelMatrix) drawLabel(propertysMatrix, myLabel, t='SpectralClustering')
def runKMeans(fileName, k=2): propertysMatrix, labelMatrix = loadData(fileName) kmeans = KMeans(n_clusters=k, random_state=0).fit(propertysMatrix) myLabel = kmeans.labels_ print computeARI(myLabel, labelMatrix)
for i in os.listdir(evenPath): if i.split('.')[-1] == 'txt': evenFileList.append(evenPath + i) unevenPath = '/Users/liuqiang/Documents/标签传播过程论文/聚类数据集/uneven/' unevenFileList = [] for i in os.listdir(unevenPath): if i.split('.')[-1] == 'txt': unevenFileList.append(unevenPath + i) unevenFileList = ['/Users/liuqiang/Documents/标签传播过程论文/聚类数据集/uneven/cancer.txt',] for filename in unevenFileList: X, labels_true = loadData(filename) k = len(set(labels_true)) kmeans = KMeans(n_clusters=k, random_state=0).fit(X) hierarchical = AgglomerativeClustering(n_clusters=k).fit(X) re_kMeans = computeARI(kmeans.labels_, labels_true, X=X) re_Hierarchical = computeARI(hierarchical.labels_, labels_true, X=X) re = np.vstack((re_kMeans, re_Hierarchical)) re_max = np.max(re, axis=0) print re_max for i in range(4, 100, 5): cblp = CBLP(model='uneven', topN=i).fit(X) print i,computeARI(cblp.labels_, labels_true, X=X) print i, (re_max < computeARI(cblp.labels_, labels_true, X=X)) * 1 # for filename in evenFileList: # X, labels_true = loadData(filename) # k = len(set(labels_true)) # kmeans = KMeans(n_clusters=k, random_state=0).fit(X) # hierarchical = AgglomerativeClustering(n_clusters=k).fit(X) # re_kMeans = computeARI(kmeans.labels_, labels_true, X=X)
def runAgglomerative(fileName, k=2): propertysMatrix, labelMatrix = loadData(fileName) re = AgglomerativeClustering(n_clusters=k).fit(propertysMatrix) myLabel = re.labels_ print computeARI(myLabel, labelMatrix)