Example #1
0
def runAff(fileName, title='AffinityPropagation'):
    propertysMatrix, labelMatrix = loadData(fileName)
    cluster = AffinityPropagation().fit(propertysMatrix)
    myLabel = cluster.labels_
    re = computeARI(myLabel, labelMatrix)
    print re
    drawLabel(propertysMatrix, myLabel, t=title)
Example #2
0
def runDBSCAN(fileName,eps,min_samples,title='DBSCAN'):
    propertysMatrix, labelMatrix = loadData(fileName)
    cluster = DBSCAN(eps=eps, min_samples=min_samples,metric='euclidean').fit(propertysMatrix)
    myLabel = cluster.labels_
    re= computeARI(myLabel, labelMatrix),eps,min_samples
    print re
    # drawLabel(propertysMatrix, myLabel, t=title)
    return re[0][-3]
Example #3
0
def runCBLP(fileName,
            iterationTime=30,
            WA2SIAway='norm',
            topN=0,
            percent=1,
            onlyTopN=False):
    propertysMatrix, labelMatrix = loadData(fileName)
    n = np.shape(propertysMatrix)[0]
    # ====================导入数据集=========================
    EDA = squareform(pdist(propertysMatrix, metric='seuclidean'))
    # ============计算标准欧式距离============================
    SIA = EDA2SIA(EDA)

    # ===================计算相似性 1/(x+1),再标准化=========================
    WA = SIA2WA(SIA,
                WA2SIAway=WA2SIAway,
                topN=topN,
                percent=percent,
                onlyTopN=onlyTopN)
    # printDegVar(WA)
    # ======================过滤相似性,得到权重矩阵==========================
    seedList = climbAlgorithm(WA, int(0.1 * n))
    # ===========================得到种子集合===============================
    LabelA = np.zeros((n, len(seedList)))
    for i in range(len(seedList)):
        LabelA[seedList[i]][i] = 1
    # ==========================基于种子集合初始化标签向量矩阵================
    start = clock()
    for t in range(iterationTime):

        LabelA, changed = updateLabelA(WA, LabelA)
        # =============更新标签矩阵=======================================

        # =============输出结果,画图======================================
        if t == 5:
            LabelA = cleanSmallLabel(LabelA, k=len(seedList))
        # ==========================大致收敛后,减去太小的社区=====================
        if changed < 0.0001 and t > 5:
            # print('iteration stop in ',t)
            break

    # ============================标签传播至收敛===========================
    end = clock()
    print('time cost:', end - start)
    LabelA = A2onehotA(LabelA)
    mylabelMatrix = np.zeros(n) - 1
    index = np.where(LabelA == 1)
    mylabelMatrix[index[0]] = index[1]
Example #4
0
    DBSCAN_para = {
        'heart': [0.3, 2],
        'cancer': [0.5, 40],
        'wine': [0.6, 37],
        'vote': [0.9, 10],
        'letterqz': [0.4, 4],
        'letteraw': [0.7, 14],
        'seeds': [0.3, 21],
        'aggregation': [0.1, 36],
        'five_cluster': [0.1, 36],
    }

    result = []
    data = []
    for filename in fileList.keys():
        X, labels_true = loadData(filename)
        # drawLabel(X, LabelA=labels_true, )
        # 原始聚类
        # 其他算法最好的结果
        k = len(set(labels_true))
        ap = AffinityPropagation().fit(X)
        kmeans = KMeans(n_clusters=k, random_state=0).fit(X)
        hierarchical = AgglomerativeClustering(n_clusters=k).fit(X)
        f = filename.split('/')[-1].split('.')[0]
        dbscan = DBSCAN(eps=DBSCAN_para[f][0],
                        min_samples=DBSCAN_para[f][1],
                        metric='euclidean').fit(X)

        re_dbscan = computeARI(dbscan.labels_, labels_true, X=X)
        re_ap = computeARI(ap.labels_, labels_true, X=X)
        re_kMeans = computeARI(kmeans.labels_, labels_true, X=X)