Exemple #1
0
def DENCLUE(D, h, kexi, eps):
    A = list()  #吸引子集合
    R = dict()  #被吸引子吸引的点的集合
    for i in range(D.shape[0]):
        x = D[i]
        Xxing = FINDATTRACTOR(x, D, h, eps)
        # Xxing=getround(Xxing)
        Xxingli = list(Xxing)
        # if fx(Xxing,D,h)>=kexi:
        if fx(Xxing, D, h) >= kexi:
            A.append(Xxingli)
            if str(Xxingli) not in R.keys():
                a = []
                a.append(x)
                R[str(Xxingli)] = a
            else:
                a = R[str(Xxingli)]
                a.append(x)
                R[str(Xxingli)] = a

    #eps1 距离的阈值
    #min_sample要成为核心对象所需要的ϵ-邻域的样本数阈值
    print("A", A)
    # A=list(set(A))
    eps1 = 0.1
    min_sample = 2
    a = DBSCAN(eps=eps1, min_samples=min_sample)

    HC = a.fit_predict(np.mat(A))  #dbscan聚类结果
    clusterset = set()
    clusterdict = dict()
    for i in range(len(HC)):
        if (HC[i] != -1):
            clusterset.add(HC[i])
            if str(HC[i]) not in clusterdict.keys():
                a = []
                a.append(A[i])
                clusterdict[str(HC[i])] = a
            else:
                a = clusterdict[str(HC[i])]
                a.append(A[i])
    print("number of clusters", len(clusterdict.keys()))
    for key in R.keys():
        print("density attracotr", key)
        print("atracted point", R[key])

    for i in clusterdict.keys():
        ls = clusterdict[i]
        for x in clusterdict[i]:
            if str(x) in R.keys():
                for z in R[str(x)]:
                    ls.append(z)
        clusterdict[i] = ls

    for i in clusterdict.keys():
        print('clusterres', i, "   size", len(clusterdict[i]))
        print('set of point in the cluster', clusterdict[i])
    showpicture(clusterdict)
Exemple #2
0
def denclue(points, bandwidth, hyper, convergence):
    A = list()  # 定义一个列表存吸引子
    r = dict()  # 定义一个字典存吸引子对应的坐标值
    for i in range(points.shape[0]):  # 对点集中的每个点判断
        x = findAttractor(bandwidth, convergence, points[i],
                          points)  # 求出每个点的吸引子
        # 用密度估计判断这个吸引子,吸引的点在区域内的密度大小。如果太小不足以构成一类就舍弃
        if multi(x, points, bandwidth) > hyper:
            A.append(list(x))  # 像列表A中插入符合条件的吸引子
            strxing = str(x)  # 将吸引子的坐标变成字符串
            if strxing not in r.keys():  # 如果字典中没有这个吸引子的坐标值
                a = []  # 定义一个数组
                a.append(points[i])  # 将对应的点全部加入
                r[strxing] = a  # 将吸引子的值与这些点对应
            # 有这个值,那就将点直接加入,一个吸引子可以对应很多点
            else:
                a = r[strxing]
                a.append(points[i])
                r[strxing] = a
    E = 0.1
    min = 2
    b = DBSCAN(eps=E, min_samples=min)  # 用DBCSACN进行聚类,密度可达,最大化
    c = b.fit_predict(np.mat(list(A)))  # 训练之后的结果
    b_set = set()
    b_dict = dict()
    for i in range(len(c)):  # 将训练完之后的类加入集合
        if (c[i] != -1):
            b_set.add(c[i])
            if str(c[i]) not in b_dict.keys():
                b = []
                b.append(A[i])
                b_dict[str(c[i])] = b
            else:
                b = b_dict[str(c[i])]
                b.append(A[i])
    print("clusters number", len(b_dict.keys()))  # 输出每个类及其中的点

    for i in b_dict.keys():  # 对每一类进行判断,如果吸引子属于该类则加入
        xs = b_dict[i]
        for x in b_dict[i]:
            if str(x) in r.keys():
                for i in r[str(x)]:
                    xs.append(i)
        b_dict[i] = xs

    for i in b_dict.keys():
        print("群集", i)
        print("群集", i, "中的点", b_dict[i])