コード例 #1
0
ファイル: funs.py プロジェクト: ShenfeiPei/EDG
def kng(X, knn, way="gaussian", t="mean", Anchor=0, isSym=True):
    """
    :param X: data matrix of n by d
    :param knn: the number of nearest neighbors
    :param way: one of ["gaussian", "t_free"]
        "t_free" denote the method proposed in :
            "The constrained laplacian rank algorithm for graph-based clustering"
        "gaussian" denote the heat kernel
    :param t: only needed by gaussian, the bandwidth parameter
    :param Anchor: Anchor set, m by d
    :return: A, an sparse matrix (graph) of n by n if Anchor = 0 (default)
    """
    N, dim = X.shape
    if isinstance(Anchor, int):
        # n x n graph
        D = EuDist2(X, X, squared=True)
        ind_M = np.argsort(D, axis=1)
        if way == "gaussian":
            Val = matrix_index_take(D, ind_M[:, 1:(knn + 1)])
            if t == "mean":
                t = np.mean(Val)
            elif t == "median":
                t = np.median(Val)
            Val = np.exp(-Val / t)

        elif way == "t_free":
            Val = matrix_index_take(D, ind_M[:, 1:(knn + 2)])
            Val = Val[:, knn].reshape((-1, 1)) - Val[:, :knn]
            ind0 = np.where(Val[:, 0] == 0)[0]
            if len(ind0) > 0:
                Val[ind0, :] = 1 / knn
            Val = Val / np.sum(Val, axis=1).reshape(-1, 1)
        A = np.zeros((N, N))
        matrix_index_assign(A, ind_M[:, 1:(knn + 1)], Val)
        if isSym:
            A = (A + A.T) / 2
    else:
        # n x m graph
        num_anchor = Anchor.shape[0]
        D = EuDist2(X, Anchor, squared=True)  # n x m
        ind_M = np.argsort(D, axis=1)
        if way == "gaussian":
            Val = matrix_index_take(D, ind_M[:, :knn])
            if t == "mean":
                t = np.mean(Val)
            elif t == "median":
                t = np.median(Val)
            Val = np.exp(-Val / t)
        elif way == "t_free":
            Val = matrix_index_take(D, ind_M[:, :(knn + 1)])
            Val = Val[:, knn].reshape((-1, 1)) - Val[:, :knn]
            Val = Val / np.sum(Val, axis=1).reshape(-1, 1)
        A = np.zeros((N, num_anchor))
        matrix_index_assign(A, ind_M[:, :knn], Val)

    return A
コード例 #2
0
ファイル: funs_graph.py プロジェクト: ShenfeiPei/FCDMF
def kng_anchor(X,
               Anchor: np.ndarray,
               knn=20,
               way="gaussian",
               t="mean",
               HSI=False,
               shape=None,
               alpha=0):
    """ see agci for more detail
    :param X: data matrix of n (a x b in HSI) by d
    :param Anchor: Anchor set, m by d
    :param knn: the number of nearest neighbors
    :param alpha:
    :param way: one of ["gaussian", "t_free"]
        "t_free" denote the method proposed in :
            "The constrained laplacian rank algorithm for graph-based clustering"
        "gaussian" denote the heat kernel
    :param t: only needed by gaussian, the bandwidth parameter
    :param HSI: compute similarity for HSI image
    :param shape: list, [a, b, c] image: a x b, c: channel
    :param alpha: parameter for HSI
    :return: A, a matrix (graph) of n by m
    """
    if shape is None:
        shape = list([1, 1, 1])
    N = X.shape[0]
    anchor_num = Anchor.shape[0]

    D = EuDist2(X, Anchor, squared=True)  # n x m
    if HSI:
        # MeanData
        conv = np.ones((3, 3)) / 9
        NData = X.reshape(shape)
        MeanData = np.zeros_like(NData)
        for i in range(shape[-1]):
            MeanData[:, :, i] = signal.convolve2d(NData[:, :, i],
                                                  np.rot90(conv),
                                                  mode='same')
        MeanData = MeanData.reshape(shape[0] * shape[1], shape[2])

        D += EuDist2(MeanData, Anchor, squared=True) * alpha  # n x m
    NN_full = np.argsort(D, axis=1)
    NN = NN_full[:, :knn]  # xi isn't among neighbors of xi
    NN_k = NN_full[:, knn]

    Val = get_similarity_by_dist(D=D, NN=NN, NN_k=NN_k, knn=knn, way=way, t=t)

    A = np.zeros((N, anchor_num))
    Ifuns.matrix_index_assign(A, NN, Val)
    return A
コード例 #3
0
ファイル: funs.py プロジェクト: ShenfeiPei/EDG
def get_anchor(X, m, way="random"):
    if way == "kmeans":
        A = KMeans(m, init='random').fit(X).cluster_centers_
    elif way == "kmeans2":
        A = KMeans(m, init='random').fit(X).cluster_centers_
        D = EuDist2(A, X)
        ind = np.argmin(D, axis=1)
        A = X[ind, :]
    elif way == "k-means++":
        A = KMeans(m, init='k-means++').fit(X).cluster_centers_
    elif way == "k-means++2":
        A = KMeans(m, init='k-means++').fit(X).cluster_centers_
        D = EuDist2(A, X)
        A = np.argmin(D, axis=1)
    elif way == "random":
        ids = random.sample(range(X.shape[0]), m)
        A = X[ids, :]
    return A
コード例 #4
0
ファイル: funs.py プロジェクト: ShenfeiPei/EDG
def knn_f(X, knn, squared=True):
    D_full = EuDist2(X, X, squared=squared)
    np.fill_diagonal(D_full, -1)
    NN_full = np.argsort(D_full, axis=1)
    np.fill_diagonal(D_full, 0)

    NN = NN_full[:, :knn]
    NND = matrix_index_take(D_full, NN)
    return NN, NND
コード例 #5
0
ファイル: funs_graph.py プロジェクト: ShenfeiPei/FCDMF
def kng(X, knn, way="gaussian", t="mean", self=0, isSym=True):
    """
    :param X: data matrix of n by d
    :param knn: the number of nearest neighbors
    :param way: one of ["gaussian", "t_free"]
        "t_free" denote the method proposed in :
            "The constrained laplacian rank algorithm for graph-based clustering"
        "gaussian" denote the heat kernel
    :param t: only needed by gaussian, the bandwidth parameter
    :param self: including self: weather xi is among the knn of xi
    :param isSym: True or False, isSym = True by default
    :return: A, a matrix (graph) of n by n
    """
    N, dim = X.shape

    # n x n graph
    D = EuDist2(X, X, squared=True)

    np.fill_diagonal(D, -1)
    NN_full = np.argsort(D, axis=1)
    np.fill_diagonal(D, 0)

    if self == 1:
        NN = NN_full[:, :knn]  # xi isn't among neighbors of xi
        NN_k = NN_full[:, knn]
    else:
        NN = NN_full[:, 1:(knn + 1)]  # xi isn't among neighbors of xi
        NN_k = NN_full[:, knn + 1]

    # A = np.zeros((N, N))
    # for i in range(N):
    #     id = NN_full[i, 1 : knn + 2]
    #     di = D[i, id]
    #     A[i, id] = (di[knn] - di) / (knn * di[knn] - np.sum(di[:knn]));

    #
    Val = get_similarity_by_dist(D=D, NN=NN, NN_k=NN_k, knn=knn, way=way, t=t)

    A = np.zeros((N, N))
    Ifuns.matrix_index_assign(A, NN, Val)

    if isSym:
        A = (A + A.T) / 2

    return A
コード例 #6
0
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances as EuDist2

import IDEAL_NPU.funs as Funs
from IDEAL_NPU.cluster import PCN

X, y_true, N, dim, c_true = Funs.load_Agg()
D_full = EuDist2(X, X, squared=True)
NN_full = np.argsort(D_full, axis=1)

knn = 33
NN = NN_full[:, 1:(knn + 1)]
NND = Funs.matrix_index_take(D_full, NN)

for i in range(N):
    tmp_ind = np.lexsort((NN[i, :], NND[i, :]))
    NN[i, :] = NN[i, tmp_ind]

print("begin")
PCN_obj = PCN(NN, NND)
y_pred = PCN_obj.cluster()
t = PCN_obj.get_time()

print("end", t)
pre = Funs.precision(y_true=y_true, y_pred=y_pred)
rec = Funs.recall(y_true=y_true, y_pred=y_pred)
f1 = 2 * pre * rec / (pre + rec)

print("{}".format(pre))
print("{}".format(f1))
コード例 #7
0
ファイル: Step2.py プロジェクト: ShenfeiPei/efanna
    if os.path.exists(e2_full_name):
        continue

    NN = np.fromfile(graph_full_name, dtype=np.int32)
    NN = NN.reshape(N, -1)

    if np.max(NN) >= N:
        print("Bad graph file, removed")
        os.system("rm {}".format(graph_full_name))
        continue

    NN[:, 0] = np.array(range(N), dtype=np.int32)
    knn = NN.shape[1]
    NND = np.zeros((N, knn))

    t1 = time.time()
    x_norm = np.sum(X**2, axis=1)

    for i in range(N):
        NND[i, :] = EuDist2(X[i, :].reshape(1, -1),
                            X[NN[i, :], :],
                            squared=True,
                            X_norm_squared=x_norm[i:(i + 1)].reshape(1, -1),
                            Y_norm_squared=x_norm[NN[i, :]])
        # NND[i, :] = my.EuDist2(X[i, :].reshape(1, -1), X[NN[i, :], :], squared=True)
    t2 = time.time() - t1
    print(t2)
    NN.astype(np.int32).tofile(graph_full_name)
    NND.astype(np.float64).tofile(e2_full_name)