Beispiel #1
0
def point_and_circle_clustering(eig_max=15):
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    #chosen_eig_indices = [1, 2, 3]    # indices of the ordered eigenvalues to pick

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-1]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))
        W = build_similarity_graph(X, var=var, eps=eps, k=k)

    # build laplacian
    else:
        W = build_similarity_graph(X, var=var, k=k)
    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'sym')

    #eigenvalues,U = np.linalg.eig(L_unn)
    #indexes = np.argsort(eigenvalues)
    #eigenvalues = eigenvalues[indexes]
    #U = U[:,indexes]
    #chosen_eig_indices = choose_eigenvalues(eigenvalues, eig_max = eig_max)
    chosen_eig_indices = [0, 1]

    Y_unn = spectral_clustering(L_unn,
                                chosen_eig_indices,
                                num_classes=num_classes)
    Y_norm = spectral_clustering(L_norm,
                                 chosen_eig_indices,
                                 num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
Beispiel #2
0
def find_the_bend():
    """
    TO BE COMPLETED

    Used in question 2.3
    :return:
    """

    # the number of samples to generate
    num_samples = 600

    # Generate blobs and compute number of clusters
    X, Y = blobs(num_samples, 4, 0.2)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1  # exponential_euclidean's sigma^2
    laplacian_normalization = 'unn'  # either 'unn'normalized, 'sym'metric normalization or 'rw' random-walk normalization

    # build laplacian
    if k == 0:
        dists = sd.cdist(X, X, metric="euclidean")
        min_tree = min_span_tree(dists)
        distance_threshold = dists[min_tree].max()
        eps = np.exp(-distance_threshold**2.0 / (2 * var))
        print(eps)
        W = build_similarity_graph(X, var=var, k=k, eps=eps)
    else:
        W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)
    """
    compute first 15 eigenvalues and call choose_eigenvalues() to choose which ones to use. 
    """
    eigenvalues, vects = scipy.linalg.eig(L)
    eigenvalues = sorted(eigenvalues.real)
    #    for ind,val in enumerate(eigenvalues[:15]):
    #        plt.scatter(ind, val)
    #    plt.xlabel("index of the eigenvalue")
    #    plt.ylabel("value of the eigenvalue")
    #    chosen_eig_indices =  [0,1,2,3]  # indices of the ordered eigenvalues to pick
    """
    compute spectral clustering solution using a non-adaptive method first, and an adaptive one after (see handout) 
    Y_rec = (n x 1) cluster assignments [0,1,..., c-1]    
    """
    # run spectral clustering
    #    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)
    Y_rec_adaptive = spectral_clustering_adaptive(L, num_classes=num_classes)

    #    plot_the_bend(X, Y, L, Y_rec, eigenvalues)
    plot_the_bend(X, Y, L, Y_rec_adaptive, eigenvalues)
Beispiel #3
0
def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    question = '2.2'

    # Get data and compute number of classes
    X, Y = blobs(600, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    n = X.shape[0]
    """
    Choose parameters
    """
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'rw'

    if question == '2.1':
        # as the graph has to be connected in this question, we construct a epsilon-graph using a MST
        dists = pairwise_distances(X).reshape(
            (n, n))  # dists[i, j] = euclidean distance between x_i and x_j
        min_tree = min_span_tree(dists)
        distance_threshold = np.max(dists[min_tree])
        eps = np.exp(-distance_threshold**2 / (2 * var))

        # choice of eigenvectors to use
        chosen_eig_indices = [1]  # indices of the ordered eigenvalues to pick

        # build similarity graph and laplacian
        W = build_similarity_graph(X, var=var, eps=eps)
        L = build_laplacian(W, laplacian_normalization)

    elif question == '2.2':
        # choice of eigenvectors to use
        chosen_eig_indices = [0, 1]

        # choice of k for the k-nn graph
        k = 20

        # build similarity graph and laplacian
        W = build_similarity_graph(X, var=var, k=k)
        L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    # Get data and compute number of classes
    X, Y = blobs(600, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 3
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [1, 2,
                          3]  # indices of the ordered eigenvalues to pick

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
def two_moons_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 3
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [1, 2,
                          3]  # indices of the ordered eigenvalues to pick

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
Beispiel #6
0
def point_and_circle_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 50
    var = 1.0  # exponential_euclidean's sigma^2

    chosen_eig_indices = [0, 1]  # indices of the ordered eigenvalues to pick

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)

    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'rw')

    Y_unn = spectral_clustering(L_unn,
                                chosen_eig_indices,
                                num_classes=num_classes)
    Y_norm = spectral_clustering(L_norm,
                                 chosen_eig_indices,
                                 num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
Beispiel #7
0
def point_and_circle_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600, sigma=.2)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    eps = 0.4
    var = 1  # exponential_euclidean's sigma^2

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'rw')

    Y_unn = spectral_clustering_adaptive(L_unn, num_classes=num_classes)
    Y_norm = spectral_clustering_adaptive(L_norm, num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
Beispiel #8
0
def two_moons_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    eps = 0.8
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'rw'

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L = build_laplacian(W, laplacian_normalization)

    # spectral clustering
    Y_rec = spectral_clustering_adaptive(L, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
def image_segmentation(input_img='fruit_salad.bmp', eig_max=15):
    """
    TO BE COMPLETED

    Function to perform image segmentation.

    :param input_img: name of the image file in /data (e.g. 'four_elements.bmp')
    """
    filename = os.path.join('data', input_img)

    X = io.imread(filename)
    X = (X - np.min(X)) / (np.max(X) - np.min(X))
    #print(X.shape)

    im_side = np.size(X, 1)
    Xr = X.reshape(im_side**2, 3)
    #print(Xr.shape)
    """
    Y_rec should contain an index from 0 to c-1 where c is the     
     number of segments you want to split the image into          
    """
    """
    Choose parameters
    """

    var = 5
    k = 45
    laplacian_normalization = 'unn'

    W = build_similarity_graph(Xr, var=var, k=k)

    L = build_laplacian(W, laplacian_normalization)

    E, U = np.linalg.eig(L)
    indexes = np.argsort(E)
    E = E[indexes]
    U = U[:, indexes]
    chosen_eig_indices = choose_eigenvalues(E, eig_max)
    #chosen_eig_indices = [0,1,2,3]

    num_classes = len(chosen_eig_indices)

    #print(len(chosen_eig_indices))

    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    plt.figure()

    plt.subplot(1, 2, 1)
    plt.imshow(X)

    plt.subplot(1, 2, 2)
    Y_rec = Y_rec.reshape(im_side, im_side)
    plt.imshow(Y_rec)

    plt.show()
Beispiel #10
0
def two_moons_clustering(eig_max=15):
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'

    #    chosen_eig_indices = [0, 1, 2]    # indices of the ordered eigenvalues to pick

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-1]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L = build_laplacian(W, laplacian_normalization)

    # chose the eigenvalues
    eigenvalues, U = np.linalg.eig(L)
    indexes = np.argsort(eigenvalues)
    eigenvalues = eigenvalues[indexes]
    U = U[:, indexes]
    chosen_eig_indices = choose_eigenvalues(eigenvalues, eig_max=eig_max)

    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
Beispiel #11
0
def image_segmentation(input_img='four_elements.bmp'):
    """
    TO BE COMPLETED

    Function to perform image segmentation.

    :param input_img: name of the image file in /data (e.g. 'four_elements.bmp')
    """
    filename = os.path.join('data', input_img)

    X = io.imread(filename)
    X = (X - np.min(X)) / (np.max(X) - np.min(X))

    im_side = np.size(X, 1)
    Xr = X.reshape(im_side**2, 3)
    print(Xr.shape)
    """
    Y_rec should contain an index from 0 to c-1 where c is the     
     number of segments you want to split the image into          
    """
    """
    Choose parameters
    """
    var = 10  # Tried multiple values before fixing it to this
    k = 60  # Tried multiple values before fixing it to this
    laplacian_normalization = 'sym'
    chosen_eig_indices = [
        1, 2
    ]  # The eigen vectors to consider for the clustering, not used if using adaptive spectral clustering
    num_classes = 5  # Fixed by hand, we can also look at the values of the eigenvectors to infer this number

    # First we build the graph using KNN (this is what takes few minutes)
    #    W = build_similarity_graph(Xr, var=var, k=0, eps= 0.5)
    W = build_similarity_graph(Xr, var=var, k=0, eps=0.7)
    # Build le laplacian matrix
    L = build_laplacian(W, laplacian_normalization)
    # Perform the spectral clustering where we choose automatically the number
    # of eigenvectors to consider using first order derivatives or by hand

    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)
    #    Y_rec = spectral_clustering_adaptive(L, num_classes=num_classes)

    plt.figure()

    plt.subplot(1, 2, 1)
    plt.imshow(X)
    #    plt.imshow(X[:30,:30,:])
    plt.subplot(1, 2, 2)
    Y_rec = Y_rec.reshape(im_side, im_side)
    #    Y_rec = Y_rec.reshape(30,30)
    plt.imshow(Y_rec)

    plt.show()
Beispiel #12
0
def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    # Get data and compute number of classes
    X, Y = blobs(50, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [0, 1,
                          2]  # indices of the ordered eigenvalues to pick

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-2]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))
    #####

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    plot_graph_matrix(X, Y, W)

    L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
Beispiel #13
0
def two_moons_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [0, 1]  # indices of the ordered eigenvalues to pick

    # build laplacian
    # build laplacian
    if k == 0:
        dists = sd.cdist(X, X, metric="euclidean")
        min_tree = min_span_tree(dists)
        distance_threshold = dists[min_tree].max()
        eps = np.exp(-distance_threshold**2.0 / (2 * var))
        print(eps)
        W = build_similarity_graph(X, var=var, k=k, eps=eps)
    else:
        W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)

    #    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)
    #
    #    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))

    Y_rec_adaptive = spectral_clustering_adaptive(L, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec_adaptive,
                           KMeans(num_classes).fit_predict(X))
Beispiel #14
0
def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    # Get data and compute number of classes
    X, Y = blobs(600, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [0, 1]  # indices of the ordered eigenvalues to pick

    # build laplacian
    if k == 0:
        dists = sd.cdist(X, X, metric="euclidean")
        min_tree = min_span_tree(dists)
        distance_threshold = dists[min_tree].max()
        eps = np.exp(-distance_threshold**2.0 / (2 * var))

        W = build_similarity_graph(X, var=var, k=k, eps=eps)
    else:
        W = build_similarity_graph(X, var=var, k=k)

    L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
Beispiel #15
0
def image_segmentation(input_img='four_elements.bmp'):
    """
    TO BE COMPLETED

    Function to perform image segmentation.

    :param input_img: name of the image file in /data (e.g. 'four_elements.bmp')
    """
    filename = os.path.join('data', input_img)

    X = io.imread(filename)
    X = (X - np.min(X)) / (np.max(X) - np.min(X))

    im_side = np.size(X, 1)
    Xr = X.reshape(im_side ** 2, 3)
    """
    Y_rec should contain an index from 0 to c-1 where c is the     
     number of segments you want to split the image into          
    """

    """
    Choose parameters
    """
    var = 1.0
    k = 25
    laplacian_normalization = 'rw'
    chosen_eig_indices = [1, 2, 3, 4, 5]
    num_classes = 5

    W = build_similarity_graph(Xr, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    plt.figure()

    plt.subplot(1, 2, 1)
    plt.imshow(X)

    plt.subplot(1, 2, 2)
    Y_rec = Y_rec.reshape(im_side, im_side)
    plt.imshow(Y_rec)

    plt.show()
Beispiel #16
0
def find_the_bend():
    """
    TO BE COMPLETED

    Used in question 2.3
    :return:
    """

    # the number of samples to generate
    num_samples = 600

    # Generate blobs and compute number of clusters
    # var_blobs = 0.03  # question 2.3.
    var_blobs = 0.2  # question 2.4.
    X, Y = blobs(num_samples, 4, var_blobs)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 20
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'rw'  # 'unn'normalized, 'sym'metric normalization or 'rw' random-walk normalization

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)
    """
    compute first 15 eigenvalues and call choose_eigenvalues() to choose which ones to use. 
    """
    eigenvalues, _ = scipy.linalg.eig(L)
    eigenvalues = eigenvalues[np.argsort(eigenvalues)].real
    eigenvalues = eigenvalues[:15]
    chosen_eig_indices = choose_eigenvalues(
        eigenvalues)  # indices of the ordered eigenvalues to pick
    """
    compute spectral clustering solution using a non-adaptive method first, and an adaptive one after (see handout) 
    Y_rec = (n x 1) cluster assignments [0,1,..., c-1]    
    """
    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)
    # Y_rec_adaptive = spectral_clustering_adaptive(L, num_classes=num_classes)

    plot_the_bend(X, Y, L, Y_rec, eigenvalues)
Beispiel #17
0
def parameter_sensitivity():
    """
    TO BE COMPLETED.

    A function to test spectral clustering sensitivity to parameter choice.

    Used in question 2.9
    """
    # the number of samples to generate
    num_samples = 500
    """
    Choose parameters
    """
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'rw'
    # chosen_eig_indices = [0, 1]
    """
    Choose candidate parameters
    """
    # the number of neighbours for the graph or the epsilon threshold
    # parameter_candidate = np.arange(3, 33, 3)
    parameter_candidate = np.linspace(0.2, 1, 9)
    parameter_performance = []

    for param in parameter_candidate:
        # Generate data
        X, Y = two_moons(num_samples)
        num_classes = len(np.unique(Y))

        W = build_similarity_graph(X, eps=param)
        # W = build_similarity_graph(X, k=param)
        L = build_laplacian(W, laplacian_normalization)

        Y_rec = spectral_clustering_adaptive(L, num_classes)
        # Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes)

        parameter_performance += [skm.adjusted_rand_score(Y, Y_rec)]

    plt.figure()
    plt.plot(parameter_candidate, parameter_performance)
    plt.title('parameter sensitivity')
    plt.show()
Beispiel #18
0
def find_the_bend(eig_max=15, blob_var=0.03):
    """
    TO BE COMPLETED

    Used in question 2.3
    :return:
    """
    eig_max -= 1  # to count starting from 0
    # the number of samples to generate
    num_samples = 600

    # Generate blobs and compute number of clusters
    X, Y = blobs(num_samples, 4, blob_var)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'sym'  # either 'unn'normalized, 'sym'metric normalization or 'rw' random-walk normalization

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-num_classes]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L = build_laplacian(W, laplacian_normalization)
    """
    compute first 15 eigenvalues and call choose_eigenvalues() to choose which ones to use. 
    """
    eigenvalues, U = np.linalg.eig(L)
    indexes = np.argsort(eigenvalues)
    eigenvalues = eigenvalues[indexes]
    U = U[:, indexes]

    chosen_eig_indices = choose_eigenvalues(
        eigenvalues,
        eig_max=eig_max)  # indices of the ordered eigenvalues to pick

    plt.plot(eigenvalues, [i for i in range(len(eigenvalues))], 'r+')
    """
    compute spectral clustering solution using a non-adaptive method first, and an adaptive one after (see handout) 
    Y_rec = (n x 1) cluster assignments [0,1,..., c-1]    
    """
    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)
    Y_rec_adaptive = spectral_clustering_adaptive(L,
                                                  num_classes=num_classes,
                                                  eig_max=eig_max)

    plot_the_bend(X, Y, L, Y_rec_adaptive, eigenvalues)
Beispiel #19
0
def parameter_sensitivity(eig_max=15):
    """
    TO BE COMPLETED.

    A function to test spectral clustering sensitivity to parameter choice.

    Used in question 2.9
    """
    # the number of samples to generate
    num_samples = 500
    """
    Choose parameters
    """
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'unn'
    #chosen_eig_indices = [0, 1, 2]
    """
    Choose candidate parameters
    """
    parameter_candidate = [
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
    ]  # the number of neighbours for the graph or the epsilon threshold
    parameter_performance = []

    for k in parameter_candidate:
        # Generate data
        X, Y = two_moons(num_samples, 1, 0.02)
        num_classes = len(np.unique(Y))

        if k == 0:  # compute epsilon
            dists = sd.cdist(
                X, X, 'euclidean'
            )  # dists[i, j] = euclidean distance between x_i and x_j

            min_tree = min_span_tree(dists)

            l = []
            n1, m1 = min_tree.shape

            for i in range(n1):
                for j in range(m1):
                    if min_tree[i][j] == True:
                        l.append(dists[i][j])
            distance_threshold = sorted(l)[-1]
            eps = np.exp(-(distance_threshold)**2.0 / (2 * var))
            W = build_similarity_graph(X, var=var, eps=eps, k=k)
        else:
            W = build_similarity_graph(X, k=k)
        L = build_laplacian(W, laplacian_normalization)

        eigenvalues, U = np.linalg.eig(L)
        indexes = np.argsort(eigenvalues)
        eigenvalues = eigenvalues[indexes]
        U = U[:, indexes]
        chosen_eig_indices = choose_eigenvalues(eigenvalues, eig_max=eig_max)

        Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes)

        parameter_performance += [skm.adjusted_rand_score(Y, Y_rec)]

    plt.figure()
    plt.plot(parameter_candidate, parameter_performance)
    plt.title('parameter sensitivity')
    plt.show()


#parameter_sensitivity()