# [A] Unnormalized case # c_idx_un: Clusters identified using unnormalized Laplacian version # Degree Matrix sumMat = np.array(A.sum(axis=0)).reshape((A.shape[0], )) D = csc_matrix((sumMat, (np.arange(0, A.shape[0], 1), np.arange(0, A.shape[1], 1)))) L = D - A # Compute Laplacian S, V = np.linalg.eig(np.array(L.todense()).squeeze() ) # Compute k smallest eigenvalues and eigenvec. of L sortidx = S.argsort() V = V[:, sortidx] c_idx_un = kmeans.kmeans_python(V[:, 0:k], k) # Partition X by k-means # [B] Normalized case # c_idx: Clusters identified using normalized Laplacian version # Degree Matrix (normalized) D = np.diag(1 / np.sqrt(sumMat)) L = csc_matrix.dot(csr_matrix.tocsc(A.T), D.T).T # Compute Laplacian L = L.dot(D) S, V = np.linalg.eig(L) # Compute k largest eigenvalues and eigenvec. of L sortidx = S.argsort()[::-1] X = V[:, sortidx][:, 0:k] norm2 = np.power(X, 2).sum(axis=1) # Normalize X row-wise norm2.shape = (norm2.shape[0], 1) X = X / (np.sqrt(norm2)) c_idx = kmeans.kmeans_python(X, k) # Partition X by k-means
L = D.dot(A) # Compute Laplacian L = L.dot(D) print 'performing randomized eigendecomposition ...\n' tic = time.time() X, V = rand_eig.rand_eig_function(L, k) toc = time.time() print 'Elapsed time (Rand_Eig) is %f seconds \n' % float(toc - tic) norm2 = np.power(X, 2).sum(axis = 1) # Normalize X row-wise norm2.shape = (norm2.shape[0], 1) X = X / (np.sqrt(norm2)) print 'performing our vectorized kmeans ...\n' tic = time.time() c_idx = kmeans.kmeans_python(X, k) # Partition X by k-means toc = time.time() print 'Elapsed time (k_mean) is %f seconds \n' % float(toc - tic) # Clustering algorithm End ## Get node labels (paper names) idx2names = {}; for line in open('title_inverse_index.txt'): (index, name) = line.split("\t") idx2names[index] = name.replace('\n','') ## Output team names partitioned by clusters obtained from above with open('clusters.txt', 'w') as fid: for i in range(0, k):
## # Original data figure data = np.concatenate((x, y), axis = 1) plt.figure(1) plt.plot(x, y, 'k.') plt.hold(True) plt.title('original data') plt.show(block = False) raw_input('press any key to continue ...') # run kmeans on the original coordinates; K = 2 idx = kmeans.kmeans_python(data, K) plt.figure(2) plt.plot(x[idx == 0], y[idx == 0], 'r.') plt.hold(True) plt.plot(x[idx == 1], y[idx == 1], 'b.') plt.title('K-means') plt.show(block = False) raw_input('press any key to continue ...') # Distance between two points distmat = np.power(squareform(pdist(data)), 2) # Distance threshold to cluster points within distmat distance of each other distmat[distmat == 1] = 2