# [A] Unnormalized case
# c_idx_un: Clusters identified using unnormalized Laplacian version

# Degree Matrix
sumMat = np.array(A.sum(axis=0)).reshape((A.shape[0], ))
D = csc_matrix((sumMat, (np.arange(0, A.shape[0],
                                   1), np.arange(0, A.shape[1], 1))))
L = D - A  # Compute Laplacian

S, V = np.linalg.eig(np.array(L.todense()).squeeze()
                     )  # Compute k smallest eigenvalues and eigenvec. of L
sortidx = S.argsort()
V = V[:, sortidx]

c_idx_un = kmeans.kmeans_python(V[:, 0:k], k)  # Partition X by k-means

# [B] Normalized case
# c_idx: Clusters identified using normalized Laplacian version

# Degree Matrix (normalized)
D = np.diag(1 / np.sqrt(sumMat))
L = csc_matrix.dot(csr_matrix.tocsc(A.T), D.T).T  # Compute Laplacian
L = L.dot(D)
S, V = np.linalg.eig(L)  # Compute k largest eigenvalues and eigenvec. of L
sortidx = S.argsort()[::-1]
X = V[:, sortidx][:, 0:k]
norm2 = np.power(X, 2).sum(axis=1)  # Normalize X row-wise
norm2.shape = (norm2.shape[0], 1)
X = X / (np.sqrt(norm2))
c_idx = kmeans.kmeans_python(X, k)  # Partition X by k-means
L = D.dot(A) # Compute Laplacian
L = L.dot(D)

print 'performing randomized eigendecomposition ...\n'
tic = time.time()
X, V = rand_eig.rand_eig_function(L, k)
toc = time.time()
print 'Elapsed time (Rand_Eig) is %f seconds \n' % float(toc - tic)


norm2 = np.power(X, 2).sum(axis = 1) # Normalize X row-wise
norm2.shape = (norm2.shape[0], 1)
X = X / (np.sqrt(norm2)) 
print 'performing our vectorized kmeans ...\n'
tic = time.time()
c_idx = kmeans.kmeans_python(X, k) # Partition X by k-means
toc = time.time()
print 'Elapsed time (k_mean) is %f seconds \n' % float(toc - tic)

# Clustering algorithm End


## Get node labels (paper names)
idx2names = {};
for line in open('title_inverse_index.txt'):
    (index, name) = line.split("\t")
    idx2names[index] = name.replace('\n','')
    
## Output team names partitioned by clusters obtained from above
with open('clusters.txt', 'w') as fid:
    for i in range(0, k):
Example #3
0
##
# Original data figure
data = np.concatenate((x, y), axis = 1)

plt.figure(1)
plt.plot(x, y, 'k.')
plt.hold(True)
plt.title('original data')

plt.show(block = False)
raw_input('press any key to continue ...')

# run kmeans on the original coordinates; 
K = 2
idx = kmeans.kmeans_python(data, K)

plt.figure(2)
plt.plot(x[idx == 0], y[idx == 0], 'r.')
plt.hold(True)
plt.plot(x[idx == 1], y[idx == 1], 'b.')
plt.title('K-means')

plt.show(block = False)
raw_input('press any key to continue ...')

# Distance between two points
distmat = np.power(squareform(pdist(data)), 2)

# Distance threshold to cluster points within distmat distance of each other
distmat[distmat == 1] = 2