/
cluster.py
96 lines (81 loc) · 2.86 KB
/
cluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
import scipy as sp
import math
import scipy.sparse as mlib
import scipy.sparse.linalg as lalib
import scipy.sparse.csgraph as csglib
# clustering algorithms
# make another file for sparsification algorithms?
# k is volbd
# graph is the matrix, in sparse form say?
# we could also parametrize over the bound...
# O(n^2 log n) is pretty vague!
# this is the global alg, but only examines a single vertex
# graph is a matrix in scipy land
# v is coordinate of input vertex (a number)
# epsilon, bigC, volbd_mult are floating controls for alg tuning
def kwok_lau(graph, v, k, epsilon, path_length_scaler, volbd_scaler):
num_vertices = graph.shape[0]
volbd = k * volbd_scaler
vertices = list(range(num_vertices))
print("N: {0}\nVolume bound: {1}\n".format(num_vertices, volbd))
p = [np.zeros(num_vertices, int)]
p[0][v] = 1
last = p[-1]
# length of walk to compute
# need to get W!
I = mlib.identity(num_vertices, int)
# assuming symmetric here
L, D_vector = csglib.laplacian(graph, return_diag=True)
D = mlib.diags(D_vector, (0), format='csc')
lazy_walk = 0.5 * (I + lalib.inv(D) * graph)
num_iterations = math.ceil(num_vertices ** 2 * math.log(num_vertices, 2))
for t in range(1, num_iterations):
p.append(last * lazy_walk)
last = p[-1]
# value function for sorting:
sortkey = lambda t: (lambda vertex: p[t][vertex] / D_vector[vertex])
# initialize set now
S = dict()
S[0,1] = p[0][v]
outset = S[0,1]
# when S has one element, the conductance is 1
# conductance is <= 1
outcond = 2
for t in range(1, num_iterations):
# so#rt all at once here?
p[t] = p[t-1] * lazy_walk
for j in range(1, num_vertices):
# compute new S[t,j]
# don't want to include the entire graph, that's dumb...
# should also put another bound in here for later
S[t,j] = computeS(sortkey(t), vertices, j, num_vertices)
# find smallest S_{t,j}
currcond = conductance(S[t,j], L, D)
if (currcond < outcond and volume(S[t,j], D) <= volbd):
outset = S[t,j]
outcond = currcond
return outset
#helper fcns
# conductance
# volume
# compute S_{t,j} somehow
# involves sorting
# computes conductance of S
# assume S is in np.array form
def conductance(chi_s, L, D):
return (chi_s * L * chi_s).sum() / volume(chi_s, D)
def volume(chi_s, D):
return (chi_s * D * chi_s).sum()
def computeS(keyfcn, vertex_list, size, nvertices):
# sort vlist using keyfcn, in descending order
# get the first size many vertices in list
#return the np array version corresponding to that set (chi it)
vertex_list.sort(key=keyfcn, reverse=True)
return set2vec(vertex_list[:size], nvertices)
def set2vec(S, length):
# length is the length of desired np array guy
chiS = np.zeros(length)
for s in S:
chiS[s] = 1
return chiS