def SPONGE_sym(self, k=4, tau_p=1, tau_n=1, eigens=None, mi=None): """Clusters the graph using the symmetric normalised version of the SPONGE clustering algorithm. The algorithm tries to minimises the following ratio (Lbar_sym^+ + tau_n Id)/(Lbar_sym^- + tau_p Id). The parameters tau_p and tau_n can be typically set to one. Args: k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. tau_n (float): regularisation of the numerator tau_p (float): regularisation of the denominator Returns: array of int, or list of array of int: Output assignment to clusters. Other parameters: eigens (int): The number of eigenvectors to take. Defaults to k. mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. nudge (int): Amount added to diagonal to bound eigenvalues away from 0. """ listk = False if isinstance(k, list): kk = k k = max(k) listk = True if eigens == None: eigens = k - 1 if mi == None: mi = self.size eye = ss.eye(self.size, format="csc") d = sqrtinvdiag(self.D_n) matrix = d * self.n * d matrix2 = eye - matrix d = sqrtinvdiag(self.D_p) matrix = d * self.p * d matrix1 = eye - matrix matrix1 = matrix1 + tau_n * eye matrix2 = matrix2 + tau_p * eye v0 = np.random.normal(0, 1, (self.p.shape[0], eigens)) (w, v) = ss.linalg.lobpcg(matrix1, v0, B=matrix2, maxiter=mi, largest=False) v = v / w if not listk: v = np.atleast_2d(v) x = sl.KMeans(n_clusters=k).fit(v) return x.labels_ else: return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:x - 1])).labels_ for x in kk]
def spectral_cluster_bnc(self, k=2, normalisation='sym', eigens=None, mi=None): """Clusters the graph by using the Balance Normalised Cut or Balance Ratio Cut objective matrix. Args: k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. normalisation (string): How to normalise for cluster size: 'none' - do not normalise. 'sym' - symmetric normalisation. 'rw' - random walk normalisation. Returns: array of int, or list of array of int: Output assignment to clusters. Other parameters: eigens (int): The number of eigenvectors to take. Defaults to k. mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. """ listk = False if isinstance(k, list): kk = k k = max(k) listk = True if eigens == None: eigens = k if mi == None: mi = self.size symmetric = True if normalisation == 'none': matrix = self.A + self.D_n elif normalisation == 'sym': d = sqrtinvdiag(self.Dbar) matrix = d * (self.A + self.D_n) * d elif normalisation == 'rw': d = invdiag(self.Dbar) matrix = d * (self.A + self.D_n) symmetric = False if symmetric: (w, v) = ss.linalg.eigsh(matrix, eigens, maxiter=mi, which='LA') else: (w, v) = ss.linalg.eigs(matrix, eigens, maxiter=mi, which='LR') v = v * w # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative if not listk: v = np.atleast_2d(v) x = sl.KMeans(n_clusters=k).fit(v) return x.labels_ else: return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk]
def __init__(self, data): self.p = data[0] self.n = data[1] self.A = (self.p - self.n).tocsc() self.D_p = ss.diags(self.p.sum(axis=0).tolist(), [0]).tocsc() self.D_n = ss.diags(self.n.sum(axis=0).tolist(), [0]).tocsc() self.Dbar = (self.D_p + self.D_n) d = sqrtinvdiag(self.Dbar) self.normA = d * self.A * d self.size = self.p.shape[0]
def SDP_cluster(self, k, solver='BM_proj_grad', normalisation='sym_sep'): """Clustering based on a SDP relaxation of the clustering problem. A low dimensional embedding is obtained via the lowest eigenvectors of positive-semidefinite matrix Z which maximises its Frobenious product with the adjacency matrix and k-means is performed in this space. Args: k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. solver (str): Type of solver for the SDP formulation. 'interior_point_method' - Interior point method. 'BM_proj_grad' - Burer Monteiro method using projected gradient updates. 'BM_aug_lag' - Burer Monteiro method using augmented Lagrangian updates. Returns: array of int, or list of array of int: Label assignments. """ listk = False if isinstance(k, list): kk = k k = max(k) listk = True if normalisation == 'none': matrix = self.A elif normalisation == 'sym': d = sqrtinvdiag(self.Dbar) matrix = d * self.A * d elif normalisation == 'sym_sep': d = sqrtinvdiag(self.D_p) matrix = d * self.p * d d = sqrtinvdiag(self.D_n) matrix = matrix - (d * self.n * d) if solver == 'interior_point_method': import cvxpy as cvx # Define a cvx optimization variable Z = cvx.Variable((self.size, self.size), PSD=True) ones = np.ones(self.size) # Define constraints constraints = [cvx.diag(Z) == ones] # Define an objective function obj = cvx.Maximize(cvx.trace(self.A * Z)) # Define an optimisation problem prob = cvx.Problem(obj, constraints) # Solve optimisation problem prob.solve(solver='CVXOPT') print("status:", prob.status) print("optimal value", prob.value) # print("optimal var", Z.value) print(Z.value) # Diagonalise solution (w, v) = sp.linalg.eigh(Z.value, eigvals=(self.size - k, self.size - 1)) v = v * w elif solver == 'BM_proj_grad': r = math.floor(np.sqrt(2 * self.size) + 1) X = np.random.normal(0, 1, (self.size, r)) ones = np.ones((self.size, 1)) step = 2 traces = [] i = 0 while True: AX = matrix.dot(X) G = 2 * AX X = X + step * G trace = np.einsum('ij, ij -> ', X, AX) traces.append(trace) Norms = np.linalg.norm(X, axis=1) X = np.divide(X, Norms[:, None]) delta_trace = abs(traces[-1] - traces[-2]) / abs( traces[-2]) if i > 0 else 100. if delta_trace <= 0.01: break i += 1 Z = X.T.dot(X) (w, v) = sp.linalg.eigh(Z, eigvals=(r - k, r - 1)) v = X.dot(v) v = v * w elif solver == 'BM_aug_lag': r = int(np.sqrt(2 * self.size)) X = augmented_lagrangian(A=matrix, r=r, printing=False, init=None) Z = X.T.dot(X) (w, v) = sp.linalg.eigh(Z, eigvals=(r - k, r - 1)) v = X.dot(v) v = v * w else: raise ValueError('please specify a valid solver') if not listk: v = np.atleast_2d(v) x = sl.KMeans(n_clusters=k).fit(v) return x.labels_ else: return [ sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk ]
def geproblem_laplacian(self, k=4, normalisation='multiplicative', eigens=None, mi=None, tau=1.): """Clusters the graph by solving a Laplacian-based generalised eigenvalue problem. Args: k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. normalisation (string): How to normalise for cluster size: 'none' - do not normalise. 'additive' - add degree matrices appropriately. 'multiplicative' - multiply by degree matrices appropriately. Returns: array of int, or list of array of int: Output assignment to clusters. Other parameters: eigens (int): The number of eigenvectors to take. Defaults to k. mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. nudge (int): Amount added to diagonal to bound eigenvalues away from 0. """ listk = False if isinstance(k, list): kk = k k = max(k) listk = True if eigens == None: eigens = k if mi == None: mi = self.size eye = ss.eye(self.size, format="csc") if normalisation == 'none': matrix1 = self.D_p - self.p matrix2 = self.D_n - self.n elif normalisation == 'additive': matrix1 = self.Dbar - self.p matrix2 = self.Dbar - self.n elif normalisation == 'multiplicative': d = sqrtinvdiag(self.D_n) matrix = d * self.n * d matrix2 = eye - matrix d = sqrtinvdiag(self.D_p) matrix = d * self.p * d matrix1 = eye - matrix matrix1 = matrix1 + eye * tau matrix2 = matrix2 + eye * tau v0 = np.random.normal(0, 1, (self.p.shape[0], eigens)) (w, v) = ss.linalg.lobpcg(matrix1, v0, B=matrix2, maxiter=mi, largest=False) v = v / w if not listk: v = np.atleast_2d(v) x = sl.KMeans(n_clusters=k).fit(v) return x.labels_ else: return [ sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:x - 1])).labels_ for x in kk ]
def spectral_cluster_adjacency(self, k=2, normalisation='sym_sep', eigens=None, mi=None): """Clusters the graph using eigenvectors of the adjacency matrix. Args: k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. normalisation (string): How to normalise for cluster size: 'none' - do not normalise. 'sym' - symmetric normalisation. 'rw' - random walk normalisation. 'sym_sep' - separate symmetric normalisation of positive and negative parts. 'rw_sep' - separate random walk normalisation of positive and negative parts. Returns: array of int, or list of array of int: Output assignment to clusters. Other parameters: eigens (int): The number of eigenvectors to take. Defaults to k. mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. """ listk = False if isinstance(k, list): kk = k k = max(k) listk = True if eigens == None: eigens = k if mi == None: mi = self.size symmetric = True if normalisation == 'none': matrix = self.A elif normalisation == 'sym': d = sqrtinvdiag(self.Dbar) matrix = d * self.A * d elif normalisation == 'rw': d = invdiag(self.Dbar) matrix = d * self.A symmetric = False elif normalisation == 'sym_sep': d = sqrtinvdiag(self.D_p) matrix = d * self.p * d d = sqrtinvdiag(self.D_n) matrix = matrix - (d * self.n * d) elif normalisation == 'rw_sep': d = invdiag(self.D_p) matrix = d * self.p d = invdiag(self.D_n) matrix = matrix - (d * self.n) symmetric = False elif normalisation == 'neg': pos = self.p d = invdiag(self.D_n) neg = d * self.n x = (pos.sum() / neg.sum()) neg = neg * x matrix = pos - neg if symmetric: (w, v) = ss.linalg.eigsh(matrix, eigens, maxiter=mi, which='LA') else: (w, v) = ss.linalg.eigs(matrix, eigens, maxiter=mi, which='LR') v = v * w # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative if not listk: v = np.atleast_2d(v) x = sl.KMeans(n_clusters=k).fit(v) return x.labels_ else: return [ sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk ]
def spectral_cluster_adjacency_reg(self, k=2, normalisation='sym_sep', tau_p=None, tau_n=None, eigens=None, mi=None): """Clusters the graph using eigenvectors of the regularised adjacency matrix. Args: k (int): The number of clusters to identify. normalisation (string): How to normalise for cluster size: 'none' - do not normalise. 'sym' - symmetric normalisation. 'rw' - random walk normalisation. 'sym_sep' - separate symmetric normalisation of positive and negative parts. 'rw_sep' - separate random walk normalisation of positive and negative parts. tau_p (int): Regularisation coefficient for positive adjacency matrix. tau_n (int): Regularisation coefficient for negative adjacency matrix. Returns: array of int: Output assignment to clusters. Other parameters: eigens (int): The number of eigenvectors to take. Defaults to k. mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. """ if eigens == None: eigens = k if mi == None: mi = self.size if tau_p == None or tau_n == None: tau_p = 0.25 * np.mean(self.Dbar.data) / self.size tau_n = 0.25 * np.mean(self.Dbar.data) / self.size symmetric = True p_tau = self.p.copy() n_tau = self.n.copy() p_tau.data += tau_p n_tau.data += tau_n Dbar_c = self.size - self.Dbar.diagonal() Dbar_tau_s = (p_tau + n_tau).sum( axis=0) + (Dbar_c * abs(tau_p - tau_n))[None, :] Dbar_tau = ss.diags(Dbar_tau_s.tolist(), [0]) if normalisation == 'none': matrix = self.A delta_tau = tau_p - tau_n def mv(v): return matrix.dot(v) + delta_tau * v.sum() elif normalisation == 'sym': d = sqrtinvdiag(Dbar_tau) matrix = d * self.A * d dd = d.diagonal() tau_dd = (tau_p - tau_n) * dd def mv(v): return matrix.dot(v) + tau_dd * dd.dot(v) elif normalisation == 'sym_sep': diag_corr = ss.diags([self.size * tau_p] * self.size).tocsc() dp = sqrtinvdiag(self.D_p + diag_corr) matrix = dp * self.p * dp diag_corr = ss.diags([self.size * tau_n] * self.size).tocsc() dn = sqrtinvdiag(self.D_n + diag_corr) matrix = matrix - (dn * self.n * dn) dpd = dp.diagonal() dnd = dn.diagonal() tau_dp = tau_p * dpd tau_dn = tau_n * dnd def mv(v): return matrix.dot( v) + tau_dp * dpd.dot(v) - tau_dn * dnd.dot(v) else: print('Error: choose normalisation') matrix_o = ss.linalg.LinearOperator(matrix.shape, matvec=mv) if symmetric: (w, v) = ss.linalg.eigsh(matrix_o, eigens, maxiter=mi, which='LA') else: (w, v) = ss.linalg.eigs(matrix_o, eigens, maxiter=mi, which='LR') v = v * w # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative v = np.atleast_2d(v) x = sl.KMeans(n_clusters=k).fit(v) return x.labels_