Esempio n. 1
0
	def SPONGE_sym(self, k=4, tau_p=1, tau_n=1, eigens=None, mi=None):
		"""Clusters the graph using the symmetric normalised version of the SPONGE clustering algorithm.

		The algorithm tries to minimises the following ratio (Lbar_sym^+ + tau_n Id)/(Lbar_sym^- + tau_p Id).
		The parameters tau_p and tau_n can be typically set to one.

		Args:
			k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list.
			tau_n (float): regularisation of the numerator
			tau_p (float): regularisation of the denominator

		Returns:
			array of int, or list of array of int: Output assignment to clusters.

		Other parameters:
			eigens (int): The number of eigenvectors to take. Defaults to k.
			mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes.
			nudge (int): Amount added to diagonal to bound eigenvalues away from 0.

		"""

		listk = False

		if isinstance(k, list):
			kk = k
			k = max(k)
			listk = True

		if eigens == None:
			eigens = k - 1
		if mi == None:
			mi = self.size

		eye = ss.eye(self.size, format="csc")

		d = sqrtinvdiag(self.D_n)
		matrix = d * self.n * d
		matrix2 = eye - matrix

		d = sqrtinvdiag(self.D_p)
		matrix = d * self.p * d
		matrix1 = eye - matrix

		matrix1 = matrix1 + tau_n * eye
		matrix2 = matrix2 + tau_p * eye

		v0 = np.random.normal(0, 1, (self.p.shape[0], eigens))
		(w, v) = ss.linalg.lobpcg(matrix1, v0, B=matrix2, maxiter=mi, largest=False)

		v = v / w
		if not listk:
			v = np.atleast_2d(v)
			x = sl.KMeans(n_clusters=k).fit(v)
			return x.labels_
		else:
			return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:x - 1])).labels_ for x in kk]
Esempio n. 2
0
	def spectral_cluster_bnc(self, k=2, normalisation='sym', eigens=None, mi=None):

		"""Clusters the graph by using the Balance Normalised Cut or Balance Ratio Cut objective matrix.

		Args:
			k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list.
			normalisation (string): How to normalise for cluster size:
				'none' - do not normalise.
				'sym' - symmetric normalisation.
				'rw' - random walk normalisation.

		Returns:
			array of int, or list of array of int: Output assignment to clusters.

		Other parameters:
			eigens (int): The number of eigenvectors to take. Defaults to k.
			mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes.

		"""

		listk = False
		if isinstance(k, list):
			kk = k
			k = max(k)
			listk = True

		if eigens == None:
			eigens = k
		if mi == None:
			mi = self.size

		symmetric = True

		if normalisation == 'none':
			matrix = self.A + self.D_n

		elif normalisation == 'sym':
			d = sqrtinvdiag(self.Dbar)
			matrix = d * (self.A + self.D_n) * d

		elif normalisation == 'rw':
			d = invdiag(self.Dbar)
			matrix = d * (self.A + self.D_n)
			symmetric = False

		if symmetric:
			(w, v) = ss.linalg.eigsh(matrix, eigens, maxiter=mi, which='LA')
		else:
			(w, v) = ss.linalg.eigs(matrix, eigens, maxiter=mi, which='LR')

		v = v * w  # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative

		if not listk:
			v = np.atleast_2d(v)
			x = sl.KMeans(n_clusters=k).fit(v)
			return x.labels_
		else:
			return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk]
Esempio n. 3
0
 def __init__(self, data):
     self.p = data[0]
     self.n = data[1]
     self.A = (self.p - self.n).tocsc()
     self.D_p = ss.diags(self.p.sum(axis=0).tolist(), [0]).tocsc()
     self.D_n = ss.diags(self.n.sum(axis=0).tolist(), [0]).tocsc()
     self.Dbar = (self.D_p + self.D_n)
     d = sqrtinvdiag(self.Dbar)
     self.normA = d * self.A * d
     self.size = self.p.shape[0]
Esempio n. 4
0
    def SDP_cluster(self, k, solver='BM_proj_grad', normalisation='sym_sep'):
        """Clustering based on a SDP relaxation of the clustering problem.

		A low dimensional embedding is obtained via the lowest eigenvectors of positive-semidefinite matrix Z
		which maximises its Frobenious product with the adjacency matrix and k-means is performed in this space.

		Args:
			k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list.
			solver (str): Type of solver for the SDP formulation.
				'interior_point_method' - Interior point method.
				'BM_proj_grad' - Burer Monteiro method using projected gradient updates.
				'BM_aug_lag' - Burer Monteiro method using augmented Lagrangian updates.

		Returns:
			array of int, or list of array of int: Label assignments.

		"""

        listk = False
        if isinstance(k, list):
            kk = k
            k = max(k)
            listk = True

        if normalisation == 'none':
            matrix = self.A

        elif normalisation == 'sym':
            d = sqrtinvdiag(self.Dbar)
            matrix = d * self.A * d

        elif normalisation == 'sym_sep':
            d = sqrtinvdiag(self.D_p)
            matrix = d * self.p * d
            d = sqrtinvdiag(self.D_n)
            matrix = matrix - (d * self.n * d)

        if solver == 'interior_point_method':
            import cvxpy as cvx

            # Define a cvx optimization variable
            Z = cvx.Variable((self.size, self.size), PSD=True)
            ones = np.ones(self.size)
            # Define constraints
            constraints = [cvx.diag(Z) == ones]
            # Define an objective function
            obj = cvx.Maximize(cvx.trace(self.A * Z))
            # Define an optimisation problem
            prob = cvx.Problem(obj, constraints)
            # Solve optimisation problem

            prob.solve(solver='CVXOPT')

            print("status:", prob.status)
            print("optimal value", prob.value)
            # print("optimal var", Z.value)
            print(Z.value)

            # Diagonalise solution
            (w, v) = sp.linalg.eigh(Z.value,
                                    eigvals=(self.size - k, self.size - 1))
            v = v * w

        elif solver == 'BM_proj_grad':

            r = math.floor(np.sqrt(2 * self.size) + 1)
            X = np.random.normal(0, 1, (self.size, r))
            ones = np.ones((self.size, 1))
            step = 2
            traces = []
            i = 0
            while True:
                AX = matrix.dot(X)
                G = 2 * AX
                X = X + step * G
                trace = np.einsum('ij, ij -> ', X, AX)
                traces.append(trace)
                Norms = np.linalg.norm(X, axis=1)
                X = np.divide(X, Norms[:, None])
                delta_trace = abs(traces[-1] - traces[-2]) / abs(
                    traces[-2]) if i > 0 else 100.
                if delta_trace <= 0.01:
                    break
                i += 1
            Z = X.T.dot(X)
            (w, v) = sp.linalg.eigh(Z, eigvals=(r - k, r - 1))
            v = X.dot(v)
            v = v * w

        elif solver == 'BM_aug_lag':
            r = int(np.sqrt(2 * self.size))
            X = augmented_lagrangian(A=matrix, r=r, printing=False, init=None)
            Z = X.T.dot(X)
            (w, v) = sp.linalg.eigh(Z, eigvals=(r - k, r - 1))
            v = X.dot(v)
            v = v * w

        else:
            raise ValueError('please specify a valid solver')

        if not listk:
            v = np.atleast_2d(v)
            x = sl.KMeans(n_clusters=k).fit(v)
            return x.labels_
        else:
            return [
                sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:,
                                                            1 - x:])).labels_
                for x in kk
            ]
Esempio n. 5
0
    def geproblem_laplacian(self,
                            k=4,
                            normalisation='multiplicative',
                            eigens=None,
                            mi=None,
                            tau=1.):
        """Clusters the graph by solving a Laplacian-based generalised eigenvalue problem.

		Args:
			k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list.
			normalisation (string): How to normalise for cluster size:
				'none' - do not normalise.
				'additive' - add degree matrices appropriately.
				'multiplicative' - multiply by degree matrices appropriately.

		Returns:
			array of int, or list of array of int: Output assignment to clusters.

		Other parameters:
			eigens (int): The number of eigenvectors to take. Defaults to k.
			mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes.
			nudge (int): Amount added to diagonal to bound eigenvalues away from 0.

		"""
        listk = False
        if isinstance(k, list):
            kk = k
            k = max(k)
            listk = True

        if eigens == None:
            eigens = k
        if mi == None:
            mi = self.size

        eye = ss.eye(self.size, format="csc")

        if normalisation == 'none':
            matrix1 = self.D_p - self.p
            matrix2 = self.D_n - self.n

        elif normalisation == 'additive':
            matrix1 = self.Dbar - self.p
            matrix2 = self.Dbar - self.n

        elif normalisation == 'multiplicative':

            d = sqrtinvdiag(self.D_n)
            matrix = d * self.n * d
            matrix2 = eye - matrix
            d = sqrtinvdiag(self.D_p)
            matrix = d * self.p * d
            matrix1 = eye - matrix

        matrix1 = matrix1 + eye * tau
        matrix2 = matrix2 + eye * tau

        v0 = np.random.normal(0, 1, (self.p.shape[0], eigens))
        (w, v) = ss.linalg.lobpcg(matrix1,
                                  v0,
                                  B=matrix2,
                                  maxiter=mi,
                                  largest=False)

        v = v / w
        if not listk:
            v = np.atleast_2d(v)
            x = sl.KMeans(n_clusters=k).fit(v)
            return x.labels_
        else:
            return [
                sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:,
                                                            0:x - 1])).labels_
                for x in kk
            ]
Esempio n. 6
0
    def spectral_cluster_adjacency(self,
                                   k=2,
                                   normalisation='sym_sep',
                                   eigens=None,
                                   mi=None):
        """Clusters the graph using eigenvectors of the adjacency matrix.

		Args:
			k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list.
			normalisation (string): How to normalise for cluster size:
				'none' - do not normalise.
				'sym' - symmetric normalisation.
				'rw' - random walk normalisation.
				'sym_sep' - separate symmetric normalisation of positive and negative parts.
				'rw_sep' - separate random walk normalisation of positive and negative parts.

		Returns:
			array of int, or list of array of int: Output assignment to clusters.

		Other parameters:
			eigens (int): The number of eigenvectors to take. Defaults to k.
			mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes.

		"""
        listk = False
        if isinstance(k, list):
            kk = k
            k = max(k)
            listk = True

        if eigens == None:
            eigens = k
        if mi == None:
            mi = self.size

        symmetric = True

        if normalisation == 'none':
            matrix = self.A

        elif normalisation == 'sym':
            d = sqrtinvdiag(self.Dbar)
            matrix = d * self.A * d

        elif normalisation == 'rw':
            d = invdiag(self.Dbar)
            matrix = d * self.A
            symmetric = False

        elif normalisation == 'sym_sep':
            d = sqrtinvdiag(self.D_p)
            matrix = d * self.p * d
            d = sqrtinvdiag(self.D_n)
            matrix = matrix - (d * self.n * d)

        elif normalisation == 'rw_sep':
            d = invdiag(self.D_p)
            matrix = d * self.p
            d = invdiag(self.D_n)
            matrix = matrix - (d * self.n)
            symmetric = False

        elif normalisation == 'neg':
            pos = self.p
            d = invdiag(self.D_n)
            neg = d * self.n
            x = (pos.sum() / neg.sum())
            neg = neg * x
            matrix = pos - neg

        if symmetric:
            (w, v) = ss.linalg.eigsh(matrix, eigens, maxiter=mi, which='LA')
        else:
            (w, v) = ss.linalg.eigs(matrix, eigens, maxiter=mi, which='LR')
        v = v * w  # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative
        if not listk:
            v = np.atleast_2d(v)
            x = sl.KMeans(n_clusters=k).fit(v)
            return x.labels_
        else:
            return [
                sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:,
                                                            1 - x:])).labels_
                for x in kk
            ]
Esempio n. 7
0
    def spectral_cluster_adjacency_reg(self,
                                       k=2,
                                       normalisation='sym_sep',
                                       tau_p=None,
                                       tau_n=None,
                                       eigens=None,
                                       mi=None):
        """Clusters the graph using eigenvectors of the regularised adjacency matrix.

		Args:
			k (int): The number of clusters to identify.
			normalisation (string): How to normalise for cluster size:
				'none' - do not normalise.
				'sym' - symmetric normalisation.
				'rw' - random walk normalisation.
				'sym_sep' - separate symmetric normalisation of positive and negative parts.
				'rw_sep' - separate random walk normalisation of positive and negative parts.
			tau_p (int): Regularisation coefficient for positive adjacency matrix.
			tau_n (int): Regularisation coefficient for negative adjacency matrix.

		Returns:
			array of int: Output assignment to clusters.

		Other parameters:
			eigens (int): The number of eigenvectors to take. Defaults to k.
			mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes.

		"""

        if eigens == None:
            eigens = k

        if mi == None:
            mi = self.size

        if tau_p == None or tau_n == None:
            tau_p = 0.25 * np.mean(self.Dbar.data) / self.size
            tau_n = 0.25 * np.mean(self.Dbar.data) / self.size

        symmetric = True

        p_tau = self.p.copy()
        n_tau = self.n.copy()
        p_tau.data += tau_p
        n_tau.data += tau_n

        Dbar_c = self.size - self.Dbar.diagonal()

        Dbar_tau_s = (p_tau + n_tau).sum(
            axis=0) + (Dbar_c * abs(tau_p - tau_n))[None, :]

        Dbar_tau = ss.diags(Dbar_tau_s.tolist(), [0])

        if normalisation == 'none':
            matrix = self.A
            delta_tau = tau_p - tau_n

            def mv(v):
                return matrix.dot(v) + delta_tau * v.sum()

        elif normalisation == 'sym':
            d = sqrtinvdiag(Dbar_tau)
            matrix = d * self.A * d
            dd = d.diagonal()
            tau_dd = (tau_p - tau_n) * dd

            def mv(v):
                return matrix.dot(v) + tau_dd * dd.dot(v)

        elif normalisation == 'sym_sep':

            diag_corr = ss.diags([self.size * tau_p] * self.size).tocsc()
            dp = sqrtinvdiag(self.D_p + diag_corr)

            matrix = dp * self.p * dp

            diag_corr = ss.diags([self.size * tau_n] * self.size).tocsc()
            dn = sqrtinvdiag(self.D_n + diag_corr)

            matrix = matrix - (dn * self.n * dn)

            dpd = dp.diagonal()
            dnd = dn.diagonal()
            tau_dp = tau_p * dpd
            tau_dn = tau_n * dnd

            def mv(v):
                return matrix.dot(
                    v) + tau_dp * dpd.dot(v) - tau_dn * dnd.dot(v)

        else:
            print('Error: choose normalisation')

        matrix_o = ss.linalg.LinearOperator(matrix.shape, matvec=mv)

        if symmetric:
            (w, v) = ss.linalg.eigsh(matrix_o, eigens, maxiter=mi, which='LA')
        else:
            (w, v) = ss.linalg.eigs(matrix_o, eigens, maxiter=mi, which='LR')

        v = v * w  # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative
        v = np.atleast_2d(v)
        x = sl.KMeans(n_clusters=k).fit(v)
        return x.labels_