def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False, sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(Louvain, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership, return_aggregate=return_aggregate) VerboseMixin.__init__(self, verbose) self.resolution = np.float32(resolution) self.modularity = modularity self.tol = np.float32(tol_optimization) self.tol_aggregation = tol_aggregation self.n_aggregations = n_aggregations self.shuffle_nodes = shuffle_nodes self.random_state = check_random_state(random_state)
def __init__(self, which='LM', n_oversamples: int = 10, n_iter='auto', power_iteration_normalizer: Union[str, None] = 'auto', random_state=None, one_pass: bool = False): super(HalkoEig, self).__init__(which=which) self.n_oversamples = n_oversamples self.n_iter = n_iter self.power_iteration_normalizer = power_iteration_normalizer self.random_state = check_random_state(random_state) self.one_pass = one_pass
def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(LouvainNE, self).__init__() self.n_components = n_components self.scale = scale self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization, tol_aggregation=tol_aggregation, n_aggregations=n_aggregations, shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose) self.random_state = check_random_state(random_state) self.bipartite = None
def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False, random_state: Optional[Union[np.random.RandomState, int]] = None, isolated_nodes: str = 'remove'): super(LouvainEmbedding, self).__init__() self.resolution = resolution self.modularity = modularity.lower() self.tol_optimization = tol_optimization self.tol_aggregation = tol_aggregation self.n_aggregations = n_aggregations self.shuffle_nodes = shuffle_nodes self.random_state = check_random_state(random_state) self.isolated_nodes = isolated_nodes self.labels_ = None
def albert_barabasi(n: int = 100, degree: int = 3, undirected: bool = True, seed: Optional[int] = None) \ -> sparse.csr_matrix: """Albert-Barabasi model. Parameters ---------- n : int Number of nodes. degree : int Degree of incoming nodes (less than **n**). undirected : bool If ``True``, return an undirected graph. seed : Seed of the random generator (optional). Returns ------- adjacency : sparse.csr_matrix Adjacency matrix. Example ------- >>> from sknetwork.data import albert_barabasi >>> adjacency = albert_barabasi(30, 3) >>> adjacency.shape (30, 30) References ---------- Albert, R., Barabási, L. (2002). `Statistical mechanics of complex networks <https://journals.aps.org/rmp/abstract/10.1103/RevModPhys.74.47>`_ Reviews of Modern Physics. """ random_state = check_random_state(seed) degrees = np.zeros(n, int) degrees[:degree] = degree - 1 edges = [(i, j) for i in range(degree) for j in range(i)] for i in range(degree, n): neighbors = random_state.choice(a=i, p=degrees[:i] / degrees.sum(), size=degree, replace=False) degrees[neighbors] += 1 degrees[i] = degree edges += [(i, j) for j in neighbors] return edgelist2adjacency(edges, undirected)
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'RandomProjection': """Compute the graph embedding. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`RandomProjection` """ adjacency = check_format(adjacency).asfptype() check_square(adjacency) n = adjacency.shape[0] random_generator = check_random_state(self.random_state) random_matrix = random_generator.normal(size=(n, self.n_components)) # make the matrix orthogonal random_matrix, _ = np.linalg.qr(random_matrix) factor = random_matrix embedding = factor.copy() if self.random_walk: transition = normalize(adjacency) else: transition = adjacency for t in range(self.n_iter): factor = self.alpha * transition.dot(factor) embedding += factor if self.normalized: embedding = normalize(embedding, p=2) self.embedding_ = embedding return self
def randomized_eig(matrix, n_components: int, which='LM', n_oversamples: int = 10, n_iter='auto', power_iteration_normalizer: Union[str, None] = 'auto', random_state=None, one_pass: bool = False): """Truncated randomized eigenvalue decomposition. Parameters ---------- matrix: ndarray or sparse matrix Matrix to decompose n_components: int Number of singular values and vectors to extract. which: str which eigenvalues to compute. ``'LM'`` for Largest Magnitude and ``'SM'`` for Smallest Magnitude. Any other entry will result in Largest Magnitude. n_oversamples : int (default=10) Additional number of random vectors to sample the range of ``matrix`` so as to ensure proper conditioning. The total number of random vectors used to find the range of ``matrix`` is ``n_components + n_oversamples``. Smaller number can improve speed but can negatively impact the quality of approximation of singular vectors and singular values. n_iter: int or 'auto' (default is 'auto') See :meth:`randomized_range_finder` power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None`` See :meth:`randomized_range_finder` random_state: int, RandomState instance or None, optional (default=None) See :meth:`randomized_range_finder` one_pass: bool (default=False) whether to use algorithm 5.6 instead of 5.3. 5.6 requires less access to the original matrix, while 5.3 is more accurate. Returns ------- eigenvalues: np.ndarray eigenvectors: np.ndarray References ---------- Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061 """ check_square(adjacency=matrix) random_state = check_random_state(random_state) n_random = n_components + n_oversamples shift_value: float = 0. # upper bound on spectral radius if which == 'SM': try: shift_value = (abs(matrix).dot(np.ones(matrix.shape[1]))).max() except TypeError: shift_value: float = 1.1 * randomized_eig(matrix, n_components=1)[0][0] matrix *= -1 if isinstance(matrix, SparseLR): matrix += shift_value * sparse.identity(matrix.shape[0], format='csr') else: matrix += shift_value * sparse.identity(matrix.shape[0]) if n_iter == 'auto': # Checks if the number of iterations is explicitly specified # Adjust n_iter. 7 was found a good compromise for PCA. n_iter = 7 if n_components < .1 * min(matrix.shape) else 4 range_matrix, random_matrix, random_proj = randomized_range_finder( matrix, n_random, n_iter, power_iteration_normalizer, random_state, True) if one_pass: approx_matrix = np.linalg.lstsq(random_matrix.T.dot(range_matrix), random_proj.T.dot(range_matrix), None)[0].T else: approx_matrix = (matrix.dot(range_matrix)).T.dot(range_matrix) eigenvalues, eigenvectors = np.linalg.eig(approx_matrix) del approx_matrix # eigenvalues indices in decreasing order values_order = np.argsort(eigenvalues)[::-1] eigenvalues = eigenvalues[values_order] eigenvectors = np.dot(range_matrix, eigenvectors)[:, values_order] if which == 'SM': eigenvalues = shift_value - eigenvalues return eigenvalues[:n_components], eigenvectors[:, :n_components]
def randomized_range_finder(matrix: np.ndarray, size: int, n_iter: int, power_iteration_normalizer='auto', random_state=None, return_all: bool = False) \ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]: """Compute an orthonormal matrix :math:`Q`, whose range approximates the range of the input matrix. :math:`A \\approx QQ^*A`. Parameters ---------- matrix : Input matrix size : Size of the return array n_iter : Number of power iterations. It can be used to deal with very noisy problems. When 'auto', it is set to 4, unless ``size`` is small (< .1 * min(matrix.shape)) in which case ``n_iter`` is set to 7. This improves precision with few components. power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None`` Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), ``None`` (the fastest but numerically unstable when ``n_iter`` is large, e.g. typically 5 or larger), or ``'LU'`` factorization (numerically stable but can lose slightly in accuracy). The ``'auto'`` mode applies no normalization if ``n_iter`` <= 2 and switches to ``'LU'`` otherwise. random_state: int, RandomState instance or ``None``, optional (default= ``None``) The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If ``None``, the random number generator is the RandomState instance used by `np.random`. return_all : if True, returns (range_matrix, random_matrix, random_proj) else returns range_matrix. Returns ------- range_matrix : np.ndarray matrix (size x size) projection matrix, the range of which approximates well the range of the input matrix. random_matrix : np.ndarray, optional projection matrix projected_matrix : np.ndarray, optional product between the data and the projection matrix Notes ----- Follows Algorithm 4.3 of `Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions <http://arxiv.org/pdf/0909.4061>`_ Halko, et al., 2009 (arXiv:909) """ random_state = check_random_state(random_state) # Generating normal random vectors with shape: (A.shape[1], size) random_matrix = random_state.normal(size=(matrix.shape[1], size)) if matrix.dtype.kind == 'f': # Ensure f32 is preserved as f32 random_matrix = random_matrix.astype(matrix.dtype, copy=False) range_matrix = random_matrix.copy() # Deal with "auto" mode if power_iteration_normalizer == 'auto': if n_iter <= 2: power_iteration_normalizer = 'none' else: power_iteration_normalizer = 'LU' # Perform power iterations with 'range_matrix' to further 'imprint' the top # singular vectors of matrix in 'range_matrix' for i in range(n_iter): if power_iteration_normalizer == 'none': range_matrix = safe_sparse_dot(matrix, range_matrix) range_matrix = safe_sparse_dot(matrix.T, range_matrix) elif power_iteration_normalizer == 'LU': range_matrix, _ = linalg.lu(safe_sparse_dot(matrix, range_matrix), permute_l=True) range_matrix, _ = linalg.lu(safe_sparse_dot( matrix.T, range_matrix), permute_l=True) elif power_iteration_normalizer == 'QR': range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix), mode='economic') range_matrix, _ = linalg.qr(safe_sparse_dot( matrix.T, range_matrix), mode='economic') # Sample the range of 'matrix' using by linear projection of 'range_matrix' # Extract an orthonormal basis range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix), mode='economic') if return_all: return range_matrix, random_matrix, matrix.dot(random_matrix) else: return range_matrix
def randomized_svd(matrix, n_components: int, n_oversamples: int = 10, n_iter='auto', transpose='auto', power_iteration_normalizer: Union[str, None] = 'auto', flip_sign: bool = True, random_state=None): """Truncated randomized SVD Parameters ---------- matrix : ndarray or sparse matrix Matrix to decompose n_components : int Number of singular values and vectors to extract. n_oversamples : int (default=10) Additional number of random vectors to sample the range of M so as to ensure proper conditioning. The total number of random vectors used to find the range of M is n_components + n_oversamples. Smaller number can improve speed but can negatively impact the quality of approximation of singular vectors and singular values. n_iter : int or 'auto' (default is 'auto') See :meth:`randomized_range_finder` power_iteration_normalizer : ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None`` See :meth:`randomized_range_finder` transpose : True, False or 'auto' (default) Whether the algorithm should be applied to ``matrix.T`` instead of ``matrix``. The result should approximately be the same. The 'auto' mode will trigger the transposition if ``matrix.shape[1] > matrix.shape[0]`` since this implementation of randomized SVD tends to be a little faster in that case. flip_sign : boolean, (default=True) The output of a singular value decomposition is only unique up to a permutation of the signs of the singular vectors. If `flip_sign` is set to `True`, the sign ambiguity is resolved by making the largest loadings for each component in the left singular vectors positive. random_state : int, RandomState instance or None, optional (default=None) See :meth:`randomized_range_finder` Returns ------- left_singular_vectors: np.ndarray singular_values: np.ndarray right_singular_vectors: np.ndarray Notes ----- This algorithm finds a (usually very good) approximate truncated singular value decomposition using randomization to speed up the computations. It is particularly fast on large matrices on which you wish to extract only a small number of components. In order to obtain further speed up, ``n_iter`` can be set <=2 (at the cost of loss of precision). References ---------- * Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061 (algorithm 5.1) * A randomized algorithm for the decomposition of matrices Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert * An implementation of a randomized algorithm for principal component analysis A. Szlam et al. 2014 """ random_state = check_random_state(random_state) n_random = n_components + n_oversamples n_row, n_col = matrix.shape if n_iter == 'auto': # Checks if the number of iterations is explicitly specified # Adjust n_iter. 7 was found a good compromise for PCA. See #5299 n_iter = 7 if n_components < .1 * min(matrix.shape) else 4 if transpose == 'auto': transpose = n_row < n_col if transpose: # this implementation is a bit faster with smaller shape[1] matrix = matrix.T range_matrix: np.ndarray = randomized_range_finder( matrix, n_random, n_iter, power_iteration_normalizer, random_state) # project M to the (k + p) dimensional space using the basis vectors approx_matrix = safe_sparse_dot(range_matrix.T, matrix) # compute the SVD on the thin matrix: (k + p) wide uhat, singular_values, v = linalg.svd(approx_matrix, full_matrices=False) del approx_matrix u = np.dot(range_matrix, uhat) if flip_sign: if not transpose: u, v = svd_flip(u, v) else: # In case of transpose u_based_decision=false # to actually flip based on u and not v. u, v = svd_flip(u, v, u_based_decision=False) if transpose: # transpose back the results according to the input convention return v[: n_components, :].T, singular_values[: n_components], u[:, : n_components].T else: return u[:, : n_components], singular_values[: n_components], v[: n_components, :]
def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Optional[int] = None, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]: """Watts-Strogatz model. Parameters ---------- n : Number of nodes. degree : Initial degree of nodes. prob : Probability of edge modification. seed : Seed of the random generator (optional). metadata : If ``True``, return a `Bunch` object with metadata. Returns ------- adjacency or graph : Union[sparse.csr_matrix, Bunch] Adjacency matrix or graph with metadata (positions). Example ------- >>> from sknetwork.data import watts_strogatz >>> adjacency = watts_strogatz(30, 4, 0.02) >>> adjacency.shape (30, 30) References ---------- Watts, D., Strogatz, S. (1998). Collective dynamics of small-world networks, Nature. """ random_state = check_random_state(seed) edges = np.array([(i, (i + j + 1) % n) for i in range(n) for j in range(degree // 2)]) row, col = edges[:, 0], edges[:, 1] adjacency = sparse.coo_matrix((np.ones_like(row, int), (row, col)), shape=(n, n)) adjacency = sparse.lil_matrix(adjacency + adjacency.T) nodes = np.arange(n) for i in range(n): neighbors = adjacency.rows[i] candidates = list(set(nodes) - set(neighbors) - {i}) for j in neighbors: if random_state.random() < prob: node = random_state.choice(candidates) adjacency[i, node] = 1 adjacency[node, i] = 1 adjacency[i, j] = 0 adjacency[j, i] = 0 adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape) if metadata: graph = Bunch() graph.adjacency = adjacency graph.position = cyclic_position(n) return graph else: return adjacency
def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05, seed: Optional[int] = None, metadata: bool = False) \ -> Union[sparse.csr_matrix, Bunch]: """Stochastic block model. Parameters ---------- sizes : Block sizes. p_in : Probability of connection within blocks. p_out : Probability of connection across blocks. seed : Seed of the random generator (optional). metadata : If ``True``, return a `Bunch` object with metadata. Returns ------- adjacency or graph : Union[sparse.csr_matrix, Bunch] Adjacency matrix or graph with metadata (labels). Example ------- >>> from sknetwork.data import block_model >>> sizes = np.array([4, 5]) >>> adjacency = block_model(sizes) >>> adjacency.shape (9, 9) References ---------- Airoldi, E., Blei, D., Feinberg, S., Xing, E. (2007). `Mixed membership stochastic blockmodels. <https://arxiv.org/pdf/0705.4485.pdf>`_ Journal of Machine Learning Research. """ random_state = check_random_state(seed) sizes = np.array(sizes) if isinstance(p_in, (np.floating, float)): p_in = p_in * np.ones_like(sizes) else: p_in = np.array(p_in) # each edge is considered twice p_in = p_in / 2 matrix = [] for i, a in enumerate(sizes): row = [] for j, b in enumerate(sizes): if j < i: row.append(None) elif j > i: row.append( sparse.random(a, b, p_out, dtype=bool, random_state=random_state)) else: row.append( sparse.random(a, a, p_in[i], dtype=bool, random_state=random_state)) matrix.append(row) adjacency = sparse.bmat(matrix) adjacency.setdiag(0) adjacency = directed2undirected(adjacency.tocsr(), weighted=False) if metadata: graph = Bunch() graph.adjacency = adjacency labels = np.repeat(np.arange(len(sizes)), sizes) graph.labels = labels return graph else: return adjacency