def __init__(self, which='LM', n_oversamples: int = 10, n_iter='auto', power_iteration_normalizer: Union[str, None] = 'auto', random_state=None, one_pass: bool = False): EigSolver.__init__(self, which=which) self.n_oversamples = n_oversamples self.n_iter = n_iter self.power_iteration_normalizer = power_iteration_normalizer self.random_state = check_random_state(random_state) self.one_pass = one_pass
def __init__(self, engine: str = 'default', algorithm: Union[str, Optimizer] = 'default', resolution: float = 1, tol: float = 1e-3, agg_tol: float = 1e-3, max_agg_iter: int = -1, shuffle_nodes: bool = False, sorted_cluster: bool = True, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(Louvain, self).__init__() VerboseMixin.__init__(self, verbose) self.random_state = check_random_state(random_state) if algorithm == 'default': self.algorithm = GreedyModularity(resolution, tol, engine=check_engine(engine)) elif isinstance(algorithm, Optimizer): self.algorithm = algorithm else: raise TypeError('Algorithm must be \'auto\' or a valid algorithm.') if type(max_agg_iter) != int: raise TypeError('The maximum number of iterations must be an integer.') self.agg_tol = agg_tol self.max_agg_iter = max_agg_iter self.shuffle_nodes = shuffle_nodes self.sorted_cluster = sorted_cluster self.iteration_count_ = None self.aggregate_graph_ = None
def block_model(clusters: Union[np.ndarray, int], shape: Optional[Tuple[int, int]] = None, inner_prob: float = .2, outer_prob: float = .01, random_state: Optional[Union[np.random.RandomState, int]] = None) \ -> Tuple[sparse.csr_matrix, np.ndarray, np.ndarray]: """ A block model graph. Parameters ---------- clusters: Union[np.ndarray, int] Cluster specifications (array of couples where each entry denotes the shape of a cluster or an int denoting the number of clusters). If an ``int`` is passed, ``shape`` must be given and the clusters are identical in shape. shape: Optional[Tuple[int]] The size of the adjacency to obtain (might be rectangular for a biadjacency matrix). inner_prob: float Intra-cluster connection probability. outer_prob: float Inter-cluster connection probability. random_state: Optional[Union[np.random.RandomState, int]] Random number generator or random seed. If ``None``, ``numpy.random`` will be used. Returns ------- adjacency: sparse.csr_matrix The adjacency (or biadjacency) matrix of the graph. ground_truth_features: np.ndarray The labels associated with the features ground_truth_samples: np.ndarray The labels associated with the samples """ check_is_proba(inner_prob) check_is_proba(outer_prob) random_state = check_random_state(random_state) if type(clusters) == int: if not shape: raise ValueError( 'Please specify the shape of the matrix when giving a number of clusters.' ) if clusters <= 0: raise ValueError('Number of clusters should be positive.') n_clusters = clusters clusters_cumul = np.zeros((n_clusters + 1, 2), dtype=int) row_step, col_step = shape[0] // clusters, shape[1] // clusters if row_step == 0 or col_step == 0: raise ValueError( 'Number of clusters is too high given the shape of the matrix.' ) clusters_cumul[:, 0] = np.arange(0, shape[0] + 1, row_step) clusters_cumul[:, 1] = np.arange(0, shape[1] + 1, col_step) clusters_cumul[-1, 0] = shape[0] clusters_cumul[-1, 1] = shape[1] elif type(clusters) == np.ndarray: n_clusters = clusters.shape[0] clusters_cumul = np.cumsum(clusters, axis=0) clusters_cumul = np.insert(clusters_cumul, 0, 0, axis=0) if shape: if clusters_cumul[-1, 0] != shape[0] or clusters_cumul[-1, 1] != shape[1]: raise ValueError('Cluster sizes do not match matrix size.') else: raise TypeError( 'Please specify an array of sizes or a number of clusters (along with the shape of the desired matrix).' ) n_rows, n_cols = clusters_cumul[-1, 0], clusters_cumul[-1, 1] ground_truth_samples = np.zeros(n_rows, dtype=int) ground_truth_features = np.zeros(n_cols, dtype=int) mat = sparse.dok_matrix((n_rows, n_cols), dtype=bool) for label, row_block in enumerate(range(n_clusters)): ground_truth_samples[clusters_cumul[row_block, 0]:clusters_cumul[row_block + 1, 0]] = label ground_truth_features[clusters_cumul[row_block, 1]:clusters_cumul[row_block + 1, 1]] = label mask = np.full(n_cols, outer_prob) mask[clusters_cumul[row_block, 1]:clusters_cumul[row_block + 1, 1]] = inner_prob for row in range(clusters_cumul[row_block, 0], clusters_cumul[row_block + 1, 0]): mat[row, (random_state.rand(n_cols) < mask)] = True return sparse.csr_matrix(mat), ground_truth_features, ground_truth_samples
def randomized_range_finder(matrix: np.ndarray, size: int, n_iter: int, power_iteration_normalizer='auto', random_state=None, return_all: bool = False) \ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]: """Compute an orthonormal matrix :math:`Q`, whose range approximates the range of the input matrix. :math:`A \\approx QQ^*A`. Parameters ---------- matrix : Input matrix size : Size of the return array n_iter : Number of power iterations. It can be used to deal with very noisy problems. When 'auto', it is set to 4, unless ``size`` is small (< .1 * min(matrix.shape)) in which case ``n_iter`` is set to 7. This improves precision with few components. power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None`` Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), ``None`` (the fastest but numerically unstable when ``n_iter`` is large, e.g. typically 5 or larger), or ``'LU'`` factorization (numerically stable but can lose slightly in accuracy). The ``'auto'`` mode applies no normalization if ``n_iter`` <= 2 and switches to ``'LU'`` otherwise. random_state: int, RandomState instance or ``None``, optional (default= ``None``) The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If ``None``, the random number generator is the RandomState instance used by `np.random`. return_all : if True, returns (range_matrix, random_matrix, random_proj) else returns range_matrix. Returns ------- range_matrix : np.ndarray matrix (size x size) projection matrix, the range of which approximates well the range of the input matrix. random_matrix : np.ndarray, optional projection matrix projected_matrix : np.ndarray, optional product between the data and the projection matrix Notes ----- Follows Algorithm 4.3 of `Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions <http://arxiv.org/pdf/0909.4061>`_ Halko, et al., 2009 (arXiv:909) """ random_state = check_random_state(random_state) # Generating normal random vectors with shape: (A.shape[1], size) random_matrix = random_state.normal(size=(matrix.shape[1], size)) if matrix.dtype.kind == 'f': # Ensure f32 is preserved as f32 random_matrix = random_matrix.astype(matrix.dtype, copy=False) range_matrix = random_matrix.copy() # Deal with "auto" mode if power_iteration_normalizer == 'auto': if n_iter <= 2: power_iteration_normalizer = 'none' else: power_iteration_normalizer = 'LU' # Perform power iterations with 'range_matrix' to further 'imprint' the top # singular vectors of matrix in 'range_matrix' for i in range(n_iter): if power_iteration_normalizer == 'none': range_matrix = safe_sparse_dot(matrix, range_matrix) range_matrix = safe_sparse_dot(matrix.T, range_matrix) elif power_iteration_normalizer == 'LU': range_matrix, _ = linalg.lu(safe_sparse_dot(matrix, range_matrix), permute_l=True) range_matrix, _ = linalg.lu(safe_sparse_dot(matrix.T, range_matrix), permute_l=True) elif power_iteration_normalizer == 'QR': range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix), mode='economic') range_matrix, _ = linalg.qr(safe_sparse_dot(matrix.T, range_matrix), mode='economic') # Sample the range of 'matrix' using by linear projection of 'range_matrix' # Extract an orthonormal basis range_matrix, _ = linalg.qr(safe_sparse_dot(matrix, range_matrix), mode='economic') if return_all: return range_matrix, random_matrix, matrix.dot(random_matrix) else: return range_matrix
def randomized_eig(matrix, n_components: int, which='LM', n_oversamples: int = 10, n_iter='auto', power_iteration_normalizer: Union[str, None] = 'auto', random_state=None, one_pass: bool = False): """Randomized eigenvalue decomposition. Parameters ---------- matrix: ndarray or sparse matrix Matrix to decompose n_components: int Number of singular values and vectors to extract. which: str which eigenvalues to compute. ``'LM'`` for Largest Magnitude and ``'SM'`` for Smallest Magnitude. Any other entry will result in Largest Magnitude. n_oversamples : int (default=10) Additional number of random vectors to sample the range of ``matrix`` so as to ensure proper conditioning. The total number of random vectors used to find the range of ``matrix`` is ``n_components + n_oversamples``. Smaller number can improve speed but can negatively impact the quality of approximation of singular vectors and singular values. n_iter: int or 'auto' (default is 'auto') See :meth:`randomized_range_finder` power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None`` See :meth:`randomized_range_finder` random_state: int, RandomState instance or None, optional (default=None) See :meth:`randomized_range_finder` one_pass: bool (default=False) whether to use algorithm 5.6 instead of 5.3. 5.6 requires less access to the original matrix, while 5.3 is more accurate. Returns ------- eigenvalues: np.ndarray eigenvectors: np.ndarray References ---------- Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061 """ random_state = check_random_state(random_state) n_random = n_components + n_oversamples n_samples, n_features = matrix.shape lambda_max = 0. if n_samples != n_features: raise ValueError('The input matrix is not square.') if which == 'SM': lambda_max: float = 1.1 * randomized_eig(matrix, n_components=1)[0][0] matrix *= -1 if isinstance(matrix, SparseLR): matrix += SparseLR(lambda_max * sparse.identity(matrix.shape[0]), []) else: matrix += lambda_max * sparse.identity(matrix.shape[0]) if n_iter == 'auto': # Checks if the number of iterations is explicitly specified # Adjust n_iter. 7 was found a good compromise for PCA. See #5299 n_iter = 7 if n_components < .1 * min(matrix.shape) else 4 range_matrix, random_matrix, random_proj = randomized_range_finder(matrix, n_random, n_iter, power_iteration_normalizer, random_state, True) if one_pass: approx_matrix = np.linalg.lstsq(random_matrix.T.dot(range_matrix), random_proj.T.dot(range_matrix), None)[0].T else: approx_matrix = (matrix.dot(range_matrix)).T.dot(range_matrix) eigenvalues, eigenvectors = np.linalg.eig(approx_matrix) del approx_matrix # eigenvalues indices in decreasing order values_order = np.argsort(eigenvalues)[::-1] eigenvalues = eigenvalues[values_order] eigenvectors = np.dot(range_matrix, eigenvectors)[:, values_order] if which == 'SM': eigenvalues = lambda_max - eigenvalues return eigenvalues[:n_components], eigenvectors[:, :n_components]
def randomized_svd(matrix, n_components: int, n_oversamples: int = 10, n_iter='auto', transpose='auto', power_iteration_normalizer: Union[str, None] = 'auto', flip_sign: bool = True, random_state=None): """Truncated randomized SVD Parameters ---------- matrix : ndarray or sparse matrix Matrix to decompose n_components : int Number of singular values and vectors to extract. n_oversamples : int (default=10) Additional number of random vectors to sample the range of M so as to ensure proper conditioning. The total number of random vectors used to find the range of M is embedding_dimension + n_oversamples. Smaller number can improve speed but can negatively impact the quality of approximation of singular vectors and singular values. n_iter : int or 'auto' (default is 'auto') See :meth:`randomized_range_finder` power_iteration_normalizer : ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None`` See :meth:`randomized_range_finder` transpose : True, False or 'auto' (default) Whether the algorithm should be applied to ``matrix.T`` instead of ``matrix``. The result should approximately be the same. The 'auto' mode will trigger the transposition if ``matrix.shape[1] > matrix.shape[0]`` since this implementation of randomized SVD tends to be a little faster in that case. flip_sign : boolean, (default=True) The output of a singular value decomposition is only unique up to a permutation of the signs of the singular vectors. If `flip_sign` is set to `True`, the sign ambiguity is resolved by making the largest loadings for each component in the left singular vectors positive. random_state : int, RandomState instance or None, optional (default=None) See :meth:`randomized_range_finder` Returns ------- left_singular_vectors: np.ndarray singular_values: np.ndarray right_singular_vectors: np.ndarray Notes ----- This algorithm finds a (usually very good) approximate truncated singular value decomposition using randomization to speed up the computations. It is particularly fast on large matrices on which you wish to extract only a small number of components. In order to obtain further speed up, ``n_iter`` can be set <=2 (at the cost of loss of precision). References ---------- * Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061 (algorithm 5.1) * A randomized algorithm for the decomposition of matrices Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert * An implementation of a randomized algorithm for principal component analysis A. Szlam et al. 2014 """ random_state = check_random_state(random_state) n_random = n_components + n_oversamples n_samples, n_features = matrix.shape if n_iter == 'auto': # Checks if the number of iterations is explicitly specified # Adjust n_iter. 7 was found a good compromise for PCA. See #5299 n_iter = 7 if n_components < .1 * min(matrix.shape) else 4 if transpose == 'auto': transpose = n_samples < n_features if transpose: # this implementation is a bit faster with smaller shape[1] matrix = matrix.T range_matrix: np.ndarray = randomized_range_finder(matrix, n_random, n_iter, power_iteration_normalizer, random_state) # project M to the (k + p) dimensional space using the basis vectors approx_matrix = safe_sparse_dot(range_matrix.T, matrix) # compute the SVD on the thin matrix: (k + p) wide uhat, singular_values, v = linalg.svd(approx_matrix, full_matrices=False) del approx_matrix u = np.dot(range_matrix, uhat) if flip_sign: if not transpose: u, v = svd_flip(u, v) else: # In case of transpose u_based_decision=false # to actually flip based on u and not v. u, v = svd_flip(u, v, u_based_decision=False) if transpose: # transpose back the results according to the input convention return v[:n_components, :].T, singular_values[:n_components], u[:, :n_components].T else: return u[:, :n_components], singular_values[:n_components], v[:n_components, :]
def test_error_random_state(self): with self.assertRaises(TypeError): # noinspection PyTypeChecker check_random_state('junk')
def test_random_state(self): random_state = np.random.RandomState(1) self.assertEqual(type(check_random_state(random_state)), np.random.RandomState)