def assign_laplacian_matrix(self, laplacian_matrix, laplacian_type = "unknown"): laplacian_matrix = check_array(laplacian_matrix, accept_sparse = sparse_formats) (a, b) = laplacian_matrix.shape if a != b: raise ValueError("Laplacian matrix is not square") else: self.laplacian_matrix = laplacian_matrix self.laplacian_type = laplacian_type;
def assign_distance_matrix(self, distance_mat, neighborhood_radius = None): distance_mat = check_array(distance_mat, accept_sparse = sparse_formats) (a, b) = distance_mat.shape if a != b: raise ValueError("distance matrix is not square") else: self.distance_matrix = distance_mat if neighborhood_radius is not None: self.neighborhood_radius = neighborhood_radius
def assign_affinity_matrix(self, affinity_matrix, affinity_radius = None): affinity_matrix = check_array(affinity_matrix, accept_sparse = sparse_formats) (a, b) = affinity_matrix.shape if a != b: raise ValueError("affinity matrix is not square") else: self.affinity_matrix = affinity_matrix if affinity_radius is not None: self.affinity_radius = affinity_radius self.default_affinity = False
def assign_data_matrix(self, X): X = check_array(X, accept_sparse = sparse_formats) self.X = X
def __init__(self, X, neighborhood_radius = None, affinity_radius = None, distance_method = 'auto', input_type = 'data', laplacian_type = None, path_to_flann = None): self.distance_method = distance_method self.input_type = input_type self.path_to_flann = path_to_flann self.laplacian_type = laplacian_type if self.distance_method not in distance_methods: raise ValueError("invalid distance method.") if neighborhood_radius is None: self.neighborhood_radius = 1/X.shape[1] else: try: neighborhood_radius = np.float(neighborhood_radius) self.neighborhood_radius = neighborhood_radius except ValueError: raise ValueError("neighborhood_radius must be convertable to float") if affinity_radius is None: self.affinity_radius = self.neighborhood_radius self.default_affinity = True else: try: affinity_radius = np.float(affinity_radius) self.affinity_radius = affinity_radius self.default_affinity = False except ValueError: raise ValueError("affinity_radius must be convertable to float") if self.input_type == 'distance': X = check_array(X, accept_sparse = sparse_formats) a, b = X.shape if a != b: raise ValueError("input_type is distance but input matrix is not square") self.X = None self.distance_matrix = X self.affinity_matrix = None self.laplacian_matrix = None elif self.input_type == 'affinity': X = check_array(X, accept_sparse = sparse_formats) a, b = X.shape if a != b: raise ValueError("input_type is affinity but input matrix is not square") self.X = None self.distance_matrix = None self.affinity_matrix = X self.laplacian_matrix = None elif self.input_type == 'data': X = check_array(X, accept_sparse = sparse_formats) self.X = X self.distance_matrix = None self.affinity_matrix = None self.laplacian_matrix = None else: raise ValueError('input_type must be one of: data, distance, affinity.') if distance_method == 'cython': if input_type == 'data': try: from Mmani.geometry.cyflann.index import Index self.cyindex = Index(X) except ImportError: raise ValueError("distance_method set to cython but cyflann_index cannot be imported.") else: self.cyindex = None if distance_method == 'pyflann': if self.path_to_flann is not None: # FLANN is installed in specific location sys.path.insert(0, self.path_to_flann) try: import pyflann as pyf self.flindex = pyf.FLANN() self.flparams = self.flindex.build_index(X, algorithm = 'kmeans', target_precision = 0.9) except ImportError: raise ValueError("distance_method is set to pyflann but pyflann is " "not available.") else: self.flindex = None self.flparams = None
def eigen_decomposition(G, n_components=8, eigen_solver=None, random_state=None, eigen_tol=0.0, drop_first=True, largest = True): """ G : 2d numpy/scipy array. Potentially sparse. The matrix to find the eigendecomposition of n_components : integer, optional The number of eigenvectors to return eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'} auto : algorithm will attempt to choose the best method for input data dense : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems. arpack : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. lobpcg : Locally Optimal Block Preconditioned Conjugate Gradient Method. a preconditioned eigensolver for large symmetric positive definite (SPD) generalized eigenproblems. amg : AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. random_state : int seed, RandomState instance, or None (default) A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when eigen_solver == 'amg'. By default, arpack is used. eigen_tol : float, optional, default=0.0 Stopping criterion for eigendecomposition when using arpack eigen_solver Returns ------- lambdas, diffusion_map : eigenvalues, eigenvectors """ n_nodes = G.shape[0] if eigen_solver is None: eigen_solver = 'auto' elif not eigen_solver in eigen_solvers: raise ValueError("Unknown value for eigen_solver: '%s'." "Should be: '%s'" % eigen_solver, eigen_solvers) if eigen_solver == 'auto': if G.shape[0] > 200: eigen_solver = 'arpack' else: eigen_solver = 'dense' # Check eigen_solver method try: from pyamg import smoothed_aggregation_solver except ImportError: if eigen_solver == "amg": raise ValueError("The eigen_solver was set to 'amg', but pyamg is " "not available.") # Check input values if not isinstance(largest, bool): raise ValueError("largest should be True if you want largest eigenvalues otherwise False") random_state = check_random_state(random_state) if drop_first: n_components = n_components + 1 # Check for symmetry is_symmetric = _is_symmetric(G) # Convert G to best type for eigendecomposition if sparse.issparse(G): if G.getformat() is not 'csr': G.tocsr() G = G.astype(np.float) if ((eigen_solver == 'lobpcg') and (n_nodes < 5 * n_components + 1)): warnings.warn("lobpcg has problems with small number of nodes. Using dense eigh") eigen_solver = 'dense' # Try Eigen Methods: if eigen_solver == 'arpack': if is_symmetric: if largest: which = 'LM' else: which = 'SM' lambdas, diffusion_map = eigsh(G, k=n_components, which=which,tol=eigen_tol) else: if largest: which = 'LR' else: which = 'SR' lambdas, diffusion_map = eigs(G, k=n_components, which=which,tol=eigen_tol) lambdas = np.real(lambdas) diffusion_map = np.real(diffusion_map) elif eigen_solver == 'amg': if not is_symmetric: raise ValueError("lobpcg requires symmetric matrices.") if not sparse.issparse(G): warnings.warn("AMG works better for sparse matrices") # Use AMG to get a preconditioner and speed up the eigenvalue problem. ml = smoothed_aggregation_solver(check_array(G, accept_sparse = ['csr'])) M = ml.aspreconditioner() n_find = min(n_nodes, 5 + 2*n_components) X = random_state.rand(n_nodes, n_find) X[:, 0] = (G.diagonal()).ravel() lambdas, diffusion_map = lobpcg(G, X, M=M, largest=largest) sort_order = np.argsort(lambdas) if largest: lambdas = lambdas[sort_order[::-1]] diffusion_map = diffusion_map[:, sort_order[::-1]] else: lambdas = lambdas[sort_order] diffusion_map = diffusion_map[:, sort_order] lambdas = lambdas[:n_components] diffusion_map = diffusion_map[:, :n_components] elif eigen_solver == "lobpcg": if not is_symmetric: raise ValueError("lobpcg requires symmetric matrices.") n_find = min(n_nodes, 5 + 2*n_components) X = random_state.rand(n_nodes, n_find) lambdas, diffusion_map = lobpcg(G, X, largest=largest) sort_order = np.argsort(lambdas) if largest: lambdas = lambdas[sort_order[::-1]] diffusion_map = diffusion_map[:, sort_order[::-1]] else: lambdas = lambdas[sort_order] diffusion_map = diffusion_map[:, sort_order] lambdas = lambdas[:n_components] diffusion_map = diffusion_map[:, :n_components] elif eigen_solver == 'dense': if sparse.isspmatrix(G): G = G.todense() if is_symmetric: lambdas, diffusion_map = eigh(G) else: lambdas, diffusion_map = eig(G) if largest:# eigh always returns eigenvalues in ascending order lambdas = lambdas[::-1] # reverse order the e-values diffusion_map = diffusion_map[:, ::-1] # reverse order the vectors lambdas = lambdas[:n_components] diffusion_map = diffusion_map[:, :n_components] return (lambdas, diffusion_map)
def test_check_array(): # accept_sparse == None # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d assert_warns(DeprecationWarning, check_array, [0, 1, 2]) X_array = check_array([0, 1, 2]) assert_equal(X_array.ndim, 2) X_array = check_array([0, 1, 2], ensure_2d=False) assert_equal(X_array.ndim, 1) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # force_all_finite X_inf = np.arange(4).reshape(2, 2).astype(np.float) X_inf[0, 0] = np.inf assert_raises(ValueError, check_array, X_inf) check_array(X_inf, force_all_finite=False) # no raise # nan check X_nan = np.arange(4).reshape(2, 2).astype(np.float) X_nan[0, 0] = np.nan assert_raises(ValueError, check_array, X_nan) check_array(X_inf, force_all_finite=False) # no raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if order == 'C': assert_true(X_checked.flags['C_CONTIGUOUS']) assert_false(X_checked.flags['F_CONTIGUOUS']) elif order == 'F': assert_true(X_checked.flags['F_CONTIGUOUS']) assert_false(X_checked.flags['C_CONTIGUOUS']) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert_true(X is X_checked) # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = ["object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf."] assert_true(message in messages) else: assert_equal(len(w), 0) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if X.format in accept_sparse: # no change if allowed assert_equal(X.format, X_checked.format) else: # got converted assert_equal(X_checked.format, accept_sparse[0]) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X.format == X_checked.format): assert_true(X is X_checked) # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert_true(isinstance(X_dense, np.ndarray)) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise
def test_check_array_dtype_stability(): # test that lists with ints don't get converted to floats X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] assert_equal(check_array(X).dtype.kind, "i") assert_equal(check_array(X, ensure_2d=False).dtype.kind, "i")