Esempio n. 1
0
 def assign_laplacian_matrix(self, laplacian_matrix, laplacian_type = "unknown"):
     laplacian_matrix = check_array(laplacian_matrix, accept_sparse = sparse_formats)
     (a, b) = laplacian_matrix.shape
     if a != b:
         raise ValueError("Laplacian matrix is not square")
     else:
         self.laplacian_matrix = laplacian_matrix
         self.laplacian_type = laplacian_type;
Esempio n. 2
0
 def assign_distance_matrix(self, distance_mat, neighborhood_radius = None):
     distance_mat = check_array(distance_mat, accept_sparse = sparse_formats)
     (a, b) = distance_mat.shape
     if a != b:
         raise ValueError("distance matrix is not square")
     else:
         self.distance_matrix = distance_mat
         if neighborhood_radius is not None:
             self.neighborhood_radius = neighborhood_radius
Esempio n. 3
0
 def assign_affinity_matrix(self, affinity_matrix, affinity_radius = None):
     affinity_matrix = check_array(affinity_matrix, accept_sparse = sparse_formats)
     (a, b) = affinity_matrix.shape
     if a != b:
         raise ValueError("affinity matrix is not square")
     else:
         self.affinity_matrix = affinity_matrix
         if affinity_radius is not None:
             self.affinity_radius = affinity_radius
             self.default_affinity = False
Esempio n. 4
0
 def assign_data_matrix(self, X):
     X = check_array(X, accept_sparse = sparse_formats)
     self.X = X
Esempio n. 5
0
 def __init__(self, X, neighborhood_radius = None, affinity_radius = None,
              distance_method = 'auto', input_type = 'data', 
              laplacian_type = None, path_to_flann = None):
     self.distance_method = distance_method
     self.input_type = input_type
     self.path_to_flann = path_to_flann
     self.laplacian_type = laplacian_type
     
     if self.distance_method not in distance_methods:
         raise ValueError("invalid distance method.")
             
     if neighborhood_radius is None:
         self.neighborhood_radius = 1/X.shape[1]
     else:
         try:
             neighborhood_radius = np.float(neighborhood_radius)
             self.neighborhood_radius = neighborhood_radius
         except ValueError:
             raise ValueError("neighborhood_radius must be convertable to float")
     if affinity_radius is None:
         self.affinity_radius = self.neighborhood_radius
         self.default_affinity = True
     else:
         try:
             affinity_radius = np.float(affinity_radius)
             self.affinity_radius = affinity_radius
             self.default_affinity = False
         except ValueError:
             raise ValueError("affinity_radius must be convertable to float")
     
     if self.input_type == 'distance':
         X = check_array(X, accept_sparse = sparse_formats)
         a, b = X.shape
         if a != b:
             raise ValueError("input_type is distance but input matrix is not square")
         self.X = None
         self.distance_matrix = X
         self.affinity_matrix = None
         self.laplacian_matrix = None
     elif self.input_type == 'affinity':
         X = check_array(X, accept_sparse = sparse_formats)
         a, b = X.shape
         if a != b:
             raise ValueError("input_type is affinity but input matrix is not square")
         self.X = None
         self.distance_matrix = None
         self.affinity_matrix = X
         self.laplacian_matrix = None
     elif self.input_type == 'data':
         X = check_array(X, accept_sparse = sparse_formats)
         self.X = X
         self.distance_matrix = None
         self.affinity_matrix = None
         self.laplacian_matrix = None
     else:
         raise ValueError('input_type must be one of: data, distance, affinity.')
         
     if distance_method == 'cython':
         if input_type == 'data':
             try:
                 from Mmani.geometry.cyflann.index import Index
                 self.cyindex = Index(X)
             except ImportError:
                 raise ValueError("distance_method set to cython but cyflann_index cannot be imported.")
     else:
         self.cyindex = None
     
     if distance_method == 'pyflann':
         if self.path_to_flann is not None: 
             # FLANN is installed in specific location
             sys.path.insert(0, self.path_to_flann)
         try:
             import pyflann as pyf
             self.flindex = pyf.FLANN()
             self.flparams = self.flindex.build_index(X, algorithm = 'kmeans', 
                                                      target_precision = 0.9)
         except ImportError:
             raise ValueError("distance_method is set to pyflann but pyflann is "
                             "not available.")
     else:
         self.flindex = None
         self.flparams = None
Esempio n. 6
0
def eigen_decomposition(G, n_components=8, eigen_solver=None,
                       random_state=None, eigen_tol=0.0, 
                       drop_first=True, largest = True):
    """
    G : 2d numpy/scipy array. Potentially sparse.
        The matrix to find the eigendecomposition of 
    n_components : integer, optional
        The number of eigenvectors to return 

    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
        auto : algorithm will attempt to choose the best method for input data
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        lobpcg : Locally Optimal Block Preconditioned Conjugate Gradient Method.
            a preconditioned eigensolver for large symmetric positive definite 
            (SPD) generalized eigenproblems.
        amg : AMG requires pyamg to be installed. It can be faster on very large, 
            sparse problems, but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition when using arpack eigen_solver
    
    Returns
    -------
    lambdas, diffusion_map : eigenvalues, eigenvectors 
    """
    n_nodes = G.shape[0]
    if eigen_solver is None:
        eigen_solver = 'auto'
    elif not eigen_solver in eigen_solvers:
        raise ValueError("Unknown value for eigen_solver: '%s'."
                         "Should be: '%s'"
                         % eigen_solver, eigen_solvers)
    if eigen_solver == 'auto':
        if G.shape[0] > 200:
            eigen_solver = 'arpack'
        else:
            eigen_solver = 'dense'
    
    # Check eigen_solver method
    try:
        from pyamg import smoothed_aggregation_solver
    except ImportError:
        if eigen_solver == "amg":
            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
                             "not available.")
    # Check input values
    if not isinstance(largest, bool):
        raise ValueError("largest should be True if you want largest eigenvalues otherwise False")
    random_state = check_random_state(random_state)
    if drop_first:
        n_components = n_components + 1     
    # Check for symmetry
    is_symmetric = _is_symmetric(G)
    # Convert G to best type for eigendecomposition 
    if sparse.issparse(G):
        if G.getformat() is not 'csr':
            G.tocsr()
    G = G.astype(np.float)
    
    if ((eigen_solver == 'lobpcg') and (n_nodes < 5 * n_components + 1)):
        warnings.warn("lobpcg has problems with small number of nodes. Using dense eigh")
        eigen_solver = 'dense'
        
    # Try Eigen Methods:
    if eigen_solver == 'arpack':
        if is_symmetric:
            if largest:
                which = 'LM'
            else:
                which = 'SM'
            lambdas, diffusion_map = eigsh(G, k=n_components, which=which,tol=eigen_tol)
        else:
            if largest:
                which = 'LR'
            else:
                which = 'SR'
            lambdas, diffusion_map = eigs(G, k=n_components, which=which,tol=eigen_tol)
        lambdas = np.real(lambdas)         
        diffusion_map = np.real(diffusion_map)
    elif eigen_solver == 'amg':
        if not is_symmetric:
            raise ValueError("lobpcg requires symmetric matrices.")
        if not sparse.issparse(G):
            warnings.warn("AMG works better for sparse matrices")
        # Use AMG to get a preconditioner and speed up the eigenvalue problem.
        ml = smoothed_aggregation_solver(check_array(G, accept_sparse = ['csr']))
        M = ml.aspreconditioner()
        n_find = min(n_nodes, 5 + 2*n_components)
        X = random_state.rand(n_nodes, n_find)
        X[:, 0] = (G.diagonal()).ravel()
        lambdas, diffusion_map = lobpcg(G, X, M=M, largest=largest)   
        sort_order = np.argsort(lambdas)
        if largest:
            lambdas = lambdas[sort_order[::-1]]
            diffusion_map = diffusion_map[:, sort_order[::-1]]
        else:
            lambdas = lambdas[sort_order]
            diffusion_map = diffusion_map[:, sort_order]
        lambdas = lambdas[:n_components]
        diffusion_map = diffusion_map[:, :n_components]
    elif eigen_solver == "lobpcg":
        if not is_symmetric:
            raise ValueError("lobpcg requires symmetric matrices.")
        n_find = min(n_nodes, 5 + 2*n_components)
        X = random_state.rand(n_nodes, n_find)
        lambdas, diffusion_map = lobpcg(G, X, largest=largest)
        sort_order = np.argsort(lambdas)
        if largest:
            lambdas = lambdas[sort_order[::-1]]
            diffusion_map = diffusion_map[:, sort_order[::-1]]
        else:
            lambdas = lambdas[sort_order]
            diffusion_map = diffusion_map[:, sort_order]
        lambdas = lambdas[:n_components]
        diffusion_map = diffusion_map[:, :n_components]
    elif eigen_solver == 'dense':
        if sparse.isspmatrix(G):
            G = G.todense()
        if is_symmetric:
            lambdas, diffusion_map = eigh(G)
        else:
            lambdas, diffusion_map = eig(G)
        if largest:# eigh always returns eigenvalues in ascending order
            lambdas = lambdas[::-1] # reverse order the e-values
            diffusion_map = diffusion_map[:, ::-1] # reverse order the vectors
        lambdas = lambdas[:n_components]
        diffusion_map = diffusion_map[:, :n_components]
    return (lambdas, diffusion_map)
Esempio n. 7
0
def test_check_array():
    # accept_sparse == None
    # raise error on sparse inputs
    X = [[1, 2], [3, 4]]
    X_csr = sp.csr_matrix(X)
    assert_raises(TypeError, check_array, X_csr)
    # ensure_2d
    assert_warns(DeprecationWarning, check_array, [0, 1, 2])
    X_array = check_array([0, 1, 2])
    assert_equal(X_array.ndim, 2)
    X_array = check_array([0, 1, 2], ensure_2d=False)
    assert_equal(X_array.ndim, 1)
    # don't allow ndim > 3
    X_ndim = np.arange(8).reshape(2, 2, 2)
    assert_raises(ValueError, check_array, X_ndim)
    check_array(X_ndim, allow_nd=True)  # doesn't raise
    # force_all_finite
    X_inf = np.arange(4).reshape(2, 2).astype(np.float)
    X_inf[0, 0] = np.inf
    assert_raises(ValueError, check_array, X_inf)
    check_array(X_inf, force_all_finite=False)  # no raise
    # nan check
    X_nan = np.arange(4).reshape(2, 2).astype(np.float)
    X_nan[0, 0] = np.nan
    assert_raises(ValueError, check_array, X_nan)
    check_array(X_inf, force_all_finite=False)  # no raise

    # dtype and order enforcement.
    X_C = np.arange(4).reshape(2, 2).copy("C")
    X_F = X_C.copy("F")
    X_int = X_C.astype(np.int)
    X_float = X_C.astype(np.float)
    Xs = [X_C, X_F, X_int, X_float]
    dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object]
    orders = ['C', 'F', None]
    copys = [True, False]

    for X, dtype, order, copy in product(Xs, dtypes, orders, copys):
        X_checked = check_array(X, dtype=dtype, order=order, copy=copy)
        if dtype is not None:
            assert_equal(X_checked.dtype, dtype)
        else:
            assert_equal(X_checked.dtype, X.dtype)
        if order == 'C':
            assert_true(X_checked.flags['C_CONTIGUOUS'])
            assert_false(X_checked.flags['F_CONTIGUOUS'])
        elif order == 'F':
            assert_true(X_checked.flags['F_CONTIGUOUS'])
            assert_false(X_checked.flags['C_CONTIGUOUS'])
        if copy:
            assert_false(X is X_checked)
        else:
            # doesn't copy if it was already good
            if (X.dtype == X_checked.dtype and
                    X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS']
                    and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']):
                assert_true(X is X_checked)

    # allowed sparse != None
    X_csc = sp.csc_matrix(X_C)
    X_coo = X_csc.tocoo()
    X_dok = X_csc.todok()
    X_int = X_csc.astype(np.int)
    X_float = X_csc.astype(np.float)

    Xs = [X_csc, X_coo, X_dok, X_int, X_float]
    accept_sparses = [['csr', 'coo'], ['coo', 'dok']]
    for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses,
                                                 copys):
        with warnings.catch_warnings(record=True) as w:
            X_checked = check_array(X, dtype=dtype,
                                    accept_sparse=accept_sparse, copy=copy)
        if (dtype is object or sp.isspmatrix_dok(X)) and len(w):
            message = str(w[0].message)
            messages = ["object dtype is not supported by sparse matrices",
                        "Can't check dok sparse matrix for nan or inf."]
            assert_true(message in messages)
        else:
            assert_equal(len(w), 0)
        if dtype is not None:
            assert_equal(X_checked.dtype, dtype)
        else:
            assert_equal(X_checked.dtype, X.dtype)
        if X.format in accept_sparse:
            # no change if allowed
            assert_equal(X.format, X_checked.format)
        else:
            # got converted
            assert_equal(X_checked.format, accept_sparse[0])
        if copy:
            assert_false(X is X_checked)
        else:
            # doesn't copy if it was already good
            if (X.dtype == X_checked.dtype and X.format == X_checked.format):
                assert_true(X is X_checked)

    # other input formats
    # convert lists to arrays
    X_dense = check_array([[1, 2], [3, 4]])
    assert_true(isinstance(X_dense, np.ndarray))
    # raise on too deep lists
    assert_raises(ValueError, check_array, X_ndim.tolist())
    check_array(X_ndim.tolist(), allow_nd=True)  # doesn't raise
Esempio n. 8
0
def test_check_array_dtype_stability():
    # test that lists with ints don't get converted to floats
    X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    assert_equal(check_array(X).dtype.kind, "i")
    assert_equal(check_array(X, ensure_2d=False).dtype.kind, "i")