def getrow(self, i): """Returns a copy of row i of the matrix, as a (1 x n) sparse matrix (row vector). """ # Spmatrix subclasses should override this method for efficiency. # Pre-multiply by a (1 x m) row vector 'a' containing all zeros # except for a_i = 1 from csr import csr_matrix m = self.shape[0] a = csr_matrix((1, m), dtype=self.dtype) a[0, i] = 1 return a * self
def tocsr(self): M, N = self.shape indptr = np.empty(M + 1, dtype=np.intc) indices = np.empty(self.nnz, dtype=np.intc) data = np.empty(self.nnz, dtype=upcast(self.dtype)) csc_tocsr(M, N, self.indptr, self.indices, self.data, indptr, indices, data) from csr import csr_matrix A = csr_matrix((data, indices, indptr), shape=self.shape) A.has_sorted_indices = True return A
def tocsr(self): M, N = self.shape indptr = np.empty(M + 1, dtype=np.intc) indices = np.empty(self.nnz, dtype=np.intc) data = np.empty(self.nnz, dtype=upcast(self.dtype)) csc_tocsr(M, N, \ self.indptr, self.indices, self.data, \ indptr, indices, data) from csr import csr_matrix A = csr_matrix((data, indices, indptr), shape=self.shape) A.has_sorted_indices = True return A
def estimate_blocksize(A,efficiency=0.7): """Attempt to determine the blocksize of a sparse matrix Returns a blocksize=(r,c) such that - A.nnz / A.tobsr( (r,c) ).nnz > efficiency """ if not (isspmatrix_csr(A) or isspmatrix_csc(A)): A = csr_matrix(A) if A.nnz == 0: return (1,1) if not 0 < efficiency < 1.0: raise ValueError,'efficiency must satisfy 0.0 < efficiency < 1.0' high_efficiency = (1.0 + efficiency) / 2.0 nnz = float(A.nnz) M,N = A.shape if M % 2 == 0 and N % 2 == 0: e22 = nnz / ( 4 * count_blocks(A,(2,2)) ) else: e22 = 0.0 if M % 3 == 0 and N % 3 == 0: e33 = nnz / ( 9 * count_blocks(A,(3,3)) ) else: e33 = 0.0 if e22 > high_efficiency and e33 > high_efficiency: e66 = nnz / ( 36 * count_blocks(A,(6,6)) ) if e66 > efficiency: return (6,6) else: return (3,3) else: if M % 4 == 0 and N % 4 == 0: e44 = nnz / ( 16 * count_blocks(A,(4,4)) ) else: e44 = 0.0 if e44 > efficiency: return (4,4) elif e33 > efficiency: return (3,3) elif e22 > efficiency: return (2,2) else: return (1,1)
def tocsr(self): """Return a copy of this matrix in Compressed Sparse Row format Duplicate entries will be summed together. Example ------- >>> from numpy import array >>> from scipy.sparse import coo_matrix >>> row = array([0,0,1,3,1,0,0]) >>> col = array([0,2,1,3,1,0,0]) >>> data = array([1,1,1,1,1,1,1]) >>> A = coo_matrix( (data,(row,col)), shape=(4,4)).tocsr() >>> A.todense() matrix([[3, 0, 1, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]) """ from csr import csr_matrix if self.nnz == 0: return csr_matrix(self.shape, dtype=self.dtype) else: M,N = self.shape indptr = np.empty(M + 1, dtype=np.intc) indices = np.empty(self.nnz, dtype=np.intc) data = np.empty(self.nnz, dtype=upcast(self.dtype)) coo_tocsr(M, N, self.nnz, \ self.row, self.col, self.data, \ indptr, indices, data) A = csr_matrix((data, indices, indptr), shape=self.shape) A.sum_duplicates() return A
def tocsr(self): """Return a copy of this matrix in Compressed Sparse Row format Duplicate entries will be summed together. Examples -------- >>> from numpy import array >>> from scipy.sparse import coo_matrix >>> row = array([0,0,1,3,1,0,0]) >>> col = array([0,2,1,3,1,0,0]) >>> data = array([1,1,1,1,1,1,1]) >>> A = coo_matrix( (data,(row,col)), shape=(4,4)).tocsr() >>> A.todense() matrix([[3, 0, 1, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]) """ from csr import csr_matrix if self.nnz == 0: return csr_matrix(self.shape, dtype=self.dtype) else: M, N = self.shape indptr = np.empty(M + 1, dtype=np.intc) indices = np.empty(self.nnz, dtype=np.intc) data = np.empty(self.nnz, dtype=upcast(self.dtype)) coo_tocsr(M, N, self.nnz, \ self.row, self.col, self.data, \ indptr, indices, data) A = csr_matrix((data, indices, indptr), shape=self.shape) A.sum_duplicates() return A
def estimate_blocksize(A, efficiency=0.7): """Attempt to determine the blocksize of a sparse matrix Returns a blocksize=(r,c) such that - A.nnz / A.tobsr( (r,c) ).nnz > efficiency """ if not (isspmatrix_csr(A) or isspmatrix_csc(A)): A = csr_matrix(A) if A.nnz == 0: return (1, 1) if not 0 < efficiency < 1.0: raise ValueError('efficiency must satisfy 0.0 < efficiency < 1.0') high_efficiency = (1.0 + efficiency) / 2.0 nnz = float(A.nnz) M, N = A.shape if M % 2 == 0 and N % 2 == 0: e22 = nnz / (4 * count_blocks(A, (2, 2))) else: e22 = 0.0 if M % 3 == 0 and N % 3 == 0: e33 = nnz / (9 * count_blocks(A, (3, 3))) else: e33 = 0.0 if e22 > high_efficiency and e33 > high_efficiency: e66 = nnz / (36 * count_blocks(A, (6, 6))) if e66 > efficiency: return (6, 6) else: return (3, 3) else: if M % 4 == 0 and N % 4 == 0: e44 = nnz / (16 * count_blocks(A, (4, 4))) else: e44 = 0.0 if e44 > efficiency: return (4, 4) elif e33 > efficiency: return (3, 3) elif e22 > efficiency: return (2, 2) else: return (1, 1)
def getrow(self, i): """Returns a copy of row i of the matrix, as a (1 x n) sparse matrix (row vector). """ # Spmatrix subclasses should override this method for efficiency. # Pre-multiply by a (1 x m) row vector 'a' containing all zeros # except for a_i = 1 from csr import csr_matrix m = self.shape[0] if i < 0: i += m if i < 0 or i >= m: raise IndexError("index out of bounds") row_selector = csr_matrix(([1], [[0], [i]]), shape=(1,m), dtype=self.dtype) return row_selector * self
def count_blocks(A,blocksize): """For a given blocksize=(r,c) count the number of occupied blocks in a sparse matrix A """ r,c = blocksize if r < 1 or c < 1: raise ValueError,'r and c must be positive' if isspmatrix_csr(A): M,N = A.shape return csr_count_blocks(M,N,r,c,A.indptr,A.indices) elif isspmatrix_csc(A): return count_blocks(A.T,(c,r)) else: return count_blocks(csr_matrix(A),blocksize)
def count_blocks(A, blocksize): """For a given blocksize=(r,c) count the number of occupied blocks in a sparse matrix A """ r, c = blocksize if r < 1 or c < 1: raise ValueError('r and c must be positive') if isspmatrix_csr(A): M, N = A.shape return csr_count_blocks(M, N, r, c, A.indptr, A.indices) elif isspmatrix_csc(A): return count_blocks(A.T, (c, r)) else: return count_blocks(csr_matrix(A), blocksize)
def __init__(self, arg1, shape=None, dtype=None, copy=False): spmatrix.__init__(self) self.dtype = getdtype(dtype, arg1, default=float) # First get the shape if isspmatrix(arg1): if isspmatrix_lil(arg1) and copy: A = arg1.copy() else: A = arg1.tolil() if dtype is not None: A = A.astype(dtype) self.shape = A.shape self.dtype = A.dtype self.rows = A.rows self.data = A.data elif isinstance(arg1,tuple): if isshape(arg1): if shape is not None: raise ValueError('invalid use of shape parameter') M, N = arg1 self.shape = (M,N) self.rows = np.empty((M,), dtype=object) self.data = np.empty((M,), dtype=object) for i in range(M): self.rows[i] = [] self.data[i] = [] else: raise TypeError('unrecognized lil_matrix constructor usage') else: #assume A is dense try: A = np.asmatrix(arg1) except TypeError: raise TypeError('unsupported matrix type') else: from csr import csr_matrix A = csr_matrix(A, dtype=dtype).tolil() self.shape = A.shape self.dtype = A.dtype self.rows = A.rows self.data = A.data
def __init__(self, arg1, shape=None, dtype=None, copy=False): spmatrix.__init__(self) self.dtype = getdtype(dtype, arg1, default=float) # First get the shape if isspmatrix(arg1): if isspmatrix_lil(arg1) and copy: A = arg1.copy() else: A = arg1.tolil() if dtype is not None: A = A.astype(dtype) self.shape = A.shape self.dtype = A.dtype self.rows = A.rows self.data = A.data elif isinstance(arg1, tuple): if isshape(arg1): if shape is not None: raise ValueError('invalid use of shape parameter') M, N = arg1 self.shape = (M, N) self.rows = np.empty((M, ), dtype=object) self.data = np.empty((M, ), dtype=object) for i in range(M): self.rows[i] = [] self.data[i] = [] else: raise TypeError('unrecognized lil_matrix constructor usage') else: #assume A is dense try: A = np.asmatrix(arg1) except TypeError: raise TypeError('unsupported matrix type') else: from csr import csr_matrix A = csr_matrix(A, dtype=dtype).tolil() self.shape = A.shape self.dtype = A.dtype self.rows = A.rows self.data = A.data
def eliminate_zeros(self): R,C = self.blocksize M,N = self.shape mask = (self.data != 0).reshape(-1,R*C).sum(axis=1) #nonzero blocks nonzero_blocks = mask.nonzero()[0] if len(nonzero_blocks) == 0: return #nothing to do self.data[:len(nonzero_blocks)] = self.data[nonzero_blocks] from csr import csr_matrix # modifies self.indptr and self.indices *in place* proxy = csr_matrix((mask,self.indices,self.indptr),shape=(M//R,N//C)) proxy.eliminate_zeros() self.prune()
def eliminate_zeros(self): R,C = self.blocksize M,N = self.shape mask = (self.data != 0).reshape(-1,R*C).sum(axis=1) #nonzero blocks nonzero_blocks = mask.nonzero()[0] if len(nonzero_blocks) == 0: return #nothing to do self.data[:len(nonzero_blocks)] = self.data[nonzero_blocks] from csr import csr_matrix # modifies self.indptr and self.indices *in place* proxy = csr_matrix((mask,self.indices,self.indptr),shape=(M/R,N/C)) proxy.eliminate_zeros() self.prune()
def tocsr(self): """ Return Compressed Sparse Row format arrays for this matrix. """ indptr = np.asarray([len(x) for x in self.rows], dtype=np.intc) indptr = np.concatenate( (np.array([0], dtype=np.intc), np.cumsum(indptr)) ) nnz = indptr[-1] indices = [] for x in self.rows: indices.extend(x) indices = np.asarray(indices, dtype=np.intc) data = [] for x in self.data: data.extend(x) data = np.asarray(data, dtype=self.dtype) from csr import csr_matrix return csr_matrix((data, indices, indptr), shape=self.shape)
def cs_graph_components(x): """ Determine connected compoments of a graph stored as a compressed sparse row or column matrix. For speed reasons, the symmetry of the matrix x is not checked. A nonzero at index `(i, j)` means that node `i` is connected to node `j` by an edge. The number of rows/columns of the matrix thus corresponds to the number of nodes in the graph. Parameters ----------- x: ndarray-like, 2 dimensions, or sparse matrix The adjacency matrix of the graph. Only the upper triangular part is used. Returns -------- n_comp: int The number of connected components. label: ndarray (ints, 1 dimension): The label array of each connected component (-2 is used to indicate empty rows in the matrix: 0 everywhere, including diagonal). This array has the length of the number of nodes, i.e. one label for each node of the graph. Nodes having the same label belong to the same connected component. Notes ------ The matrix is assumed to be symmetric and the upper triangular part of the matrix is used. The matrix is converted to a CSR matrix unless it is already a CSR. Example ------- >>> from scipy.sparse import cs_graph_components >>> import numpy as np >>> D = np.eye(4) >>> D[0,1] = D[1,0] = 1 >>> cs_graph_components(D) (3, array([0, 0, 1, 2])) >>> from scipy.sparse import dok_matrix >>> cs_graph_components(dok_matrix(D)) (3, array([0, 0, 1, 2])) """ try: shape = x.shape except AttributeError: raise ValueError(_msg0) if not ((len(x.shape) == 2) and (x.shape[0] == x.shape[1])): raise ValueError(_msg1 % x.shape) if isspmatrix(x): x = x.tocsr() else: x = csr_matrix(x) label = np.empty((shape[0], ), dtype=x.indptr.dtype) n_comp = _cs_graph_components(shape[0], x.indptr, x.indices, label) return n_comp, label
def transpose(self, copy=False): from csr import csr_matrix M, N = self.shape return csr_matrix((self.data, self.indices, self.indptr), (N, M), copy=copy)
def cs_graph_components(x): """ Determine connected compoments of a graph stored as a compressed sparse row or column matrix. For speed reasons, the symmetry of the matrix x is not checked. A nonzero at index `(i, j)` means that node `i` is connected to node `j` by an edge. The number of rows/columns of the matrix thus corresponds to the number of nodes in the graph. Parameters ----------- x: ndarray-like, 2 dimensions, or sparse matrix The adjacency matrix of the graph. Only the upper triangular part is used. Returns -------- n_comp: int The number of connected components. label: ndarray (ints, 1 dimension): The label array of each connected component (-2 is used to indicate empty rows in the matrix: 0 everywhere, including diagonal). This array has the length of the number of nodes, i.e. one label for each node of the graph. Nodes having the same label belong to the same connected component. Notes ------ The matrix is assumed to be symmetric and the upper triangular part of the matrix is used. The matrix is converted to a CSR matrix unless it is already a CSR. Example ------- >>> from scipy.sparse import cs_graph_components >>> import numpy as np >>> D = np.eye(4) >>> D[0,1] = D[1,0] = 1 >>> cs_graph_components(D) (3, array([0, 0, 1, 2])) >>> from scipy.sparse import dok_matrix >>> cs_graph_components(dok_matrix(D)) (3, array([0, 0, 1, 2])) """ try: shape = x.shape except AttributeError: raise ValueError(_msg0) if not ((len(x.shape) == 2) and (x.shape[0] == x.shape[1])): raise ValueError(_msg1 % x.shape) if isspmatrix(x): x = x.tocsr() else: x = csr_matrix(x) label = np.empty((shape[0],), dtype=x.indptr.dtype) n_comp = _cs_graph_components(shape[0], x.indptr, x.indices, label) return n_comp, label
def kron(A, B, format=None): """kronecker product of sparse matrices A and B Parameters ---------- A : sparse or dense matrix first matrix of the product B : sparse or dense matrix second matrix of the product format : string format of the result (e.g. "csr") Returns ------- kronecker product in a sparse matrix format Examples -------- >>> A = csr_matrix(array([[0,2],[5,0]])) >>> B = csr_matrix(array([[1,2],[3,4]])) >>> kron(A,B).todense() matrix([[ 0, 0, 2, 4], [ 0, 0, 6, 8], [ 5, 10, 0, 0], [15, 20, 0, 0]]) >>> kron(A,[[1,2],[3,4]]).todense() matrix([[ 0, 0, 2, 4], [ 0, 0, 6, 8], [ 5, 10, 0, 0], [15, 20, 0, 0]]) """ B = coo_matrix(B) if (format is None or format == "bsr") and 2*B.nnz >= B.shape[0] * B.shape[1]: #B is fairly dense, use BSR A = csr_matrix(A,copy=True) output_shape = (A.shape[0]*B.shape[0], A.shape[1]*B.shape[1]) if A.nnz == 0 or B.nnz == 0: # kronecker product is the zero matrix return coo_matrix( output_shape ) B = B.toarray() data = A.data.repeat(B.size).reshape(-1,B.shape[0],B.shape[1]) data = data * B return bsr_matrix((data,A.indices,A.indptr), shape=output_shape) else: #use COO A = coo_matrix(A) output_shape = (A.shape[0]*B.shape[0], A.shape[1]*B.shape[1]) if A.nnz == 0 or B.nnz == 0: # kronecker product is the zero matrix return coo_matrix( output_shape ) # expand entries of a into blocks row = A.row.repeat(B.nnz) col = A.col.repeat(B.nnz) data = A.data.repeat(B.nnz) row *= B.shape[0] col *= B.shape[1] # increment block indices row,col = row.reshape(-1,B.nnz),col.reshape(-1,B.nnz) row += B.row col += B.col row,col = row.reshape(-1),col.reshape(-1) # compute block entries data = data.reshape(-1,B.nnz) * B.data data = data.reshape(-1) return coo_matrix((data,(row,col)), shape=output_shape).asformat(format)
def kron(A, B, format=None): """kronecker product of sparse matrices A and B Parameters ---------- A : sparse or dense matrix first matrix of the product B : sparse or dense matrix second matrix of the product format : string format of the result (e.g. "csr") Returns ------- kronecker product in a sparse matrix format Examples -------- >>> A = csr_matrix(array([[0,2],[5,0]])) >>> B = csr_matrix(array([[1,2],[3,4]])) >>> kron(A,B).todense() matrix([[ 0, 0, 2, 4], [ 0, 0, 6, 8], [ 5, 10, 0, 0], [15, 20, 0, 0]]) >>> kron(A,[[1,2],[3,4]]).todense() matrix([[ 0, 0, 2, 4], [ 0, 0, 6, 8], [ 5, 10, 0, 0], [15, 20, 0, 0]]) """ B = coo_matrix(B) if (format is None or format == "bsr") and 2 * B.nnz >= B.shape[0] * B.shape[1]: #B is fairly dense, use BSR A = csr_matrix(A, copy=True) output_shape = (A.shape[0] * B.shape[0], A.shape[1] * B.shape[1]) if A.nnz == 0 or B.nnz == 0: # kronecker product is the zero matrix return coo_matrix(output_shape) B = B.toarray() data = A.data.repeat(B.size).reshape(-1, B.shape[0], B.shape[1]) data = data * B return bsr_matrix((data, A.indices, A.indptr), shape=output_shape) else: #use COO A = coo_matrix(A) output_shape = (A.shape[0] * B.shape[0], A.shape[1] * B.shape[1]) if A.nnz == 0 or B.nnz == 0: # kronecker product is the zero matrix return coo_matrix(output_shape) # expand entries of a into blocks row = A.row.repeat(B.nnz) col = A.col.repeat(B.nnz) data = A.data.repeat(B.nnz) row *= B.shape[0] col *= B.shape[1] # increment block indices row, col = row.reshape(-1, B.nnz), col.reshape(-1, B.nnz) row += B.row col += B.col row, col = row.reshape(-1), col.reshape(-1) # compute block entries data = data.reshape(-1, B.nnz) * B.data data = data.reshape(-1) return coo_matrix((data, (row, col)), shape=output_shape).asformat(format)