def _mul_sparse_matrix(self, other): M, K1 = self.shape K2, N = other.shape major_axis = self._swap((M, N))[0] other = self.__class__(other) # convert to this format idx_dtype = get_index_dtype( (self.indptr, self.indices, other.indptr, other.indices)) fn = getattr(_sparsetools, self.format + '_matmat_maxnnz') nnz = fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype)) idx_dtype = get_index_dtype( (self.indptr, self.indices, other.indptr, other.indices), maxval=nnz) indptr = np.empty(major_axis + 1, dtype=idx_dtype) indices = np.empty(nnz, dtype=idx_dtype) data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype)) fn = getattr(_sparsetools, self.format + '_matmat') fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), self.data, np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), other.data, indptr, indices, data) return self.__class__((data, indices, indptr), shape=(M, N))
def _mul_sparse_matrix(self, other): """ Do the sparse matrix mult returning fast_csr_matrix only when other is also fast_csr_matrix. """ M, _ = self.shape _, N = other.shape major_axis = self._swap((M, N))[0] if isinstance(other, fast_csr_matrix): A = zcsr_mult(self, other, sorted=1) return A other = csr_matrix(other) # convert to this format idx_dtype = get_index_dtype( (self.indptr, self.indices, other.indptr, other.indices), maxval=M * N) # scipy 1.5 renamed the older csr_matmat_pass1 to the much more # descriptive csr_matmat_maxnnz, but also changed the call and logic # structure of constructing the indices. try: fn = getattr(_sparsetools, self.format + '_matmat_maxnnz') nnz = fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype)) idx_dtype = get_index_dtype( (self.indptr, self.indices, other.indptr, other.indices), maxval=nnz) indptr = np.empty(major_axis + 1, dtype=idx_dtype) except AttributeError: indptr = np.empty(major_axis + 1, dtype=idx_dtype) fn = getattr(_sparsetools, self.format + '_matmat_pass1') fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), indptr) nnz = indptr[-1] idx_dtype = get_index_dtype( (self.indptr, self.indices, other.indptr, other.indices), maxval=nnz) indices = np.empty(nnz, dtype=idx_dtype) data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype)) try: fn = getattr(_sparsetools, self.format + '_matmat') except AttributeError: fn = getattr(_sparsetools, self.format + '_matmat_pass2') fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), self.data, np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), other.data, indptr, indices, data) A = csr_matrix((data, indices, indptr), shape=(M, N)) return A
def tobsr(self, blocksize=None, copy=True): from scipy.sparse.bsr import bsr_matrix if blocksize is None: from scipy.sparse.spfuncs import estimate_blocksize return self.tobsr(blocksize=estimate_blocksize(self)) elif blocksize == (1, 1): arg1 = (self.data.reshape(-1, 1, 1), self.indices, self.indptr) return bsr_matrix(arg1, shape=self.shape, copy=copy) else: R, C = blocksize M, N = self.shape if R < 1 or C < 1 or M % R != 0 or N % C != 0: raise ValueError('invalid blocksize %s' % blocksize) blks = csr_count_blocks(M, N, R, C, self.indptr, self.indices) idx_dtype = get_index_dtype((self.indptr, self.indices), maxval=max(N // C, blks)) indptr = np.empty(M // R + 1, dtype=idx_dtype) indices = np.empty(blks, dtype=idx_dtype) data = np.zeros((blks, R, C), dtype=self.dtype) csr_tobsr(M, N, R, C, self.indptr.astype(idx_dtype), self.indices.astype(idx_dtype), self.data, indptr, indices, data.ravel()) return bsr_matrix((data, indices, indptr), shape=self.shape)
def _binopt(self, other, op): """apply the binary operation fn to two sparse matrices.""" other = self.__class__(other) # e.g. csr_plus_csr, csr_minus_csr, etc. fn = getattr(_sparsetools, self.format + op + self.format) maxnnz = self.nnz + other.nnz idx_dtype = get_index_dtype( (self.indptr, self.indices, other.indptr, other.indices), maxval=maxnnz) indptr = np.empty(self.indptr.shape, dtype=idx_dtype) indices = np.empty(maxnnz, dtype=idx_dtype) bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_'] if op in bool_ops: data = np.empty(maxnnz, dtype=np.bool_) else: data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype)) fn(self.shape[0], self.shape[1], np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), self.data, np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), other.data, indptr, indices, data) A = self.__class__((data, indices, indptr), shape=self.shape) A.prune() return A
def tocsr(self, copy=False): """ Convert this matrix to CSRSymMatrix format. Remains symmetric Returns ------- CSRSymMatrix """ from pyomo.contrib.pynumero.sparse.csr import CSRSymMatrix if self.nnz == 0: return CSRSymMatrix(self.shape, dtype=self.dtype) else: M, N = self.shape idx_dtype = get_index_dtype((self.row, self.col), maxval=max(self.nnz, N)) row = self.row.astype(idx_dtype, copy=False) col = self.col.astype(idx_dtype, copy=False) indptr = np.empty(M + 1, dtype=idx_dtype) indices = np.empty_like(col, dtype=idx_dtype) data = np.empty_like(self.data, dtype=upcast(self.dtype)) coo_tocsr(M, N, self.nnz, row, col, self.data, indptr, indices, data) x = CSRSymMatrix((data, indices, indptr), shape=self.shape) if not self.has_canonical_format: x.sum_duplicates() return x
def tocsc(self, copy=False): """ Convert this matrix to Compressed Sparse Column format Returns ------- CSCMatrix """ from pyomo.contrib.pynumero.sparse.csc import CSCMatrix if self.nnz == 0: return CSCMatrix(self.shape, dtype=self.dtype) else: M, N = self.shape idx_dtype = get_index_dtype((self.col, self.row), maxval=max(self.nnz, M)) row = self.row.astype(idx_dtype, copy=False) col = self.col.astype(idx_dtype, copy=False) indptr = np.empty(N + 1, dtype=idx_dtype) indices = np.empty_like(row, dtype=idx_dtype) data = np.empty_like(self.data, dtype=upcast(self.dtype)) # TODO: check why scipy does this and not coo_tocsc coo_tocsr(N, M, self.nnz, col, row, self.data, indptr, indices, data) x = CSCMatrix((data, indices, indptr), shape=self.shape) if not self.has_canonical_format: x.sum_duplicates() return x
def _check(self): """ Checks data structure for consistency """ nnz = self.nnz # index arrays should have integer data types if self.row.dtype.kind != 'i': warn("row index array has non-integer dtype (%s) " % self.row.dtype.name) if self.col.dtype.kind != 'i': warn("col index array has non-integer dtype (%s) " % self.col.dtype.name) idx_dtype = get_index_dtype(maxval=max(self.shape)) if not isinstance(self.row, da.core.Array): self.row = da.from_array(self.row, chunks=self.chunks) if not isinstance(self.col, da.core.Array): self.col = da.from_array(self.col, chunks=self.chunks) if not isinstance(self.data, da.core.Array): self.data = da.from_array(self.data, chunks=self.chunks) if nnz > 0: if self.row.max().compute() >= self.shape[0]: raise ValueError('row index exceeds matrix dimensions') if self.col.max().compute() >= self.shape[1]: raise ValueError('column index exceeds matrix dimensions') if self.row.min().compute() < 0: raise ValueError('negative row index found') if self.col.min().compute() < 0: raise ValueError('negative column index found')
def _mul_sparse_matrix(self, other): """ Do the sparse matrix mult returning fast_csr_matrix only when other is also fast_csr_matrix. """ M, K1 = self.shape K2, N = other.shape major_axis = self._swap((M,N))[0] if isinstance(other, fast_csr_matrix): A = zcsr_mult(self, other) A.sort_indices() return A other = csr_matrix(other) # convert to this format idx_dtype = get_index_dtype((self.indptr, self.indices, other.indptr, other.indices), maxval=M*N) indptr = np.empty(major_axis + 1, dtype=idx_dtype) fn = getattr(_sparsetools, self.format + '_matmat_pass1') fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), indptr) nnz = indptr[-1] idx_dtype = get_index_dtype((self.indptr, self.indices, other.indptr, other.indices), maxval=nnz) indptr = np.asarray(indptr, dtype=idx_dtype) indices = np.empty(nnz, dtype=idx_dtype) data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype)) fn = getattr(_sparsetools, self.format + '_matmat_pass2') fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), self.data, np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), other.data, indptr, indices, data) A = csr_matrix((data,indices,indptr),shape=(M,N)) return A
def _mul_sparse_matrix(self, other): """ Do the sparse matrix mult returning fast_csr_matrix only when other is also fast_csr_matrix. """ M, _ = self.shape _, N = other.shape major_axis = self._swap((M, N))[0] if isinstance(other, fast_csr_matrix): A = zcsr_mult(self, other, sorted=1) return A other = csr_matrix(other) # convert to this format idx_dtype = get_index_dtype((self.indptr, self.indices, other.indptr, other.indices), maxval=M * N) indptr = np.empty(major_axis + 1, dtype=idx_dtype) fn = getattr(_sparsetools, self.format + '_matmat_pass1') fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), indptr) nnz = indptr[-1] idx_dtype = get_index_dtype((self.indptr, self.indices, other.indptr, other.indices), maxval=nnz) indptr = np.asarray(indptr, dtype=idx_dtype) indices = np.empty(nnz, dtype=idx_dtype) data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype)) fn = getattr(_sparsetools, self.format + '_matmat_pass2') fn(M, N, np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), self.data, np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), other.data, indptr, indices, data) A = csr_matrix((data, indices, indptr), shape=(M, N)) return A
def hessian(self, x): """Return model's hessian Parameters ---------- x: `np.ndarray`, shape=(n_coeffs,) Value at which the hessian is computed Notes ----- For `ModelHawkesFixedExpKernLeastSq` the value of the hessian does not depend on the value at which it is computed. """ if not hasattr(self._model, "hessian"): raise NotImplementedError('hessian is not implemented yet for ' 'this model') if not self._fitted: raise ValueError("call ``fit`` before using ``hessian``") # What kind of integers does scipy use fr sparse indices? sparse_dtype = sputils.get_index_dtype() dim = self.n_nodes row_indices_size = dim * (dim + 1) + 1 data_size = dim * (dim + 1) * (dim + 1) # looks like [0 3 6 9 12 15 18] in dimension 2 row_indices = np.arange(row_indices_size, dtype=sparse_dtype) * (dim + 1) # looks like [0 2 3 1 4 5 0 2 3 0 2 3 1 4 5 1 4 5] in dimension 2 # We first create the recurrent pattern for each dim block_dim = {} for d in range(dim): mu_array = np.array(d) alpha_array = dim + d * dim + np.arange(dim) block_dim[d] = np.hstack((mu_array, alpha_array)) # and then fill the indices array indices = np.zeros(data_size, dtype=sparse_dtype) for d in range(dim): indices[d * (dim + 1):(d + 1) * (dim + 1)] = block_dim[d] indices[(d + 1) * (dim * dim + dim): (d + 2) * (dim * dim + dim)] = \ np.tile(block_dim[d], (dim,)) data = np.zeros(data_size, dtype=float) # In these two models, hessian does not depend on x if isinstance(self._model, (ModelHawkesFixedSumExpKernLeastSqList, ModelHawkesFixedExpKernLeastSqList)): self._model.hessian(data) else: self._model.hessian(x, data) hessian = csr_matrix((data, indices, row_indices)) return hessian
def check_format(self, full_check=True): """check whether the matrix format is valid Parameters ---------- full_check : bool, optional If `True`, rigorous check, O(N) operations. Otherwise basic check, O(1) operations (default True). """ # use _swap to determine proper bounds major_name,minor_name = self._swap(('row','column')) major_dim,minor_dim = self._swap(self.shape) # index arrays should have integer data types if self.indptr.dtype.kind != 'i': warn("indptr array has non-integer dtype (%s)" % self.indptr.dtype.name) if self.indices.dtype.kind != 'i': warn("indices array has non-integer dtype (%s)" % self.indices.dtype.name) idx_dtype = get_index_dtype((self.indptr, self.indices)) self.indptr = np.asarray(self.indptr, dtype=idx_dtype) self.indices = np.asarray(self.indices, dtype=idx_dtype) self.data = to_native(self.data) # check array shapes if self.data.ndim != 1 or self.indices.ndim != 1 or self.indptr.ndim != 1: raise ValueError('data, indices, and indptr should be 1-D') # check index pointer if (len(self.indptr) != major_dim + 1): raise ValueError("index pointer size (%d) should be (%d)" % (len(self.indptr), major_dim + 1)) if (self.indptr[0] != 0): raise ValueError("index pointer should start with 0") # check index and data arrays if (len(self.indices) != len(self.data)): raise ValueError("indices and data should have the same size") if (self.indptr[-1] > len(self.indices)): raise ValueError("Last value of index pointer should be less than " "the size of index and data arrays") self.prune() if full_check: # check format validity (more expensive) if self.nnz > 0: if self.indices.max() >= minor_dim: raise ValueError("%s index values must be < %d" % (minor_name,minor_dim)) if self.indices.min() < 0: raise ValueError("%s index values must be >= 0" % minor_name) if np.diff(self.indptr).min() < 0: raise ValueError("index pointer values must form a " "non-decreasing sequence")
def tocoo(self, copy=True): """ Converts this matrix to COOrdinate format. Parameters ---------- copy: bool, optional This argument is in the signature solely for Scipy compatibility reasons. It does not do anything. The data is always copied. Returns ------- scipy.sparse.coo_matrix """ assert_block_structure(self) dtype = self.dtype # Determine offsets for rows # e.g. row_offset[1] = block_00.shape[0] # e.g. row_offset[2] = block_00.shape[0] + block_10.shape[0] row_offsets = np.append(0, np.cumsum(self._brow_lengths)) # Determine offsets for columns col_offsets = np.append(0, np.cumsum(self._bcol_lengths)) # stores shape of resulting "flattened" matrix shape = (row_offsets[-1], col_offsets[-1]) # total number of nonzeros nonzeros = self.nnz # create pointers for COO matrix (row, col, data) data = np.empty(nonzeros, dtype=dtype) idx_dtype = get_index_dtype(maxval=max(shape)) row = -np.ones(nonzeros, dtype=idx_dtype) col = -np.ones(nonzeros, dtype=idx_dtype) # populate COO pointers nnz = 0 ii, jj = np.nonzero(self._block_mask) for i, j in zip(ii, jj): B = self.get_block(i, j).tocoo() # get slice that contains all elements in current block idx = slice(nnz, nnz + B.nnz) # append B.nnz elements to COO pointers using the slice data[idx] = B.data row[idx] = B.row + row_offsets[i] col[idx] = B.col + col_offsets[j] nnz += B.nnz return coo_matrix((data, (row, col)), shape=shape)
def asindices(x): try: x = np.asarray(x) # Check index contents, to avoid creating 64-bit arrays needlessly idx_dtype = get_index_dtype((x, ), check_contents=True) if idx_dtype != x.dtype: x = x.astype(idx_dtype) except: raise IndexError('invalid index') else: return x
def tocoo(self): """ Return a copy of this matrix in COOrdinate format""" from scipy.sparse.coo import coo_matrix if self.nnz == 0: return coo_matrix(self.shape, dtype=self.dtype) else: idx_dtype = get_index_dtype( maxval=max(self.shape[0], self.shape[1])) data = np.asarray(_list(self.values()), dtype=self.dtype) indices = np.asarray(_list(self.keys()), dtype=idx_dtype).T return coo_matrix((data, indices), shape=self.shape, dtype=self.dtype)
def test_get_index_dtype(self): imax = np.iinfo(np.int32).max too_big = imax + 1 # Check that uint32's with no values too large doesn't return # int64 a1 = np.ones(90, dtype='uint32') a2 = np.ones(90, dtype='uint32') assert_equal( np.dtype(sputils.get_index_dtype((a1, a2), check_contents=True)), np.dtype('int32') ) # Check that if we can not convert but all values are less than or # equal to max that we can just convert to int32 a1[-1] = imax assert_equal( np.dtype(sputils.get_index_dtype((a1, a2), check_contents=True)), np.dtype('int32') ) # Check that if it can not convert directly and the contents are # too large that we return int64 a1[-1] = too_big assert_equal( np.dtype(sputils.get_index_dtype((a1, a2), check_contents=True)), np.dtype('int64') ) # test that if can not convert and didn't specify to check_contents # we return int64 a1 = np.ones(89, dtype='uint32') a2 = np.ones(89, dtype='uint32') assert_equal( np.dtype(sputils.get_index_dtype((a1, a2))), np.dtype('int64') ) # Check that even if we have arrays that can be converted directly # that if we specify a maxval directly it takes precedence a1 = np.ones(12, dtype='uint32') a2 = np.ones(12, dtype='uint32') assert_equal( np.dtype(sputils.get_index_dtype( (a1, a2), maxval=too_big, check_contents=True )), np.dtype('int64') ) # Check that an array with a too max size and maxval set # still returns int64 a1[-1] = too_big assert_equal( np.dtype(sputils.get_index_dtype((a1, a2), maxval=too_big)), np.dtype('int64') )
def tocsc(self, copy=False): idx_dtype = get_index_dtype((self.indptr, self.indices), maxval=max(self.nnz, self.shape[0])) indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype) indices = np.empty(self.nnz, dtype=idx_dtype) data = np.empty(self.nnz, dtype=upcast(self.dtype)) csr_tocsc(self.shape[0], self.shape[1], self.indptr.astype(idx_dtype), self.indices.astype(idx_dtype), self.data, indptr, indices, data) from scipy.sparse.csc import csc_matrix A = csc_matrix((data, indices, indptr), shape=self.shape) A.has_sorted_indices = True return A
def tocsr(self): '''Overridden method to return csr matrix with toPETsc function Original Documentation: Return a copy of this matrix in Compressed Sparse Row format Duplicate entries will be summed together. Examples -------- >>> from numpy import array >>> from scipy.sparse import coo_matrix >>> row = array([0, 0, 1, 3, 1, 0, 0]) >>> col = array([0, 2, 1, 3, 1, 0, 0]) >>> data = array([1, 1, 1, 1, 1, 1, 1]) >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr() >>> A.toarray() array([[3, 0, 1, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]) ''' from scipy.sparse.sputils import get_index_dtype if self.nnz == 0: return csr_matrix(self.shape, dtype=self.dtype) else: M, N = self.shape idx_dtype = get_index_dtype((self.row, self.col), maxval=max(self.nnz, N)) indptr = np.empty(M + 1, dtype=idx_dtype) indices = np.empty(self.nnz, dtype=idx_dtype) data = np.empty(self.nnz, dtype=scipy.sparse.coo.upcast(self.dtype)) scipy.sparse.coo.coo_tocsr(M, N, self.nnz, self.row.astype(idx_dtype), self.col.astype(idx_dtype), self.data, indptr, indices, data) A = csr_matrix((data, indices, indptr), shape=self.shape) A.sum_duplicates() return A
def tocsr(self, copy=False): M, N = self.shape idx_dtype = get_index_dtype((self.indptr, self.indices), maxval=max(self.nnz, N)) indptr = np.empty(M + 1, dtype=idx_dtype) indices = np.empty(self.nnz, dtype=idx_dtype) data = np.empty(self.nnz, dtype=upcast(self.dtype)) csc_tocsr(M, N, self.indptr.astype(idx_dtype), self.indices.astype(idx_dtype), self.data, indptr, indices, data) from pyomo.contrib.pynumero.sparse.csr import CSRMatrix A = CSRMatrix((data, indices, indptr), shape=self.shape, copy=False) A.has_sorted_indices = True return A
def _binopt(self, other, op): """ Do the binary operation fn to two sparse matrices using fast_csr_matrix only when other is also a fast_csr_matrix. """ # e.g. csr_plus_csr, csr_minus_csr, etc. if not isinstance(other, fast_csr_matrix): other = csr_matrix(other) # e.g. csr_plus_csr, csr_minus_csr, etc. fn = getattr(_sparsetools, self.format + op + self.format) maxnnz = self.nnz + other.nnz idx_dtype = get_index_dtype( (self.indptr, self.indices, other.indptr, other.indices), maxval=maxnnz) indptr = np.empty(self.indptr.shape, dtype=idx_dtype) indices = np.empty(maxnnz, dtype=idx_dtype) bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_'] if op in bool_ops: data = np.empty(maxnnz, dtype=np.bool_) else: data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype)) fn(self.shape[0], self.shape[1], np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), self.data, np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), other.data, indptr, indices, data) actual_nnz = indptr[-1] indices = indices[:actual_nnz] data = data[:actual_nnz] if actual_nnz < maxnnz // 2: # too much waste, trim arrays indices = indices.copy() data = data.copy() if isinstance(other, fast_csr_matrix) and (not op in bool_ops): A = fast_csr_matrix((data, indices, indptr), dtype=data.dtype, shape=self.shape) else: A = csr_matrix((data, indices, indptr), dtype=data.dtype, shape=self.shape) return A
def coo_tocsr(coo_mat): M, N = coo_mat.shape idx_dtype = get_index_dtype((coo_mat.row, coo_mat.col), maxval=max(coo_mat.nnz, N)) indptr = np.empty(M + 1, dtype=idx_dtype) indices = np.empty(coo_mat.nnz, dtype=idx_dtype) data = np.empty(coo_mat.nnz, dtype=upcast(coo_mat.dtype)) scipy_coo_tocsr(M, N, coo_mat.nnz, coo_mat.row.astype(idx_dtype), coo_mat.col.astype(idx_dtype), coo_mat.data, indptr, indices, data) A = scipy.sparse.csr_matrix((data, indices, indptr), shape=coo_mat.shape) A.sort_indices() csr_max_duplicates(M, N, A.indptr, A.indices, A.data) A.prune() A.has_canonical_format = True return A
def _binopt(self, other, op): """ Do the binary operation fn to two sparse matrices using fast_csr_matrix only when other is also a fast_csr_matrix. """ # e.g. csr_plus_csr, csr_minus_csr, etc. if not isinstance(other, fast_csr_matrix): other = csr_matrix(other) # e.g. csr_plus_csr, csr_minus_csr, etc. fn = getattr(_sparsetools, self.format + op + self.format) maxnnz = self.nnz + other.nnz idx_dtype = get_index_dtype((self.indptr, self.indices, other.indptr, other.indices), maxval=maxnnz) indptr = np.empty(self.indptr.shape, dtype=idx_dtype) indices = np.empty(maxnnz, dtype=idx_dtype) bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_'] if op in bool_ops: data = np.empty(maxnnz, dtype=np.bool_) else: data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype)) fn(self.shape[0], self.shape[1], np.asarray(self.indptr, dtype=idx_dtype), np.asarray(self.indices, dtype=idx_dtype), self.data, np.asarray(other.indptr, dtype=idx_dtype), np.asarray(other.indices, dtype=idx_dtype), other.data, indptr, indices, data) actual_nnz = indptr[-1] indices = indices[:actual_nnz] data = data[:actual_nnz] if actual_nnz < maxnnz // 2: # too much waste, trim arrays indices = indices.copy() data = data.copy() if isinstance(other, fast_csr_matrix) and (not op in bool_ops): A = fast_csr_matrix((data, indices, indptr), dtype=data.dtype, shape=self.shape) else: A = csr_matrix((data, indices, indptr), dtype=data.dtype, shape=self.shape) return A
def tocoo(self): """ Converts this matrix to COOSymMatrix format. Remains symmetric Returns ------- COOSymMatrix """ self._check_mask() dtype = self.dtype row_offsets = np.append(0, np.cumsum(self._brow_lengths)) col_offsets = np.append(0, np.cumsum(self._bcol_lengths)) shape = (row_offsets[-1], col_offsets[-1]) nonzeros = 0 ii, jj = np.nonzero(self._block_mask) for i, j in zip(ii, jj): if self._blocks[i, j].is_symmetric and i != j: nonzeros += self._blocks[i, j].getallnnz() else: nonzeros += self._blocks[i, j].nnz data = np.empty(nonzeros, dtype=dtype) idx_dtype = get_index_dtype(maxval=max(shape)) row = np.empty(nonzeros, dtype=idx_dtype) col = np.empty(nonzeros, dtype=idx_dtype) nnz = 0 ii, jj = np.nonzero(self._block_mask) for i, j in zip(ii, jj): if self._blocks[i, j].is_symmetric and i != j: B = self._blocks[i, j].tofullcoo() else: B = self._blocks[i, j].tocoo() idx = slice(nnz, nnz + B.nnz) data[idx] = B.data row[idx] = B.row + row_offsets[i] col[idx] = B.col + col_offsets[j] nnz += B.nnz return COOSymMatrix((data, (row, col)), shape=shape)
def tocoo(self): """ Converts this matrix to COOMatrix format. Returns ------- COOMatrix """ # ToDo: copy argument to match scipy? self._check_mask() dtype = self.dtype row_offsets = np.append(0, np.cumsum(self._brow_lengths)) col_offsets = np.append(0, np.cumsum(self._bcol_lengths)) shape = (row_offsets[-1], col_offsets[-1]) nonzeros = self.getallnnz() data = np.empty(nonzeros, dtype=dtype) idx_dtype = get_index_dtype(maxval=max(shape)) row = -np.ones(nonzeros, dtype=idx_dtype) col = -np.ones(nonzeros, dtype=idx_dtype) nnz = 0 ii, jj = np.nonzero(self._block_mask) for i, j in zip(ii, jj): if self._blocks[i, j].is_symmetric: B = self[i, j].tofullcoo() else: B = self[i, j].tocoo() idx = slice(nnz, nnz + B.nnz) data[idx] = B.data #row[idx] = (B.row + row_offsets[i]).astype(idx_dtype, copy=False) #col[idx] = (B.col + col_offsets[j]).astype(idx_dtype, copy=False) row[idx] = B.row + row_offsets[i] col[idx] = B.col + col_offsets[j] nnz += B.nnz return COOMatrix((data, (row, col)), shape=shape)
def tocsr(self): """Return a copy of this matrix in Compressed Sparse Row format Duplicate entries will be summed together. Examples -------- >>> from numpy import array >>> from scipy.sparse import coo_matrix >>> row = array([0, 0, 1, 3, 1, 0, 0]) >>> col = array([0, 2, 1, 3, 1, 0, 0]) >>> data = array([1, 1, 1, 1, 1, 1, 1]) >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr() >>> A.toarray() array([[3, 0, 1, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]]) """ from .csr import csr_matrix if self.nnz == 0: return csr_matrix(self.shape, dtype=self.dtype) else: M, N = self.shape idx_dtype = get_index_dtype((self.row, self.col), maxval=max(self.nnz, N)) indptr = np.empty(M + 1, dtype=idx_dtype) indices = np.empty(self.nnz, dtype=idx_dtype) data = np.empty(self.nnz, dtype=upcast(self.dtype)) coo_tocsr(M, N, self.nnz, self.row.astype(idx_dtype), self.col.astype(idx_dtype), self.data, indptr, indices, data) A = csr_matrix((data, indices, indptr), shape=self.shape) A.sum_duplicates() return A
scipy_csr_elmul_csr = getattr(_sparsetools, "csr_elmul_csr") m = 200 n = 400 m1 = np.random.randint(0, 100, (m, n)) m1[m1 > 50] = 0 m2 = np.random.randint(0, 100, (m, n)) m2[m2 > 50] = 0 m1x = sparse.csr_matrix(m1) m2x = sparse.csr_matrix(m2) maxnnz = m1x.nnz + m2x.nnz idx_dtype = get_index_dtype((m1x.indptr, m1x.indices, m2x.indptr, m2x.indices), maxval=maxnnz) indptr = np.empty(m1x.indptr.shape, dtype=idx_dtype) indices = np.empty(maxnnz, dtype=idx_dtype) bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_'] if 'enul' in bool_ops: data = np.empty(maxnnz, dtype=np.bool_) else: data = np.empty(maxnnz, dtype=upcast(m1x.dtype, m2x.dtype)) n_row, n_col = m1.shape Ap = m1x.indptr Aj = m1x.indices Ax = m1x.data Bp = m2x.indptr Bj = m2x.indices Bx = m2x.data
def __init__(self, arg1, block_size=None, n_samples=None, n_history=None, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) # case 1: instantiate from another sparse matrix if isspmatrix(arg1): if arg1.format == self.format: self._set_self(arg1) elif arg1.format == "csr": self._csr_to_delta_csr(arg1, block_size, n_samples, n_history) else: raise NotImplementedError( "Instantiation from sparse matrix not yet ready") # case 2: instantiate from some kind of raw data elif isinstance(arg1, tuple): if isshape(arg1): # input is size specification (M,N) for empty matrix # code mostly taken from scipy CSR implementation, other than an # additional line to instantiate deltas array self.shape = arg1 M, N = self.shape idx_dtype = get_index_dtype(maxval=max(M, N)) self.data = np.zeros(0, getdtype(dtype, default='float')) self.indices = np.zeros(0, idx_dtype) self.indptr = np.zeros(self._swap((M, N))[0] + 1, dtype=idx_dtype) self.deltas = np.zeros(0, dtype=idx_dtype) else: if len(arg1) == 2: # COO data format raise NotImplementedError( "Instantiation from COO format not yet ready") elif len(arg1) == 3 or len(arg1) == 4: # contents of the tuple are the raw data structures (self.data, self.indices, self.indptr) = arg1[:3] # use given shape or automatically infer one if shape is not None: self.shape = shape else: M = indptr.shape[0] - 1 N = np.max(indices) self.shape = (M, N) # a fourth array, for the deltas pointer, should always be # given in general use, but we also allow for the case where # it is omitted in order to maintain backwards compatibility # with superclass methods. In this case we just let each # deltas[i] = i; in other words treating this matrix as a # standard CSR matrix with no delta encoding self.deltas = arg1[3] if len(arg1) > 3 else np.arange( self.shape[0]) # case 3: instantiate from generator object elif isinstance(arg1, types.GeneratorType): self._construct_from_iterable(arg1, getdtype(dtype, default='float'), np.int32, block_size, n_samples, shape) # case 4: instantiate from dense matrix / array else: try: arg1 = np.asarray(arg1) except: raise ValueError( "unrecognized delta_csr_matrix constructor usage") # create a generator expression for iterating over rows of arg1 row_gen = (arg1[i, :] for i in range(arg1.shape[0])) self._construct_from_iterable( row_gen, arg1.dtype, get_index_dtype(maxval=max(*arg1.shape)), block_size, n_samples, shape=arg1.shape) self.check_format(full_check=False)
def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) self.chunks = (10, 1) if isinstance(arg1, tuple): if isshape(arg1): M, N = arg1 self.shape = (M, N) idx_dtype = get_index_dtype(maxval=max(M, N)) self.row = np.array([], dtype=idx_dtype) self.col = np.array([], dtype=idx_dtype) self.data = np.array([], getdtype(dtype, default=float)) self.has_canonical_format = True else: try: obj, (row, col) = arg1 except (TypeError, ValueError): raise TypeError('invalid input format') if shape is None: if len(row) == 0 or len(col) == 0: raise ValueError('cannot infer dimensions from zero ' 'sized index arrays') M = np.max(row) + 1 N = np.max(col) + 1 self.shape = (M, N) else: # Use 2 steps to ensure shape has length 2. M, N = shape self.shape = (M, N) idx_dtype = get_index_dtype(maxval=max(self.shape)) if isinstance(row, da.core.Array): self.row = row else: self.row = da.from_array(row, chunks=self.chunks) if isinstance(col, da.core.Array): self.col = col else: self.col = da.from_array(col, chunks=self.chunks) if isinstance(obj, da.core.Array): self.data = obj else: self.data = da.from_array(obj, chunks=self.chunks) self.has_canonical_format = False else: if isspmatrix(arg1): if isspmatrix_coo(arg1) and copy: self.row = arg1.row.copy() self.col = arg1.col.copy() self.data = arg1.data.copy() self.shape = arg1.shape else: coo = arg1.tocoo() self.row = coo.row self.col = coo.col self.data = coo.data self.shape = coo.shape self.has_canonical_format = False else: #dense argument M = np.atleast_2d(np.asarray(arg1)) if M.ndim != 2: raise TypeError('expected dimension <= 2 array or matrix') else: self.shape = M.shape self.row, self.col = M.nonzero() self.data = M[self.row, self.col] self.has_canonical_format = True if dtype is not None: self.data = self.data.astype(dtype) self._check()
def hessian(self, x): """Return model's hessian Parameters ---------- x : `np.ndarray`, shape=(n_coeffs,) Value at which the hessian is computed Notes ----- For `ModelHawkesExpKernLeastSq` the value of the hessian does not depend on the value at which it is computed. """ if not hasattr(self._model, "hessian"): raise NotImplementedError('hessian is not implemented yet for ' 'this model') if not self._fitted: raise ValueError("call ``fit`` before using ``hessian``") # What kind of integers does scipy use fr sparse indices? sparse_dtype = sputils.get_index_dtype() n_baselines = self.n_nodes # number of alphas per dimension if isinstance( self._model, (ModelHawkesSumExpKernLeastSq, ModelHawkesSumExpKernLogLik)): n_alphas_i = self.n_nodes * len(self.decays) else: n_alphas_i = self.n_nodes dim = self.n_nodes row_indices_size = n_baselines + dim * n_alphas_i + 1 data_size = (n_baselines + dim * n_alphas_i) * (1 + n_alphas_i) # looks like [0 3 6 9 12 15 18] in dimension 2 row_indices = np.arange(row_indices_size, dtype=sparse_dtype) * (1 + n_alphas_i) # looks like [0 2 3 1 4 5 0 2 3 0 2 3 1 4 5 1 4 5] in dimension 2 # We first create the recurrent pattern for each dim block_dim = {} for d in range(dim): mu_array = np.array(d) alpha_array = n_baselines + d * n_alphas_i + np.arange(n_alphas_i) block_dim[d] = np.hstack((mu_array, alpha_array)) # and then fill the indices array indices = np.zeros(data_size, dtype=sparse_dtype) for d in range(dim): indices[d * (n_alphas_i + 1): (d + 1) * (n_alphas_i + 1)] = \ block_dim[d] alpha_shift = n_baselines * (n_alphas_i + 1) alpha_d_start = alpha_shift + d * n_alphas_i * (n_alphas_i + 1) alpha_d_end = alpha_shift + (d + 1) * n_alphas_i * (n_alphas_i + 1) indices[alpha_d_start: alpha_d_end] = \ np.tile(block_dim[d], (n_alphas_i,)) data = np.zeros(data_size, dtype=float) # In these two models, hessian does not depend on x if isinstance( self._model, (ModelHawkesSumExpKernLeastSq, ModelHawkesExpKernLeastSq)): self._model.hessian(data) else: self._model.hessian(x, data) hessian = csr_matrix((data, indices, row_indices)) return hessian
def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) if isspmatrix(arg1): if arg1.format == self.format and copy: arg1 = arg1.copy() else: arg1 = arg1.asformat(self.format) self._set_self(arg1) elif isinstance(arg1, tuple): if isshape(arg1): # It's a tuple of matrix dimensions (M, N) # create empty matrix self.shape = arg1 # spmatrix checks for errors here M, N = self.shape idx_dtype = get_index_dtype(maxval=self._swap((M,N))[1]) self.data = da.zeros(0, getdtype(dtype, default=float)) self.indices = da.zeros(0, idx_dtype) self.indptr = da.zeros(self._swap((M,N))[0] + 1, dtype=idx_dtype) else: if len(arg1) == 2: # (data, ij) format from .coo import coo_matrix other = self.__class__(coo_matrix(arg1, shape=shape)) self._set_self(other) elif len(arg1) == 3: # (data, indices, indptr) format (data, indices, indptr) = arg1 idx_dtype = get_index_dtype((indices, indptr), check_contents=True) chunks = (10,) self.indices = da.from_array(indices, chunks=chunks) self.indptr = da.from_array(indptr, chunks=chunks) self.data = da.from_array(data, chunks=chunks) else: raise ValueError("unrecognized %s_matrix constructor usage" % self.format) else: # must be dense try: arg1 = np.asarray(arg1) except: raise ValueError("unrecognized %s_matrix constructor usage" % self.format) from scipy.sparse.coo import coo_matrix self._set_self(self.__class__(coo_matrix(arg1, dtype=dtype))) # Read matrix dimensions given, if any if shape is not None: self.shape = shape # spmatrix will check for errors else: if self.shape is None: # shape not already set, try to infer dimensions try: major_dim = len(self.indptr) - 1 minor_dim = self.indices.max() + 1 except: raise ValueError('unable to infer matrix dimensions') else: self.shape = self._swap((major_dim,minor_dim)) if dtype is not None: self.data = np.asarray(self.data, dtype=dtype) self.check_format(full_check=False)
def _insert_many(self, i, j, x): """Inserts new nonzero at each (i, j) with value x Here (i,j) index major and minor respectively. i, j and x must be non-empty, 1d arrays. Inserts each major group (e.g. all entries per row) at a time. Maintains has_sorted_indices property. Modifies i, j, x in place. """ order = np.argsort(i, kind='mergesort') # stable for duplicates i = i.take(order, mode='clip') j = j.take(order, mode='clip') x = x.take(order, mode='clip') do_sort = self.has_sorted_indices # Update index data type idx_dtype = get_index_dtype((self.indices, self.indptr), maxval=(self.indptr[-1] + x.size)) self.indptr = np.asarray(self.indptr, dtype=idx_dtype) self.indices = np.asarray(self.indices, dtype=idx_dtype) i = np.asarray(i, dtype=idx_dtype) j = np.asarray(j, dtype=idx_dtype) # Collate old and new in chunks by major index indices_parts = [] data_parts = [] ui, ui_indptr = np.unique(i, return_index=True) ui_indptr = np.append(ui_indptr, len(j)) new_nnzs = np.diff(ui_indptr) prev = 0 for c, (ii, js, je) in enumerate(zip(ui, ui_indptr, ui_indptr[1:])): # old entries start = self.indptr[prev] stop = self.indptr[ii] indices_parts.append(self.indices[start:stop]) data_parts.append(self.data[start:stop]) # handle duplicate j: keep last setting uj, uj_indptr = np.unique(j[js:je][::-1], return_index=True) if len(uj) == je - js: indices_parts.append(j[js:je]) data_parts.append(x[js:je]) else: indices_parts.append(j[js:je][::-1][uj_indptr]) data_parts.append(x[js:je][::-1][uj_indptr]) new_nnzs[c] = len(uj) prev = ii # remaining old entries start = self.indptr[ii] indices_parts.append(self.indices[start:]) data_parts.append(self.data[start:]) # update attributes self.indices = np.concatenate(indices_parts) self.data = np.concatenate(data_parts) nnzs = np.empty(self.indptr.shape, dtype=idx_dtype) nnzs[0] = idx_dtype(0) indptr_diff = np.diff(self.indptr) indptr_diff[ui] += new_nnzs nnzs[1:] = indptr_diff self.indptr = np.cumsum(nnzs, out=nnzs) if do_sort: # TODO: only sort where necessary self.has_sorted_indices = False self.sort_indices() self.check_format(full_check=False)
except ImportError as e: if is_building_tick: print(e) warnings.warn("numpy is not installed:\n" " - Include directory for numpy integration may not be " "correct\n " " - BLAS will not be used for this build\n") # By default, we assume that scipy uses 32 bit integers for indices in sparse # arrays sparse_indices_flag = "-DTICK_SPARSE_INDICES_INT32" try: import numpy as np from scipy.sparse import sputils sparsearray_type = sputils.get_index_dtype() if sparsearray_type == np.int64: sparse_indices_flag = "-DTICK_SPARSE_INDICES_INT64" except ImportError as e: if is_building_tick and numpy_available: print(e) warnings.warn("scipy is not installed, unable to determine " "sparse array integer type (assuming 32 bits)\n") if os.name == 'posix': if platform.system() == 'Darwin': os_version = platform.mac_ver()[0] # keep only major + minor os_version = '.'.join(os_version.split('.')[:2])
def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) if isspmatrix(arg1): if arg1.format == self.format and copy: arg1 = arg1.copy() else: arg1 = arg1.asformat(self.format) self._set_self(arg1) elif isinstance(arg1, tuple): if isshape(arg1): # It's a tuple of matrix dimensions (M, N) # create empty matrix self._shape = check_shape(arg1) M, N = self.shape # Select index dtype large enough to pass array and # scalar parameters to sparsetools idx_dtype = get_index_dtype(maxval=max(M, N)) self.data = np.zeros(0, getdtype(dtype, default=float)) self.indices = np.zeros(0, idx_dtype) self.indptr = np.zeros(self._swap((M, N))[0] + 1, dtype=idx_dtype) else: if len(arg1) == 2: # (data, ij) format from scipy.sparse.coo import coo_matrix other = self.__class__(coo_matrix(arg1, shape=shape)) self._set_self(other) elif len(arg1) == 3: # (data, indices, indptr) format (data, indices, indptr) = arg1 # Select index dtype large enough to pass array and # scalar parameters to sparsetools maxval = None if shape is not None: maxval = max(shape) idx_dtype = get_index_dtype((indices, indptr), maxval=maxval, check_contents=True) self.indices = np.array(indices, copy=copy, dtype=idx_dtype) self.indptr = np.array(indptr, copy=copy, dtype=idx_dtype) self.data = np.array(data, copy=copy, dtype=dtype) else: raise ValueError("unrecognized {}_matrix " "constructor usage".format(self.format)) else: # must be dense try: arg1 = np.asarray(arg1) except Exception: raise ValueError("unrecognized {}_matrix constructor usage" "".format(self.format)) from scipy.sparse.coo import coo_matrix self._set_self(self.__class__(coo_matrix(arg1, dtype=dtype))) # Read matrix dimensions given, if any if shape is not None: self._shape = check_shape(shape) else: if self.shape is None: # shape not already set, try to infer dimensions try: major_dim = len(self.indptr) - 1 minor_dim = self.indices.max() + 1 except Exception: raise ValueError('unable to infer matrix dimensions') else: self._shape = check_shape( self._swap((major_dim, minor_dim))) if dtype is not None: self.data = self.data.astype(dtype, copy=False) self.check_format(full_check=False)