def sub(self, indices): """ Return a new sparse CSR matrix with the data only for the given indices Parameters ---------- indices : array_like the indices of the rows *and* columns that are retained in the sparse pattern """ indices = ensure_array(indices) # Check if we have a square matrix or a rectangular one if self.shape[0] == self.shape[1]: # Easy ridx = indices.view() nc = len(indices) pvt = n_.emptyi([self.shape[0]]) elif self.shape[0] < self.shape[1]: ridx = indices[indices < self.shape[0]] nc = len(indices) pvt = n_.emptyi([self.shape[1]]) elif self.shape[0] > self.shape[1]: ridx = indices.view() nc = np.count_nonzero(indices < self.shape[1]) pvt = n_.emptyi([self.shape[0]]) # Fix the pivoting indices with the new indices pvt.fill(-1) pvt[indices] = n_.arangei(len(indices)) # Create the new SparseCSR # We use nnzpr = 1 because we will overwrite all quantities afterwards. csr = self.__class__((len(ridx), nc, self.shape[2]), dtype=self.dtype, nnz=1) # Get views ptr1 = csr.ptr.view() ncol1 = csr.ncol.view() # Create the sub data take(self.ptr, ridx, out=ptr1[1:]) # Place directly where it should be (i.e. re-use space) take(self.ncol, ridx, out=ncol1) # Create a list of ndarrays with indices of elements per row # and transfer to a linear index col_idx = array_arange(ptr1[1:], n=ncol1) # Reduce the column indices (note this also ensures that # it will work on non-finalized sparse matrices) col1 = pvt[take(self.col, col_idx)] # Count the number of items that are left in the sparse pattern # First recreate the new (temporar) pointer ptr1[0] = 0 # Place it directly where it should be n_.cumsumi(ncol1, out=ptr1[1:]) cnnz = np.count_nonzero # Note ncol1 is a view of csr.ncol ncol1[:] = ensure_array([ cnnz(col1[ptr1[r]:ptr1[r + 1]] >= 0) for r in range(len(ptr1) - 1) ]) # Now we should figure out how to remove those entries # that are from the old structure # Because this is `sub`, it probably means that # we are dealing with a relatively small number of # indices compared to the original one. Hence, # we use the take function here. idx_take = (col1 >= 0).nonzero()[0] # Decrease col1 and also extract the data csr.col = take(col1, idx_take) del col1 csr._D = take(self._D[col_idx, :], idx_take, 0) del col_idx, idx_take # Set the data for the new sparse csr csr.ptr[0] = 0 n_.cumsumi(ncol1, out=csr.ptr[1:]) csr._nnz = len(csr.col) return csr
def __init_shape(self, arg1, dim=1, dtype=None, nnzpr=20, nnz=None, **kwargs): # The shape of the data... if len(arg1) == 2: # extend to extra dimension arg1 = arg1 + (dim, ) elif len(arg1) != 3: raise ValueError( "unrecognized shape input, either a 2-tuple or 3-tuple is required" ) # Set default dtype if dtype is None: dtype = np.float64 # unpack size M, N, K = arg1 # Store shape self._shape = (M, N, K) # Check default construction of sparse matrix nnzpr = max(nnzpr, 1) # Re-create options if nnz is None: # number of non-zero elements is NOT given nnz = M * nnzpr else: # number of non-zero elements is give AND larger # than the provided non-zero elements per row nnzpr = nnz // M # Correct input in case very few elements are requested nnzpr = max(nnzpr, 1) nnz = max(nnz, nnzpr * M) # Store number of columns currently hold # in the sparsity pattern self.ncol = n_.zerosi([M]) # Create pointer array self.ptr = n_.cumsumi(n_.arrayi([nnzpr] * (M + 1))) - nnzpr # Create column array self.col = n_.emptyi(nnz) # Store current number of non-zero elements self._nnz = 0 # Important that this is zero # For instance one may set one dimension at a time # thus automatically zeroing the other dimensions. self._D = np.zeros([nnz, K], dtype) # Denote that this sparsity pattern hasn't been finalized self._finalized = False
def tile(self, reps, axis, eta=False): """ Create a tiled sparse atom object, equivalent to `Geometry.tile` The already existing sparse elements are extrapolated to the new supercell by repeating them in blocks like the coordinates. Notes ----- Calling this routine will automatically `finalize` the `SparseAtom`. This is required to greatly increase performance. Parameters ---------- reps : int number of repetitions along cell-vector ``axis`` axis : int 0, 1, 2 according to the cell-direction eta : bool, optional print an ETA to stdout See Also -------- Geometry.tile: the same ordering as the final geometry Geometry.repeat: a different ordering of the final geometry repeat: a different ordering of the final geometry """ # Create the new sparse object g = self.geom.tile(reps, axis) S = self.__class__(g, self.dim, self.dtype, 1, **self._cls_kwargs()) # Now begin to populate it accordingly # Retrieve local pointers to the information # regarding the current Hamiltonian sparse matrix geom = self.geom na = self.na ncol = self._csr.ncol if self.finalized: col = self._csr.col D = self._csr._D else: ptr = self._csr.ptr idx = array_arange(ptr[:-1], n=ncol) col = np.take(self._csr.col, idx) D = np.take(self._csr._D, idx, 0) del ptr, idx # Information for the new Hamiltonian sparse matrix na_n = S.na geom_n = S.geom # For ETA from time import time from sys import stdout t0 = time() name = self.__class__.__name__ # First loop on axis tiling and local # atoms in the geometry sc_index = geom_n.sc_index # Create new indptr, indices and D ncol = np.tile(ncol, reps) # Now indptr is complete indptr = np.insert(n_.cumsumi(ncol), 0, 0) del ncol indices = n_.emptyi([indptr[-1]]) indices.shape = (reps, -1) # Now we should fill the data isc = geom.a2isc(col) # resulting atom in the new geometry (without wrapping # for correct supercell, that will happen below) JA = col % na + na * isc[:, axis] - na # Create repetitions for rep in range(reps): # Figure out the JA atoms JA += na # Correct the supercell information isc[:, axis] = JA // na_n indices[rep, :] = JA % na_n + sc_index(isc) * na_n if eta: # calculate hours, minutes, seconds m, s = divmod((time() - t0) / (rep + 1) * (reps - rep - 1), 60) h, m = divmod(m, 60) stdout.write(name + ".tile() ETA = {0:5d}h {1:2d}m {2:5.2f}s\r". format(int(h), int(m), s)) stdout.flush() # Clean-up del isc, JA indices.shape = (-1, ) S._csr = SparseCSR((np.tile(D, (reps, 1)), indices, indptr), shape=(geom_n.na, geom_n.na_s)) if eta: # calculate hours, minutes, seconds spend on the computation m, s = divmod((time() - t0), 60) h, m = divmod(m, 60) stdout.write(name + ".tile() finished after {0:d}h {1:d}m {2:.1f}s\n". format(int(h), int(m), s)) stdout.flush() return S
def repeat(self, reps, axis, eta=False): """ Create a repeated sparse atom object, equivalent to `Geometry.repeat` The already existing sparse elements are extrapolated to the new supercell by repeating them in blocks like the coordinates. Parameters ---------- reps : int number of repetitions along cell-vector ``axis`` axis : int 0, 1, 2 according to the cell-direction eta : bool, optional print an ETA to stdout See Also -------- Geometry.repeat: the same ordering as the final geometry Geometry.tile: a different ordering of the final geometry tile: a different ordering of the final geometry """ # Create the new sparse object g = self.geom.repeat(reps, axis) S = self.__class__(g, self.dim, self.dtype, 1, **self._cls_kwargs()) # Now begin to populate it accordingly # Retrieve local pointers to the information # regarding the current Hamiltonian sparse matrix geom = self.geom na = self.na ncol = self._csr.ncol if self.finalized: col = self._csr.col D = self._csr._D else: ptr = self._csr.ptr idx = array_arange(ptr[:-1], n=ncol) col = np.take(self._csr.col, idx) D = np.take(self._csr._D, idx, 0) del idx # Information for the new Hamiltonian sparse matrix na_n = S.na geom_n = S.geom # For ETA from time import time from sys import stdout t0 = time() name = self.__class__.__name__ # First loop on axis tiling and local # atoms in the geometry sc_index = geom_n.sc_index # Create new indptr, indices and D ncol = np.repeat(ncol, reps) # Now indptr is complete indptr = np.insert(n_.cumsumi(ncol), 0, 0) del ncol indices = n_.emptyi([indptr[-1]]) # Now we should fill the data isc = geom.a2isc(col) # resulting atom in the new geometry (without wrapping # for correct supercell, that will happen below) JA = (col % na) * reps # Get the offset atoms A = isc[:, axis] - 1 for rep in range(reps): # Update the offset A += 1 # Correct supercell information isc[:, axis] = A // reps # Create the indices for the repetition idx = array_arange(indptr[rep:-1:reps], n=self._csr.ncol) indices[idx] = JA + A % reps + sc_index(isc) * na_n if eta: # calculate hours, minutes, seconds m, s = divmod((time() - t0) / (rep + 1) * (reps - rep - 1), 60) h, m = divmod(m, 60) stdout.write(name + ".repeat() ETA = {0:5d}h {1:2d}m {2:5.2f}s\r". format(int(h), int(m), s)) stdout.flush() # Clean-up del isc, JA, A, idx # In the repeat we have to tile individual atomic couplings # So we should split the arrays and tile them individually # Now D is made up of D values, per atom D = np.hstack([ np.tile(d, (reps, 1)) for d in np.split(D, n_.cumsumi(self._csr.ncol[:-1]), axis=1) ]) S._csr = SparseCSR((D, indices, indptr), shape=(geom_n.na, geom_n.na_s)) if eta: # calculate hours, minutes, seconds spend on the computation m, s = divmod((time() - t0), 60) h, m = divmod(m, 60) stdout.write(name + ".repeat() finished after {0:d}h {1:d}m {2:.1f}s\n". format(int(h), int(m), s)) stdout.flush() return S
def repeat(self, reps, axis, eta=False): """ Create a repeated sparse orbital object, equivalent to `Geometry.repeat` The already existing sparse elements are extrapolated to the new supercell by repeating them in blocks like the coordinates. Parameters ---------- reps : int number of repetitions along cell-vector ``axis`` axis : int 0, 1, 2 according to the cell-direction eta : bool, optional print the ETA to stdout See Also -------- Geometry.repeat: the same ordering as the final geometry Geometry.tile: a different ordering of the final geometry tile: a different ordering of the final geometry """ # Create the new sparse object g = self.geom.repeat(reps, axis) S = self.__class__(g, self.dim, self.dtype, 1, **self._cls_kwargs()) # Now begin to populate it accordingly # Retrieve local pointers to the information # regarding the current Hamiltonian sparse matrix geom = self.geom no = self.no ncol = self._csr.ncol if self.finalized: col = self._csr.col D = self._csr._D else: ptr = self._csr.ptr idx = array_arange(ptr[:-1], n=ncol) col = np.take(self._csr.col, idx) D = np.take(self._csr._D, idx, 0) del ptr, idx # Information for the new Hamiltonian sparse matrix no_n = S.no geom_n = S.geom # For ETA from time import time from sys import stdout t0 = time() name = self.__class__.__name__ # First loop on axis tiling and local # orbitals in the geometry sc_index = geom_n.sc_index # Create new indptr, indices and D ncol = np.repeat(ncol, reps) # Now indptr is complete indptr = np.insert(n_.cumsumi(ncol), 0, 0) del ncol indices = n_.emptyi([indptr[-1]]) # Now we should fill the data isc = geom.o2isc(col) # resulting orbital in the new geometry (without wrapping # for correct supercell, that will happen below) JO = col % no # Get number of orbitals per atom (lasto - firsto + 1) # This is faster than the direct call ja = geom.o2a(JO) oJ = geom.firsto[ja] oA = geom.lasto[ja] + 1 - oJ # Shift the orbitals corresponding to the # repetitions of all previous atoms JO += oJ * (reps - 1) # Get the offset orbitals O = isc[:, axis] - 1 # We need to create and indexable atomic array # This is required for multi-orbital cases where # we should tile atomic orbitals, and repeat the atoms (only). # 'A' is now the first (non-repeated) atom in the new structure A = n_.arangei(geom.na) * reps AO = geom_n.lasto[A] - geom_n.firsto[A] + 1 # subtract AO for first iteration in repetition loop OA = geom_n.firsto[A] - AO # Clean del ja, oJ, A # Get view of ncol ncol = self._csr.ncol.view() # Create repetitions for rep in range(reps): # Update atomic offset OA += AO # Update the offset O += 1 # Correct supercell information isc[:, axis] = O // reps # Create the indices for the repetition idx = array_arange(indptr[array_arange(OA, n=AO)], n=ncol) indices[idx] = JO + oA * (O % reps) + sc_index(isc) * no_n if eta: # calculate hours, minutes, seconds m, s = divmod((time() - t0) / (rep + 1) * (reps - rep - 1), 60) h, m = divmod(m, 60) stdout.write(name + ".repeat() ETA = {0:5d}h {1:2d}m {2:5.2f}s\r". format(int(h), int(m), s)) stdout.flush() # Clean-up del isc, JO, O, OA, AO, idx # In the repeat we have to tile individual atomic couplings # So we should split the arrays and tile them individually # Now D is made up of D values, per atom D = np.hstack([ np.tile(d, (reps, 1)) for d in np.split(D, n_.cumsumi(ncol[:-1]), axis=1) ]) S._csr = SparseCSR((D, indices, indptr), shape=(geom_n.no, geom_n.no_s)) if eta: # calculate hours, minutes, seconds spend on the computation m, s = divmod((time() - t0), 60) h, m = divmod(m, 60) stdout.write(name + ".repeat() finished after {0:d}h {1:d}m {2:.1f}s\n". format(int(h), int(m), s)) stdout.flush() return S