Ejemplo n.º 1
0
    def sub(self, indices):
        """ Return a new sparse CSR matrix with the data only for the given indices

        Parameters
        ----------
        indices : array_like
           the indices of the rows *and* columns that are retained in the sparse pattern
        """
        indices = ensure_array(indices)

        # Check if we have a square matrix or a rectangular one
        if self.shape[0] == self.shape[1]:
            # Easy
            ridx = indices.view()
            nc = len(indices)
            pvt = n_.emptyi([self.shape[0]])

        elif self.shape[0] < self.shape[1]:
            ridx = indices[indices < self.shape[0]]
            nc = len(indices)
            pvt = n_.emptyi([self.shape[1]])

        elif self.shape[0] > self.shape[1]:
            ridx = indices.view()
            nc = np.count_nonzero(indices < self.shape[1])
            pvt = n_.emptyi([self.shape[0]])

        # Fix the pivoting indices with the new indices
        pvt.fill(-1)
        pvt[indices] = n_.arangei(len(indices))

        # Create the new SparseCSR
        # We use nnzpr = 1 because we will overwrite all quantities afterwards.
        csr = self.__class__((len(ridx), nc, self.shape[2]),
                             dtype=self.dtype,
                             nnz=1)

        # Get views
        ptr1 = csr.ptr.view()
        ncol1 = csr.ncol.view()

        # Create the sub data
        take(self.ptr, ridx, out=ptr1[1:])
        # Place directly where it should be (i.e. re-use space)
        take(self.ncol, ridx, out=ncol1)

        # Create a list of ndarrays with indices of elements per row
        # and transfer to a linear index
        col_idx = array_arange(ptr1[1:], n=ncol1)

        # Reduce the column indices (note this also ensures that
        # it will work on non-finalized sparse matrices)
        col1 = pvt[take(self.col, col_idx)]

        # Count the number of items that are left in the sparse pattern
        # First recreate the new (temporar) pointer
        ptr1[0] = 0
        # Place it directly where it should be
        n_.cumsumi(ncol1, out=ptr1[1:])
        cnnz = np.count_nonzero
        # Note ncol1 is a view of csr.ncol
        ncol1[:] = ensure_array([
            cnnz(col1[ptr1[r]:ptr1[r + 1]] >= 0) for r in range(len(ptr1) - 1)
        ])

        # Now we should figure out how to remove those entries
        # that are from the old structure
        # Because this is `sub`, it probably means that
        # we are dealing with a relatively small number of
        # indices compared to the original one. Hence,
        # we use the take function here.
        idx_take = (col1 >= 0).nonzero()[0]

        # Decrease col1 and also extract the data
        csr.col = take(col1, idx_take)
        del col1
        csr._D = take(self._D[col_idx, :], idx_take, 0)
        del col_idx, idx_take

        # Set the data for the new sparse csr
        csr.ptr[0] = 0
        n_.cumsumi(ncol1, out=csr.ptr[1:])
        csr._nnz = len(csr.col)

        return csr
Ejemplo n.º 2
0
    def __init_shape(self,
                     arg1,
                     dim=1,
                     dtype=None,
                     nnzpr=20,
                     nnz=None,
                     **kwargs):

        # The shape of the data...
        if len(arg1) == 2:
            # extend to extra dimension
            arg1 = arg1 + (dim, )
        elif len(arg1) != 3:
            raise ValueError(
                "unrecognized shape input, either a 2-tuple or 3-tuple is required"
            )

        # Set default dtype
        if dtype is None:
            dtype = np.float64

        # unpack size
        M, N, K = arg1

        # Store shape
        self._shape = (M, N, K)

        # Check default construction of sparse matrix
        nnzpr = max(nnzpr, 1)

        # Re-create options
        if nnz is None:
            # number of non-zero elements is NOT given
            nnz = M * nnzpr

        else:
            # number of non-zero elements is give AND larger
            # than the provided non-zero elements per row
            nnzpr = nnz // M

        # Correct input in case very few elements are requested
        nnzpr = max(nnzpr, 1)
        nnz = max(nnz, nnzpr * M)

        # Store number of columns currently hold
        # in the sparsity pattern
        self.ncol = n_.zerosi([M])
        # Create pointer array
        self.ptr = n_.cumsumi(n_.arrayi([nnzpr] * (M + 1))) - nnzpr
        # Create column array
        self.col = n_.emptyi(nnz)
        # Store current number of non-zero elements
        self._nnz = 0

        # Important that this is zero
        # For instance one may set one dimension at a time
        # thus automatically zeroing the other dimensions.
        self._D = np.zeros([nnz, K], dtype)

        # Denote that this sparsity pattern hasn't been finalized
        self._finalized = False
Ejemplo n.º 3
0
    def tile(self, reps, axis, eta=False):
        """ Create a tiled sparse atom object, equivalent to `Geometry.tile`

        The already existing sparse elements are extrapolated
        to the new supercell by repeating them in blocks like the coordinates.

        Notes
        -----
        Calling this routine will automatically `finalize` the `SparseAtom`. This
        is required to greatly increase performance.

        Parameters
        ----------
        reps : int
            number of repetitions along cell-vector ``axis``
        axis : int
            0, 1, 2 according to the cell-direction
        eta : bool, optional
            print an ETA to stdout

        See Also
        --------
        Geometry.tile: the same ordering as the final geometry
        Geometry.repeat: a different ordering of the final geometry
        repeat: a different ordering of the final geometry
        """
        # Create the new sparse object
        g = self.geom.tile(reps, axis)
        S = self.__class__(g, self.dim, self.dtype, 1, **self._cls_kwargs())

        # Now begin to populate it accordingly
        # Retrieve local pointers to the information
        # regarding the current Hamiltonian sparse matrix
        geom = self.geom
        na = self.na
        ncol = self._csr.ncol
        if self.finalized:
            col = self._csr.col
            D = self._csr._D
        else:
            ptr = self._csr.ptr
            idx = array_arange(ptr[:-1], n=ncol)
            col = np.take(self._csr.col, idx)
            D = np.take(self._csr._D, idx, 0)
            del ptr, idx

        # Information for the new Hamiltonian sparse matrix
        na_n = S.na
        geom_n = S.geom

        # For ETA
        from time import time
        from sys import stdout
        t0 = time()
        name = self.__class__.__name__

        # First loop on axis tiling and local
        # atoms in the geometry
        sc_index = geom_n.sc_index

        # Create new indptr, indices and D
        ncol = np.tile(ncol, reps)
        # Now indptr is complete
        indptr = np.insert(n_.cumsumi(ncol), 0, 0)
        del ncol
        indices = n_.emptyi([indptr[-1]])
        indices.shape = (reps, -1)

        # Now we should fill the data
        isc = geom.a2isc(col)
        # resulting atom in the new geometry (without wrapping
        # for correct supercell, that will happen below)
        JA = col % na + na * isc[:, axis] - na

        # Create repetitions
        for rep in range(reps):
            # Figure out the JA atoms
            JA += na
            # Correct the supercell information
            isc[:, axis] = JA // na_n

            indices[rep, :] = JA % na_n + sc_index(isc) * na_n

            if eta:
                # calculate hours, minutes, seconds
                m, s = divmod((time() - t0) / (rep + 1) * (reps - rep - 1), 60)
                h, m = divmod(m, 60)
                stdout.write(name +
                             ".tile() ETA = {0:5d}h {1:2d}m {2:5.2f}s\r".
                             format(int(h), int(m), s))
                stdout.flush()

        # Clean-up
        del isc, JA

        indices.shape = (-1, )
        S._csr = SparseCSR((np.tile(D, (reps, 1)), indices, indptr),
                           shape=(geom_n.na, geom_n.na_s))

        if eta:
            # calculate hours, minutes, seconds spend on the computation
            m, s = divmod((time() - t0), 60)
            h, m = divmod(m, 60)
            stdout.write(name +
                         ".tile() finished after {0:d}h {1:d}m {2:.1f}s\n".
                         format(int(h), int(m), s))
            stdout.flush()

        return S
Ejemplo n.º 4
0
    def repeat(self, reps, axis, eta=False):
        """ Create a repeated sparse atom object, equivalent to `Geometry.repeat`

        The already existing sparse elements are extrapolated
        to the new supercell by repeating them in blocks like the coordinates.

        Parameters
        ----------
        reps : int
            number of repetitions along cell-vector ``axis``
        axis : int
            0, 1, 2 according to the cell-direction
        eta : bool, optional
            print an ETA to stdout

        See Also
        --------
        Geometry.repeat: the same ordering as the final geometry
        Geometry.tile: a different ordering of the final geometry
        tile: a different ordering of the final geometry
        """
        # Create the new sparse object
        g = self.geom.repeat(reps, axis)
        S = self.__class__(g, self.dim, self.dtype, 1, **self._cls_kwargs())

        # Now begin to populate it accordingly
        # Retrieve local pointers to the information
        # regarding the current Hamiltonian sparse matrix
        geom = self.geom
        na = self.na
        ncol = self._csr.ncol
        if self.finalized:
            col = self._csr.col
            D = self._csr._D
        else:
            ptr = self._csr.ptr
            idx = array_arange(ptr[:-1], n=ncol)
            col = np.take(self._csr.col, idx)
            D = np.take(self._csr._D, idx, 0)
            del idx

        # Information for the new Hamiltonian sparse matrix
        na_n = S.na
        geom_n = S.geom

        # For ETA
        from time import time
        from sys import stdout
        t0 = time()
        name = self.__class__.__name__

        # First loop on axis tiling and local
        # atoms in the geometry
        sc_index = geom_n.sc_index

        # Create new indptr, indices and D
        ncol = np.repeat(ncol, reps)
        # Now indptr is complete
        indptr = np.insert(n_.cumsumi(ncol), 0, 0)
        del ncol
        indices = n_.emptyi([indptr[-1]])

        # Now we should fill the data
        isc = geom.a2isc(col)
        # resulting atom in the new geometry (without wrapping
        # for correct supercell, that will happen below)
        JA = (col % na) * reps
        # Get the offset atoms
        A = isc[:, axis] - 1

        for rep in range(reps):

            # Update the offset
            A += 1
            # Correct supercell information
            isc[:, axis] = A // reps

            # Create the indices for the repetition
            idx = array_arange(indptr[rep:-1:reps], n=self._csr.ncol)
            indices[idx] = JA + A % reps + sc_index(isc) * na_n

            if eta:
                # calculate hours, minutes, seconds
                m, s = divmod((time() - t0) / (rep + 1) * (reps - rep - 1), 60)
                h, m = divmod(m, 60)
                stdout.write(name +
                             ".repeat() ETA = {0:5d}h {1:2d}m {2:5.2f}s\r".
                             format(int(h), int(m), s))
                stdout.flush()

        # Clean-up
        del isc, JA, A, idx

        # In the repeat we have to tile individual atomic couplings
        # So we should split the arrays and tile them individually
        # Now D is made up of D values, per atom
        D = np.hstack([
            np.tile(d, (reps, 1))
            for d in np.split(D, n_.cumsumi(self._csr.ncol[:-1]), axis=1)
        ])
        S._csr = SparseCSR((D, indices, indptr),
                           shape=(geom_n.na, geom_n.na_s))

        if eta:
            # calculate hours, minutes, seconds spend on the computation
            m, s = divmod((time() - t0), 60)
            h, m = divmod(m, 60)
            stdout.write(name +
                         ".repeat() finished after {0:d}h {1:d}m {2:.1f}s\n".
                         format(int(h), int(m), s))
            stdout.flush()

        return S
Ejemplo n.º 5
0
    def repeat(self, reps, axis, eta=False):
        """ Create a repeated sparse orbital object, equivalent to `Geometry.repeat`

        The already existing sparse elements are extrapolated
        to the new supercell by repeating them in blocks like the coordinates.

        Parameters
        ----------
        reps : int
            number of repetitions along cell-vector ``axis``
        axis : int
            0, 1, 2 according to the cell-direction
        eta : bool, optional
            print the ETA to stdout

        See Also
        --------
        Geometry.repeat: the same ordering as the final geometry
        Geometry.tile: a different ordering of the final geometry
        tile: a different ordering of the final geometry
        """
        # Create the new sparse object
        g = self.geom.repeat(reps, axis)
        S = self.__class__(g, self.dim, self.dtype, 1, **self._cls_kwargs())

        # Now begin to populate it accordingly
        # Retrieve local pointers to the information
        # regarding the current Hamiltonian sparse matrix
        geom = self.geom
        no = self.no
        ncol = self._csr.ncol
        if self.finalized:
            col = self._csr.col
            D = self._csr._D
        else:
            ptr = self._csr.ptr
            idx = array_arange(ptr[:-1], n=ncol)
            col = np.take(self._csr.col, idx)
            D = np.take(self._csr._D, idx, 0)
            del ptr, idx

        # Information for the new Hamiltonian sparse matrix
        no_n = S.no
        geom_n = S.geom

        # For ETA
        from time import time
        from sys import stdout
        t0 = time()
        name = self.__class__.__name__

        # First loop on axis tiling and local
        # orbitals in the geometry
        sc_index = geom_n.sc_index

        # Create new indptr, indices and D
        ncol = np.repeat(ncol, reps)
        # Now indptr is complete
        indptr = np.insert(n_.cumsumi(ncol), 0, 0)
        del ncol
        indices = n_.emptyi([indptr[-1]])

        # Now we should fill the data
        isc = geom.o2isc(col)
        # resulting orbital in the new geometry (without wrapping
        # for correct supercell, that will happen below)
        JO = col % no
        # Get number of orbitals per atom (lasto - firsto + 1)
        # This is faster than the direct call

        ja = geom.o2a(JO)
        oJ = geom.firsto[ja]
        oA = geom.lasto[ja] + 1 - oJ
        # Shift the orbitals corresponding to the
        # repetitions of all previous atoms
        JO += oJ * (reps - 1)
        # Get the offset orbitals
        O = isc[:, axis] - 1
        # We need to create and indexable atomic array
        # This is required for multi-orbital cases where
        # we should tile atomic orbitals, and repeat the atoms (only).
        # 'A' is now the first (non-repeated) atom in the new structure
        A = n_.arangei(geom.na) * reps
        AO = geom_n.lasto[A] - geom_n.firsto[A] + 1
        # subtract AO for first iteration in repetition loop
        OA = geom_n.firsto[A] - AO

        # Clean
        del ja, oJ, A

        # Get view of ncol
        ncol = self._csr.ncol.view()

        # Create repetitions
        for rep in range(reps):

            # Update atomic offset
            OA += AO
            # Update the offset
            O += 1
            # Correct supercell information
            isc[:, axis] = O // reps

            # Create the indices for the repetition
            idx = array_arange(indptr[array_arange(OA, n=AO)], n=ncol)
            indices[idx] = JO + oA * (O % reps) + sc_index(isc) * no_n

            if eta:
                # calculate hours, minutes, seconds
                m, s = divmod((time() - t0) / (rep + 1) * (reps - rep - 1), 60)
                h, m = divmod(m, 60)
                stdout.write(name +
                             ".repeat() ETA = {0:5d}h {1:2d}m {2:5.2f}s\r".
                             format(int(h), int(m), s))
                stdout.flush()

        # Clean-up
        del isc, JO, O, OA, AO, idx

        # In the repeat we have to tile individual atomic couplings
        # So we should split the arrays and tile them individually
        # Now D is made up of D values, per atom
        D = np.hstack([
            np.tile(d, (reps, 1))
            for d in np.split(D, n_.cumsumi(ncol[:-1]), axis=1)
        ])
        S._csr = SparseCSR((D, indices, indptr),
                           shape=(geom_n.no, geom_n.no_s))

        if eta:
            # calculate hours, minutes, seconds spend on the computation
            m, s = divmod((time() - t0), 60)
            h, m = divmod(m, 60)
            stdout.write(name +
                         ".repeat() finished after {0:d}h {1:d}m {2:.1f}s\n".
                         format(int(h), int(m), s))
            stdout.flush()

        return S