def self2np(self, srow=0, nrow=None, scol=0, ncol=None, block_shape=None, rank=0): """Copy a section of the distributed matrix self[srow:srow+nrow, scol:scol+ncol] to a two dimensional numpy array owned by rank `rank`. Once copy a section equal or less than `block_shape` if the copied section is large. """ Nrow, Ncol = self.global_shape srow = srow if srow >= 0 else srow + Nrow srow = max(0, srow) srow = min(srow, Nrow) scol = scol if scol >= 0 else scol + Ncol scol = max(0, scol) scol = min(scol, Ncol) m = Nrow - srow if nrow is None else nrow m = max(0, m) m = min(m, Nrow - srow) n = Ncol - scol if ncol is None else ncol n = max(0, n) n = min(n, Ncol - scol) if self.context.mpi_comm.rank == rank: a = np.empty((m, n), dtype=self.dtype, order='F') else: a = None if m == 0 or n == 0: return a block_shape = self.block_shape if block_shape is None else block_shape if not _chk_2d_size(block_shape): raise ScalapyException("Invalid block_shape") bm, bn = block_shape br = blockcyclic.num_blocks(m, bm) # number of blocks for row bc = blockcyclic.num_blocks(n, bn) # number of blocks for column rm = m - (br - 1) * bm # remained number of rows of the last block rn = n - (bc - 1) * bn # remained number of columes of the last block # due to bugs in scalapy, it is needed to first init an process context here ProcessContext([1, self.context.mpi_comm.size], comm=self.context.mpi_comm) # process context for bri in range(br): M = bm if bri != br - 1 else rm for bci in range(bc): N = bn if bci != bc - 1 else rn if self.context.mpi_comm.rank == rank: pc = ProcessContext([1, 1], comm=MPI.COMM_SELF) # process context desc = self.desc desc[1] = pc.blacs_context desc[2], desc[3] = a.shape desc[4], desc[5] = a.shape desc[8] = a.shape[0] args = [M, N, self._local_array, srow+1+bm*bri, scol+1+bn*bci, self.desc, a , 1+bm*bri, 1+bn*bci, desc, self.context.blacs_context] else: desc = np.zeros(9, dtype=np.int32) desc[1] = -1 args = [M, N, self._local_array, srow+1+bm*bri, scol+1+bn*bci, self.desc, np.zeros(1, dtype=self.dtype) , 1+bm*bri, 1+bn*bci, desc, self.context.blacs_context] from . import lowlevel as ll call_table = {'S': (ll.psgemr2d, args), 'D': (ll.pdgemr2d, args), 'C': (ll.pcgemr2d, args), 'Z': (ll.pzgemr2d, args)} func, args = call_table[self.sc_dtype] func(*args) return a
def _copy_from_np(self, a, asrow=0, anrow=None, ascol=0, ancol=None, srow=0, scol=0, block_shape=None, rank=0): ## copy a section of a numpy array a[asrow:asrow+anrow, ascol:ascol+ancol] to self[srow:srow+anrow, scol:scol+ancol], once per block_shape Nrow, Ncol = self.global_shape srow = srow if srow >= 0 else srow + Nrow srow = max(0, srow) srow = min(srow, Nrow) scol = scol if scol >= 0 else scol + Ncol scol = max(0, scol) scol = min(scol, Ncol) if self.context.mpi_comm.rank == rank: if not (a.ndim == 1 or a.ndim == 2): raise ScalapyException('Unsupported high dimensional array.') a = np.asfortranarray(a.astype(self.dtype)) # type conversion a = a.reshape(-1, a.shape[-1]) # reshape to two dimensional am, an = a.shape asrow = asrow if asrow >= 0 else asrow + am asrow = max(0, asrow) asrow = min(asrow, am) ascol = ascol if ascol >= 0 else ascol + an ascol = max(0, ascol) ascol = min(ascol, an) m = am - asrow if anrow is None else anrow m = max(0, m) m = min(m, am - asrow, Nrow - srow) n = an - ascol if ancol is None else ancol n = max(0, n) n = min(n, an - ascol, Ncol - scol) else: m, n = 1, 1 asrow = self.context.mpi_comm.bcast(asrow, root=rank) ascol = self.context.mpi_comm.bcast(ascol, root=rank) m = self.context.mpi_comm.bcast(m, root=rank) # number of rows to copy n = self.context.mpi_comm.bcast(n, root=rank) # number of columes to copy if m == 0 or n == 0: return self block_shape = self.block_shape if block_shape is None else block_shape if not _chk_2d_size(block_shape): raise ScalapyException("Invalid block_shape") bm, bn = block_shape br = blockcyclic.num_blocks(m, bm) # number of blocks for row bc = blockcyclic.num_blocks(n, bn) # number of blocks for column rm = m - (br - 1) * bm # remained number of rows of the last block rn = n - (bc - 1) * bn # remained number of columes of the last block # due to bugs in scalapy, it is needed to first init an process context here ProcessContext([1, self.context.mpi_comm.size], comm=self.context.mpi_comm) # process context for bri in range(br): M = bm if bri != br - 1 else rm for bci in range(bc): N = bn if bci != bc - 1 else rn if self.context.mpi_comm.rank == rank: pc = ProcessContext([1, 1], comm=MPI.COMM_SELF) # process context desc = self.desc desc[1] = pc.blacs_context desc[2], desc[3] = a.shape desc[4], desc[5] = a.shape desc[8] = a.shape[0] args = [M, N, a, asrow+1+bm*bri, ascol+1+bn*bci, desc, self._local_array, srow+1+bm*bri, scol+1+bn*bci, self.desc, self.context.blacs_context] else: desc = np.zeros(9, dtype=np.int32) desc[1] = -1 args = [M, N, np.zeros(1, dtype=self.dtype) , asrow+1+bm*bri, ascol+1+bn*bci, desc, self._local_array, srow+1+bm*bri, scol+1+bn*bci, self.desc, self.context.blacs_context] from . import lowlevel as ll call_table = {'S': (ll.psgemr2d, args), 'D': (ll.pdgemr2d, args), 'C': (ll.pcgemr2d, args), 'Z': (ll.pzgemr2d, args)} func, args = call_table[self.sc_dtype] func(*args) return self