def __init__(self, velocity, lengths, numCells): self.rk = MPI.COMM_WORLD.Get_rank() self.sz = MPI.COMM_WORLD.Get_size() # decomposition self.dc = pnumpy.CubeDecomp(self.sz, numCells) if not self.dc.getDecomp(): print('*** No uniform decomposition could be found for {0} processes'.format(self.sz)) print('*** Please adjust the number of cells {0}'.format(numCells)) sys.exit(1) # begin/end indices of local sub-domain self.localSlices = self.dc.getSlab(self.rk) self.iBeg = numpy.array([s.start for s in self.localSlices]) self.iEnd = numpy.array([s.stop for s in self.localSlices]) self.nsLocal = numpy.array([s.stop - s.start for s in self.localSlices]) print('[{0}] local number of cells: {1}'.format(self.rk, self.nsLocal)) # global number of cells self.numCells = numCells self.ndims = len(velocity) self.deltas = numpy.zeros( (self.ndims,), numpy.float64 ) self.upDirection = numpy.zeros( (self.ndims,), numpy.int ) self.v = velocity self.lengths = lengths # number of local field values self.ntot = 1 for j in range(self.ndims): self.upDirection[j] = -1 if velocity[j] < 0.: self.upDirection[j] = +1 self.deltas[j] = lengths[j] / numCells[j] self.ntot *= self.nsLocal[j] self.coeff = self.v * self.upDirection / self.deltas # initializing the field self.f = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 ) self.fOld = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 ) # initialize lower corner to one if self.rk == 0: self.f[0, 0, 0] = 1 # get the neighboring ranks self.neighbSide = [[] for i in range(self.ndims)] direction = numpy.array([0] * self.ndims) self.neighbRk = numpy.array([0] * self.ndims) periodic = [True for i in range(self.ndims)] for i in range(self.ndims): direction[i] = self.upDirection[i] self.neighbRk[i] = self.dc.getNeighborProc(self.rk, direction, periodic=periodic) self.neighbSide[i] = tuple(-direction) direction[i] = 0
def __init__(self, velocity, lengths, numCells): self.rk = MPI.COMM_WORLD.Get_rank() self.sz = MPI.COMM_WORLD.Get_size() # decomposition self.dc = pnumpy.CubeDecomp(self.sz, numCells) if not self.dc.getDecomp(): print('*** No uniform decomposition could be found for {0} processes'.format(self.sz)) print('*** Please ajust the number of cells {0}'.format(numCells)) sys.exit(1) # begin/end indices of local sub-domain self.localSlices = self.dc.getSlab(self.rk) self.iBeg = numpy.array([s.start for s in self.localSlices]) self.iEnd = numpy.array([s.stop for s in self.localSlices]) self.nsLocal = numpy.array([s.stop - s.start for s in self.localSlices]) print('[{0}] local number of cells: {1}'.format(self.rk, self.nsLocal)) # global number of cells self.numCells = numCells self.ndims = 3 self.deltas = numpy.zeros( (self.ndims,), numpy.float64 ) self.upDirection = numpy.zeros( (self.ndims,), numpy.float64 ) self.v = velocity self.lengths = lengths # number of local field values self.ntot = 1 for j in range(self.ndims): self.upDirection[j] = -1 if velocity[j] < 0.: self.upDirection[j] = +1 self.deltas[j] = lengths[j] / numCells[j] self.ntot *= self.nsLocal[j] self.coeff = self.v * self.upDirection / self.deltas # initializing the field self.f = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 ) self.fOld = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 ) # initialize lower corner to one if self.rk == 0: self.f[0, 0, 0] = 1 # get the neighboring ranks self.neighbSide = [[] for i in range(self.ndims)] direction = numpy.array([0] * self.ndims) self.neighbRk = numpy.array([0] * self.ndims) periodic = [True for i in range(self.ndims)] for i in range(self.ndims): direction[i] = self.upDirection[i] self.neighbRk[i] = self.dc.getNeighborProc(self.rk, direction, periodic=periodic) self.neighbSide[i] = tuple(-direction) direction[i] = 0
def test2d_1_non_periodic(self): """ 2d array test, 1 ghost, non-periodic boundary conditions """ # create the dist array, the sizes are local to each processor da = pnumpy.gdaZeros((2, 3), numpy.float32, numGhosts=1) # processor rank and number of processes rk = da.rk nprocs = da.sz # set the data da[:] = rk # get the neighbor MPI rank (None if there is no neighbor) otherRk = rk - 1 if otherRk < 0: otherRk = None # collective operation. all procs must call "get" southData = da.getData(otherRk, winID=(1, 0)) # check if otherRk is not None and otherRk >= 0: self.assertEqual(southData.min(), rk - 1) self.assertEqual(southData.max(), rk - 1) # clean up da.free()
def test1d_1(self): """ 1d, float64 """ dtyp = numpy.float64 # create the ghosted dist array n = 10 da = pnumpy.gdaZeros((n, ), dtyp, numGhosts=1) # set data to process dependent value, # da.rk is the mpi proc ID # da.sz is the size of the MPI communicator da[:] = 100 * da.rk + numpy.array([i for i in range(n)], dtyp) # access remote data to the left leftRk = (da.rk - 1) % da.sz print('proc %d tries to access data from %d' % (da.rk, leftRk)) leftData = da.getData(pe=leftRk, winID=(1, )) print('leftData for rank %d = %s' % (da.rk, str(leftData))) # check if leftRk < da.rk: self.assertEqual(leftData[0], da[-1] - 100) else: self.assertEqual(leftData[0], da[-1] + 100 * (da.sz - 1)) # free da.free()
def test2d_1_non_periodic(self): """ 2d array test, 1 ghost, non-periodic boundary conditions """ # create the dist array, the sizes are local to each processor da = pnumpy.gdaZeros( (2,3), numpy.float32, numGhosts=1 ) # processor rank and number of processes rk = da.rk nprocs = da.sz # set the data da[:] = rk # get the neighbor MPI rank (None if there is no neighbor) otherRk = rk - 1 if otherRk < 0: otherRk = None # collective operation. all procs must call "get" southData = da.getData( otherRk, winID=(1,0) ) # check if otherRk is not None and otherRk >= 0: self.assertEqual(southData.min(), rk - 1) self.assertEqual(southData.max(), rk - 1) # clean up da.free()
def test1d_1(self): """ 1d, float64 """ dtyp = numpy.float64 # create the ghosted dist array n = 10 da = pnumpy.gdaZeros( (n,), dtyp, numGhosts=1 ) # set data to process dependent value, # da.rk is the mpi proc ID # da.sz is the size of the MPI communicator da[:] = 100*da.rk + numpy.array([i for i in range(n)], dtyp) # access remote data to the left leftRk = (da.rk - 1) % da.sz print('proc %d tries to access data from %d' % (da.rk, leftRk)) leftData = da.getData(pe=leftRk, winID=(1,)) print('leftData for rank %d = %s' % (da.rk, str(leftData))) # check if leftRk < da.rk: self.assertEqual(leftData[0], da[-1] - 100) else: self.assertEqual(leftData[0], da[-1] + 100*(da.sz-1)) # free da.free()
def apply(self, localArray): """ Apply Laplacian stencil to data @param localArray local array @return new array on local proc """ # input dist array inp = gdaZeros(localArray.shape, localArray.dtype, numGhosts=1) # output array out = numpy.zeros(localArray.shape, localArray.dtype) # no displacement term weight = self.stencil[self.zeros] out[...] += weight * localArray for disp in self.srcLocalDomains: weight = self.stencil[disp] # no communication required here srcDom = self.srcLocalDomains[disp] dstDom = self.dstLocalDomains[disp] out[dstDom] += weight * localArray[srcDom] # # now the part that requires communication # # set the ghost values srcSlab = self.srcSlab[disp] # copy inp[srcSlab] = localArray[srcSlab] # send over to local process dstSlab = self.dstSlab[disp] winId = self.winIds[disp] rk = self.neighRk[disp] # remote fetch out[dstSlab] += weight * inp.getData(rk, winId) # some implementations require this inp.free() return out
def test2d_1_periodic(self): """ 2d array test, 1 ghost, periodic boundary conditions """ # create the dist array, the sizes are local to each processor da = pnumpy.gdaZeros((2, 3), numpy.float32, numGhosts=1) # processor rank and number of processes rk = da.rk nprocs = da.sz # set the data da[:] = rk # access neighbor data, collective operation southData = da.getData((rk - 1) % nprocs, winID=(1, 0)) # check self.assertEqual(southData.min(), (rk - 1) % nprocs) self.assertEqual(southData.max(), (rk - 1) % nprocs) # clean up da.free()
def test2d_1_periodic(self): """ 2d array test, 1 ghost, periodic boundary conditions """ # create the dist array, the sizes are local to each processor da = pnumpy.gdaZeros( (2,3), numpy.float32, numGhosts=1 ) # processor rank and number of processes rk = da.rk nprocs = da.sz # set the data da[:] = rk # access neighbor data, collective operation southData = da.getData( (rk-1) % nprocs, winID=(1,0) ) # check self.assertEqual(southData.min(), (rk - 1) % nprocs) self.assertEqual(southData.max(), (rk - 1) % nprocs) # clean up da.free()
def test2d_laplacian_periodic(self): """ 2d array, apply Laplacian, periodic along the two axes """ from pnumpy import CubeDecomp from pnumpy import MultiArrayIter import operator from math import sin, pi # global sizes ndims = 2 #ns = numpy.array([60] * ndims) ns = numpy.array([3 * 4] * ndims) # local rank and number of procs rk = MPI.COMM_WORLD.Get_rank() sz = MPI.COMM_WORLD.Get_size() # find a domain decomposition dc = CubeDecomp(sz, ns) # not all numbers of procs will give a uniform domain decomposition, # exit if none can be found if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') return # get the local start/stop indices along each axis as a list of # 1d slices localSlices = dc.getSlab(rk) iBeg = numpy.array([s.start for s in localSlices]) iEnd = numpy.array([s.stop for s in localSlices]) nsLocal = numpy.array([s.stop - s.start for s in localSlices]) # create the dist arrays da = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1) laplacian = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1) # set the data for it in MultiArrayIter(nsLocal): localInds = it.getIndices() globalInds = iBeg + localInds # positions are cell centered, domain is [0, 1]^ndims position = (globalInds + 0.5) / numpy.array(ns, numpy.float32) # sin(2*pi*x) * sin(2*pi*y) ... da[tuple(localInds)] = reduce( operator.mul, [numpy.sin(2 * numpy.pi * position[i]) for i in range(ndims)]) # apply the Laplacian finite difference operator. # Start by performing all the operations that do # not require any communication. laplacian[:] = 2 * ndims * da # now subtract the neighbor values which are local to this process for idim in range(ndims): # indices shifted in the + direction along axis idim slabP = [slice(None, None, None) for j in range(idim)] + \ [slice(1, None, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] # indices shifted in the - direction along axis idim slabM = [slice(None, None, None) for j in range(idim)] + \ [slice(0, -1, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] laplacian[slabP] -= da[slabM] # subtract left neighbor laplacian[slabM] -= da[slabP] # subtract right neighbor # fetch the data located on other procs periodic = [True for idim in range(ndims)] for idim in range(ndims): # define the positive and negative directions directionP = tuple([0 for j in range(idim)] + [1] + [0 for j in range(idim + 1, ndims)]) directionM = tuple([0 for j in range(idim)] + [-1] + [0 for j in range(idim + 1, ndims)]) procP = dc.getNeighborProc(rk, directionP, periodic=periodic) procM = dc.getNeighborProc(rk, directionM, periodic=periodic) # this is where communication takes place... Note that when # accessing the data on the low-end side on rank procM we # access the slide on the positive side on procM (directionP). # And inversely for the high-end side data... dataM = da.getData(procM, winID=directionP) dataP = da.getData(procP, winID=directionM) # finish off the operator laplacian[da.getEllipsis(winID=directionM)] -= dataM laplacian[da.getEllipsis(winID=directionP)] -= dataP # compute a checksum and send the result to rank 0 checksum = laplacian.reduce(lambda x, y: abs(x) + abs(y), 0.0, rootPe=0) if rk == 0: print('checksum = ', checksum) # float32 calculation has higher error assert (abs(checksum - 32.0) < 1.e-4) # free the windows da.free() laplacian.free()
slab = dc.getSlab(rk) iBeg, iEnd = slab[0].start, slab[0].stop jBeg, jEnd = slab[1].start, slab[1].stop # local domain sizes nx, ny = iEnd - iBeg, jEnd - jBeg # the decomp must be regular if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') MPI.Finalize() sys.exit(1) # create and set the input distributed array inputData = pnumpy.gdaZeros((nx, ny), numpy.float32, numGhosts=1) setValues(nxG, nyG, iBeg, iEnd, jBeg, jEnd, inputData) # store the number of times a cell has an invalid neighbor so # we can correct the weights numInvalidNeighbors = numpy.zeros((nx, ny), numpy.int32) domain = Partition(2) # the ghosted array only exposes west, east, south and north # windows. Need to also export the corners for disp in (-1, -1), (-1, 1), (1, -1), (1, 1): d0 = (disp[0], 0) d1 = (0, disp[1]) n0 = (-disp[0], 0) n1 = (0, -disp[1])
# list of slices slab = dc.getSlab(rk) # starting/ending indices for local domain iBeg, iEnd = slab[0].start, slab[0].stop jBeg, jEnd = slab[1].start, slab[1].stop # local variables xx = numpy.outer( xs[iBeg:iEnd], numpy.ones( (ny/npy,), numpy.float64 ) ) yy = numpy.outer( numpy.ones( (nx/npx,), numpy.float64 ), ys[jBeg:jEnd] ) # local field zz = numpy.sin(numpy.pi*xx) * numpy.cos(2*numpy.pi*yy) # create and set distributed array zda = pnumpy.gdaZeros( zz.shape, zz.dtype, numGhosts=1 ) zda[:] = zz # compute the star Laplacian in the interior, this does not require # any communication laplaceZ = 4 * zda[:] # local neighbour contributions, no communication laplaceZ[1: , :] -= zda[0:-1,:] laplaceZ[0:-1, :] -= zda[1: ,:] laplaceZ[:, 1: ] -= zda[:,0:-1] laplaceZ[:, 0:-1] -= zda[:,1: ] # now compute and fill in the halo
# list of slices slab = dc.getSlab(rk) # starting/ending indices for local domain iBeg, iEnd = slab[0].start, slab[0].stop jBeg, jEnd = slab[1].start, slab[1].stop # local variables xx = numpy.outer(xs[iBeg:iEnd], numpy.ones((ny / npy, ), numpy.float64)) yy = numpy.outer(numpy.ones((nx / npx, ), numpy.float64), ys[jBeg:jEnd]) # local field zz = numpy.sin(numpy.pi * xx) * numpy.cos(2 * numpy.pi * yy) # create and set distributed array zda = pnumpy.gdaZeros(zz.shape, zz.dtype, numGhosts=1) zda[:] = zz # compute the star Laplacian in the interior, this does not require # any communication laplaceZ = 4 * zda[:] # local neighbour contributions, no communication laplaceZ[1:, :] -= zda[0:-1, :] laplaceZ[0:-1, :] -= zda[1:, :] laplaceZ[:, 1:] -= zda[:, 0:-1] laplaceZ[:, 0:-1] -= zda[:, 1:] # now compute and fill in the halo
def test2d_laplacian_periodic(self): """ 2d array, apply Laplacian, periodic along the two axes """ from pnumpy import CubeDecomp from pnumpy import MultiArrayIter import operator from math import sin, pi # global sizes ndims = 2 #ns = numpy.array([60] * ndims) ns = numpy.array([3*4] * ndims) # local rank and number of procs rk = MPI.COMM_WORLD.Get_rank() sz = MPI.COMM_WORLD.Get_size() # find a domain decomposition dc = CubeDecomp(sz, ns) # not all numbers of procs will give a uniform domain decomposition, # exit if none can be found if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') return # get the local start/stop indices along each axis as a list of # 1d slices localSlices = dc.getSlab(rk) iBeg = numpy.array([s.start for s in localSlices]) iEnd = numpy.array([s.stop for s in localSlices]) nsLocal = numpy.array([s.stop - s.start for s in localSlices]) # create the dist arrays da = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1) laplacian = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1) # set the data for it in MultiArrayIter(nsLocal): localInds = it.getIndices() globalInds = iBeg + localInds # positions are cell centered, domain is [0, 1]^ndims position = (globalInds + 0.5)/ numpy.array(ns, numpy.float32) # sin(2*pi*x) * sin(2*pi*y) ... da[tuple(localInds)] = reduce(operator.mul, [numpy.sin(2*numpy.pi*position[i]) for i in range(ndims)]) # apply the Laplacian finite difference operator. # Start by performing all the operations that do # not require any communication. laplacian[:] = 2 * ndims * da # now subtract the neighbor values which are local to this process for idim in range(ndims): # indices shifted in the + direction along axis idim slabP = [slice(None, None, None) for j in range(idim)] + \ [slice(1, None, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] # indices shifted in the - direction along axis idim slabM = [slice(None, None, None) for j in range(idim)] + \ [slice(0, -1, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] laplacian[slabP] -= da[slabM] # subtract left neighbor laplacian[slabM] -= da[slabP] # subtract right neighbor # fetch the data located on other procs periodic = [True for idim in range(ndims)] for idim in range(ndims): # define the positive and negative directions directionP = tuple([0 for j in range(idim)] + [1] + [0 for j in range(idim + 1, ndims)]) directionM = tuple([0 for j in range(idim)] + [-1] + [0 for j in range(idim + 1, ndims)]) procP = dc.getNeighborProc(rk, directionP, periodic=periodic) procM = dc.getNeighborProc(rk, directionM, periodic=periodic) # this is where communication takes place... Note that when # accessing the data on the low-end side on rank procM we # access the slide on the positive side on procM (directionP). # And inversely for the high-end side data... dataM = da.getData(procM, winID=directionP) dataP = da.getData(procP, winID=directionM) # finish off the operator laplacian[da.getEllipsis(winID=directionM)] -= dataM laplacian[da.getEllipsis(winID=directionP)] -= dataP # compute a checksum and send the result to rank 0 checksum = laplacian.reduce(lambda x,y:abs(x) + abs(y), 0.0, rootPe=0) if rk == 0: print('checksum = ', checksum) # float32 calculation has higher error assert(abs(checksum - 32.0) < 1.e-4) # free the windows da.free() laplacian.free()