def test2d(self): n = 8 # global number of cells ns = (n, n) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print( '*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs' ) sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] nsLocal = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop nsLocal.append(iend - ibeg) h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h * (numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False, False)) for i in range(ndims): disp = [0] * ndims for pm in (-1, 1): disp[i] = pm op.addStencilBranch(tuple(disp), 1.0) op.addStencilBranch(tuple([0] * ndims), -2 * ndims) # set the input function xx = numpy.outer(axes[0], numpy.ones((nsLocal[1], ))) yy = numpy.outer(numpy.ones((nsLocal[0], )), axes[1]) inp = 0.5 * xx * yy**2 #print('[{0}] inp = {1}'.format(self.rk, str(inp))) out = op.apply( inp) / hs[0]**2 # NEED TO ADJUST WHEN CELL SIZE IS DIFFERENT IN Y! #print('[{0}] out = {1}'.format(self.rk, str(out))) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test2d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -198.0), 1.e-10)
def test2d(self): n = 8 # global number of cells ns = (n, n) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print('*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs') sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] nsLocal = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop nsLocal.append(iend - ibeg) h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h*(numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False, False)) for i in range(ndims): disp = [0] * ndims for pm in (-1, 1): disp[i] = pm op.addStencilBranch(tuple(disp), 1.0) op.addStencilBranch(tuple([0]*ndims), -2*ndims) # set the input function xx = numpy.outer(axes[0], numpy.ones((nsLocal[1],))) yy = numpy.outer(numpy.ones((nsLocal[0],)), axes[1]) inp = 0.5 * xx * yy ** 2 #print('[{0}] inp = {1}'.format(self.rk, str(inp))) out = op.apply(inp) / hs[0]**2 # NEED TO ADJUST WHEN CELL SIZE IS DIFFERENT IN Y! #print('[{0}] out = {1}'.format(self.rk, str(out))) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test2d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -198.0), 1.e-10)
def test1d(self): n = 8 # global number of cells ns = (n, ) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print( '*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs' ) sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h * (numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False, )) op.addStencilBranch((1, ), 1.0) op.addStencilBranch((-1, ), 1.0) op.addStencilBranch((0, ), -2.0) # set the input function inp = 0.5 * axes[0]**2 #print('[{0}] inp = {1}'.format(self.rk, str(inp))) out = op.apply(inp) / hs[0]**2 #print('[{0}] out = {1}'.format(self.rk, str(out))) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test1d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -28.25), 1.e-10)
def test1d(self): n = 8 # global number of cells ns = (n,) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print('*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs') sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h*(numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False,)) op.addStencilBranch((1,), 1.0) op.addStencilBranch((-1,), 1.0) op.addStencilBranch((0,), -2.0) # set the input function inp = 0.5 * axes[0]**2 #print('[{0}] inp = {1}'.format(self.rk, str(inp))) out = op.apply(inp) / hs[0]**2 #print('[{0}] out = {1}'.format(self.rk, str(out))) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test1d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -28.25), 1.e-10)
def test2d_laplacian_periodic(self): """ 2d array, apply Laplacian, periodic along the two axes """ from pnumpy import CubeDecomp from pnumpy import MultiArrayIter import operator from math import sin, pi # global sizes ndims = 2 #ns = numpy.array([60] * ndims) ns = numpy.array([3*4] * ndims) # local rank and number of procs rk = MPI.COMM_WORLD.Get_rank() sz = MPI.COMM_WORLD.Get_size() # find a domain decomposition dc = CubeDecomp(sz, ns) # not all numbers of procs will give a uniform domain decomposition, # exit if none can be found if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') return # get the local start/stop indices along each axis as a list of # 1d slices localSlices = dc.getSlab(rk) iBeg = numpy.array([s.start for s in localSlices]) iEnd = numpy.array([s.stop for s in localSlices]) nsLocal = numpy.array([s.stop - s.start for s in localSlices]) # create the dist arrays da = pnumpy.gmdaZeros(nsLocal, numpy.float32, mask=None, numGhosts=1) laplacian = pnumpy.gmdaZeros(nsLocal, numpy.float32, numGhosts=1) # set the data for it in MultiArrayIter(nsLocal): localInds = it.getIndices() globalInds = iBeg + localInds # positions are cell centered, domain is [0, 1]^ndims position = (globalInds + 0.5)/ numpy.array(ns, numpy.float32) # sin(2*pi*x) * sin(2*pi*y) ... da[tuple(localInds)] = reduce(operator.mul, [numpy.sin(2*numpy.pi*position[i]) for i in range(ndims)]) # apply the Laplacian finite difference operator. # Start by performing all the operations that do # not require any communication. laplacian[:] = 2 * ndims * da # now subtract the neighbor values which are local to this process for idim in range(ndims): # indices shifted in the + direction along axis idim slabP = [slice(None, None, None) for j in range(idim)] + \ [slice(1, None, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] # indices shifted in the - direction along axis idim slabM = [slice(None, None, None) for j in range(idim)] + \ [slice(0, -1, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] laplacian[slabP] -= da[slabM] # subtract left neighbor laplacian[slabM] -= da[slabP] # subtract right neighbor # fetch the data located on other procs periodic = [True for idim in range(ndims)] for idim in range(ndims): # define the positive and negative directions directionP = tuple([0 for j in range(idim)] + [1] + [0 for j in range(idim + 1, ndims)]) directionM = tuple([0 for j in range(idim)] + [-1] + [0 for j in range(idim + 1, ndims)]) procP = dc.getNeighborProc(rk, directionP, periodic=periodic) procM = dc.getNeighborProc(rk, directionM, periodic=periodic) # this is where communication takes place... Note that when # accessing the data on the low-end side on rank procM we # access the slide on the positive side on procM (directionP). # And inversely for the high-end side data... dataM = da.getData(procM, winID=directionP) dataP = da.getData(procP, winID=directionM) # finish off the operator laplacian[da.getEllipsis(winID=directionM)] -= dataM laplacian[da.getEllipsis(winID=directionP)] -= dataP # compute a checksum and send the result to rank 0 checksum = laplacian.reduce(lambda x,y:abs(x) + abs(y), 0.0, rootPe=0) if rk == 0: print('checksum = ', checksum) # float32 calculation has higher error assert(abs(checksum - 32.0) < 1.e-4) # free the windows da.free() laplacian.free()
def test3d(self): n = 8 # global number of cells ns = (n, n, n) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print( '*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs' ) sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] nsLocal = [] iBegs = [] iEnds = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop iBegs.append(ibeg) iEnds.append(iend) nsLocal.append(iend - ibeg) h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h * (numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False, False, True)) for i in range(ndims): disp = [0] * ndims for pm in (-1, 1): disp[i] = pm op.addStencilBranch(tuple(disp), 1.0) op.addStencilBranch(tuple([0] * ndims), -2 * ndims) # set the input function inp = numpy.zeros( (iEnds[0] - iBegs[0], iEnds[1] - iBegs[1], iEnds[2] - iBegs[2]), numpy.float64) for ig in range(iBegs[0], iEnds[0]): i = ig - iBegs[0] x = axes[0][i] for jg in range(iBegs[1], iEnds[1]): j = jg - iBegs[1] y = axes[1][j] for kg in range(iBegs[2], iEnds[2]): k = kg - iBegs[2] z = axes[2][k] inp[i, j, k] = 0.5 * x * y**2 # check sum of input localChkSum = numpy.sum(inp.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test3d check sum of input = {}'.format(chksum)) out = op.apply(inp) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test3d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -24.75), 1.e-10)
data[i, j] = numpy.floor( 1.9 * numpy.exp(-di**2 / nxGHalf**2 - dj**2 / nyGHalf**2)) # local rank and number of procs rk = MPI.COMM_WORLD.Get_rank() sz = MPI.COMM_WORLD.Get_size() # global domain sizes nxG, nyG = 128, 256 # domain decomposition dc = CubeDecomp(sz, (nxG, nyG)) # starting/ending global indices for local domain slab = dc.getSlab(rk) iBeg, iEnd = slab[0].start, slab[0].stop jBeg, jEnd = slab[1].start, slab[1].stop # local domain sizes nx, ny = iEnd - iBeg, jEnd - jBeg # the decomp must be regular if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') MPI.Finalize() sys.exit(1) # create and set the input distributed array inputData = pnumpy.gdaZeros((nx, ny), numpy.float32, numGhosts=1)
# domain decomposition dc = CubeDecomp(sz, (nx, ny)) # the decomp must be regular if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') MPI.Finalize() sys.exit(1) # number of procs in each direction npx, npy = dc.getDecomp() # list of slices slab = dc.getSlab(rk) # starting/ending indices for local domain iBeg, iEnd = slab[0].start, slab[0].stop jBeg, jEnd = slab[1].start, slab[1].stop # local variables xx = numpy.outer( xs[iBeg:iEnd], numpy.ones( (ny/npy,), numpy.float64 ) ) yy = numpy.outer( numpy.ones( (nx/npx,), numpy.float64 ), ys[jBeg:jEnd] ) # local field zz = numpy.sin(numpy.pi*xx) * numpy.cos(2*numpy.pi*yy) # create and set distributed array zda = pnumpy.gdaZeros( zz.shape, zz.dtype, numGhosts=1 ) zda[:] = zz
def test3d(self): n = 8 # global number of cells ns = (n, n, n) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print('*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs') sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] nsLocal = [] iBegs = [] iEnds = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop iBegs.append(ibeg) iEnds.append(iend) nsLocal.append(iend - ibeg) h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h*(numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) lapl = Laplacian(dc, periodic=(False, False, True)) # set the input function inp = numpy.zeros((iEnds[0] - iBegs[0], iEnds[1] - iBegs[1], iEnds[2] - iBegs[2]), numpy.float64) for ig in range(iBegs[0], iEnds[0]): i = ig - iBegs[0] x = axes[0][i] for jg in range(iBegs[1], iEnds[1]): j = jg - iBegs[1] y = axes[1][j] for kg in range(iBegs[2], iEnds[2]): k = kg - iBegs[2] z = axes[2][k] inp[i, j, k] = 0.5 * x * y**2 # check sum of input localChkSum = numpy.sum(inp.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test3d check sum of input = {}'.format(chksum)) out = lapl.apply(inp) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test3d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -24.75), 1.e-10)