예제 #1
0
파일: upwindMPI.py 프로젝트: s3540645/COMBS
  def __init__(self, velocity, lengths, numCells):

    self.rk = MPI.COMM_WORLD.Get_rank()
    self.sz = MPI.COMM_WORLD.Get_size()

    # decomposition
    self.dc = pnumpy.CubeDecomp(self.sz, numCells)
    if not self.dc.getDecomp():
      print('*** No uniform decomposition could be found for {0} processes'.format(self.sz))
      print('*** Please adjust the number of cells {0}'.format(numCells))
      sys.exit(1)

    # begin/end indices of local sub-domain
    self.localSlices = self.dc.getSlab(self.rk)
    self.iBeg = numpy.array([s.start for s in self.localSlices])
    self.iEnd = numpy.array([s.stop for s in self.localSlices])
    self.nsLocal = numpy.array([s.stop - s.start for s in self.localSlices])
    print('[{0}] local number of cells: {1}'.format(self.rk, self.nsLocal))

    # global number of cells
    self.numCells = numCells

    self.ndims = len(velocity)
    self.deltas = numpy.zeros( (self.ndims,), numpy.float64 )
    self.upDirection = numpy.zeros( (self.ndims,), numpy.int )
    self.v = velocity
    self.lengths = lengths

    # number of local field values
    self.ntot = 1
    for j in range(self.ndims):
      self.upDirection[j] = -1
      if velocity[j] < 0.: self.upDirection[j] = +1
      self.deltas[j] = lengths[j] / numCells[j]
      self.ntot *= self.nsLocal[j]

    self.coeff = self.v * self.upDirection / self.deltas

    # initializing the field
    self.f = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 )
    self.fOld = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 )

    # initialize lower corner to one
    if self.rk == 0:
      self.f[0, 0, 0] = 1

    # get the neighboring ranks
    self.neighbSide = [[] for i in range(self.ndims)]
    direction = numpy.array([0] * self.ndims)
    self.neighbRk = numpy.array([0] * self.ndims)
    periodic = [True for i in range(self.ndims)]
    for i in range(self.ndims):
      direction[i] = self.upDirection[i]
      self.neighbRk[i] = self.dc.getNeighborProc(self.rk, direction, periodic=periodic)
      self.neighbSide[i] = tuple(-direction)
      direction[i] = 0
예제 #2
0
  def __init__(self, velocity, lengths, numCells):

    self.rk = MPI.COMM_WORLD.Get_rank()
    self.sz = MPI.COMM_WORLD.Get_size()

    # decomposition
    self.dc = pnumpy.CubeDecomp(self.sz, numCells)
    if not self.dc.getDecomp():
      print('*** No uniform decomposition could be found for {0} processes'.format(self.sz))
      print('*** Please ajust the number of cells {0}'.format(numCells))
      sys.exit(1)

    # begin/end indices of local sub-domain
    self.localSlices = self.dc.getSlab(self.rk)
    self.iBeg = numpy.array([s.start for s in self.localSlices])
    self.iEnd = numpy.array([s.stop for s in self.localSlices])
    self.nsLocal = numpy.array([s.stop - s.start for s in self.localSlices])
    print('[{0}] local number of cells: {1}'.format(self.rk, self.nsLocal))

    # global number of cells
    self.numCells = numCells

    self.ndims = 3
    self.deltas = numpy.zeros( (self.ndims,), numpy.float64 )
    self.upDirection = numpy.zeros( (self.ndims,), numpy.float64 )
    self.v = velocity
    self.lengths = lengths

    # number of local field values
    self.ntot = 1
    for j in range(self.ndims):
      self.upDirection[j] = -1
      if velocity[j] < 0.: self.upDirection[j] = +1
      self.deltas[j] = lengths[j] / numCells[j]
      self.ntot *= self.nsLocal[j]

    self.coeff = self.v * self.upDirection / self.deltas

    # initializing the field
    self.f = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 )
    self.fOld = pnumpy.gdaZeros( self.nsLocal, numpy.float64, numGhosts=1 )

    # initialize lower corner to one
    if self.rk == 0:
      self.f[0, 0, 0] = 1

    # get the neighboring ranks
    self.neighbSide = [[] for i in range(self.ndims)]
    direction = numpy.array([0] * self.ndims)
    self.neighbRk = numpy.array([0] * self.ndims)
    periodic = [True for i in range(self.ndims)]
    for i in range(self.ndims):
      direction[i] = self.upDirection[i]
      self.neighbRk[i] = self.dc.getNeighborProc(self.rk, direction, periodic=periodic)
      self.neighbSide[i] = tuple(-direction)
      direction[i] = 0
예제 #3
0
    def test2d_1_non_periodic(self):
        """
        2d array test, 1 ghost, non-periodic boundary conditions
        """

        # create the dist array, the sizes are local to each processor
        da = pnumpy.gdaZeros((2, 3), numpy.float32, numGhosts=1)

        # processor rank and number of processes
        rk = da.rk
        nprocs = da.sz

        # set the data
        da[:] = rk

        # get the neighbor MPI rank (None if there is no neighbor)
        otherRk = rk - 1
        if otherRk < 0:
            otherRk = None

        # collective operation. all procs must call "get"
        southData = da.getData(otherRk, winID=(1, 0))

        # check
        if otherRk is not None and otherRk >= 0:
            self.assertEqual(southData.min(), rk - 1)
            self.assertEqual(southData.max(), rk - 1)

        # clean up
        da.free()
예제 #4
0
    def test1d_1(self):
        """
        1d, float64
        """

        dtyp = numpy.float64

        # create the ghosted dist array
        n = 10
        da = pnumpy.gdaZeros((n, ), dtyp, numGhosts=1)

        # set data to process dependent value,
        # da.rk is the mpi proc ID
        # da.sz is the size of the MPI communicator
        da[:] = 100 * da.rk + numpy.array([i for i in range(n)], dtyp)

        # access remote data to the left
        leftRk = (da.rk - 1) % da.sz

        print('proc %d tries to access data from %d' % (da.rk, leftRk))
        leftData = da.getData(pe=leftRk, winID=(1, ))

        print('leftData for rank %d = %s' % (da.rk, str(leftData)))
        # check
        if leftRk < da.rk:
            self.assertEqual(leftData[0], da[-1] - 100)
        else:
            self.assertEqual(leftData[0], da[-1] + 100 * (da.sz - 1))

        # free
        da.free()
예제 #5
0
    def test2d_1_non_periodic(self):
        """
        2d array test, 1 ghost, non-periodic boundary conditions
        """

        # create the dist array, the sizes are local to each processor
        da = pnumpy.gdaZeros( (2,3), numpy.float32, numGhosts=1 )

        # processor rank and number of processes
        rk = da.rk
        nprocs = da.sz

        # set the data
        da[:] = rk

        # get the neighbor MPI rank (None if there is no neighbor)
        otherRk = rk - 1
        if otherRk < 0:
            otherRk = None

        # collective operation. all procs must call "get"
        southData = da.getData( otherRk, winID=(1,0) )

        # check 
        if otherRk is not None and otherRk >= 0:
            self.assertEqual(southData.min(), rk - 1)
            self.assertEqual(southData.max(), rk - 1)

        # clean up
        da.free()
예제 #6
0
    def test1d_1(self):
        """
        1d, float64
        """

        dtyp = numpy.float64

        # create the ghosted dist array
        n = 10
        da = pnumpy.gdaZeros( (n,), dtyp, numGhosts=1 )

        # set data to process dependent value, 
        # da.rk is the mpi proc ID
        # da.sz is the size of the MPI communicator
        da[:] = 100*da.rk + numpy.array([i for i in range(n)], dtyp)

        # access remote data to the left
        leftRk = (da.rk - 1) % da.sz

        print('proc %d tries to access data from %d' % (da.rk, leftRk))
        leftData = da.getData(pe=leftRk, winID=(1,))

        print('leftData for rank %d = %s' % (da.rk, str(leftData)))
        # check
        if leftRk < da.rk:
            self.assertEqual(leftData[0], da[-1] - 100)
        else:
            self.assertEqual(leftData[0], da[-1] + 100*(da.sz-1))

        # free
        da.free()
예제 #7
0
    def apply(self, localArray):
        """
        Apply Laplacian stencil to data
        @param localArray local array
        @return new array on local proc
        """

        # input dist array
        inp = gdaZeros(localArray.shape, localArray.dtype, numGhosts=1)
        # output array
        out = numpy.zeros(localArray.shape, localArray.dtype)

        # no displacement term
        weight = self.stencil[self.zeros]
        out[...] += weight * localArray

        for disp in self.srcLocalDomains:

            weight = self.stencil[disp]

            # no communication required here
            srcDom = self.srcLocalDomains[disp]
            dstDom = self.dstLocalDomains[disp]

            out[dstDom] += weight * localArray[srcDom]

            #
            # now the part that requires communication
            #

            # set the ghost values
            srcSlab = self.srcSlab[disp]
            # copy
            inp[srcSlab] = localArray[srcSlab]

            # send over to local process
            dstSlab = self.dstSlab[disp]
            winId = self.winIds[disp]
            rk = self.neighRk[disp]

            # remote fetch
            out[dstSlab] += weight * inp.getData(rk, winId)

        # some implementations require this
        inp.free()

        return out
예제 #8
0
    def test2d_1_periodic(self):
        """
        2d array test, 1 ghost, periodic boundary conditions
        """

        # create the dist array, the sizes are local to each processor
        da = pnumpy.gdaZeros((2, 3), numpy.float32, numGhosts=1)

        # processor rank and number of processes
        rk = da.rk
        nprocs = da.sz

        # set the data
        da[:] = rk

        # access neighbor data, collective operation
        southData = da.getData((rk - 1) % nprocs, winID=(1, 0))

        # check
        self.assertEqual(southData.min(), (rk - 1) % nprocs)
        self.assertEqual(southData.max(), (rk - 1) % nprocs)

        # clean up
        da.free()
예제 #9
0
    def test2d_1_periodic(self):
        """
        2d array test, 1 ghost, periodic boundary conditions
        """

        # create the dist array, the sizes are local to each processor
        da = pnumpy.gdaZeros( (2,3), numpy.float32, numGhosts=1 )

        # processor rank and number of processes
        rk = da.rk
        nprocs = da.sz

        # set the data
        da[:] = rk

        # access neighbor data, collective operation
        southData = da.getData( (rk-1) % nprocs, winID=(1,0) )

        # check 
        self.assertEqual(southData.min(), (rk - 1) % nprocs)
        self.assertEqual(southData.max(), (rk - 1) % nprocs)

        # clean up
        da.free()
예제 #10
0
    def test2d_laplacian_periodic(self):
        """
        2d array, apply Laplacian, periodic along the two axes
        """
        from pnumpy import CubeDecomp
        from pnumpy import MultiArrayIter
        import operator
        from math import sin, pi

        # global sizes
        ndims = 2
        #ns = numpy.array([60] * ndims)
        ns = numpy.array([3 * 4] * ndims)

        # local rank and number of procs
        rk = MPI.COMM_WORLD.Get_rank()
        sz = MPI.COMM_WORLD.Get_size()

        # find a domain decomposition
        dc = CubeDecomp(sz, ns)

        # not all numbers of procs will give a uniform domain decomposition,
        # exit if none can be found
        if not dc.getDecomp():
            if rk == 0:
                print('no decomp could be found, adjust the number of procs')
            return

        # get the local start/stop indices along each axis as a list of
        # 1d slices
        localSlices = dc.getSlab(rk)
        iBeg = numpy.array([s.start for s in localSlices])
        iEnd = numpy.array([s.stop for s in localSlices])
        nsLocal = numpy.array([s.stop - s.start for s in localSlices])

        # create the dist arrays
        da = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1)
        laplacian = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1)

        # set the data
        for it in MultiArrayIter(nsLocal):
            localInds = it.getIndices()
            globalInds = iBeg + localInds
            # positions are cell centered, domain is [0, 1]^ndims
            position = (globalInds + 0.5) / numpy.array(ns, numpy.float32)
            # sin(2*pi*x) * sin(2*pi*y) ...
            da[tuple(localInds)] = reduce(
                operator.mul,
                [numpy.sin(2 * numpy.pi * position[i]) for i in range(ndims)])

        # apply the Laplacian finite difference operator.
        # Start by performing all the operations that do
        # not require any communication.
        laplacian[:] = 2 * ndims * da

        # now subtract the neighbor values which are local to this process
        for idim in range(ndims):
            # indices shifted in the + direction along axis idim
            slabP = [slice(None, None, None) for j in range(idim)] + \
                [slice(1, None, None)] + \
                [slice(None, None, None) for j in range(idim + 1, ndims)]
            # indices shifted in the - direction along axis idim
            slabM = [slice(None, None, None) for j in range(idim)] + \
                [slice(0, -1, None)] + \
                [slice(None, None, None) for j in range(idim + 1, ndims)]
            laplacian[slabP] -= da[slabM]  # subtract left neighbor
            laplacian[slabM] -= da[slabP]  # subtract right neighbor

        # fetch the data located on other procs
        periodic = [True for idim in range(ndims)]
        for idim in range(ndims):
            # define the positive and negative directions
            directionP = tuple([0 for j in range(idim)] + [1] +
                               [0 for j in range(idim + 1, ndims)])
            directionM = tuple([0 for j in range(idim)] + [-1] +
                               [0 for j in range(idim + 1, ndims)])
            procP = dc.getNeighborProc(rk, directionP, periodic=periodic)
            procM = dc.getNeighborProc(rk, directionM, periodic=periodic)

            # this is where communication takes place... Note that when
            # accessing the data on the low-end side on rank procM we
            # access the slide on the positive side on procM (directionP).
            # And inversely for the high-end side data...
            dataM = da.getData(procM, winID=directionP)
            dataP = da.getData(procP, winID=directionM)

            # finish off the operator
            laplacian[da.getEllipsis(winID=directionM)] -= dataM
            laplacian[da.getEllipsis(winID=directionP)] -= dataP

        # compute a checksum and send the result to rank 0
        checksum = laplacian.reduce(lambda x, y: abs(x) + abs(y),
                                    0.0,
                                    rootPe=0)
        if rk == 0:
            print('checksum = ', checksum)
            # float32 calculation has higher error
            assert (abs(checksum - 32.0) < 1.e-4)

        # free the windows
        da.free()
        laplacian.free()
예제 #11
0
slab = dc.getSlab(rk)
iBeg, iEnd = slab[0].start, slab[0].stop
jBeg, jEnd = slab[1].start, slab[1].stop

# local domain sizes
nx, ny = iEnd - iBeg, jEnd - jBeg

# the decomp must be regular
if not dc.getDecomp():
    if rk == 0:
        print('no decomp could be found, adjust the number of procs')
        MPI.Finalize()
        sys.exit(1)

# create and set the input distributed array
inputData = pnumpy.gdaZeros((nx, ny), numpy.float32, numGhosts=1)
setValues(nxG, nyG, iBeg, iEnd, jBeg, jEnd, inputData)

# store the number of times a cell has an invalid neighbor so
# we can correct the weights
numInvalidNeighbors = numpy.zeros((nx, ny), numpy.int32)

domain = Partition(2)

# the ghosted array only exposes west, east, south and north
# windows. Need to also export the corners
for disp in (-1, -1), (-1, 1), (1, -1), (1, 1):
    d0 = (disp[0], 0)
    d1 = (0, disp[1])
    n0 = (-disp[0], 0)
    n1 = (0, -disp[1])
예제 #12
0
# list of slices
slab = dc.getSlab(rk)

# starting/ending indices for local domain
iBeg, iEnd = slab[0].start, slab[0].stop
jBeg, jEnd = slab[1].start, slab[1].stop

# local variables
xx = numpy.outer( xs[iBeg:iEnd], numpy.ones( (ny/npy,), numpy.float64 ) )
yy = numpy.outer( numpy.ones( (nx/npx,), numpy.float64 ), ys[jBeg:jEnd] )

# local field
zz = numpy.sin(numpy.pi*xx) * numpy.cos(2*numpy.pi*yy)

# create and set distributed array
zda = pnumpy.gdaZeros( zz.shape, zz.dtype, numGhosts=1 )
zda[:] = zz

# compute the star Laplacian in the interior, this does not require
# any communication

laplaceZ = 4 * zda[:]

# local neighbour contributions, no communication
laplaceZ[1:  , :] -= zda[0:-1,:]
laplaceZ[0:-1, :] -= zda[1:  ,:]
laplaceZ[:, 1:  ] -= zda[:,0:-1]
laplaceZ[:, 0:-1] -= zda[:,1:  ]


# now compute and fill in the halo
예제 #13
0
# list of slices
slab = dc.getSlab(rk)

# starting/ending indices for local domain
iBeg, iEnd = slab[0].start, slab[0].stop
jBeg, jEnd = slab[1].start, slab[1].stop

# local variables
xx = numpy.outer(xs[iBeg:iEnd], numpy.ones((ny / npy, ), numpy.float64))
yy = numpy.outer(numpy.ones((nx / npx, ), numpy.float64), ys[jBeg:jEnd])

# local field
zz = numpy.sin(numpy.pi * xx) * numpy.cos(2 * numpy.pi * yy)

# create and set distributed array
zda = pnumpy.gdaZeros(zz.shape, zz.dtype, numGhosts=1)
zda[:] = zz

# compute the star Laplacian in the interior, this does not require
# any communication

laplaceZ = 4 * zda[:]

# local neighbour contributions, no communication
laplaceZ[1:, :] -= zda[0:-1, :]
laplaceZ[0:-1, :] -= zda[1:, :]
laplaceZ[:, 1:] -= zda[:, 0:-1]
laplaceZ[:, 0:-1] -= zda[:, 1:]

# now compute and fill in the halo
예제 #14
0
slab = dc.getSlab(rk)
iBeg, iEnd = slab[0].start, slab[0].stop
jBeg, jEnd = slab[1].start, slab[1].stop

# local domain sizes
nx, ny = iEnd  - iBeg, jEnd - jBeg

# the decomp must be regular
if not dc.getDecomp():
    if rk == 0: 
        print('no decomp could be found, adjust the number of procs')
        MPI.Finalize()
        sys.exit(1)

# create and set the input distributed array
inputData = pnumpy.gdaZeros((nx, ny), numpy.float32, numGhosts=1)
setValues(nxG, nyG, iBeg, iEnd, jBeg, jEnd, inputData)

# store the number of times a cell has an invalid neighbor so 
# we can correct the weights
numInvalidNeighbors = numpy.zeros((nx, ny), numpy.int32)

domain = Partition(2)

# the ghosted array only exposes west, east, south and north
# windows. Need to also export the corners
for disp in (-1, -1), (-1, 1), (1, -1), (1, 1):
    d0 = (disp[0], 0)
    d1 = (0, disp[1])
    n0 = (-disp[0], 0)
    n1 = (0, -disp[1])
예제 #15
0
    def test2d_laplacian_periodic(self):
        """
        2d array, apply Laplacian, periodic along the two axes
        """
        from pnumpy import CubeDecomp
        from pnumpy import MultiArrayIter
        import operator
        from math import sin, pi

        # global sizes
        ndims = 2
        #ns = numpy.array([60] * ndims)
        ns = numpy.array([3*4] * ndims)

        # local rank and number of procs
        rk = MPI.COMM_WORLD.Get_rank()
        sz = MPI.COMM_WORLD.Get_size()

        # find a domain decomposition
        dc = CubeDecomp(sz, ns)

        # not all numbers of procs will give a uniform domain decomposition,
        # exit if none can be found
        if not dc.getDecomp():
            if rk == 0: 
                print('no decomp could be found, adjust the number of procs')
            return
        
        # get the local start/stop indices along each axis as a list of 
        # 1d slices
        localSlices = dc.getSlab(rk)
        iBeg = numpy.array([s.start for s in localSlices])
        iEnd = numpy.array([s.stop for s in localSlices])
        nsLocal = numpy.array([s.stop - s.start for s in localSlices])
        
        # create the dist arrays
        da = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1)
        laplacian = pnumpy.gdaZeros(nsLocal, numpy.float32, numGhosts=1)

        # set the data
        for it in MultiArrayIter(nsLocal):
            localInds = it.getIndices()
            globalInds = iBeg + localInds
            # positions are cell centered, domain is [0, 1]^ndims
            position = (globalInds  + 0.5)/ numpy.array(ns, numpy.float32)
            # sin(2*pi*x) * sin(2*pi*y) ...
            da[tuple(localInds)] = reduce(operator.mul, 
                                   [numpy.sin(2*numpy.pi*position[i]) for i in range(ndims)])

        # apply the Laplacian finite difference operator.
        # Start by performing all the operations that do
        # not require any communication.
        laplacian[:] = 2 * ndims * da

        # now subtract the neighbor values which are local to this process
        for idim in range(ndims):
            # indices shifted in the + direction along axis idim
            slabP = [slice(None, None, None) for j in range(idim)] + \
                [slice(1, None, None)] + \
                [slice(None, None, None) for j in range(idim + 1, ndims)]
            # indices shifted in the - direction along axis idim
            slabM = [slice(None, None, None) for j in range(idim)] + \
                [slice(0, -1, None)] + \
                [slice(None, None, None) for j in range(idim + 1, ndims)]
            laplacian[slabP] -= da[slabM] # subtract left neighbor
            laplacian[slabM] -= da[slabP] # subtract right neighbor

        # fetch the data located on other procs
        periodic = [True for idim in range(ndims)]
        for idim in range(ndims):
            # define the positive and negative directions
            directionP = tuple([0 for j in range(idim)] + [1] + [0 for j in range(idim + 1, ndims)])
            directionM = tuple([0 for j in range(idim)] + [-1] + [0 for j in range(idim + 1, ndims)])
            procP = dc.getNeighborProc(rk, directionP, periodic=periodic)
            procM = dc.getNeighborProc(rk, directionM, periodic=periodic)

            # this is where communication takes place... Note that when
            # accessing the data on the low-end side on rank procM we
            # access the slide on the positive side on procM (directionP).
            # And inversely for the high-end side data...
            dataM = da.getData(procM, winID=directionP)
            dataP = da.getData(procP, winID=directionM)

            # finish off the operator
            laplacian[da.getEllipsis(winID=directionM)] -= dataM
            laplacian[da.getEllipsis(winID=directionP)] -= dataP

        # compute a checksum and send the result to rank 0
        checksum = laplacian.reduce(lambda x,y:abs(x) + abs(y), 0.0, rootPe=0)
        if rk == 0:
            print('checksum = ', checksum)
            # float32 calculation has higher error
            assert(abs(checksum - 32.0) < 1.e-4)
        
        # free the windows
        da.free()
        laplacian.free()