def test_reuse_local_buffer(comm): procmesh = pfft.ProcMesh(np=[1], comm=comm) partition1 = pfft.Partition(pfft.Type.PFFT_R2C, [8, 8], procmesh, flags=pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT) partition2 = pfft.Partition(pfft.Type.PFFT_R2C, [8, 8], procmesh, flags=pfft.Flags.PFFT_ESTIMATE) buffer1 = pfft.LocalBuffer(partition1) buffer2 = pfft.LocalBuffer(partition2, base=buffer1) buffer3 = pfft.LocalBuffer(partition1) assert buffer1 is not buffer2 assert buffer1.address == buffer2.address assert buffer1 in buffer2 assert buffer2 in buffer1 assert buffer1 not in buffer3 assert buffer3 not in buffer1 assert buffer2 not in buffer3 assert buffer3 not in buffer2
def test_2d_on_2d_r2c(comm): if comm.size == 1: procmesh = pfft.ProcMesh(np=[1, 1], comm=comm) else: procmesh = pfft.ProcMesh(np=[2, 2], comm=comm) N = (8, 8) data = numpy.arange(numpy.prod(N), dtype='f8').reshape(N) correct = numpy.fft.rfftn(data.copy()) result = numpy.zeros_like(correct) partition = pfft.Partition( pfft.Type.PFFT_R2C, N, procmesh, flags=pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT # | pfft.Flags.PADDED_R2C # doesn't work yet ) buffer1 = pfft.LocalBuffer(partition) buffer2 = pfft.LocalBuffer(partition) plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2) buffer1.view_input()[:] = data[partition.local_i_slice] plan.execute(buffer1, buffer2) result[partition.local_o_slice] = buffer2.view_output() result = comm.allreduce(result) assert_almost_equal(correct, result)
def test_correct_multi(comm): procmesh = pfft.ProcMesh(np=[ comm.size, ], comm=comm) N = (2, 3) data = numpy.arange(numpy.prod(N), dtype='complex128').reshape(N) correct = numpy.fft.fftn(data) result = numpy.zeros_like(data) partition = pfft.Partition(pfft.Type.PFFT_C2C, N, procmesh, flags=pfft.Flags.PFFT_ESTIMATE) buffer1 = pfft.LocalBuffer(partition) buffer2 = pfft.LocalBuffer(partition) plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2) buffer1.view_input()[:] = data[partition.local_i_slice] plan.execute(buffer1, buffer2) result[partition.local_o_slice] = buffer2.view_output() result = comm.allreduce(result) assert_almost_equal(correct, result)
def test_correct_single(comm): procmesh = pfft.ProcMesh(np=[1], comm=comm) partition = pfft.Partition(pfft.Type.PFFT_C2C, [2, 2], procmesh, flags=pfft.Flags.PFFT_ESTIMATE) buffer1 = pfft.LocalBuffer(partition) buffer2 = pfft.LocalBuffer(partition) plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2) buffer1.view_input()[:] = numpy.arange(4).reshape(2, 2) correct = numpy.fft.fftn(buffer1.view_input()) plan.execute(buffer1, buffer2) assert_array_equal(correct, buffer2.view_output())
def test_plan_backward(comm): procmesh = pfft.ProcMesh(np=[1], comm=comm) partition = pfft.Partition(pfft.Type.PFFT_R2C, [2, 2], procmesh, flags=pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT) buffer1 = pfft.LocalBuffer(partition) buffer2 = pfft.LocalBuffer(partition) plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2) assert plan.flags & pfft.Flags.PFFT_TRANSPOSED_OUT assert plan.type == pfft.Type.PFFT_R2C plan = pfft.Plan(partition, pfft.Direction.PFFT_BACKWARD, buffer1, buffer2) assert plan.flags & pfft.Flags.PFFT_TRANSPOSED_IN assert plan.type == pfft.Type.PFFT_C2R
def test_leak(comm): for i in range(1024): procmesh = pfft.ProcMesh(np=[1, 1], comm=comm) partition = pfft.Partition(pfft.Type.PFFT_C2C, [128, 128, 128], procmesh, pfft.Flags.PFFT_TRANSPOSED_OUT) buffer = pfft.LocalBuffer(partition) #FIXME: check with @mpip if this is correct. i = buffer.view_input()
def test_raw(comm): procmesh = pfft.ProcMesh(np=[1], comm=comm) partition = pfft.Partition(pfft.Type.PFFT_R2C, [8, 8], procmesh, flags=pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT) buffer1 = pfft.LocalBuffer(partition) assert buffer1.view_raw().size == 2 * partition.alloc_local
def test_transpose_2d_decom(comm): procmesh = pfft.ProcMesh(np=[1, 1], comm=comm) N = (1, 2, 3, 4) partition = pfft.Partition(pfft.Type.PFFT_C2C, N, procmesh, flags=pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT) buffer = pfft.LocalBuffer(partition) i = buffer.view_input() assert_array_equal(i.strides, [384, 192, 64, 16]) o = buffer.view_output() assert_array_equal(o.strides, [64, 192, 64, 16])
def test_transpose_3d_decom(comm): procmesh = pfft.ProcMesh(np=[1, 1, 1], comm=comm) N = (1, 2, 3, 4, 5) partition = pfft.Partition(pfft.Type.PFFT_C2C, N, procmesh, flags=pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT) buffer = pfft.LocalBuffer(partition) #FIXME: check with @mpip if this is correct. i = buffer.view_input() assert_array_equal(i.strides, [1920, 960, 320, 80, 16]) o = buffer.view_output() assert_array_equal(o.strides, [80, 960, 320, 80, 16])
def __init__(self, pm, base=None): """ Used internally to add shortcuts of attributes from pm """ if base is None: base = pfft.LocalBuffer(pm.partition) self.base = base self.pm = pm self.partition = pm.partition self.BoxSize = pm.BoxSize self.Nmesh = pm.Nmesh self.ndim = len(pm.Nmesh) if isinstance(self, RealField): self.value = self.base.view_input() self.start = self.partition.local_i_start self.cshape = pm.Nmesh self.x = pm.x self.i = pm.i_ind elif isinstance(self, ComplexField): self.value = self.base.view_output() self.start = self.partition.local_o_start self.cshape = pm.Nmesh.copy() self.cshape[-1] = self.cshape[-1] // 2 + 1 self.x = pm.k self.i = pm.o_ind self.real = self.value.real self.imag = self.value.imag self.plain = self.value.view(dtype=(self.real.dtype, 2)) else: raise TypeError("Olny RealField and ComplexField. No more subclassing"); # copy over a few ndarray attributes self.flat = self.value.flat self.shape = self.value.shape self.size = self.value.size self.dtype = self.value.dtype # the slices in the full array self.slices = tuple([ slice(s, s + n) for s, n in zip(self.start, self.shape) ]) self.csize = pm.comm.allreduce(self.size)
def test_transposed(comm): procmesh = pfft.ProcMesh(np=[ 1, ], comm=comm) partition = pfft.Partition(pfft.Type.PFFT_C2C, [4, 8], procmesh, pfft.Flags.PFFT_TRANSPOSED_OUT) buffer = pfft.LocalBuffer(partition) o = buffer.view_output() i = buffer.view_input() assert_array_equal(i.shape, (4, 8)) assert_array_equal(i.strides, (128, 16)) assert_array_equal(o.shape, (4, 8)) assert_array_equal(o.strides, (16, 64)) assert o.dtype == numpy.dtype('complex128') assert i.dtype == numpy.dtype('complex128')
def test_padded(comm): procmesh = pfft.ProcMesh(np=[ 1, ], comm=comm) partition = pfft.Partition( pfft.Type.PFFT_R2C, [4, 8], procmesh, pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_PADDED_R2C) buffer = pfft.LocalBuffer(partition) i = buffer.view_input() o = buffer.view_output() assert_array_equal(i.shape, (4, 8)) assert_array_equal(i.strides, (80, 8)) assert_array_equal(o.shape, (4, 5)) assert_array_equal(o.strides, (16, 64)) assert i.dtype == numpy.dtype('float64') assert o.dtype == numpy.dtype('complex128')
def __init__(self, BoxSize, Nmesh, paintbrush='cic', comm=None, np=None, verbose=False, dtype='f8'): """ create a PM object. """ # this weird sequence to intialize comm is because # we want to be compatible with None comm == MPI.COMM_WORLD # while not relying on pfft's full mpi4py compatibility # (passing None through to pfft) if comm is None: self.comm = MPI.COMM_WORLD else: self.comm = comm if np is None: np = pfft.split_size_2d(self.comm.size) dtype = numpy.dtype(dtype) if dtype == numpy.dtype('f8'): forward = pfft.Type.PFFT_R2C backward = pfft.Type.PFFT_C2R elif dtype == numpy.dtype('f4'): forward = pfft.Type.PFFTF_R2C backward = pfft.Type.PFFTF_C2R else: raise ValueError("dtype must be f8 or f4") self.procmesh = pfft.ProcMesh(np, comm=comm) self.Nmesh = Nmesh self.BoxSize = numpy.empty(3, dtype='f8') self.BoxSize[:] = BoxSize self.partition = pfft.Partition(forward, [Nmesh, Nmesh, Nmesh], self.procmesh, pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT) buffer = pfft.LocalBuffer(self.partition) self.real = buffer.view_input() self.real[:] = 0 self.complex = buffer.view_output() self.T = Timers(self.comm) with self.T['Plan']: self.forward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD, self.real.base, self.complex.base, forward, pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT) self.backward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD, self.complex.base, self.real.base, backward, pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_IN | pfft.Flags.PFFT_DESTROY_INPUT) self.domain = domain.GridND(self.partition.i_edges, comm=self.comm) self.verbose = verbose self.stack = [] k = [] x = [] w = [] r = [] for d in range(self.partition.ndim): t = numpy.ones(self.partition.ndim, dtype='intp') s = numpy.ones(self.partition.ndim, dtype='intp') t[d] = self.partition.local_ni[d] s[d] = self.partition.local_no[d] wi = numpy.arange(s[d], dtype='f4') + self.partition.local_o_start[d] ri = numpy.arange(t[d], dtype='f4') + self.partition.local_i_start[d] wi[wi >= self.Nmesh // 2] -= self.Nmesh ri[ri >= self.Nmesh // 2] -= self.Nmesh wi *= (2 * numpy.pi / self.Nmesh) ki = wi * self.Nmesh / self.BoxSize[d] xi = ri * self.BoxSize[d] / self.Nmesh w.append(wi.reshape(s)) r.append(ri.reshape(t)) k.append(ki.reshape(s)) x.append(xi.reshape(t)) self.w = w self.r = r self.k = k self.x = x # set the painter self.paintbrush = paintbrush.lower() if paintbrush == 'cic': self.painter = cic.paint elif paintbrush == 'tsc': self.painter = tsc.paint else: raise ValueError("valid `painter` values are: ['cic', 'tsc']")
procmesh = pfft.ProcMesh([4], comm=MPI.COMM_WORLD) partition = pfft.Partition( pfft.Type.PFFT_C2C, [8, 8], procmesh, pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT) for irank in range(4): MPI.COMM_WORLD.barrier() if irank != procmesh.rank: continue print 'My rank is', procmesh.this print 'local_i_start', partition.local_i_start print 'local_o_start', partition.local_o_start print 'i_edges', partition.i_edges print 'o_edges', partition.o_edges buffer = pfft.LocalBuffer(partition) plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer) iplan = pfft.Plan( partition, pfft.Direction.PFFT_BACKWARD, buffer, flags=pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT, ) input = buffer.view_input() print input.base # now lets fill the input array in a funny way # a[i, j] = i * 10 + j # we will do a tranform roundtrip and print this out indices = numpy.array(numpy.indices(input.shape))
def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_method='estimate', resampler='cic'): """ create a PM object. """ if comm is None: comm = MPI.COMM_WORLD self.comm = comm if np is None: if len(Nmesh) >= 3: np = pfft.split_size_2d(self.comm.size) elif len(Nmesh) == 2: np = [self.comm.size] elif len(Nmesh) == 1: np = [] dtype = numpy.dtype(dtype) self.dtype = dtype if dtype == numpy.dtype('f8'): forward = pfft.Type.PFFT_R2C backward = pfft.Type.PFFT_C2R elif dtype == numpy.dtype('f4'): forward = pfft.Type.PFFTF_R2C backward = pfft.Type.PFFTF_C2R else: raise ValueError("dtype must be f8 or f4") self.procmesh = pfft.ProcMesh(np, comm=comm) self.Nmesh = numpy.array(Nmesh, dtype='i8') self.ndim = len(self.Nmesh) self.BoxSize = numpy.empty(len(Nmesh), dtype='f8') self.BoxSize[:] = BoxSize self.partition = pfft.Partition(forward, self.Nmesh, self.procmesh, pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_PADDED_R2C) bufferin = pfft.LocalBuffer(self.partition) bufferout = pfft.LocalBuffer(self.partition) plan_method = { "estimate": pfft.Flags.PFFT_ESTIMATE, "measure": pfft.Flags.PFFT_MEASURE, "exhaustive": pfft.Flags.PFFT_EXHAUSTIVE, } [plan_method] self.forward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD, bufferin, bufferout, forward, plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_R2C) self.backward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD, bufferout, bufferin, backward, plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_C2R) self.ipforward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD, bufferin, bufferin, forward, plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_R2C) self.ipbackward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD, bufferout, bufferout, backward, plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_C2R) self.domain = domain.GridND(self.partition.i_edges, comm=self.comm) k = [] x = [] w = [] r = [] o_ind = [] i_ind = [] for d in range(self.partition.ndim): t = numpy.ones(self.partition.ndim, dtype='intp') s = numpy.ones(self.partition.ndim, dtype='intp') t[d] = self.partition.local_i_shape[d] s[d] = self.partition.local_o_shape[d] i_indi = numpy.arange(t[d], dtype='intp') + self.partition.local_i_start[d] o_indi = numpy.arange(s[d], dtype='intp') + self.partition.local_o_start[d] wi = numpy.arange(s[d], dtype='f4') + self.partition.local_o_start[d] ri = numpy.arange(t[d], dtype='f4') + self.partition.local_i_start[d] wi[wi >= self.Nmesh[d] // 2] -= self.Nmesh[d] ri[ri >= self.Nmesh[d] // 2] -= self.Nmesh[d] wi *= (2 * numpy.pi / self.Nmesh[d]) ki = wi * self.Nmesh[d] / self.BoxSize[d] xi = ri * self.BoxSize[d] / self.Nmesh[d] o_ind.append(o_indi.reshape(s)) i_ind.append(i_indi.reshape(t)) w.append(wi.reshape(s)) r.append(ri.reshape(t)) k.append(ki.reshape(s)) x.append(xi.reshape(t)) self.i_ind = i_ind self.o_ind = o_ind self.w = w self.r = r self.k = k self.x = x # Transform from simulation unit to local grid unit. self.affine = Affine(self.partition.ndim, translate=-self.partition.local_i_start, scale=1.0 * self.Nmesh / self.BoxSize, period = self.Nmesh) # Transform from global grid unit to local grid unit. self.affine_grid = Affine(self.partition.ndim, translate=-self.partition.local_i_start, scale=1.0, period = self.Nmesh) self.resampler = FindResampler(resampler)
def main(comm): Nmesh = [8, 8] if len(Nmesh) == 3: procmesh = pfft.ProcMesh(pfft.split_size_2d(comm.size), comm=comm) else: procmesh = pfft.ProcMesh((comm.size, ), comm=comm) partition = pfft.Partition( pfft.Type.R2C, Nmesh, procmesh, pfft.Flags.PADDED_R2C | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.DESTROY_INPUT) # generate the coordinate support. k = [None] * partition.ndim x = [None] * partition.ndim for d in range(partition.ndim): k[d] = numpy.arange(partition.no[d])[partition.local_o_slice[d]] k[d][k[d] >= partition.n[d] // 2] -= partition.n[d] # set to the right numpy broadcast shape k[d] = k[d].reshape( [-1 if i == d else 1 for i in range(partition.ndim)]) x[d] = numpy.arange(partition.ni[d])[partition.local_i_slice[d]] # set to the right numpy broadcast shape x[d] = x[d].reshape( [-1 if i == d else 1 for i in range(partition.ndim)]) # allocate memory buffer1 = pfft.LocalBuffer(partition) phi_disp = buffer1.view_input() buffer2 = pfft.LocalBuffer(partition) phi_spec = buffer2.view_output() # forward plan disp_to_spec_inplace = pfft.Plan( partition, pfft.Direction.PFFT_FORWARD, buffer2, buffer2, # the two lines below not needed after version 0.1.21 # type=pfft.Type.R2C, # flags=pfft.Flags.TRANSPOSED_OUT | pfft.Flags.DESTROY_INPUT | pfft.Flags.PADDED_R2C ) buffer3 = pfft.LocalBuffer(partition) grad_spec = buffer3.view_output() buffer4 = pfft.LocalBuffer(partition) grad_disp = buffer4.view_input() # backward plan spec_to_disp = pfft.Plan( partition, pfft.Direction.PFFT_BACKWARD, buffer3, buffer4, # the two lines below not needed after version 0.1.21 # type=pfft.Type.C2R, # flags=pfft.Flags.TRANSPOSED_IN | pfft.Flags.DESTROY_INPUT | pfft.Flags.PADDED_C2R ) # to do : fill in initial value dx = x[0] - Nmesh[0] * 0.5 + 0.5 dy = x[1] - Nmesh[1] * 0.5 + 0.5 phi_disp[...] = dx**2 + dx * dy + dy**2 cprint('phi =', gather(partition, phi_disp).round(2), comm=comm) # copy in to the buffer for inplace transform # this preserves value of phi_disp phi_spec.base.view_input()[...] = phi_disp disp_to_spec_inplace.execute(phi_spec.base, phi_spec.base) all_grad_disp = numpy.zeros([partition.ndim] + list(phi_disp.shape), dtype=grad_disp.dtype) # cprint('phi_k =', gather(partition, phi_spec, mode='output').round(2), comm=comm) for d in range(partition.ndim): grad_spec[...] = phi_spec[...] * (k[d] * 1j) spec_to_disp.execute(grad_spec.base, grad_disp.base) # copy the gradient along d th direction all_grad_disp[d] = grad_disp # now do your thing. for d in range(partition.ndim): cprint('dim =', gather(partition, all_grad_disp[d]).round(2), comm=comm)