Python ProcMesh 예제들, pfft.ProcMesh Python 예제들

예제 #1

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_2d_on_2d_r2c(comm):
    if comm.size == 1:
        procmesh = pfft.ProcMesh(np=[1, 1], comm=comm)
    else:
        procmesh = pfft.ProcMesh(np=[2, 2], comm=comm)
    N = (8, 8)

    data = numpy.arange(numpy.prod(N), dtype='f8').reshape(N)

    correct = numpy.fft.rfftn(data.copy())
    result = numpy.zeros_like(correct)

    partition = pfft.Partition(
        pfft.Type.PFFT_R2C,
        N,
        procmesh,
        flags=pfft.Flags.PFFT_ESTIMATE
        | pfft.Flags.PFFT_TRANSPOSED_OUT
        | pfft.Flags.PFFT_DESTROY_INPUT
        #          | pfft.Flags.PADDED_R2C # doesn't work yet
    )

    buffer1 = pfft.LocalBuffer(partition)
    buffer2 = pfft.LocalBuffer(partition)

    plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2)

    buffer1.view_input()[:] = data[partition.local_i_slice]
    plan.execute(buffer1, buffer2)

    result[partition.local_o_slice] = buffer2.view_output()
    result = comm.allreduce(result)
    assert_almost_equal(correct, result)

예제 #2

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_world():
    world = MPI.COMM_WORLD

    procmesh = pfft.ProcMesh(np=[
        world.size,
    ], comm=world)
    assert procmesh.comm == world
    procmesh = pfft.ProcMesh(np=[
        world.size,
    ], comm=None)
    assert procmesh.comm == world

    assert_array_equal(pfft.ProcMesh.split(2, None),
                       pfft.ProcMesh.split(2, world))
    assert_array_equal(pfft.ProcMesh.split(1, None),
                       pfft.ProcMesh.split(1, world))

예제 #3

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_correct_multi(comm):
    procmesh = pfft.ProcMesh(np=[
        comm.size,
    ], comm=comm)
    N = (2, 3)
    data = numpy.arange(numpy.prod(N), dtype='complex128').reshape(N)
    correct = numpy.fft.fftn(data)
    result = numpy.zeros_like(data)

    partition = pfft.Partition(pfft.Type.PFFT_C2C,
                               N,
                               procmesh,
                               flags=pfft.Flags.PFFT_ESTIMATE)

    buffer1 = pfft.LocalBuffer(partition)
    buffer2 = pfft.LocalBuffer(partition)

    plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2)

    buffer1.view_input()[:] = data[partition.local_i_slice]
    plan.execute(buffer1, buffer2)

    result[partition.local_o_slice] = buffer2.view_output()
    result = comm.allreduce(result)
    assert_almost_equal(correct, result)

예제 #4

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_reuse_local_buffer(comm):
    procmesh = pfft.ProcMesh(np=[1], comm=comm)

    partition1 = pfft.Partition(pfft.Type.PFFT_R2C, [8, 8],
                                procmesh,
                                flags=pfft.Flags.PFFT_ESTIMATE
                                | pfft.Flags.PFFT_TRANSPOSED_OUT)

    partition2 = pfft.Partition(pfft.Type.PFFT_R2C, [8, 8],
                                procmesh,
                                flags=pfft.Flags.PFFT_ESTIMATE)

    buffer1 = pfft.LocalBuffer(partition1)
    buffer2 = pfft.LocalBuffer(partition2, base=buffer1)
    buffer3 = pfft.LocalBuffer(partition1)

    assert buffer1 is not buffer2
    assert buffer1.address == buffer2.address

    assert buffer1 in buffer2
    assert buffer2 in buffer1

    assert buffer1 not in buffer3
    assert buffer3 not in buffer1
    assert buffer2 not in buffer3
    assert buffer3 not in buffer2

예제 #5

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_nino(comm):
    procmesh = pfft.ProcMesh(np=[
        comm.size,
    ], comm=comm)

    partition = pfft.Partition(pfft.Type.PFFT_C2C, [4, 8], procmesh,
                               pfft.Flags.PFFT_TRANSPOSED_OUT)

    assert_array_equal(partition.ni, [4, 8])
    assert_array_equal(partition.no, [4, 8])

예제 #6

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_raw(comm):
    procmesh = pfft.ProcMesh(np=[1], comm=comm)

    partition = pfft.Partition(pfft.Type.PFFT_R2C, [8, 8],
                               procmesh,
                               flags=pfft.Flags.PFFT_ESTIMATE
                               | pfft.Flags.PFFT_TRANSPOSED_OUT)

    buffer1 = pfft.LocalBuffer(partition)
    assert buffer1.view_raw().size == 2 * partition.alloc_local

예제 #7

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_leak(comm):
    for i in range(1024):
        procmesh = pfft.ProcMesh(np=[1, 1], comm=comm)

        partition = pfft.Partition(pfft.Type.PFFT_C2C, [128, 128, 128],
                                   procmesh, pfft.Flags.PFFT_TRANSPOSED_OUT)

        buffer = pfft.LocalBuffer(partition)
        #FIXME: check with @mpip if this is correct.
        i = buffer.view_input()

예제 #8

0

파일 보기

파일: test_edges.py 프로젝트: tobson/pfft-python

def main():
    comm = MPI.COMM_WORLD
    # this must run with comm.size == 3
    assert comm.size == 3
    procmesh = pfft.ProcMesh(np=[
        3,
    ])
    partition = pfft.Partition(pfft.Type.PFFT_C2C, [4, 4], procmesh,
                               pfft.Flags.PFFT_TRANSPOSED_OUT)

    assert_array_equal(partition.i_edges[0], [0, 2, 4, 4])
    assert_array_equal(partition.i_edges[1], [0, 4])

예제 #9

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_edges(comm):
    procmesh = pfft.ProcMesh(np=[
        comm.size,
    ], comm=comm)

    partition = pfft.Partition(pfft.Type.PFFT_C2C, [4, 4], procmesh,
                               pfft.Flags.PFFT_TRANSPOSED_OUT)

    assert_array_equal(partition.i_edges[0], [0, 2, 4, 4])
    assert_array_equal(partition.i_edges[1], [0, 4])

    assert_array_equal(partition.o_edges[1], [0, 2, 4, 4])
    assert_array_equal(partition.o_edges[0], [0, 4])

예제 #10

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_edges_padded(comm):
    procmesh = pfft.ProcMesh(np=[
        comm.size,
    ], comm=comm)

    partition = pfft.Partition(
        pfft.Type.PFFT_R2C, [16, 8], procmesh,
        pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_PADDED_R2C)

    assert_array_equal(partition.i_edges[0], [0, 16])
    assert_array_equal(partition.i_edges[1], [0, 8])

    assert_array_equal(partition.o_edges[0], [0, 16])
    assert_array_equal(partition.o_edges[1], [0, 5])

예제 #11

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_transpose_2d_decom(comm):
    procmesh = pfft.ProcMesh(np=[1, 1], comm=comm)
    N = (1, 2, 3, 4)

    partition = pfft.Partition(pfft.Type.PFFT_C2C,
                               N,
                               procmesh,
                               flags=pfft.Flags.PFFT_ESTIMATE
                               | pfft.Flags.PFFT_TRANSPOSED_OUT)

    buffer = pfft.LocalBuffer(partition)
    i = buffer.view_input()
    assert_array_equal(i.strides, [384, 192, 64, 16])
    o = buffer.view_output()
    assert_array_equal(o.strides, [64, 192, 64, 16])

예제 #12

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_correct_single(comm):
    procmesh = pfft.ProcMesh(np=[1], comm=comm)

    partition = pfft.Partition(pfft.Type.PFFT_C2C, [2, 2],
                               procmesh,
                               flags=pfft.Flags.PFFT_ESTIMATE)

    buffer1 = pfft.LocalBuffer(partition)
    buffer2 = pfft.LocalBuffer(partition)

    plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2)
    buffer1.view_input()[:] = numpy.arange(4).reshape(2, 2)
    correct = numpy.fft.fftn(buffer1.view_input())
    plan.execute(buffer1, buffer2)

    assert_array_equal(correct, buffer2.view_output())

예제 #13

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_transpose_3d_decom(comm):
    procmesh = pfft.ProcMesh(np=[1, 1, 1], comm=comm)
    N = (1, 2, 3, 4, 5)

    partition = pfft.Partition(pfft.Type.PFFT_C2C,
                               N,
                               procmesh,
                               flags=pfft.Flags.PFFT_ESTIMATE
                               | pfft.Flags.PFFT_TRANSPOSED_OUT)

    buffer = pfft.LocalBuffer(partition)
    #FIXME: check with @mpip if this is correct.
    i = buffer.view_input()
    assert_array_equal(i.strides, [1920, 960, 320, 80, 16])
    o = buffer.view_output()
    assert_array_equal(o.strides, [80, 960, 320, 80, 16])

예제 #14

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_plan_backward(comm):
    procmesh = pfft.ProcMesh(np=[1], comm=comm)

    partition = pfft.Partition(pfft.Type.PFFT_R2C, [2, 2],
                               procmesh,
                               flags=pfft.Flags.PFFT_ESTIMATE
                               | pfft.Flags.PFFT_TRANSPOSED_OUT)

    buffer1 = pfft.LocalBuffer(partition)
    buffer2 = pfft.LocalBuffer(partition)

    plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2)
    assert plan.flags & pfft.Flags.PFFT_TRANSPOSED_OUT
    assert plan.type == pfft.Type.PFFT_R2C

    plan = pfft.Plan(partition, pfft.Direction.PFFT_BACKWARD, buffer1, buffer2)
    assert plan.flags & pfft.Flags.PFFT_TRANSPOSED_IN
    assert plan.type == pfft.Type.PFFT_C2R

예제 #15

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_transposed(comm):
    procmesh = pfft.ProcMesh(np=[
        1,
    ], comm=comm)

    partition = pfft.Partition(pfft.Type.PFFT_C2C, [4, 8], procmesh,
                               pfft.Flags.PFFT_TRANSPOSED_OUT)

    buffer = pfft.LocalBuffer(partition)
    o = buffer.view_output()
    i = buffer.view_input()

    assert_array_equal(i.shape, (4, 8))
    assert_array_equal(i.strides, (128, 16))
    assert_array_equal(o.shape, (4, 8))
    assert_array_equal(o.strides, (16, 64))

    assert o.dtype == numpy.dtype('complex128')
    assert i.dtype == numpy.dtype('complex128')

예제 #16

0

파일 보기

파일: test_pfft.py 프로젝트: AntoineSIMTEK/pfft-python

def test_padded(comm):
    procmesh = pfft.ProcMesh(np=[
        1,
    ], comm=comm)

    partition = pfft.Partition(
        pfft.Type.PFFT_R2C, [4, 8], procmesh,
        pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_PADDED_R2C)

    buffer = pfft.LocalBuffer(partition)
    i = buffer.view_input()
    o = buffer.view_output()

    assert_array_equal(i.shape, (4, 8))
    assert_array_equal(i.strides, (80, 8))
    assert_array_equal(o.shape, (4, 5))
    assert_array_equal(o.strides, (16, 64))

    assert i.dtype == numpy.dtype('float64')
    assert o.dtype == numpy.dtype('complex128')

예제 #17

0

파일 보기

파일: particlemesh.py 프로젝트: sashmit/pmesh

    def __init__(self, BoxSize, Nmesh, paintbrush='cic', comm=None, np=None, verbose=False, dtype='f8'):
        """ create a PM object.  """
        # this weird sequence to intialize comm is because
        # we want to be compatible with None comm == MPI.COMM_WORLD
        # while not relying on pfft's full mpi4py compatibility
        # (passing None through to pfft)
        if comm is None:
            self.comm = MPI.COMM_WORLD
        else:
            self.comm = comm
        if np is None:
                np = pfft.split_size_2d(self.comm.size)

        dtype = numpy.dtype(dtype)
        if dtype == numpy.dtype('f8'):
            forward = pfft.Type.PFFT_R2C
            backward = pfft.Type.PFFT_C2R
        elif dtype == numpy.dtype('f4'):
            forward = pfft.Type.PFFTF_R2C
            backward = pfft.Type.PFFTF_C2R
        else:
            raise ValueError("dtype must be f8 or f4")

        self.procmesh = pfft.ProcMesh(np, comm=comm)
        self.Nmesh = Nmesh
        self.BoxSize = numpy.empty(3, dtype='f8')
        self.BoxSize[:] = BoxSize
        self.partition = pfft.Partition(forward,
            [Nmesh, Nmesh, Nmesh], 
            self.procmesh,
            pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT)

        buffer = pfft.LocalBuffer(self.partition)
        self.real = buffer.view_input()
        self.real[:] = 0

        self.complex = buffer.view_output()

        self.T = Timers(self.comm)
        with self.T['Plan']:
            self.forward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD,
                    self.real.base, self.complex.base, forward,
                    pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT)
            self.backward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD,
                    self.complex.base, self.real.base, backward, 
                    pfft.Flags.PFFT_ESTIMATE | pfft.Flags.PFFT_TRANSPOSED_IN | pfft.Flags.PFFT_DESTROY_INPUT)

        self.domain = domain.GridND(self.partition.i_edges, comm=self.comm)
        self.verbose = verbose
        self.stack = []

        k = []
        x = []
        w = []
        r = []

        for d in range(self.partition.ndim):
            t = numpy.ones(self.partition.ndim, dtype='intp')
            s = numpy.ones(self.partition.ndim, dtype='intp')
            t[d] = self.partition.local_ni[d]
            s[d] = self.partition.local_no[d]
            wi = numpy.arange(s[d], dtype='f4') + self.partition.local_o_start[d] 
            ri = numpy.arange(t[d], dtype='f4') + self.partition.local_i_start[d] 

            wi[wi >= self.Nmesh // 2] -= self.Nmesh
            ri[ri >= self.Nmesh // 2] -= self.Nmesh

            wi *= (2 * numpy.pi / self.Nmesh)
            ki = wi * self.Nmesh / self.BoxSize[d]
            xi = ri * self.BoxSize[d] / self.Nmesh

            w.append(wi.reshape(s))
            r.append(ri.reshape(t))
            k.append(ki.reshape(s))
            x.append(xi.reshape(t))

        self.w = w
        self.r = r
        self.k = k
        self.x = x
        
        # set the painter
        self.paintbrush = paintbrush.lower()
        if paintbrush == 'cic':
            self.painter = cic.paint
        elif paintbrush == 'tsc':
            self.painter = tsc.paint
        else:
            raise ValueError("valid `painter` values are: ['cic', 'tsc']")

예제 #18

0

파일 보기

from mpi4py import MPI
import numpy
import pfft

if MPI.COMM_WORLD.rank == 0:
    print \
        """
This example performs a in-place transform, with a naive slab decomposition.

In place transform is achieved by providing a single buffer object to pfft.Plan.
Consequently, calls to plan.execute we also provide only a single buffer object.
"""

procmesh = pfft.ProcMesh([4], comm=MPI.COMM_WORLD)
partition = pfft.Partition(
    pfft.Type.PFFT_C2C, [8, 8], procmesh,
    pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_DESTROY_INPUT)
for irank in range(4):
    MPI.COMM_WORLD.barrier()
    if irank != procmesh.rank:
        continue
    print 'My rank is', procmesh.this
    print 'local_i_start', partition.local_i_start
    print 'local_o_start', partition.local_o_start
    print 'i_edges', partition.i_edges
    print 'o_edges', partition.o_edges

buffer = pfft.LocalBuffer(partition)

plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer)
iplan = pfft.Plan(

예제 #19

0

파일 보기

    def __init__(self, Nmesh, BoxSize=1.0, comm=None, np=None, dtype='f8', plan_method='estimate', resampler='cic'):
        """ create a PM object.  """
        if comm is None:
            comm = MPI.COMM_WORLD

        self.comm = comm

        if np is None:
            if len(Nmesh) >= 3:
                np = pfft.split_size_2d(self.comm.size)
            elif len(Nmesh) == 2:
                np = [self.comm.size]
            elif len(Nmesh) == 1:
                np = []

        dtype = numpy.dtype(dtype)
        self.dtype = dtype

        if dtype == numpy.dtype('f8'):
            forward = pfft.Type.PFFT_R2C
            backward = pfft.Type.PFFT_C2R
        elif dtype == numpy.dtype('f4'):
            forward = pfft.Type.PFFTF_R2C
            backward = pfft.Type.PFFTF_C2R
        else:
            raise ValueError("dtype must be f8 or f4")

        self.procmesh = pfft.ProcMesh(np, comm=comm)
        self.Nmesh = numpy.array(Nmesh, dtype='i8')
        self.ndim = len(self.Nmesh)
        self.BoxSize = numpy.empty(len(Nmesh), dtype='f8')
        self.BoxSize[:] = BoxSize
        self.partition = pfft.Partition(forward,
            self.Nmesh,
            self.procmesh,
            pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_PADDED_R2C)

        bufferin = pfft.LocalBuffer(self.partition)
        bufferout = pfft.LocalBuffer(self.partition)

        plan_method = {
            "estimate": pfft.Flags.PFFT_ESTIMATE,
            "measure": pfft.Flags.PFFT_MEASURE,
            "exhaustive": pfft.Flags.PFFT_EXHAUSTIVE,
            } [plan_method]

        self.forward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD,
                bufferin, bufferout, forward,
                plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_R2C)
        self.backward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD,
                bufferout, bufferin, backward,
                plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_C2R)

        self.ipforward = pfft.Plan(self.partition, pfft.Direction.PFFT_FORWARD,
                bufferin, bufferin, forward,
                plan_method | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_R2C)
        self.ipbackward = pfft.Plan(self.partition, pfft.Direction.PFFT_BACKWARD,
                bufferout, bufferout, backward,
                plan_method | pfft.Flags.PFFT_TRANSPOSED_IN | pfft.Flags.PFFT_TUNE | pfft.Flags.PFFT_PADDED_C2R)

        self.domain = domain.GridND(self.partition.i_edges, comm=self.comm)

        k = []
        x = []
        w = []
        r = []
        o_ind = []
        i_ind = []

        for d in range(self.partition.ndim):
            t = numpy.ones(self.partition.ndim, dtype='intp')
            s = numpy.ones(self.partition.ndim, dtype='intp')
            t[d] = self.partition.local_i_shape[d]
            s[d] = self.partition.local_o_shape[d]

            i_indi = numpy.arange(t[d], dtype='intp') + self.partition.local_i_start[d]
            o_indi = numpy.arange(s[d], dtype='intp') + self.partition.local_o_start[d]

            wi = numpy.arange(s[d], dtype='f4') + self.partition.local_o_start[d]
            ri = numpy.arange(t[d], dtype='f4') + self.partition.local_i_start[d]

            wi[wi >= self.Nmesh[d] // 2] -= self.Nmesh[d]
            ri[ri >= self.Nmesh[d] // 2] -= self.Nmesh[d]

            wi *= (2 * numpy.pi / self.Nmesh[d])
            ki = wi * self.Nmesh[d] / self.BoxSize[d]
            xi = ri * self.BoxSize[d] / self.Nmesh[d]

            o_ind.append(o_indi.reshape(s))
            i_ind.append(i_indi.reshape(t))
            w.append(wi.reshape(s))
            r.append(ri.reshape(t))
            k.append(ki.reshape(s))
            x.append(xi.reshape(t))

        self.i_ind = i_ind
        self.o_ind = o_ind
        self.w = w
        self.r = r
        self.k = k
        self.x = x

        # Transform from simulation unit to local grid unit.
        self.affine = Affine(self.partition.ndim,
                    translate=-self.partition.local_i_start,
                    scale=1.0 * self.Nmesh / self.BoxSize,
                    period = self.Nmesh)

        # Transform from global grid unit to local grid unit.
        self.affine_grid = Affine(self.partition.ndim,
                    translate=-self.partition.local_i_start,
                    scale=1.0,
                    period = self.Nmesh)

        self.resampler = FindResampler(resampler)

예제 #20

0

파일 보기

def main(comm):
    Nmesh = [8, 8]

    if len(Nmesh) == 3:
        procmesh = pfft.ProcMesh(pfft.split_size_2d(comm.size), comm=comm)
    else:
        procmesh = pfft.ProcMesh((comm.size, ), comm=comm)

    partition = pfft.Partition(
        pfft.Type.R2C, Nmesh, procmesh, pfft.Flags.PADDED_R2C
        | pfft.Flags.PFFT_TRANSPOSED_OUT | pfft.Flags.DESTROY_INPUT)

    # generate the coordinate support.

    k = [None] * partition.ndim
    x = [None] * partition.ndim
    for d in range(partition.ndim):
        k[d] = numpy.arange(partition.no[d])[partition.local_o_slice[d]]
        k[d][k[d] >= partition.n[d] // 2] -= partition.n[d]
        # set to the right numpy broadcast shape
        k[d] = k[d].reshape(
            [-1 if i == d else 1 for i in range(partition.ndim)])

        x[d] = numpy.arange(partition.ni[d])[partition.local_i_slice[d]]
        # set to the right numpy broadcast shape
        x[d] = x[d].reshape(
            [-1 if i == d else 1 for i in range(partition.ndim)])

    # allocate memory
    buffer1 = pfft.LocalBuffer(partition)
    phi_disp = buffer1.view_input()

    buffer2 = pfft.LocalBuffer(partition)
    phi_spec = buffer2.view_output()

    # forward plan
    disp_to_spec_inplace = pfft.Plan(
        partition,
        pfft.Direction.PFFT_FORWARD,
        buffer2,
        buffer2,
        # the two lines below not needed after version 0.1.21
        # type=pfft.Type.R2C,
        # flags=pfft.Flags.TRANSPOSED_OUT | pfft.Flags.DESTROY_INPUT | pfft.Flags.PADDED_R2C
    )

    buffer3 = pfft.LocalBuffer(partition)
    grad_spec = buffer3.view_output()

    buffer4 = pfft.LocalBuffer(partition)
    grad_disp = buffer4.view_input()

    # backward plan
    spec_to_disp = pfft.Plan(
        partition,
        pfft.Direction.PFFT_BACKWARD,
        buffer3,
        buffer4,
        # the two lines below not needed after version 0.1.21
        # type=pfft.Type.C2R,
        # flags=pfft.Flags.TRANSPOSED_IN | pfft.Flags.DESTROY_INPUT | pfft.Flags.PADDED_C2R
    )

    # to do : fill in initial value
    dx = x[0] - Nmesh[0] * 0.5 + 0.5
    dy = x[1] - Nmesh[1] * 0.5 + 0.5
    phi_disp[...] = dx**2 + dx * dy + dy**2

    cprint('phi =', gather(partition, phi_disp).round(2), comm=comm)

    # copy in to the buffer for inplace transform
    # this preserves value of phi_disp
    phi_spec.base.view_input()[...] = phi_disp
    disp_to_spec_inplace.execute(phi_spec.base, phi_spec.base)

    all_grad_disp = numpy.zeros([partition.ndim] + list(phi_disp.shape),
                                dtype=grad_disp.dtype)

    #    cprint('phi_k =', gather(partition, phi_spec, mode='output').round(2), comm=comm)

    for d in range(partition.ndim):
        grad_spec[...] = phi_spec[...] * (k[d] * 1j)
        spec_to_disp.execute(grad_spec.base, grad_disp.base)
        # copy the gradient along d th direction
        all_grad_disp[d] = grad_disp

    # now do your thing.

    for d in range(partition.ndim):
        cprint('dim =',
               gather(partition, all_grad_disp[d]).round(2),
               comm=comm)