Exemple #1
0
    def _make_wait(self, f, hse, key, msg=None):
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')

        sizes = [
            FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
            for i in range(len(f._dist_dimensions))
        ]
        scatter = Call('scatter_%s' % key, [bufs] + sizes + [f] + ofss)

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        iet = List(body=[waitsend, waitrecv, scatter])
        parameters = ([f] + ofss + [fromrank, msg])
        return Callable('wait_%s' % key, iet, 'void', parameters, ('static', ))
Exemple #2
0
    def _make_sendrecv(self, f, hse, key, msg=None):
        comm = f.grid.distributor._obj_comm

        bufg = FieldFromPointer(msg._C_field_bufg, msg)
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        sizes = [FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
                 for i in range(len(f._dist_dimensions))]

        gather = Call('gather%s' % key, [bufg] + sizes + [f] + ofsg)
        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        recv = IrecvCall([bufs, count, Macro(dtype_to_mpitype(f.dtype)),
                         fromrank, Integer(13), comm, rrecv])
        send = IsendCall([bufg, count, Macro(dtype_to_mpitype(f.dtype)),
                         torank, Integer(13), comm, rsend])

        iet = List(body=[recv, gather, send])
        parameters = ([f] + ofsg + [fromrank, torank, comm, msg])
        return SendRecv(key, iet, parameters, bufg, bufs)
Exemple #3
0
    def _make_halowait(self, f, hse, key, msg=None):
        cast = cast_mapper[(f.dtype, '*')]

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)

        sizes = [FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
                 for i in range(len(f._dist_dimensions))]
        ofss = [FieldFromComposite('%s[%d]' % (msg._C_field_ofss, i), msgi)
                for i in range(len(f._dist_dimensions))]
        ofss = [fixed.get(d) or ofss.pop(0) for d in f.dimensions]

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Call('scatter%s' % key, [cast(bufs)] + sizes + [f] + ofss)
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([waitsend, waitrecv, scatter], dim, ncomms - 1)
        parameters = ([f] + list(fixed.values()) + [msg, ncomms])
        return Callable('halowait%d' % key, iet, 'void', parameters, ('static',))
Exemple #4
0
    def _make_poke(self, hs, key, msgs):
        lflag = Symbol(name='lflag')
        gflag = Symbol(name='gflag')

        # Init flags
        body = [Expression(DummyEq(lflag, 0)), Expression(DummyEq(gflag, 1))]

        # For each msg, build an Iteration calling MPI_Test on all peers
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            update = AugmentedExpression(DummyEq(gflag, lflag), '&')

            body.append(
                Iteration([testsend, update, testrecv, update], dim,
                          msg.npeers - 1))

        body.append(Return(gflag))

        return make_efunc('pokempi%d' % key, List(body=body), retval='int')
Exemple #5
0
    def _make_haloupdate(self, f, hse, key, msg=None):
        comm = f.grid.distributor._obj_comm

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufg = FieldFromComposite(msg._C_field_bufg, msgi)
        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)
        torank = FieldFromComposite(msg._C_field_to, msgi)

        sizes = [
            FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [
            FieldFromComposite('%s[%d]' % (msg._C_field_ofsg, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [fixed.get(d) or ofsg.pop(0) for d in f.dimensions]

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Call('gather_%s' % key, [bufg] + sizes + [f] + ofsg)
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        # Make Irecv/Isend
        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([recv, gather, send], dim, ncomms - 1)
        parameters = ([f, comm, msg, ncomms]) + list(fixed.values())
        return Callable('haloupdate%d' % key, iet, 'void', parameters,
                        ('static', ))
Exemple #6
0
def sendrecv(f, fixed):
    """Construct an IET performing a halo exchange along arbitrary
    dimension and side."""
    assert f.is_Function
    assert f.grid is not None

    comm = f.grid.distributor._C_comm

    buf_dims = [Dimension(name='buf_%s' % d.root) for d in f.dimensions if d not in fixed]
    bufg = Array(name='bufg', dimensions=buf_dims, dtype=f.dtype, scope='heap')
    bufs = Array(name='bufs', dimensions=buf_dims, dtype=f.dtype, scope='heap')

    dat_dims = [Dimension(name='dat_%s' % d.root) for d in f.dimensions]
    dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype, scope='external')

    ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
    ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

    fromrank = Symbol(name='fromrank')
    torank = Symbol(name='torank')

    parameters = [bufg] + list(bufg.shape) + [dat] + list(dat.shape) + ofsg
    gather = Call('gather_%s' % f.name, parameters)
    parameters = [bufs] + list(bufs.shape) + [dat] + list(dat.shape) + ofss
    scatter = Call('scatter_%s' % f.name, parameters)

    # The scatter must be guarded as we must not alter the halo values along
    # the domain boundary, where the sender is actually MPI.PROC_NULL
    scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

    srecv = MPIStatusObject(name='srecv')
    rrecv = MPIRequestObject(name='rrecv')
    rsend = MPIRequestObject(name='rsend')

    count = reduce(mul, bufs.shape, 1)
    recv = Call('MPI_Irecv', [bufs, count, Macro(numpy_to_mpitypes(f.dtype)),
                              fromrank, '13', comm, rrecv])
    send = Call('MPI_Isend', [bufg, count, Macro(numpy_to_mpitypes(f.dtype)),
                              torank, '13', comm, rsend])

    waitrecv = Call('MPI_Wait', [rrecv, srecv])
    waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

    iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
    iet = List(body=[ArrayCast(dat), iet_insert_C_decls(iet)])
    parameters = ([dat] + list(dat.shape) + list(bufs.shape) +
                  ofsg + ofss + [fromrank, torank, comm])
    return Callable('sendrecv_%s' % f.name, iet, 'void', parameters, ('static',))
Exemple #7
0
def _make_thread_init(threads, tfunc, isdata, sdata, sregistry):
    d = threads.index
    if threads.size == 1:
        callback = lambda body: body
    else:
        callback = lambda body: Iteration(body, d, threads.size - 1)

    # A unique identifier for each created pthread
    pthreadid = d + threads.base_id

    # Initialize `sdata`
    arguments = list(isdata.parameters)
    arguments[-3] = sdata.symbolic_base + d
    arguments[-2] = pthreadid
    arguments[-1] = sregistry.deviceid
    call0 = Call(isdata.name, arguments)

    # Create pthreads
    call1 = Call('pthread_create', (threads.symbolic_base + d,
                                    Macro('NULL'),
                                    Call(tfunc.name, [], is_indirect=True),
                                    sdata.symbolic_base + d))

    threadsinit = List(
        header=c.Comment("Fire up and initialize `%s`" % threads.name),
        body=callback([call0, call1])
    )

    return threadsinit
Exemple #8
0
def _make_thread_func(name, iet, root, threads, sregistry):
    # Create the SharedData, that is the data structure that will be used by the
    # main thread to pass information dows to the child thread(s)
    required, parameters, dynamic_parameters = diff_parameters(iet, root)
    parameters = sorted(parameters, key=lambda i: i.is_Function)  # Allow casting
    sdata = SharedData(name=sregistry.make_name(prefix='sdata'), npthreads=threads.size,
                       fields=required, dynamic_fields=dynamic_parameters)

    sbase = sdata.symbolic_base
    sid = sdata.symbolic_id

    # Create a Callable to initialize `sdata` with the known const values
    iname = 'init_%s' % sdata.dtype._type_.__name__
    ibody = [DummyExpr(FieldFromPointer(i._C_name, sbase), i._C_symbol)
             for i in parameters]
    ibody.extend([
        BlankLine,
        DummyExpr(FieldFromPointer(sdata._field_id, sbase), sid),
        DummyExpr(FieldFromPointer(sdata._field_flag, sbase), 1)
    ])
    iparameters = parameters + [sdata, sid]
    isdata = Callable(iname, ibody, 'void', iparameters, 'static')

    # Prepend the SharedData fields available upon thread activation
    preactions = [DummyExpr(i, FieldFromPointer(i.name, sbase))
                  for i in dynamic_parameters]

    # Append the flag reset
    postactions = [List(body=[
        BlankLine,
        DummyExpr(FieldFromPointer(sdata._field_flag, sbase), 1)
    ])]

    iet = List(body=preactions + [iet] + postactions)

    # The thread has work to do when it receives the signal that all locks have
    # been set to 0 by the main thread
    iet = Conditional(CondEq(FieldFromPointer(sdata._field_flag, sbase), 2), iet)

    # The thread keeps spinning until the alive flag is set to 0 by the main thread
    iet = While(CondNe(FieldFromPointer(sdata._field_flag, sbase), 0), iet)

    # pthread functions expect exactly one argument, a void*, and must return void*
    tretval = 'void*'
    tparameter = VoidPointer('_%s' % sdata.name)

    # Unpack `sdata`
    unpack = [PointerCast(sdata, tparameter), BlankLine]
    for i in parameters:
        if i.is_AbstractFunction:
            unpack.extend([Dereference(i, sdata), PointerCast(i)])
        else:
            unpack.append(DummyExpr(i, FieldFromPointer(i.name, sbase)))
    unpack.append(DummyExpr(sid, FieldFromPointer(sdata._field_id, sbase)))
    unpack.append(BlankLine)
    iet = List(body=unpack + [iet, BlankLine, Return(Macro('NULL'))])

    tfunc = ThreadFunction(name, iet, tretval, tparameter, 'static')

    return tfunc, isdata, sdata
Exemple #9
0
def test_null_init():
    grid = Grid(shape=(10, 10))

    u = Function(name='u', grid=grid)

    expr = DummyExpr(u.indexed, Macro('NULL'), init=True)

    assert str(expr) == "float * u = NULL;"
    assert expr.defines == (u.indexed, )
Exemple #10
0
    def _make_sendrecv(self, f, hse, key, **kwargs):
        comm = f.grid.distributor._obj_comm

        buf_dims = [
            Dimension(name='buf_%s' % d.root) for d in f.dimensions
            if d not in hse.loc_indices
        ]
        bufg = Array(name='bufg',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     padding=0,
                     scope='heap')
        bufs = Array(name='bufs',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     padding=0,
                     scope='heap')

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        gather = Call('gather_%s' % key,
                      [bufg] + list(bufg.shape) + [f] + ofsg)
        scatter = Call('scatter_%s' % key,
                       [bufs] + list(bufs.shape) + [f] + ofss)

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)
        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        count = reduce(mul, bufs.shape, 1)
        rrecv = MPIRequestObject(name='rrecv')
        rsend = MPIRequestObject(name='rsend')
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
        parameters = ([f] + list(bufs.shape) + ofsg + ofss +
                      [fromrank, torank, comm])
        return Callable('sendrecv_%s' % key, iet, 'void', parameters,
                        ('static', ))
Exemple #11
0
    def _make_poke(self, hs, key, msgs):
        flag = Symbol(name='flag')
        initflag = LocalExpression(DummyEq(flag, 0))

        body = [initflag]
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])

            body.append(Iteration([testsend, testrecv], dim, msg.npeers - 1))

        return make_efunc('pokempi%d' % key, body)
Exemple #12
0
def _make_thread_finalize(threads, sdata):
    d = threads.index
    if threads.size == 1:
        callback = lambda body: body
    else:
        callback = lambda body: Iteration(body, d, threads.size - 1)

    threadswait = List(
        header=c.Comment("Wait for completion of `%s`" % threads.name),
        body=callback([
            While(CondEq(FieldFromComposite(sdata._field_flag, sdata[d]), 2)),
            DummyExpr(FieldFromComposite(sdata._field_flag, sdata[d]), 0),
            Call('pthread_join', (threads[d], Macro('NULL')))
        ]))

    return threadswait
Exemple #13
0
    def _(iet):
        # TODO: we need to pick the rank from `comm_shm`, not `comm`,
        # so that we have nranks == ngpus (as long as the user has launched
        # the right number of MPI processes per node given the available
        # number of GPUs per node)

        objcomm = None
        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                objcomm = i
                break

        deviceid = DeviceID()
        device_nvidia = Macro('acc_device_nvidia')
        if objcomm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = DefFunction('acc_get_num_devices', device_nvidia)
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            asdn_then = Call('acc_set_device_num', [deviceid, device_nvidia])
            asdn_else = Call('acc_set_device_num',
                             [rank % ngpus, device_nvidia])

            body = [
                Call('acc_init', [device_nvidia]),
                Conditional(
                    CondNe(deviceid, -1), asdn_then,
                    List(body=[rank_decl, rank_init, ngpus_init, asdn_else]))
            ]
        else:
            body = [
                Call('acc_init', [device_nvidia]),
                Conditional(
                    CondNe(deviceid, -1),
                    Call('acc_set_device_num', [deviceid, device_nvidia]))
            ]

        init = List(header=c.Comment('Begin of OpenACC+MPI setup'),
                    body=body,
                    footer=(c.Comment('End of OpenACC+MPI setup'), c.Line()))
        iet = iet._rebuild(body=(init, ) + iet.body)

        return iet, {'args': deviceid}
Exemple #14
0
    def new_ops_arg(self, indexed):
        """
        Create an :class:`Indexed` node using OPS representation.

        Parameters
        ----------
        indexed : :class:`Indexed`
            Indexed object using devito representation.

        Returns
        -------
        :class:`Indexed`
            Indexed node using OPS representation.
        """

        # Build the OPS arg identifier
        time_index = split_affine(indexed.indices[TimeFunction._time_position])
        ops_arg_id = '%s%s%s' % (indexed.name, time_index.var,
                                 time_index.shift)

        if ops_arg_id not in self.ops_args:
            # Create the indexed object
            ops_arg = Array(name=ops_arg_id,
                            dimensions=[Dimension(name=namespace['ops_acc'])],
                            dtype=indexed.dtype)

            self.ops_args[ops_arg_id] = ops_arg
        else:
            ops_arg = self.ops_args[ops_arg_id]

        # Get the space indices
        space_indices = [
            e for i, e in enumerate(indexed.indices)
            if i != TimeFunction._time_position
        ]

        # Define the Macro used in OPS arg index
        access_macro = Macro(
            'OPS_ACC%d(%s)' % (list(self.ops_args).index(ops_arg_id), ','.join(
                str(split_affine(i).shift) for i in space_indices)))

        # Create Indexed object representing the OPS arg access
        new_indexed = Indexed(ops_arg.indexed, access_macro)

        return new_indexed
Exemple #15
0
    def _initialize(iet):
        # TODO: we need to pick the rank from `comm_shm`, not `comm`,
        # so that we have nranks == ngpus (as long as the user has launched
        # the right number of MPI processes per node given the available
        # number of GPUs per node)
        comm = None
        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                comm = i
                break

        device_nvidia = Macro('acc_device_nvidia')
        body = Call('acc_init', [device_nvidia])

        if comm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = DefFunction('acc_get_num_devices', device_nvidia)
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            devicenum = Symbol(name='devicenum')
            devicenum_init = LocalExpression(DummyEq(devicenum, rank % ngpus))

            set_device_num = Call('acc_set_device_num',
                                  [devicenum, device_nvidia])

            body = [
                rank_decl, rank_init, ngpus_init, devicenum_init,
                set_device_num, body
            ]

        init = List(header=c.Comment('Begin of OpenACC+MPI setup'),
                    body=body,
                    footer=(c.Comment('End of OpenACC+MPI setup'), c.Line()))

        iet = iet._rebuild(body=(init, ) + iet.body)

        return iet
Exemple #16
0
# OPS API
namespace['ops_init'] = 'ops_init'
namespace['ops_partition'] = 'ops_partition'
namespace['ops_timing_output'] = 'ops_timing_output'
namespace['ops_exit'] = 'ops_exit'
namespace['ops_par_loop'] = 'ops_par_loop'
namespace['ops_dat_fetch_data'] = lambda ops_dat, data: Call(
    name='ops_dat_fetch_data', arguments=[ops_dat, 0, data])

namespace['ops_decl_stencil'] = Function(name='ops_decl_stencil')
namespace['ops_decl_block'] = Function(name='ops_decl_block')
namespace['ops_decl_dat'] = Function(name='ops_decl_dat')
namespace['ops_arg_dat'] = Function(name='ops_arg_dat')
namespace['ops_arg_gbl'] = Function(name='ops_arg_gbl')

namespace['ops_read'] = Macro('OPS_READ')
namespace['ops_write'] = Macro('OPS_WRITE')

namespace['ops_stencil_type'] = 'ops_stencil'
namespace['ops_block_type'] = 'ops_block'
namespace['ops_dat_type'] = 'ops_dat'

# Naming conventions
namespace['ops_define_dimension'] = lambda i: '#define OPS_%sD' % i
namespace['ops_kernel'] = lambda i: 'OPS_Kernel_%s' % i
namespace['ops_stencil_name'] = lambda dims, name, pts: 's%dd_%s_%dpt' % (
    dims, name, pts)
namespace['ops_dat_dim'] = lambda i: '%s_dim' % i
namespace['ops_dat_base'] = lambda i: '%s_base' % i
namespace['ops_dat_d_p'] = lambda i: '%s_d_p' % i
namespace['ops_dat_d_m'] = lambda i: '%s_d_m' % i
Exemple #17
0
class AccBB(PragmaLangBB):

    mapper = {
        # Misc
        'name':
        'OpenACC',
        'header':
        'openacc.h',
        # Platform mapping
        AMDGPUX:
        Macro('acc_device_radeon'),
        NVIDIAX:
        Macro('acc_device_nvidia'),
        # Runtime library
        'init':
        lambda args: Call('acc_init', args),
        'num-devices':
        lambda args: DefFunction('acc_get_num_devices', args),
        'set-device':
        lambda args: Call('acc_set_device_num', args),
        # Pragmas
        'atomic':
        c.Pragma('acc atomic update'),
        'map-enter-to':
        lambda i, j: c.Pragma('acc enter data copyin(%s%s)' % (i, j)),
        'map-enter-to-wait':
        lambda i, j, k: (c.Pragma('acc enter data copyin(%s%s) async(%s)' %
                                  (i, j, k)), c.Pragma('acc wait(%s)' % k)),
        'map-enter-alloc':
        lambda i, j: c.Pragma('acc enter data create(%s%s)' % (i, j)),
        'map-present':
        lambda i, j: c.Pragma('acc data present(%s%s)' % (i, j)),
        'map-wait':
        lambda i: c.Pragma('acc wait(%s)' % i),
        'map-update':
        lambda i, j: c.Pragma('acc exit data copyout(%s%s)' % (i, j)),
        'map-update-host':
        lambda i, j: c.Pragma('acc update self(%s%s)' % (i, j)),
        'map-update-host-async':
        lambda i, j, k: c.Pragma('acc update self(%s%s) async(%s)' %
                                 (i, j, k)),
        'map-update-device':
        lambda i, j: c.Pragma('acc update device(%s%s)' % (i, j)),
        'map-update-device-async':
        lambda i, j, k: c.Pragma('acc update device(%s%s) async(%s)' %
                                 (i, j, k)),
        'map-release':
        lambda i, j, k: c.Pragma('acc exit data delete(%s%s)%s' % (i, j, k)),
        'map-exit-delete':
        lambda i, j, k: c.Pragma('acc exit data delete(%s%s)%s' % (i, j, k)),
        'memcpy-to-device':
        lambda i, j, k: Call('acc_memcpy_to_device', [i, j, k]),
        'memcpy-to-device-wait':
        lambda i, j, k, l: List(body=[
            Call('acc_memcpy_to_device_async', [i, j, k, l]),
            Call('acc_wait', [l])
        ]),
        'device-get':
        Call('acc_get_device_num'),
        'device-alloc':
        lambda i, *a, retobj=None: Call(
            'acc_malloc', (i, ), retobj=retobj, cast=True),
        'device-free':
        lambda i, *a: Call('acc_free', (i, ))
    }
    mapper.update(CBB.mapper)

    Region = OmpRegion
    HostIteration = OmpIteration  # Host parallelism still goes via OpenMP
    DeviceIteration = DeviceAccIteration

    @classmethod
    def _map_to_wait(cls, f, imask=None, queueid=None):
        sections = cls._make_sections_from_imask(f, imask)
        return cls.mapper['map-enter-to-wait'](f.name, sections, queueid)

    @classmethod
    def _map_present(cls, f, imask=None):
        sections = cls._make_sections_from_imask(f, imask)
        return cls.mapper['map-present'](f.name, sections)

    @classmethod
    def _map_delete(cls, f, imask=None, devicerm=None):
        sections = cls._make_sections_from_imask(f, imask)
        if devicerm is not None:
            cond = ' if(%s)' % devicerm.name
        else:
            cond = ''
        return cls.mapper['map-exit-delete'](f.name, sections, cond)

    @classmethod
    def _map_update_host_async(cls, f, imask=None, queueid=None):
        sections = cls._make_sections_from_imask(f, imask)
        return cls.mapper['map-update-host-async'](f.name, sections, queueid)

    @classmethod
    def _map_update_device_async(cls, f, imask=None, queueid=None):
        sections = cls._make_sections_from_imask(f, imask)
        return cls.mapper['map-update-device-async'](f.name, sections, queueid)
Exemple #18
0
    def _make_sendrecv(self, f, fixed, extra=None):
        extra = extra or []
        comm = f.grid.distributor._obj_comm

        buf_dims = [
            Dimension(name='buf_%s' % d.root) for d in f.dimensions
            if d not in fixed
        ]
        bufg = Array(name='bufg',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     scope='heap')
        bufs = Array(name='bufs',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     scope='heap')

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        args = [bufg] + list(bufg.shape) + [f] + ofsg + extra
        gather = Call('gather%dd' % f.ndim, args)
        args = [bufs] + list(bufs.shape) + [f] + ofss + extra
        scatter = Call('scatter%dd' % f.ndim, args)

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)
        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        srecv = MPIStatusObject(name='srecv')
        ssend = MPIStatusObject(name='ssend')
        rrecv = MPIRequestObject(name='rrecv')
        rsend = MPIRequestObject(name='rsend')

        count = reduce(mul, bufs.shape, 1)
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        waitrecv = Call('MPI_Wait', [rrecv, srecv])
        waitsend = Call('MPI_Wait', [rsend, ssend])

        iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
        iet = List(body=iet_insert_C_decls(iet))
        parameters = ([f] + list(bufs.shape) + ofsg + ofss +
                      [fromrank, torank, comm] + extra)
        return Callable('sendrecv%dd' % f.ndim, iet, 'void', parameters,
                        ('static', ))