def _make_wait(self, f, hse, key, msg=None): bufs = FieldFromPointer(msg._C_field_bufs, msg) ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') sizes = [ FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg) for i in range(len(f._dist_dimensions)) ] scatter = Call('scatter_%s' % key, [bufs] + sizes + [f] + ofss) # The `scatter` must be guarded as we must not alter the halo values along # the domain boundary, where the sender is actually MPI.PROC_NULL scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter) rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg)) waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')]) rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg)) waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')]) iet = List(body=[waitsend, waitrecv, scatter]) parameters = ([f] + ofss + [fromrank, msg]) return Callable('wait_%s' % key, iet, 'void', parameters, ('static', ))
def _make_sendrecv(self, f, hse, key, msg=None): comm = f.grid.distributor._obj_comm bufg = FieldFromPointer(msg._C_field_bufg, msg) bufs = FieldFromPointer(msg._C_field_bufs, msg) ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') torank = Symbol(name='torank') sizes = [FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg) for i in range(len(f._dist_dimensions))] gather = Call('gather%s' % key, [bufg] + sizes + [f] + ofsg) # The `gather` is unnecessary if sending to MPI.PROC_NULL gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather) count = reduce(mul, sizes, 1) rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg)) rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg)) recv = IrecvCall([bufs, count, Macro(dtype_to_mpitype(f.dtype)), fromrank, Integer(13), comm, rrecv]) send = IsendCall([bufg, count, Macro(dtype_to_mpitype(f.dtype)), torank, Integer(13), comm, rsend]) iet = List(body=[recv, gather, send]) parameters = ([f] + ofsg + [fromrank, torank, comm, msg]) return SendRecv(key, iet, parameters, bufg, bufs)
def _make_haloupdate(self, f, hse, key, **kwargs): distributor = f.grid.distributor nb = distributor._obj_neighborhood comm = distributor._obj_comm sendrecv = self._cache_dims[f.dimensions][0] fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} # Build a mapper `(dim, side, region) -> (size, ofs)` for `f`. `size` and # `ofs` are symbolic objects. This mapper tells what data values should be # sent (OWNED) or received (HALO) given dimension and side mapper = {} for d0, side, region in product(f.dimensions, (LEFT, RIGHT), (OWNED, HALO)): if d0 in fixed: continue sizes = [] ofs = [] for d1 in f.dimensions: if d1 in fixed: ofs.append(fixed[d1]) else: meta = f._C_get_field(region if d0 is d1 else NOPAD, d1, side) ofs.append(meta.offset) sizes.append(meta.size) mapper[(d0, side, region)] = (sizes, ofs) body = [] for d in f.dimensions: if d in fixed: continue name = ''.join('r' if i is d else 'c' for i in distributor.dimensions) rpeer = FieldFromPointer(name, nb) name = ''.join('l' if i is d else 'c' for i in distributor.dimensions) lpeer = FieldFromPointer(name, nb) if (d, LEFT) in hse.halos: # Sending to left, receiving from right lsizes, lofs = mapper[(d, LEFT, OWNED)] rsizes, rofs = mapper[(d, RIGHT, HALO)] args = [f, lsizes, lofs, rofs, rpeer, lpeer, comm] body.append(self._call_sendrecv(sendrecv.name, *args, **kwargs)) if (d, RIGHT) in hse.halos: # Sending to right, receiving from left rsizes, rofs = mapper[(d, RIGHT, OWNED)] lsizes, lofs = mapper[(d, LEFT, HALO)] args = [f, rsizes, rofs, lofs, lpeer, rpeer, comm] body.append(self._call_sendrecv(sendrecv.name, *args, **kwargs)) iet = List(body=body) parameters = [f, comm, nb] + list(fixed.values()) return Callable('haloupdate%d' % key, iet, 'void', parameters, ('static', ))
def _make_haloupdate(self, f, hse, key, **kwargs): distributor = f.grid.distributor nb = distributor._obj_neighborhood comm = distributor._obj_comm sendrecv = self._cache_dims[f.dimensions][0] fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} # Only retain the halos required by the Diag scheme # Note: `sorted` is only for deterministic code generation halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple)) body = [] for dims, tosides in halos: mapper = OrderedDict(zip(dims, tosides)) sizes = [f._C_get_field(OWNED, d, s).size for d, s in mapper.items()] torank = FieldFromPointer(''.join(i.name[0] for i in mapper.values()), nb) ofsg = [fixed.get(d, f._C_get_field(OWNED, d, mapper.get(d)).offset) for d in f.dimensions] mapper = OrderedDict(zip(dims, [i.flip() for i in tosides])) fromrank = FieldFromPointer(''.join(i.name[0] for i in mapper.values()), nb) ofss = [fixed.get(d, f._C_get_field(HALO, d, mapper.get(d)).offset) for d in f.dimensions] kwargs['haloid'] = len(body) body.append(self._call_sendrecv(sendrecv.name, f, sizes, ofsg, ofss, fromrank, torank, comm, **kwargs)) iet = List(body=body) parameters = [f, comm, nb] + list(fixed.values()) return HaloUpdate(key, iet, parameters)
def _make_thread_func(name, iet, root, threads, sregistry): # Create the SharedData, that is the data structure that will be used by the # main thread to pass information dows to the child thread(s) required, parameters, dynamic_parameters = diff_parameters(iet, root) parameters = sorted(parameters, key=lambda i: i.is_Function) # Allow casting sdata = SharedData(name=sregistry.make_name(prefix='sdata'), npthreads=threads.size, fields=required, dynamic_fields=dynamic_parameters) sbase = sdata.symbolic_base sid = sdata.symbolic_id # Create a Callable to initialize `sdata` with the known const values iname = 'init_%s' % sdata.dtype._type_.__name__ ibody = [DummyExpr(FieldFromPointer(i._C_name, sbase), i._C_symbol) for i in parameters] ibody.extend([ BlankLine, DummyExpr(FieldFromPointer(sdata._field_id, sbase), sid), DummyExpr(FieldFromPointer(sdata._field_flag, sbase), 1) ]) iparameters = parameters + [sdata, sid] isdata = Callable(iname, ibody, 'void', iparameters, 'static') # Prepend the SharedData fields available upon thread activation preactions = [DummyExpr(i, FieldFromPointer(i.name, sbase)) for i in dynamic_parameters] # Append the flag reset postactions = [List(body=[ BlankLine, DummyExpr(FieldFromPointer(sdata._field_flag, sbase), 1) ])] iet = List(body=preactions + [iet] + postactions) # The thread has work to do when it receives the signal that all locks have # been set to 0 by the main thread iet = Conditional(CondEq(FieldFromPointer(sdata._field_flag, sbase), 2), iet) # The thread keeps spinning until the alive flag is set to 0 by the main thread iet = While(CondNe(FieldFromPointer(sdata._field_flag, sbase), 0), iet) # pthread functions expect exactly one argument, a void*, and must return void* tretval = 'void*' tparameter = VoidPointer('_%s' % sdata.name) # Unpack `sdata` unpack = [PointerCast(sdata, tparameter), BlankLine] for i in parameters: if i.is_AbstractFunction: unpack.extend([Dereference(i, sdata), PointerCast(i)]) else: unpack.append(DummyExpr(i, FieldFromPointer(i.name, sbase))) unpack.append(DummyExpr(sid, FieldFromPointer(sdata._field_id, sbase))) unpack.append(BlankLine) iet = List(body=unpack + [iet, BlankLine, Return(Macro('NULL'))]) tfunc = ThreadFunction(name, iet, tretval, tparameter, 'static') return tfunc, isdata, sdata
def update_halo(f, fixed): """ Construct an IET performing a halo exchange for a :class:`TensorFunction`. """ # Requirements assert f.is_Function assert f.grid is not None distributor = f.grid.distributor nb = distributor._C_neighbours.obj comm = distributor._C_comm fixed = {d: Symbol(name="o%s" % d.root) for d in fixed} mapper = get_views(f, fixed) body = [] masks = [] for d in f.dimensions: if d in fixed: continue rpeer = FieldFromPointer("%sright" % d, nb) lpeer = FieldFromPointer("%sleft" % d, nb) # Sending to left, receiving from right lsizes, loffsets = mapper[(d, LEFT, OWNED)] rsizes, roffsets = mapper[(d, RIGHT, HALO)] assert lsizes == rsizes sizes = lsizes parameters = ([f] + list(f.symbolic_shape) + sizes + loffsets + roffsets + [rpeer, lpeer, comm]) call = Call('sendrecv_%s' % f.name, parameters) mask = Symbol(name='m%sl' % d) body.append(Conditional(mask, call)) masks.append(mask) # Sending to right, receiving from left rsizes, roffsets = mapper[(d, RIGHT, OWNED)] lsizes, loffsets = mapper[(d, LEFT, HALO)] assert rsizes == lsizes sizes = rsizes parameters = ([f] + list(f.symbolic_shape) + sizes + roffsets + loffsets + [lpeer, rpeer, comm]) call = Call('sendrecv_%s' % f.name, parameters) mask = Symbol(name='m%sr' % d) body.append(Conditional(mask, call)) masks.append(mask) iet = List(body=body) parameters = ([f] + masks + [comm, nb] + list(fixed.values()) + [d.symbolic_size for d in f.dimensions]) return Callable('halo_exchange_%s' % f.name, iet, 'void', parameters, ('static', ))
def _make_halowait(self, f, hse, key, msg=None): nb = f.grid.distributor._obj_neighborhood wait = self._cache_dims[f.dimensions][2] fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} # Only retain the halos required by the Diag scheme # Note: `sorted` is only for deterministic code generation halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple)) body = [] for dims, tosides in halos: mapper = OrderedDict(zip(dims, [i.flip() for i in tosides])) fromrank = FieldFromPointer( ''.join(i.name[0] for i in mapper.values()), nb) ofss = [ fixed.get(d, f._C_get_field(HALO, d, mapper.get(d)).offset) for d in f.dimensions ] msgi = Byref(IndexedPointer(msg, len(body))) body.append(Call(wait.name, [f] + ofss + [fromrank, msgi])) iet = List(body=body) parameters = [f] + list(fixed.values()) + [nb, msg] return Callable('halowait%d' % key, iet, 'void', parameters, ('static', ))
def test_shared_data(): s = Scalar(name='s') a = Scalar(name='a') sdata = SharedData(name='sdata', npthreads=2, fields=[s], dynamic_fields=[a]) pkl_sdata = pickle.dumps(sdata) new_sdata = pickle.loads(pkl_sdata) assert sdata.name == new_sdata.name assert sdata.size == new_sdata.size assert sdata.fields == new_sdata.fields assert sdata.pfields == new_sdata.pfields assert sdata.dynamic_fields == new_sdata.dynamic_fields ffp = FieldFromPointer(sdata._field_flag, sdata.symbolic_base) pkl_ffp = pickle.dumps(ffp) new_ffp = pickle.loads(pkl_ffp) assert ffp == new_ffp indexed = sdata[0] pkl_indexed = pickle.dumps(indexed) new_indexed = pickle.loads(pkl_indexed) assert indexed.name == new_indexed.name assert indexed.shape == new_indexed.shape
def __make_tfunc(self, name, iet, root, threads): # Create the SharedData required = derive_parameters(iet) known = (root.parameters + tuple(i for i in required if i.is_Array and i._mem_shared)) parameters, dynamic_parameters = split(required, lambda i: i in known) sdata = SharedData(name=self.sregistry.make_name(prefix='sdata'), nthreads_std=threads.size, fields=dynamic_parameters) parameters.append(sdata) # Prepend the unwinded SharedData fields, available upon thread activation preactions = [ DummyExpr(i, FieldFromPointer(i.name, sdata.symbolic_base)) for i in dynamic_parameters ] preactions.append( DummyExpr(sdata.symbolic_id, FieldFromPointer(sdata._field_id, sdata.symbolic_base))) # Append the flag reset postactions = [ List(body=[ BlankLine, DummyExpr( FieldFromPointer(sdata._field_flag, sdata.symbolic_base), 1) ]) ] iet = List(body=preactions + [iet] + postactions) # Append the flag reset # The thread has work to do when it receives the signal that all locks have # been set to 0 by the main thread iet = Conditional( CondEq(FieldFromPointer(sdata._field_flag, sdata.symbolic_base), 2), iet) # The thread keeps spinning until the alive flag is set to 0 by the main thread iet = While( CondNe(FieldFromPointer(sdata._field_flag, sdata.symbolic_base), 0), iet) return Callable(name, iet, 'void', parameters, 'static'), sdata
def _C_get_field(self, region, dim, side=None): """Symbolic representation of a given data region.""" ffp = lambda f, i: FieldFromPointer("%s[%d]" % (f, i), self._C_name) if region is DOMAIN: offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, LEFT)) size = dim.symbolic_size elif region is OWNED: if side is LEFT: offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, RIGHT)) else: offset = ffp(self._C_field_owned_ofs, self._C_make_index(dim, RIGHT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, LEFT)) elif region is HALO: if side is LEFT: offset = ffp(self._C_field_halo_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, LEFT)) else: offset = ffp(self._C_field_halo_ofs, self._C_make_index(dim, RIGHT)) size = ffp(self._C_field_halo_size, self._C_make_index(dim, RIGHT)) elif region is NOPAD: offset = ffp(self._C_field_halo_ofs, self._C_make_index(dim, LEFT)) size = ffp(self._C_field_nopad_size, self._C_make_index(dim)) elif region is FULL: offset = 0 size = ffp(self._C_field_size, self._C_make_index(dim)) else: raise ValueError("Unknown region `%s`" % str(region)) RegionMeta = namedtuple('RegionMeta', 'offset size') return RegionMeta(offset, size)
def _make_haloupdate(self, f, fixed, mask, extra=None, uniquekey=None): extra = extra or [] distributor = f.grid.distributor nb = distributor._obj_neighborhood comm = distributor._obj_comm fixed = {d: Symbol(name="o%s" % d.root) for d in fixed} # Build a mapper `(dim, side, region) -> (size, ofs)` for `f`. `size` and # `ofs` are symbolic objects. This mapper tells what data values should be # sent (OWNED) or received (HALO) given dimension and side mapper = {} for d0, side, region in product(f.dimensions, (LEFT, RIGHT), (OWNED, HALO)): if d0 in fixed: continue sizes = [] offsets = [] for d1 in f.dimensions: if d1 in fixed: offsets.append(fixed[d1]) else: meta = f._C_get_field(region if d0 is d1 else NOPAD, d1, side) offsets.append(meta.offset) sizes.append(meta.size) mapper[(d0, side, region)] = (sizes, offsets) body = [] for d in f.dimensions: if d in fixed: continue name = ''.join('r' if i is d else 'c' for i in distributor.dimensions) rpeer = FieldFromPointer(name, nb) name = ''.join('l' if i is d else 'c' for i in distributor.dimensions) lpeer = FieldFromPointer(name, nb) if mask[(d, LEFT)]: # Sending to left, receiving from right lsizes, loffsets = mapper[(d, LEFT, OWNED)] rsizes, roffsets = mapper[(d, RIGHT, HALO)] args = [f] + lsizes + loffsets + roffsets + [ rpeer, lpeer, comm ] + extra body.append(Call('sendrecv%dd' % f.ndim, args)) if mask[(d, RIGHT)]: # Sending to right, receiving from left rsizes, roffsets = mapper[(d, RIGHT, OWNED)] lsizes, loffsets = mapper[(d, LEFT, HALO)] args = [f] + rsizes + roffsets + loffsets + [ lpeer, rpeer, comm ] + extra body.append(Call('sendrecv%dd' % f.ndim, args)) if uniquekey is None: uniquekey = ''.join(str(int(i)) for i in mask.values()) name = 'haloupdate%dd%s' % (f.ndim, uniquekey) iet = List(body=body) parameters = [f, comm, nb] + list(fixed.values()) + extra return Callable(name, iet, 'void', parameters, ('static', ))