def test_create_ops_dat_function(self): grid = Grid(shape=(4)) u = Function(name='u', grid=grid, space_order=2) block = OpsBlock('block') name_to_ops_dat = {} result = create_ops_dat(u, name_to_ops_dat, block) assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name) assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type'] assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name) assert result[0].expr.rhs.params == (Integer(4), ) assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name) assert result[1].expr.rhs.params == (Zero(), ) assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name) assert result[2].expr.rhs.params == (Integer(2), ) assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name) assert result[3].expr.rhs.params == (Integer(-2), ) assert result[4].expr.lhs == name_to_ops_dat['u'] assert type(result[4].expr.rhs) == namespace['ops_decl_dat'] assert result[4].expr.rhs.args == ( block, 1, Symbol(namespace['ops_dat_dim'](u.name)), Symbol(namespace['ops_dat_base'](u.name)), Symbol(namespace['ops_dat_d_m'](u.name)), Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify( (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"'))
def _(iet): # TODO: we need to pick the rank from `comm_shm`, not `comm`, # so that we have nranks == ngpus (as long as the user has launched # the right number of MPI processes per node given the available # number of GPUs per node) objcomm = None for i in iet.parameters: if isinstance(i, MPICommObject): objcomm = i break devicetype = as_list(self.lang[self.platform]) try: lang_init = [self.lang['init'](devicetype)] except TypeError: # Not all target languages need to be explicitly initialized lang_init = [] deviceid = DeviceID() if objcomm is not None: rank = Symbol(name='rank') rank_decl = LocalExpression(DummyEq(rank, 0)) rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)]) ngpus = Symbol(name='ngpus') call = self.lang['num-devices'](devicetype) ngpus_init = LocalExpression(DummyEq(ngpus, call)) osdd_then = self.lang['set-device']([deviceid] + devicetype) osdd_else = self.lang['set-device']([rank % ngpus] + devicetype) body = lang_init + [ Conditional( CondNe(deviceid, -1), osdd_then, List( body=[rank_decl, rank_init, ngpus_init, osdd_else ]), ) ] header = c.Comment('Begin of %s+MPI setup' % self.lang['name']) footer = c.Comment('End of %s+MPI setup' % self.lang['name']) else: body = lang_init + [ Conditional( CondNe(deviceid, -1), self.lang['set-device']([deviceid] + devicetype)) ] header = c.Comment('Begin of %s setup' % self.lang['name']) footer = c.Comment('End of %s setup' % self.lang['name']) init = List(header=header, body=body, footer=(footer, c.Line())) iet = iet._rebuild(body=(init, ) + iet.body) return iet, {'args': deviceid}
def _initialize(iet): comm = None for i in iet.parameters: if isinstance(i, MPICommObject): comm = i break if comm is not None: rank = Symbol(name='rank') rank_decl = LocalExpression(DummyEq(rank, 0)) rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)]) ngpus = Symbol(name='ngpus') call = Function('omp_get_num_devices')() ngpus_init = LocalExpression(DummyEq(ngpus, call)) set_device_num = Call('omp_set_default_device', [rank % ngpus]) body = [rank_decl, rank_init, ngpus_init, set_device_num] init = List(header=c.Comment('Begin of OpenMP+MPI setup'), body=body, footer=(c.Comment('End of OpenMP+MPI setup'), c.Line())) iet = iet._rebuild(body=(init,) + iet.body) return iet
def __new__(cls, call, pointer, params=None, **kwargs): if isinstance(pointer, str): pointer = Symbol(pointer) if isinstance(call, str): call = Symbol(call) elif not isinstance(call, (CallFromPointer, DefFunction, sympy.Symbol)): # NOTE: we need `sympy.Symbol`, rather than just (devito) `Symbol` # because otherwise it breaks upon certain reconstructions on SymPy-1.8, # due to the way `bound_symbols` and `canonical_variables` interact raise ValueError("`call` must be CallFromPointer, DefFunction, or Symbol") _params = [] for p in as_tuple(params): if isinstance(p, str): _params.append(Symbol(p)) elif isinstance(p, Expr): _params.append(p) else: try: _params.append(Number(p)) except TypeError: raise ValueError("`params` must be Expr, numbers or str") params = Tuple(*_params) obj = sympy.Expr.__new__(cls, call, pointer, params) obj.call = call obj.pointer = pointer obj.params = params return obj
def _make_wait(self, f, hse, key, msg=None): bufs = FieldFromPointer(msg._C_field_bufs, msg) ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') sizes = [ FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg) for i in range(len(f._dist_dimensions)) ] scatter = Call('scatter_%s' % key, [bufs] + sizes + [f] + ofss) # The `scatter` must be guarded as we must not alter the halo values along # the domain boundary, where the sender is actually MPI.PROC_NULL scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter) rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg)) waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')]) rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg)) waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')]) iet = List(body=[waitsend, waitrecv, scatter]) parameters = ([f] + ofss + [fromrank, msg]) return Callable('wait_%s' % key, iet, 'void', parameters, ('static', ))
def _make_halowait(self, f, hse, key, msg=None): cast = cast_mapper[(f.dtype, '*')] fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} dim = Dimension(name='i') msgi = IndexedPointer(msg, dim) bufs = FieldFromComposite(msg._C_field_bufs, msgi) fromrank = FieldFromComposite(msg._C_field_from, msgi) sizes = [FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi) for i in range(len(f._dist_dimensions))] ofss = [FieldFromComposite('%s[%d]' % (msg._C_field_ofss, i), msgi) for i in range(len(f._dist_dimensions))] ofss = [fixed.get(d) or ofss.pop(0) for d in f.dimensions] # The `scatter` must be guarded as we must not alter the halo values along # the domain boundary, where the sender is actually MPI.PROC_NULL scatter = Call('scatter%s' % key, [cast(bufs)] + sizes + [f] + ofss) scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter) rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi)) waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')]) rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi)) waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')]) # The -1 below is because an Iteration, by default, generates <= ncomms = Symbol(name='ncomms') iet = Iteration([waitsend, waitrecv, scatter], dim, ncomms - 1) parameters = ([f] + list(fixed.values()) + [msg, ncomms]) return Callable('halowait%d' % key, iet, 'void', parameters, ('static',))
def _make_sendrecv(self, f, hse, key, msg=None): comm = f.grid.distributor._obj_comm bufg = FieldFromPointer(msg._C_field_bufg, msg) bufs = FieldFromPointer(msg._C_field_bufs, msg) ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') torank = Symbol(name='torank') sizes = [FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg) for i in range(len(f._dist_dimensions))] gather = Call('gather%s' % key, [bufg] + sizes + [f] + ofsg) # The `gather` is unnecessary if sending to MPI.PROC_NULL gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather) count = reduce(mul, sizes, 1) rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg)) rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg)) recv = IrecvCall([bufs, count, Macro(dtype_to_mpitype(f.dtype)), fromrank, Integer(13), comm, rrecv]) send = IsendCall([bufg, count, Macro(dtype_to_mpitype(f.dtype)), torank, Integer(13), comm, rsend]) iet = List(body=[recv, gather, send]) parameters = ([f] + ofsg + [fromrank, torank, comm, msg]) return SendRecv(key, iet, parameters, bufg, bufs)
def _make_poke(self, hs, key, msgs): lflag = Symbol(name='lflag') gflag = Symbol(name='gflag') # Init flags body = [Expression(DummyEq(lflag, 0)), Expression(DummyEq(gflag, 1))] # For each msg, build an Iteration calling MPI_Test on all peers for msg in msgs: dim = Dimension(name='i') msgi = IndexedPointer(msg, dim) rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi)) testrecv = Call( 'MPI_Test', [rrecv, Byref(lflag), Macro('MPI_STATUS_IGNORE')]) rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi)) testsend = Call( 'MPI_Test', [rsend, Byref(lflag), Macro('MPI_STATUS_IGNORE')]) update = AugmentedExpression(DummyEq(gflag, lflag), '&') body.append( Iteration([testsend, update, testrecv, update], dim, msg.npeers - 1)) body.append(Return(gflag)) return make_efunc('pokempi%d' % key, List(body=body), retval='int')
def _make_sendrecv(self, f, hse, key, **kwargs): comm = f.grid.distributor._obj_comm buf_dims = [ Dimension(name='buf_%s' % d.root) for d in f.dimensions if d not in hse.loc_indices ] bufg = Array(name='bufg', dimensions=buf_dims, dtype=f.dtype, padding=0, scope='heap') bufs = Array(name='bufs', dimensions=buf_dims, dtype=f.dtype, padding=0, scope='heap') ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions] ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') torank = Symbol(name='torank') gather = Call('gather_%s' % key, [bufg] + list(bufg.shape) + [f] + ofsg) scatter = Call('scatter_%s' % key, [bufs] + list(bufs.shape) + [f] + ofss) # The `gather` is unnecessary if sending to MPI.PROC_NULL gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather) # The `scatter` must be guarded as we must not alter the halo values along # the domain boundary, where the sender is actually MPI.PROC_NULL scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter) count = reduce(mul, bufs.shape, 1) rrecv = MPIRequestObject(name='rrecv') rsend = MPIRequestObject(name='rsend') recv = Call('MPI_Irecv', [ bufs, count, Macro(dtype_to_mpitype(f.dtype)), fromrank, Integer(13), comm, rrecv ]) send = Call('MPI_Isend', [ bufg, count, Macro(dtype_to_mpitype(f.dtype)), torank, Integer(13), comm, rsend ]) waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')]) waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')]) iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter]) parameters = ([f] + list(bufs.shape) + ofsg + ofss + [fromrank, torank, comm]) return Callable('sendrecv_%s' % key, iet, 'void', parameters, ('static', ))
def update_halo(f, fixed): """ Construct an IET performing a halo exchange for a :class:`TensorFunction`. """ # Requirements assert f.is_Function assert f.grid is not None distributor = f.grid.distributor nb = distributor._C_neighbours.obj comm = distributor._C_comm fixed = {d: Symbol(name="o%s" % d.root) for d in fixed} mapper = get_views(f, fixed) body = [] masks = [] for d in f.dimensions: if d in fixed: continue rpeer = FieldFromPointer("%sright" % d, nb) lpeer = FieldFromPointer("%sleft" % d, nb) # Sending to left, receiving from right lsizes, loffsets = mapper[(d, LEFT, OWNED)] rsizes, roffsets = mapper[(d, RIGHT, HALO)] assert lsizes == rsizes sizes = lsizes parameters = ([f] + list(f.symbolic_shape) + sizes + loffsets + roffsets + [rpeer, lpeer, comm]) call = Call('sendrecv_%s' % f.name, parameters) mask = Symbol(name='m%sl' % d) body.append(Conditional(mask, call)) masks.append(mask) # Sending to right, receiving from left rsizes, roffsets = mapper[(d, RIGHT, OWNED)] lsizes, loffsets = mapper[(d, LEFT, HALO)] assert rsizes == lsizes sizes = rsizes parameters = ([f] + list(f.symbolic_shape) + sizes + roffsets + loffsets + [lpeer, rpeer, comm]) call = Call('sendrecv_%s' % f.name, parameters) mask = Symbol(name='m%sr' % d) body.append(Conditional(mask, call)) masks.append(mask) iet = List(body=body) parameters = ([f] + masks + [comm, nb] + list(fixed.values()) + [d.symbolic_size for d in f.dimensions]) return Callable('halo_exchange_%s' % f.name, iet, 'void', parameters, ('static', ))
def test_symbols_args_vs_kwargs(self): """ Unlike Functions, Symbols don't require the use of a kwarg to specify the name. This test basically checks that `Symbol('s') is Symbol(name='s')`, i.e. that we don't make any silly mistakes when it gets to compute the cache key. """ v_arg = Symbol('v') v_kwarg = Symbol(name='v') assert v_arg is v_kwarg d_arg = Dimension('d100') d_kwarg = Dimension(name='d100') assert d_arg is d_kwarg
def _make_haloupdate(self, f, hse, key, msg=None): comm = f.grid.distributor._obj_comm fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} dim = Dimension(name='i') msgi = IndexedPointer(msg, dim) bufg = FieldFromComposite(msg._C_field_bufg, msgi) bufs = FieldFromComposite(msg._C_field_bufs, msgi) fromrank = FieldFromComposite(msg._C_field_from, msgi) torank = FieldFromComposite(msg._C_field_to, msgi) sizes = [ FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi) for i in range(len(f._dist_dimensions)) ] ofsg = [ FieldFromComposite('%s[%d]' % (msg._C_field_ofsg, i), msgi) for i in range(len(f._dist_dimensions)) ] ofsg = [fixed.get(d) or ofsg.pop(0) for d in f.dimensions] # The `gather` is unnecessary if sending to MPI.PROC_NULL gather = Call('gather_%s' % key, [bufg] + sizes + [f] + ofsg) gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather) # Make Irecv/Isend count = reduce(mul, sizes, 1) rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi)) rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi)) recv = Call('MPI_Irecv', [ bufs, count, Macro(dtype_to_mpitype(f.dtype)), fromrank, Integer(13), comm, rrecv ]) send = Call('MPI_Isend', [ bufg, count, Macro(dtype_to_mpitype(f.dtype)), torank, Integer(13), comm, rsend ]) # The -1 below is because an Iteration, by default, generates <= ncomms = Symbol(name='ncomms') iet = Iteration([recv, gather, send], dim, ncomms - 1) parameters = ([f, comm, msg, ncomms]) + list(fixed.values()) return Callable('haloupdate%d' % key, iet, 'void', parameters, ('static', ))
def sendrecv(f, fixed): """Construct an IET performing a halo exchange along arbitrary dimension and side.""" assert f.is_Function assert f.grid is not None comm = f.grid.distributor._C_comm buf_dims = [Dimension(name='buf_%s' % d.root) for d in f.dimensions if d not in fixed] bufg = Array(name='bufg', dimensions=buf_dims, dtype=f.dtype, scope='heap') bufs = Array(name='bufs', dimensions=buf_dims, dtype=f.dtype, scope='heap') dat_dims = [Dimension(name='dat_%s' % d.root) for d in f.dimensions] dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype, scope='external') ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions] ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions] fromrank = Symbol(name='fromrank') torank = Symbol(name='torank') parameters = [bufg] + list(bufg.shape) + [dat] + list(dat.shape) + ofsg gather = Call('gather_%s' % f.name, parameters) parameters = [bufs] + list(bufs.shape) + [dat] + list(dat.shape) + ofss scatter = Call('scatter_%s' % f.name, parameters) # The scatter must be guarded as we must not alter the halo values along # the domain boundary, where the sender is actually MPI.PROC_NULL scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter) srecv = MPIStatusObject(name='srecv') rrecv = MPIRequestObject(name='rrecv') rsend = MPIRequestObject(name='rsend') count = reduce(mul, bufs.shape, 1) recv = Call('MPI_Irecv', [bufs, count, Macro(numpy_to_mpitypes(f.dtype)), fromrank, '13', comm, rrecv]) send = Call('MPI_Isend', [bufg, count, Macro(numpy_to_mpitypes(f.dtype)), torank, '13', comm, rsend]) waitrecv = Call('MPI_Wait', [rrecv, srecv]) waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')]) iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter]) iet = List(body=[ArrayCast(dat), iet_insert_C_decls(iet)]) parameters = ([dat] + list(dat.shape) + list(bufs.shape) + ofsg + ofss + [fromrank, torank, comm]) return Callable('sendrecv_%s' % f.name, iet, 'void', parameters, ('static',))
def _do_generate(self, exprs, exclude, cbk_search, cbk_compose=None): """ Carry out the bulk of the work of ``_generate``. """ counter = generator() make = lambda: Symbol(name='dummy%d' % counter()) if cbk_compose is None: cbk_compose = lambda *args: None mapper = Uxmapper() for e in exprs: for i in cbk_search(e): if not i.is_commutative: continue terms = cbk_compose(i) # Make sure we won't break any data dependencies if terms: free_symbols = set().union( *[i.free_symbols for i in terms]) else: free_symbols = i.free_symbols if {a.function for a in free_symbols} & exclude: continue mapper.add(i, make, terms) return mapper
def _make_haloupdate(self, f, hse, key, **kwargs): distributor = f.grid.distributor nb = distributor._obj_neighborhood comm = distributor._obj_comm sendrecv = self._cache_dims[f.dimensions][0] fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} # Only retain the halos required by the Diag scheme # Note: `sorted` is only for deterministic code generation halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple)) body = [] for dims, tosides in halos: mapper = OrderedDict(zip(dims, tosides)) sizes = [f._C_get_field(OWNED, d, s).size for d, s in mapper.items()] torank = FieldFromPointer(''.join(i.name[0] for i in mapper.values()), nb) ofsg = [fixed.get(d, f._C_get_field(OWNED, d, mapper.get(d)).offset) for d in f.dimensions] mapper = OrderedDict(zip(dims, [i.flip() for i in tosides])) fromrank = FieldFromPointer(''.join(i.name[0] for i in mapper.values()), nb) ofss = [fixed.get(d, f._C_get_field(HALO, d, mapper.get(d)).offset) for d in f.dimensions] kwargs['haloid'] = len(body) body.append(self._call_sendrecv(sendrecv.name, f, sizes, ofsg, ofss, fromrank, torank, comm, **kwargs)) iet = List(body=body) parameters = [f, comm, nb] + list(fixed.values()) return HaloUpdate(key, iet, parameters)
def callback(): # Derivatives must be evaluated before the introduction of indirect accesses try: _expr = expr.evaluate except AttributeError: # E.g., a generic SymPy expression or a number _expr = expr variables = list(retrieve_function_carriers(_expr)) # Need to get origin of the field in case it is staggered # TODO: handle each variable staggereing spearately field_offset = variables[0].origin # List of indirection indices for all adjacent grid points idx_subs, temps = self._interpolation_indices(variables, offset, field_offset=field_offset) # Substitute coordinate base symbols into the interpolation coefficients args = [_expr.xreplace(v_sub) * b.xreplace(v_sub) for b, v_sub in zip(self._interpolation_coeffs, idx_subs)] # Accumulate point-wise contributions into a temporary rhs = Symbol(name='sum', dtype=self.sfunction.dtype) summands = [Eq(rhs, 0., implicit_dims=self.sfunction.dimensions)] summands.extend([Inc(rhs, i, implicit_dims=self.sfunction.dimensions) for i in args]) # Write/Incr `self` lhs = self.sfunction.subs(self_subs) last = [Inc(lhs, rhs)] if increment else [Eq(lhs, rhs)] return temps + summands + last
def _make_halowait(self, f, hse, key, msg=None): nb = f.grid.distributor._obj_neighborhood wait = self._cache_dims[f.dimensions][2] fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} # Only retain the halos required by the Diag scheme # Note: `sorted` is only for deterministic code generation halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple)) body = [] for dims, tosides in halos: mapper = OrderedDict(zip(dims, [i.flip() for i in tosides])) fromrank = FieldFromPointer( ''.join(i.name[0] for i in mapper.values()), nb) ofss = [ fixed.get(d, f._C_get_field(HALO, d, mapper.get(d)).offset) for d in f.dimensions ] msgi = Byref(IndexedPointer(msg, len(body))) body.append(Call(wait.name, [f] + ofss + [fromrank, msgi])) iet = List(body=body) parameters = [f] + list(fixed.values()) + [nb, msg] return Callable('halowait%d' % key, iet, 'void', parameters, ('static', ))
def _make_haloupdate(self, f, hse, key, **kwargs): distributor = f.grid.distributor nb = distributor._obj_neighborhood comm = distributor._obj_comm sendrecv = self._cache_dims[f.dimensions][0] fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices} # Build a mapper `(dim, side, region) -> (size, ofs)` for `f`. `size` and # `ofs` are symbolic objects. This mapper tells what data values should be # sent (OWNED) or received (HALO) given dimension and side mapper = {} for d0, side, region in product(f.dimensions, (LEFT, RIGHT), (OWNED, HALO)): if d0 in fixed: continue sizes = [] ofs = [] for d1 in f.dimensions: if d1 in fixed: ofs.append(fixed[d1]) else: meta = f._C_get_field(region if d0 is d1 else NOPAD, d1, side) ofs.append(meta.offset) sizes.append(meta.size) mapper[(d0, side, region)] = (sizes, ofs) body = [] for d in f.dimensions: if d in fixed: continue name = ''.join('r' if i is d else 'c' for i in distributor.dimensions) rpeer = FieldFromPointer(name, nb) name = ''.join('l' if i is d else 'c' for i in distributor.dimensions) lpeer = FieldFromPointer(name, nb) if (d, LEFT) in hse.halos: # Sending to left, receiving from right lsizes, lofs = mapper[(d, LEFT, OWNED)] rsizes, rofs = mapper[(d, RIGHT, HALO)] args = [f, lsizes, lofs, rofs, rpeer, lpeer, comm] body.append(self._call_sendrecv(sendrecv.name, *args, **kwargs)) if (d, RIGHT) in hse.halos: # Sending to right, receiving from left rsizes, rofs = mapper[(d, RIGHT, OWNED)] lsizes, lofs = mapper[(d, LEFT, HALO)] args = [f, rsizes, rofs, lofs, lpeer, rpeer, comm] body.append(self._call_sendrecv(sendrecv.name, *args, **kwargs)) iet = List(body=body) parameters = [f, comm, nb] + list(fixed.values()) return Callable('haloupdate%d' % key, iet, 'void', parameters, ('static', ))
def _make_copy(self, f, hse, key, swap=False): buf_dims = [] buf_indices = [] for d in f.dimensions: if d not in hse.loc_indices: buf_dims.append(Dimension(name='buf_%s' % d.root)) buf_indices.append(d.root) buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype, padding=0) f_offsets = [] f_indices = [] for d in f.dimensions: offset = Symbol(name='o%s' % d.root) f_offsets.append(offset) f_indices.append(offset + (d.root if d not in hse.loc_indices else 0)) if swap is False: eq = DummyEq(buf[buf_indices], f[f_indices]) name = 'gather_%s' % key else: eq = DummyEq(f[f_indices], buf[buf_indices]) name = 'scatter_%s' % key iet = Expression(eq) for i, d in reversed(list(zip(buf_indices, buf_dims))): # The -1 below is because an Iteration, by default, generates <= iet = Iteration(iet, i, d.symbolic_size - 1, properties=(PARALLEL, AFFINE)) parameters = [buf] + list(buf.shape) + [f] + f_offsets return Callable(name, iet, 'void', parameters, ('static', ))
def to_ops_stencil(param, accesses): dims = len(accesses[0]) pts = len(accesses) stencil_name = namespace['ops_stencil_name'](dims, param.name, pts) stencil_array = Array( name=stencil_name, dimensions=(DefaultDimension(name='len', default_value=dims * pts), ), dtype=np.int32, ) ops_stencil = OpsStencil(stencil_name.upper()) return ops_stencil, [ Expression( ClusterizedEq( Eq(stencil_array, ListInitializer(list(itertools.chain(*accesses)))))), Expression( ClusterizedEq( Eq( ops_stencil, namespace['ops_decl_stencil']( dims, pts, Symbol(stencil_array.name), Literal('"%s"' % stencil_name.upper()))))) ]
def makeit_ssa(exprs): """Convert an iterable of Eqs into Static Single Assignment (SSA) form.""" # Identify recurring LHSs seen = {} for i, e in enumerate(exprs): seen.setdefault(e.lhs, []).append(i) # Optimization: don't waste time reconstructing stuff if already in SSA form if all(len(i) == 1 for i in seen.values()): return exprs # SSA conversion c = 0 mapper = {} processed = [] for i, e in enumerate(exprs): where = seen[e.lhs] rhs = e.rhs.xreplace(mapper) if len(where) > 1: needssa = e.is_Scalar or where[-1] != i lhs = Symbol(name='ssa%d' % c, dtype=e.dtype) if needssa else e.lhs if e.is_Increment: # Turn AugmentedAssignment into Assignment processed.append(e.func(lhs, mapper[e.lhs] + rhs, is_Increment=False)) else: processed.append(e.func(lhs, rhs)) mapper[e.lhs] = lhs c += 1 else: processed.append(e.func(e.lhs, rhs)) return processed
def _make_thread_activate(threads, sdata, sync_ops, sregistry): if threads.size == 1: d = threads.index else: d = Symbol(name=sregistry.make_name(prefix=threads.index.name)) sync_locks = [s for s in sync_ops if s.is_SyncLock] condition = Or(*([CondNe(s.handle, 2) for s in sync_locks] + [CondNe(FieldFromComposite(sdata._field_flag, sdata[d]), 1)])) if threads.size == 1: activation = [While(condition)] else: activation = [DummyExpr(d, 0), While(condition, DummyExpr(d, (d + 1) % threads.size))] activation.extend([DummyExpr(FieldFromComposite(i.name, sdata[d]), i) for i in sdata.dynamic_fields]) activation.extend([DummyExpr(s.handle, 0) for s in sync_locks]) activation.append(DummyExpr(FieldFromComposite(sdata._field_flag, sdata[d]), 2)) activation = List( header=[c.Line(), c.Comment("Activate `%s`" % threads.name)], body=activation, footer=c.Line() ) return activation
def _(iet): # TODO: we need to pick the rank from `comm_shm`, not `comm`, # so that we have nranks == ngpus (as long as the user has launched # the right number of MPI processes per node given the available # number of GPUs per node) objcomm = None for i in iet.parameters: if isinstance(i, MPICommObject): objcomm = i break deviceid = DeviceID() device_nvidia = Macro('acc_device_nvidia') if objcomm is not None: rank = Symbol(name='rank') rank_decl = LocalExpression(DummyEq(rank, 0)) rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)]) ngpus = Symbol(name='ngpus') call = DefFunction('acc_get_num_devices', device_nvidia) ngpus_init = LocalExpression(DummyEq(ngpus, call)) asdn_then = Call('acc_set_device_num', [deviceid, device_nvidia]) asdn_else = Call('acc_set_device_num', [rank % ngpus, device_nvidia]) body = [ Call('acc_init', [device_nvidia]), Conditional( CondNe(deviceid, -1), asdn_then, List(body=[rank_decl, rank_init, ngpus_init, asdn_else])) ] else: body = [ Call('acc_init', [device_nvidia]), Conditional( CondNe(deviceid, -1), Call('acc_set_device_num', [deviceid, device_nvidia])) ] init = List(header=c.Comment('Begin of OpenACC+MPI setup'), body=body, footer=(c.Comment('End of OpenACC+MPI setup'), c.Line())) iet = iet._rebuild(body=(init, ) + iet.body) return iet, {'args': deviceid}
def cse(cluster, sregistry, *args): """ Common sub-expressions elimination (CSE). """ make = lambda: Symbol(name=sregistry.make_name(), dtype=cluster.dtype).indexify() processed = _cse(cluster.exprs, make) return cluster.rebuild(processed)
def __new__(cls, indexed, pname): plabel = Symbol(name=pname, dtype=indexed.dtype) base = IndexedData(plabel, shape=indexed.shape, function=indexed.function) obj = super().__new__(cls, base, *indexed.indices) obj.indexed = indexed obj.pname = pname return obj
def test_to_ops_stencil(self, _accesses): param = Symbol('foo') accesses = eval(_accesses) stencil_name = 's2d_foo_%spt' % len(accesses) stencil, result = to_ops_stencil(param, accesses) assert stencil.name == stencil_name.upper() assert result[0].expr.lhs.name == stencil_name assert result[0].expr.rhs.params == tuple(itertools.chain(*accesses)) assert result[1].expr.lhs == stencil assert type(result[1].expr.rhs) == namespace['ops_decl_stencil'] assert result[1].expr.rhs.args == (2, len(accesses), Symbol(stencil_name), Literal('"%s"' % stencil_name.upper()))
def make_parallel(self, iet): """ Transform ``iet`` by decorating its parallel :class:`Iteration`s with suitable ``#pragma omp ...`` for thread-level parallelism. """ # Group sequences of loops that should go within the same parallel region was_tagged = False groups = OrderedDict() for tree in retrieve_iteration_tree(iet): # Determine the number of consecutive parallelizable Iterations candidates = filter_iterations(tree, key=self.key, stop='asap') if not candidates: was_tagged = False continue # Consecutive tagged Iteration go in the same group is_tagged = any(i.tag is not None for i in tree) key = len(groups) - (is_tagged & was_tagged) handle = groups.setdefault(key, OrderedDict()) handle[candidates[0]] = candidates was_tagged = is_tagged mapper = OrderedDict() for group in groups.values(): private = [] for root, candidates in group.items(): mapper.update(self._make_parallel_tree(root, candidates)) # Track the thread-private and thread-shared variables private.extend([ i for i in FindSymbols('symbolics').visit(root) if i.is_Array and i._mem_stack ]) # Build the parallel region private = sorted(set([i.name for i in private])) private = ('private(%s)' % ','.join(private)) if private else '' rebuilt = [v for k, v in mapper.items() if k in group] par_region = Block(header=self.lang['par-region'](private), body=rebuilt) for k, v in list(mapper.items()): if isinstance(v, Iteration): mapper[k] = None if v.is_Remainder else par_region processed = Transformer(mapper).visit(iet) # Hack/workaround to the fact that the OpenMP pragmas are not true # IET nodes, so the `nthreads` variables won't be detected as a # Callable parameter unless inserted in a mock Expression if mapper: nt = NThreads() eq = LocalExpression(DummyEq(Symbol(name='nt', dtype=np.int32), nt)) return List(body=[eq, processed]), {'input': [nt]} else: return List(body=processed), {}
def test_find_symbols_nested(mode, expected): grid = Grid(shape=(4, 4, 4)) call = Call('foo', [ Call('bar', [Symbol(name='x'), Call('baz', [Function(name='f', grid=grid)])]) ]) found = FindSymbols(mode).visit(call) assert [f.name for f in found] == eval(expected)
def _initialize(iet): # TODO: we need to pick the rank from `comm_shm`, not `comm`, # so that we have nranks == ngpus (as long as the user has launched # the right number of MPI processes per node given the available # number of GPUs per node) comm = None for i in iet.parameters: if isinstance(i, MPICommObject): comm = i break device_nvidia = Macro('acc_device_nvidia') body = Call('acc_init', [device_nvidia]) if comm is not None: rank = Symbol(name='rank') rank_decl = LocalExpression(DummyEq(rank, 0)) rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)]) ngpus = Symbol(name='ngpus') call = DefFunction('acc_get_num_devices', device_nvidia) ngpus_init = LocalExpression(DummyEq(ngpus, call)) devicenum = Symbol(name='devicenum') devicenum_init = LocalExpression(DummyEq(devicenum, rank % ngpus)) set_device_num = Call('acc_set_device_num', [devicenum, device_nvidia]) body = [ rank_decl, rank_init, ngpus_init, devicenum_init, set_device_num, body ] init = List(header=c.Comment('Begin of OpenACC+MPI setup'), body=body, footer=(c.Comment('End of OpenACC+MPI setup'), c.Line())) iet = iet._rebuild(body=(init, ) + iet.body) return iet
def initialize(iet, **kwargs): """ Initialize the OpenMP environment. """ devicenum = Symbol(name='devicenum') @singledispatch def _initialize(iet): comm = None for i in iet.parameters: if isinstance(i, MPICommObject): comm = i break if comm is not None: rank = Symbol(name='rank') rank_decl = LocalExpression(DummyEq(rank, 0)) rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)]) ngpus = Symbol(name='ngpus') call = Function('omp_get_num_devices')() ngpus_init = LocalExpression(DummyEq(ngpus, call)) devicenum_init = LocalExpression(DummyEq(devicenum, rank % ngpus)) body = [rank_decl, rank_init, ngpus_init, devicenum_init] init = List(header=c.Comment('Begin of OpenMP+MPI setup'), body=body, footer=(c.Comment('End of OpenMP+MPI setup'), c.Line())) else: devicenum_init = LocalExpression(DummyEq(devicenum, 0)) body = [devicenum_init] init = List(header=c.Comment('Begin of OpenMP setup'), body=body, footer=(c.Comment('End of OpenMP setup'), c.Line())) iet = iet._rebuild(body=(init, ) + iet.body) return iet @_initialize.register(ElementalFunction) @_initialize.register(MPICallable) def _(iet): return iet iet = _initialize(iet) return iet, {'args': devicenum}