def _generate_mpi(self, iet, **kwargs): # Drop superfluous HaloSpots halo_spots = FindNodes(HaloSpot).visit(iet) mapper = {i: None for i in halo_spots if i.is_Redundant} iet = Transformer(mapper, nested=True).visit(iet) # Nothing else to do if no MPI if configuration['mpi'] is False: return iet # Build halo exchange Callables and Calls halo_spots = FindNodes(HaloSpot).visit(iet) heb = HaloExchangeBuilder(is_threaded(kwargs.get("dle"))) callables, calls = heb.make(halo_spots) # Update the Operator internal state self._includes.append('mpi.h') self._func_table.update( OrderedDict([(i.name, MetaCall(i, True)) for i in callables])) # Transform the IET by adding in the `haloupdate` Calls mapper = {k: List(body=v + list(k.body)) for k, v in calls.items()} iet = Transformer(mapper, nested=True).visit(iet) return iet
def mpiize(iet, **kwargs): """ Add MPI routines performing halo exchanges to emit distributed-memory parallel code. """ mode = kwargs.pop('mode') # To produce unique object names generators = {'msg': generator(), 'comm': generator(), 'comp': generator()} sync_heb = HaloExchangeBuilder('basic', **generators) user_heb = HaloExchangeBuilder(mode, **generators) mapper = {} for hs in FindNodes(HaloSpot).visit(iet): heb = user_heb if isinstance(hs, OverlappableHaloSpot) else sync_heb mapper[hs] = heb.make(hs) efuncs = sync_heb.efuncs + user_heb.efuncs objs = filter_sorted(sync_heb.objs + user_heb.objs) iet = Transformer(mapper, nested=True).visit(iet) # Must drop the PARALLEL tag from the Iterations within which halo # exchanges are performed mapper = {} for tree in retrieve_iteration_tree(iet): for i in reversed(tree): if i in mapper: # Already seen this subtree, skip break if FindNodes(Call).visit(i): mapper.update({n: n._rebuild(properties=set(n.properties)-{PARALLEL}) for n in tree[:tree.index(i)+1]}) break iet = Transformer(mapper, nested=True).visit(iet) return iet, {'includes': ['mpi.h'], 'efuncs': efuncs, 'args': objs}
def test_iet_basic_sendrecv(self): grid = Grid(shape=(4, 4)) t = grid.stepping_dim f = TimeFunction(name='f', grid=grid) heb = HaloExchangeBuilder() sendrecv = heb._make_sendrecv(f, HaloSchemeEntry([t], [])) assert str(sendrecv.parameters) == """\ (f(t, x, y), buf_x_size, buf_y_size, ogtime, ogx, ogy, ostime, osx, osy,\ fromrank, torank, comm)""" assert str(sendrecv.body[0]) == """\
def test_iet_basic_haloupdate(self): grid = Grid(shape=(4, 4)) x, y = grid.dimensions t = grid.stepping_dim f = TimeFunction(name='f', grid=grid) heb = HaloExchangeBuilder() halos = [(x, LEFT), (x, RIGHT), (y, LEFT), (y, RIGHT)] haloupdate = heb._make_haloupdate(f, HaloSchemeEntry([t], halos)) assert str(haloupdate.parameters) == """\ (f(t, x, y), comm, nb, otime)""" assert str(haloupdate.body[0]) == """\
def _dist_parallelize(self, iet): """ Add MPI routines performing halo exchanges to emit distributed-memory parallel code. """ # Build send/recv Callables and Calls heb = HaloExchangeBuilder(self.params['mpi']) call_trees, calls = heb.make(FindNodes(HaloSpot).visit(iet)) # Transform the IET by adding in the `haloupdate` Calls iet = Transformer(calls, nested=True).visit(iet) return iet, {'includes': ['mpi.h'], 'call_trees': call_trees}
def _dist_parallelize(self, iet): """ Add MPI routines performing halo exchanges to emit distributed-memory parallel code. """ sync_heb = HaloExchangeBuilder('basic') user_heb = HaloExchangeBuilder(self.params['mpi']) mapper = {} for i, hs in enumerate(FindNodes(HaloSpot).visit(iet)): heb = user_heb if hs.is_Overlappable else sync_heb mapper[hs] = heb.make(hs, i) efuncs = sync_heb.efuncs + user_heb.efuncs objs = sync_heb.objs + user_heb.objs iet = Transformer(mapper, nested=True).visit(iet) return iet, {'includes': ['mpi.h'], 'efuncs': efuncs, 'args': objs}
def test_iet_copy(self): grid = Grid(shape=(4, 4)) t = grid.stepping_dim f = TimeFunction(name='f', grid=grid) heb = HaloExchangeBuilder() gather = heb._make_copy(f, HaloSchemeEntry([t], [])) assert str(gather.parameters) == """\ (buf(buf_x, buf_y), buf_x_size, buf_y_size, f(t, x, y), otime, ox, oy)""" assert """\ for (int x = 0; x <= buf_x_size - 1; x += 1) { for (int y = 0; y <= buf_y_size - 1; y += 1) { buf[x][y] = f[otime][x + ox][y + oy]; } }""" in str(gather)