def _(iet): # TODO: we need to pick the rank from `comm_shm`, not `comm`, # so that we have nranks == ngpus (as long as the user has launched # the right number of MPI processes per node given the available # number of GPUs per node) objcomm = None for i in iet.parameters: if isinstance(i, MPICommObject): objcomm = i break devicetype = as_list(self.lang[self.platform]) try: lang_init = [self.lang['init'](devicetype)] except TypeError: # Not all target languages need to be explicitly initialized lang_init = [] deviceid = DeviceID() if objcomm is not None: rank = Symbol(name='rank') rank_decl = LocalExpression(DummyEq(rank, 0)) rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)]) ngpus = Symbol(name='ngpus') call = self.lang['num-devices'](devicetype) ngpus_init = LocalExpression(DummyEq(ngpus, call)) osdd_then = self.lang['set-device']([deviceid] + devicetype) osdd_else = self.lang['set-device']([rank % ngpus] + devicetype) body = lang_init + [ Conditional( CondNe(deviceid, -1), osdd_then, List( body=[rank_decl, rank_init, ngpus_init, osdd_else ]), ) ] header = c.Comment('Begin of %s+MPI setup' % self.lang['name']) footer = c.Comment('End of %s+MPI setup' % self.lang['name']) else: body = lang_init + [ Conditional( CondNe(deviceid, -1), self.lang['set-device']([deviceid] + devicetype)) ] header = c.Comment('Begin of %s setup' % self.lang['name']) footer = c.Comment('End of %s setup' % self.lang['name']) init = List(header=header, body=body, footer=(footer, c.Line())) iet = iet._rebuild(body=(init, ) + iet.body) return iet, {'args': deviceid}
def test_deviceid(): grid = Grid(shape=(4, 4, 4)) did = DeviceID(grid.distributor._obj_comm) pkl_did = pickle.dumps(did) new_did = pickle.loads(pkl_did) assert did.name == new_did.name assert did.dtype == new_did.dtype assert did.data == new_did.data
def test_deviceid(): did = DeviceID() pkl_did = pickle.dumps(did) new_did = pickle.loads(pkl_did) # TODO: this will be extend when we'll support DeviceID # for multi-node multi-gpu execution, when DeviceID will have # to pick its default value from an MPI communicator attached # to the runtime arguments assert did.name == new_did.name assert did.dtype == new_did.dtype
def __init__(self): # {name -> generator()} -- to create unique names for symbols, functions, ... self.counters = {} # Special symbols self.nthreads = NThreads() self.nthreads_nested = NThreadsNested() self.nthreads_nonaffine = NThreadsNonaffine() self.threadid = ThreadID(self.nthreads) self.deviceid = DeviceID() # Several groups of pthreads each of size `npthread` may be created # during compilation self.npthreads = []
def test_special_symbols(self): """ This test checks the singletonization, through the caching infrastructure, of the special symbols that an Operator may generate (e.g., `nthreads`). """ grid = Grid(shape=(4, 4, 4)) f = TimeFunction(name='f', grid=grid) sf = SparseTimeFunction(name='sf', grid=grid, npoint=1, nt=10) eqns = [Eq(f.forward, f + 1.)] + sf.inject(field=f.forward, expr=sf) opt = ('advanced', {'par-nested': 0, 'openmp': True}) op0 = Operator(eqns, opt=opt) op1 = Operator(eqns, opt=opt) nthreads0, nthreads_nested0, nthreads_nonaffine0 =\ [i for i in op0.input if isinstance(i, NThreadsBase)] nthreads1, nthreads_nested1, nthreads_nonaffine1 =\ [i for i in op1.input if isinstance(i, NThreadsBase)] assert nthreads0 is nthreads1 assert nthreads_nested0 is nthreads_nested1 assert nthreads_nonaffine0 is nthreads_nonaffine1 tid0 = ThreadID(op0.nthreads) tid1 = ThreadID(op0.nthreads) assert tid0 is tid1 did0 = DeviceID() did1 = DeviceID() assert did0 is did1 npt0 = NPThreads(name='npt', size=3) npt1 = NPThreads(name='npt', size=3) npt2 = NPThreads(name='npt', size=4) assert npt0 is npt1 assert npt0 is not npt2
def _(iet): # TODO: we need to pick the rank from `comm_shm`, not `comm`, # so that we have nranks == ngpus (as long as the user has launched # the right number of MPI processes per node given the available # number of GPUs per node) objcomm = None for i in iet.parameters: if isinstance(i, MPICommObject): objcomm = i break deviceid = DeviceID() device_nvidia = Macro('acc_device_nvidia') if objcomm is not None: rank = Symbol(name='rank') rank_decl = LocalExpression(DummyEq(rank, 0)) rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)]) ngpus = Symbol(name='ngpus') call = DefFunction('acc_get_num_devices', device_nvidia) ngpus_init = LocalExpression(DummyEq(ngpus, call)) asdn_then = Call('acc_set_device_num', [deviceid, device_nvidia]) asdn_else = Call('acc_set_device_num', [rank % ngpus, device_nvidia]) body = [ Call('acc_init', [device_nvidia]), Conditional( CondNe(deviceid, -1), asdn_then, List(body=[rank_decl, rank_init, ngpus_init, asdn_else])) ] else: body = [ Call('acc_init', [device_nvidia]), Conditional( CondNe(deviceid, -1), Call('acc_set_device_num', [deviceid, device_nvidia])) ] init = List(header=c.Comment('Begin of OpenACC+MPI setup'), body=body, footer=(c.Comment('End of OpenACC+MPI setup'), c.Line())) iet = iet._rebuild(body=(init, ) + iet.body) return iet, {'args': deviceid}