예제 #1
0
    gpucomm = collectives.GpuComm(_local_id,_local_size,_local_rank)  
    
    return gpucomm



if __name__ == '__main__':
    
    comm = get_internode_comm()
    
    rank=comm.rank
    device='cuda'+str(rank)
    size=comm.size

    from test_exchanger import init_device, clean_device
    _,ctx,arr,shared_x,shared_xx = init_device(device=device)
    
    gpucomm = get_intranode_comm(rank,size, ctx)
                           

    if rank==0: print 'original array %s' % arr

    # prepare nccl32 exchanger

    from exchanger_strategy import Exch_nccl32

    exch = Exch_nccl32(intercomm=comm, intracomm=gpucomm, avg=False)

    exch.prepare(ctx, [shared_x])

    exch.exchange()
예제 #2
0
import sys
sys.path.append('../../lib/base/')
device = sys.argv[1]

from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.rank
size = comm.size
# device='gpu'+str(rank)

from test_exchanger import init_device, clean_device

drv, ctx, arr, shared_x, shared_xx = init_device(device=device)

if rank == 0: print 'original array %s' % arr

# prepare copper exchanger

from exchanger_strategy import Exch_copper
exch = Exch_copper(comm, avg=False)

exch.prepare(ctx, drv, [shared_x])
exch.exchange()

if rank == 0: print 'copper summation: %s' % shared_x.get_value()

# prepare ar exchanger

from exchanger_strategy import Exch_allreduce
exch = Exch_allreduce(comm, avg=False)