def main(): children = [] for cid, dev in enumerate(cuda.list_devices()): t = threading.Thread(target=device_controller, args=(cid, )) t.start() children.append(t) for t in children: t.join() print('ending gracefully')
def main(): children = [] for cid, dev in enumerate(cuda.list_devices()): t = threading.Thread(target=device_controller, args=(cid,)) t.start() children.append(t) for t in children: t.join() print( 'ending gracefully' )
def __init__(self, gpuID=None, stream=None): if gpuID is not None: if gpuID < len(cuda.list_devices()) and gpuID >= 0: cuda.close() cuda.select_device(gpuID) else: raise ValueError('GPU ID not found') if stream is None: self.stream = cuda.stream() else: assert isinstance(stream, numba.cuda.cudadrv.driver.Stream) self.stream = stream self.blas = numbapro.cudalib.cublas.Blas(stream=self.stream) self.blockdim = 32 self.blockdim2 = (32, 32)
import numpy as np from math import ceil import threading from numbapro import cuda print('System has %d CUDA devices' % len(cuda.list_devices())) signature = 'void(int32[:], int32[:])' def kernel(dst, src): '''A simple kernel that adds 1 to every item ''' i = cuda.grid(1) if i >= dst.shape[0]: return dst[i] = src[i] + 1 # Numba compiler is not threadsafe compiler_lock = threading.Lock() def device_controller(cid): cuda.select_device(cid) # bind device to thread device = cuda.get_current_device() # get current device # print some information about the CUDA card prefix = '[%s]' % device print(prefix, 'device_controller', cid, '| CC', device.COMPUTE_CAPABILITY)
import numpy as np from math import ceil import threading from numbapro import cuda print('System has %d CUDA devices' % len(cuda.list_devices())) signature = 'void(int32[:], int32[:])' def kernel(dst, src): '''A simple kernel that adds 1 to every item ''' i = cuda.grid(1) if i >= dst.shape[0]: return dst[i] = src[i] + 1 # Numba compiler is not threadsafe compiler_lock = threading.Lock() def device_controller(cid): cuda.select_device(cid) # bind device to thread device = cuda.get_current_device() # get current device # print some information about the CUDA card prefix = '[%s]' % device print( prefix, 'device_controller', cid, '| CC', device.COMPUTE_CAPABILITY ) max_thread = device.MAX_THREADS_PER_BLOCK with compiler_lock: # lock the compiler