def main():
    children = []
    for cid, dev in enumerate(cuda.list_devices()):
        t = threading.Thread(target=device_controller, args=(cid, ))
        t.start()
        children.append(t)

    for t in children:
        t.join()

    print('ending gracefully')
def main():
    children = []
    for cid, dev in enumerate(cuda.list_devices()):
        t = threading.Thread(target=device_controller, args=(cid,))
        t.start()
        children.append(t)

    for t in children:
        t.join()

    print( 'ending gracefully' )
Example #3
0
 def __init__(self, gpuID=None, stream=None):
     if gpuID is not None:
         if gpuID < len(cuda.list_devices()) and gpuID >= 0:
             cuda.close()
             cuda.select_device(gpuID)
         else:
             raise ValueError('GPU ID not found')
     if stream is None:
         self.stream = cuda.stream()
     else:
         assert isinstance(stream, numba.cuda.cudadrv.driver.Stream)
         self.stream = stream
     self.blas = numbapro.cudalib.cublas.Blas(stream=self.stream)
     self.blockdim = 32
     self.blockdim2 = (32, 32)
import numpy as np
from math import ceil
import threading
from numbapro import cuda

print('System has %d CUDA devices' % len(cuda.list_devices()))

signature = 'void(int32[:], int32[:])'


def kernel(dst, src):
    '''A simple kernel that adds 1 to every item
    '''
    i = cuda.grid(1)
    if i >= dst.shape[0]:
        return
    dst[i] = src[i] + 1


# Numba compiler is not threadsafe
compiler_lock = threading.Lock()


def device_controller(cid):
    cuda.select_device(cid)  # bind device to thread
    device = cuda.get_current_device()  # get current device

    # print some information about the CUDA card
    prefix = '[%s]' % device
    print(prefix, 'device_controller', cid, '| CC', device.COMPUTE_CAPABILITY)
import numpy as np
from math import ceil
import threading
from numbapro import cuda

print('System has %d CUDA devices' % len(cuda.list_devices()))

signature = 'void(int32[:], int32[:])'

def kernel(dst, src):
    '''A simple kernel that adds 1 to every item
    '''
    i = cuda.grid(1)
    if i >= dst.shape[0]:
        return
    dst[i] = src[i] + 1

# Numba compiler is not threadsafe
compiler_lock = threading.Lock()

def device_controller(cid):
    cuda.select_device(cid)                    # bind device to thread
    device = cuda.get_current_device()         # get current device

    # print some information about the CUDA card
    prefix = '[%s]' % device
    print( prefix, 'device_controller', cid, '| CC', device.COMPUTE_CAPABILITY )
    
    max_thread = device.MAX_THREADS_PER_BLOCK

    with compiler_lock:                        # lock the compiler