def init(device=None): """Initializes CUDA global state. Chainer maintains CUDA context, CUBLAS context, random number generator and device memory pool for each GPU device and for each process (the main process or a process forked by :mod:`multiprocessing`) as global states. When called for the first time on the process, this function initializes these global states. .. warning:: This function also initializes PyCUDA and scikits.cuda. Since these packages do not support forking after initialization, do not call this function before forking the process. This function also registers :func:`shutdown` to :mod:`atexit` slot. It also initializes random number generator. User can set fixed seed with ``CHAINER_SEED`` environment variable. Args: device (``int`` or :class:`~pycuda.driver.Device` or ``None``): Device ID to initialize on. """ global _contexts, _cublas_handles, _generators, _pid, _pools if not available: global _import_error raise RuntimeError( 'CUDA environment is not correctly set up. ' + 'The original import error said: ' + str(_import_error)) pid = os.getpid() if _pid == pid: # already initialized return drv.init() if device is None: # use default device context = cutools.make_default_context() device = Context.get_device() else: device = Device(device) context = device.make_context() _contexts = {device: context} _generators = {} _pools = {} _cublas_handles = {} cumisc.init(mem_alloc) seed(os.environ.get('CHAINER_SEED')) _pid = pid # mark as initialized atexit.register(shutdown)
import time import pycuda.gpuarray as gpuarray import pycuda.cumath as cumath import scikits.cuda.misc as scm import pycuda.autoinit import utils import numpy as np K = 87930 B = 1000 N = 1000 scm.init() scores = gpuarray.to_gpu((5 * np.random.randn(B, K)).astype(np.float32)) probs = gpuarray.to_gpu(np.random.rand(B, K).astype(np.float32)) maxscores = gpuarray.empty((B,), dtype=np.float32) maxscoreids = gpuarray.empty((B,), dtype=np.uint32) deltas = gpuarray.empty_like(scores) sumdeltas = gpuarray.empty((B,), dtype=np.float32) cpu_probs = np.empty((B, K), dtype=np.float32) indices = np.random.randint(0, K, size=(N, B)).astype(np.uint32) gpu_ind = gpuarray.empty((B,), dtype=np.uint32) selected_probs = gpuarray.empty((B,), dtype=np.float32) for i in range(10): gpu_ind.set(indices[i]) gpuarray.take(probs, gpu_ind, out=selected_probs)
import time import pycuda.gpuarray as gpuarray import pycuda.cumath as cumath import scikits.cuda.misc as scm import pycuda.autoinit import utils import numpy as np K = 87930 B = 1000 N = 1000 scm.init() scores = gpuarray.to_gpu((5 * np.random.randn(B, K)).astype(np.float32)) probs = gpuarray.to_gpu(np.random.rand(B, K).astype(np.float32)) maxscores = gpuarray.empty((B, ), dtype=np.float32) maxscoreids = gpuarray.empty((B, ), dtype=np.uint32) deltas = gpuarray.empty_like(scores) sumdeltas = gpuarray.empty((B, ), dtype=np.float32) cpu_probs = np.empty((B, K), dtype=np.float32) indices = np.random.randint(0, K, size=(N, B)).astype(np.uint32) gpu_ind = gpuarray.empty((B, ), dtype=np.uint32) selected_probs = gpuarray.empty((B, ), dtype=np.float32) for i in range(10): gpu_ind.set(indices[i]) gpuarray.take(probs, gpu_ind, out=selected_probs)
def setUp(self): np.random.seed(0) misc.init()