Python select_deviceの例、numbapro.cuda.select_device Pythonの例

コード例 #1

0

ファイルを表示

ファイル: __init__.py プロジェクト: Abel-Ding/gpupy

 def __init__(self, gpuID=None, stream=None):
     if gpuID is not None:
         if gpuID < len(cuda.list_devices()) and gpuID >= 0:
             cuda.close()
             cuda.select_device(gpuID)
         else:
             raise ValueError('GPU ID not found')
     if stream is None:
         self.stream = cuda.stream()
     else:
         assert isinstance(stream, numba.cuda.cudadrv.driver.Stream)
         self.stream = stream
     self.blas = numbapro.cudalib.cublas.Blas(stream=self.stream)
     self.blockdim = 32
     self.blockdim2 = (32, 32)

コード例 #2

0

ファイルを表示

ファイル: block_increment.py プロジェクト: Jingoo88/Projet-3A-2015

def block_increment(start, n):

    cuda.select_device(0)
    stream = cuda.stream()
    blockdim = 256
    griddim = n // 256 + 1
    c_host = np.zeros((n, n), dtype=np.float32)
    m_dev = curand.normal(0, 1, n, dtype=np.float32, device=True)
    n_dev = curand.normal(0, 1, n, dtype=np.float32, device=True)
    a_host = np.zeros(n, dtype=np.float32)
    a_dev = cuda.device_array_like(a_host)
    cuda_div[griddim, blockdim, stream](m_dev, n_dev, a_dev, n)
    #keeps a_dev on the device for the kernel ==> no access at this point to the device memory
    # so i cant know what appends to m_dev and n_dev best guess is python GC is
    # translated into desallocation on the device
    b_dev = curand.uniform((n * n), dtype=np.float32, device=True)
    c_dev = cuda.device_array_like(c_host, stream)
    block_kernel[griddim, blockdim, stream](start, n, a_dev, b_dev, c_dev)
    c_dev.copy_to_host(c_host, stream)
    stream.synchronize()

    return c_host

コード例 #3

0

ファイルを表示

ファイル: sli_test_multigpu_mt.py プロジェクト: lundybernard/cuda_practice

def device_controller(cid):
    cuda.select_device(cid)  # bind device to thread
    device = cuda.get_current_device()  # get current device

    # print some information about the CUDA card
    prefix = '[%s]' % device
    print(prefix, 'device_controller', cid, '| CC', device.COMPUTE_CAPABILITY)

    max_thread = device.MAX_THREADS_PER_BLOCK

    with compiler_lock:  # lock the compiler
        # prepare function for this thread
        # the jitted CUDA kernel is loaded into the current context
        cuda_kernel = cuda.jit(signature)(kernel)

    # prepare data
    N = 12345
    data = np.arange(N, dtype=np.int32) * (cid + 1)
    orig = data.copy()

    # determine number of threads and blocks
    if N >= max_thread:
        ngrid = int(ceil(float(N) / max_thread))
        nthread = max_thread
    else:
        ngrid = 1
        nthread = N

    print(prefix, 'grid x thread = %d x %d' % (ngrid, nthread))

    # real CUDA work
    d_data = cuda.to_device(data)  # transfer to device
    cuda_kernel[ngrid, nthread](d_data, d_data)  # compute inplace
    d_data.copy_to_host(data)  # transfer to host

    # check result
    if not np.all(data == orig + 1):
        raise ValueError

コード例 #4

0

ファイルを表示

ファイル: sli_test_multigpu_mt.py プロジェクト: lundybernard/cuda_practice

def device_controller(cid):
    cuda.select_device(cid)                    # bind device to thread
    device = cuda.get_current_device()         # get current device

    # print some information about the CUDA card
    prefix = '[%s]' % device
    print( prefix, 'device_controller', cid, '| CC', device.COMPUTE_CAPABILITY )
    
    max_thread = device.MAX_THREADS_PER_BLOCK

    with compiler_lock:                        # lock the compiler
        # prepare function for this thread
        # the jitted CUDA kernel is loaded into the current context
        cuda_kernel = cuda.jit(signature)(kernel)

    # prepare data
    N = 12345
    data = np.arange(N, dtype=np.int32) * (cid + 1)
    orig = data.copy()

    # determine number of threads and blocks
    if N >= max_thread:
        ngrid = int(ceil(float(N) / max_thread))
        nthread = max_thread
    else:
        ngrid = 1
        nthread = N

    print( prefix, 'grid x thread = %d x %d' % (ngrid, nthread) )

    # real CUDA work
    d_data = cuda.to_device(data)                   # transfer to device
    cuda_kernel[ngrid, nthread](d_data, d_data)     # compute inplace
    d_data.copy_to_host(data)                       # transfer to host

    # check result
    if not np.all(data == orig + 1):
        raise ValueError

コード例 #5

0

ファイルを表示

ファイル: cauchy_sample_generator.py プロジェクト: Jingoo88/Projet-3A-2015

__version__ = '0.1'
__maintainer__ = ['gilles.drigout', 'thomas.clavier']
__status__ = 'Development'


# Uses device generated random normal simulations to generate cauchy simulation
# Methods may be better if normal simulation are reused ==> to check

from numbapro import cuda
from numbapro import vectorize
from numbapro.cudalib import curand
import numpy as np
import matplotlib.pyplot as plt


cuda.select_device(0)

class Cauchy:
	
	def __init__(self, size):
		
		self.container = np.empty(size, np.float64)
		
	def __get_cuda_randoms(self):
	    
	    prng = curand.PRNG(rndtype=curand.PRNG.XORWOW)
	    prng.normal(self.container,0,1)
	    
	
	    #self.container = rand.reshape((x, y)) a completer

コード例 #6

0

ファイルを表示

ファイル: cudaTrade.py プロジェクト: boersmamarcel/CAStocks

for i in range(1,sp500_open.size): # compute volatility
    ki = k if k < i else i
    price_avg = np.mean(sp500_open[i-ki:i+1])
    value = np.sum(np.absolute(sp500_open[i-ki:i+1] - price_avg))/(1.0*ki*price_avg)
    sp500_volatility = np.append(sp500_volatility, value)

sp500_price_clustering = np.array([])
sp500_volatility_clustering = np.array([])
for lag in range(1,500): # array of correlation with certain lags
    sp500_price_clustering = np.append(sp500_price_clustering,  np.sum(np.multiply(sp500_price_change[lag:],sp500_price_change[:-lag])))
    sp500_volatility_clustering = np.append(sp500_volatility_clustering,  np.sum(np.multiply(sp500_volatility[lag:],sp500_volatility[:-lag])))
sp500_price_clustering = sp500_price_clustering/sp500_price_clustering[0] # normalize to first entry
sp500_volatility_clustering = sp500_volatility_clustering/sp500_volatility_clustering[0] # normalize to first entry


cuda.select_device(0) #select videocard

w = 120
h = 30

initProb = 0.05

#generate random traders
A = np.array(np.random.choice([0, 1], p=[1-initProb, initProb], size=w*h, replace=True).reshape(h,w), dtype=np.int32)
B = np.empty_like(A)

def calcCluster(grid):
    grid_abs = np.absolute(grid) # reduce field to active/inactive traders
    grid_abs = grid_abs == 1 # get field of True/False values
        
    # lw: matrix with cluster numbers, num: total number of clusters, area: matrix of cluster size

コード例 #7

0

ファイルを表示

ファイル: testilp.py プロジェクト: Aahung/numbapro-examples

from timeit import default_timer as timer
import math
import numpy as np
import pylab
from numbapro import cuda, cudadrv
# For machine with multiple devices
cuda.select_device(0)

@cuda.jit('float32(float32, float32)', device=True)
def core(a, b):
    return a + b

@cuda.jit('void(float32[:], float32[:], float32[:])')
def vec_add(a, b, c):
    i = cuda.grid(1)
    c[i] = core(a[i], b[i])

@cuda.jit('void(float32[:], float32[:], float32[:])')
def vec_add_ilp_x2(a, b, c):
    # read
    i = cuda.grid(1)
    ai = a[i]
    bi = b[i]

    bw = cuda.blockDim.x
    gw = cuda.gridDim.x
    stride = gw * bw

    j = i + stride
    aj = a[j]
    bj = b[j]

コード例 #8

0

ファイルを表示

ファイル: spca.py プロジェクト: mahjoubihamza/numbapro-spca

from __future__ import print_function, division
import sys
import os
import numpy as np
import timeit
import itertools
import math
from numbapro import cuda, int32, float32, float64, void
from timeit import default_timer as timer

from numbapro.cudalib import curand
# from numbapro.cudalib.sorting.radixlib import RadixSort
from numbapro.cudalib.sorting.segsort import segmented_sort

cuda.select_device(int(os.environ.get("CUDA_DEVICE", 0)))
NN = int(os.environ.get("NN", 1000))
FILE = os.environ.get("FILE", "input{}.npy".format(NN))

try:
    xrange
    zip = itertools.izip
except NameError:
    xrange = range

cached_input_file = FILE  # "input.npy"

float_type = float32
float_dtype = np.float32


def generate_input():

コード例 #9

0

ファイルを表示

ファイル: spca.py プロジェクト: ContinuumIO/numbapro-spca

from __future__ import print_function, division
import sys
import os
import numpy as np
import timeit
import itertools
import math
from numbapro import cuda, int32, float32, float64, void
from timeit import default_timer as timer

from numbapro.cudalib import curand
# from numbapro.cudalib.sorting.radixlib import RadixSort
from numbapro.cudalib.sorting.segsort import segmented_sort

cuda.select_device(int(os.environ.get("CUDA_DEVICE", 0)))
NN = int(os.environ.get("NN", 1000))
FILE = os.environ.get("FILE", "input{}.npy".format(NN))

try:
    xrange
    zip = itertools.izip
except NameError:
    xrange = range

cached_input_file = FILE  # "input.npy"

float_type = float32
float_dtype = np.float32


def generate_input():