Python jit Beispiele, numba.cuda.jit Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_slicing.py Projekt: ASPP/numba

    def test_slice_as_arg(self):
        global cufoo
        cufoo = cuda.jit("void(int32[:], int32[:])", device=True)(foo)
        cucopy = cuda.jit("void(int32[:,:], int32[:,:])")(copy)

        inp = np.arange(100, dtype=np.int32).reshape(10, 10)
        out = np.zeros_like(inp)

        cucopy[1, 10](inp, out)

Beispiel #2

0

Datei anzeigen

Datei: test_fastmath.py Projekt: Alexhuszagh/numba

    def test_kernel(self):

        def foo(arr, val):
            i = cuda.grid(1)
            if i < arr.size:
                arr[i] = float32(i) / val

        fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(foo)
        precver = cuda.jit("void(float32[:], float32)")(foo)

        self.assertIn('div.full.ftz.f32', fastver.ptx)
        self.assertNotIn('div.full.ftz.f32', precver.ptx)

Beispiel #3

0

Datei anzeigen

Datei: test_exception.py Projekt: GaZ3ll3/numba

    def test_exception(self):
        unsafe_foo = cuda.jit(foo)
        safe_foo = cuda.jit(debug=True)(foo)

        if not config.ENABLE_CUDASIM:
            # Simulator throws exceptions regardless of debug
            # setting
            unsafe_foo[1, 2](numpy.array([0, 1]))

        with self.assertRaises(IndexError) as cm:
            safe_foo[1, 2](numpy.array([0, 1]))
        self.assertIn("tuple index out of range", str(cm.exception))

Beispiel #4

0

Datei anzeigen

Datei: test_constmem.py Projekt: esc/numba

    def test_const_record_align(self):
        A = np.zeros(2, dtype=np.float64)
        B = np.zeros(2, dtype=np.float64)
        C = np.zeros(2, dtype=np.float64)
        D = np.zeros(2, dtype=np.float64)
        E = np.zeros(2, dtype=np.float64)
        jcuconst = cuda.jit(cuconstRecAlign).specialize(A, B, C, D, E)

        if not ENABLE_CUDASIM:
            self.assertIn(
                'ld.const.v4.u8',
                jcuconst.ptx,
                'load the first three bytes as a vector')

            self.assertIn(
                'ld.const.u32',
                jcuconst.ptx,
                'load the uint32 natively')

            self.assertIn(
                'ld.const.u8',
                jcuconst.ptx,
                'load the last byte by itself')

        jcuconst[2, 1](A, B, C, D, E)
        np.testing.assert_allclose(A, CONST_RECORD_ALIGN['a'])
        np.testing.assert_allclose(B, CONST_RECORD_ALIGN['b'])
        np.testing.assert_allclose(C, CONST_RECORD_ALIGN['x'])
        np.testing.assert_allclose(D, CONST_RECORD_ALIGN['y'])
        np.testing.assert_allclose(E, CONST_RECORD_ALIGN['z'])

Beispiel #5

0

Datei anzeigen

Datei: test_intrinsics.py Projekt: ASPP/numba

 def test_fill_threadidx(self):
     compiled = cuda.jit("void(int32[:])")(fill_threadidx)
     N = 10
     ary = np.ones(N, dtype=np.int32)
     exp = np.arange(N, dtype=np.int32)
     compiled[1, N](ary)
     self.assertTrue(np.all(ary == exp))

Beispiel #6

0

Datei anzeigen

Datei: test_intrinsics.py Projekt: ASPP/numba

 def test_simple_grid1d(self):
     compiled = cuda.jit("void(int32[::1])")(simple_grid1d)
     ntid, nctaid = 3, 7
     nelem = ntid * nctaid
     ary = np.empty(nelem, dtype=np.int32)
     compiled[nctaid, ntid](ary)
     self.assertTrue(np.all(ary == np.arange(nelem)))

Beispiel #7

0

Datei anzeigen

Datei: test_fastmath.py Projekt: Alexhuszagh/numba

    def test_device(self):
        # fastmath option is ignored for device function
        @cuda.jit("float32(float32, float32)", device=True)
        def foo(a, b):
            return a / b

        def bar(arr, val):
            i = cuda.grid(1)
            if i < arr.size:
                arr[i] = foo(i, val)

        fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(bar)
        precver = cuda.jit("void(float32[:], float32)")(bar)

        self.assertIn('div.full.ftz.f32', fastver.ptx)
        self.assertNotIn('div.full.ftz.f32', precver.ptx)

Beispiel #8

0

Datei anzeigen

Datei: test_sync.py Projekt: Alexhuszagh/numba

 def test_useless_sync(self):
     compiled = cuda.jit("void(int32[::1])")(useless_sync)
     nelem = 10
     ary = np.empty(nelem, dtype=np.int32)
     exp = np.arange(nelem, dtype=np.int32)
     compiled[1, nelem](ary)
     self.assertTrue(np.all(ary == exp))

Beispiel #9

0

Datei anzeigen

Datei: test_constmem.py Projekt: ASPP/numba

 def test_const_array(self):
     jcuconst = cuda.jit('void(float64[:])')(cuconst)
     print(jcuconst.ptx)
     self.assertTrue('.const' in jcuconst.ptx)
     A = numpy.empty_like(CONST1D)
     jcuconst[2, 5](A)
     self.assertTrue(numpy.all(A == CONST1D))

Beispiel #10

0

Datei anzeigen

Datei: test_localmem.py Projekt: PierreBizouard/numba

 def test_local_array(self):
     jculocal = cuda.jit('void(int32[:], int32[:])')(culocal)
     self.assertTrue('.local' in jculocal.ptx)
     A = numpy.arange(100, dtype='int32')
     B = numpy.zeros_like(A)
     jculocal(A, B)
     self.assertTrue(numpy.all(A == B))

Beispiel #11

0

Datei anzeigen

Datei: test_atomics.py Projekt: cpcloud/numba

    def test_atomic_add_double_global_3(self):
        ary = np.random.randint(0, 32, size=32).astype(np.float64).reshape(4, 8)
        orig = ary.copy()
        cuda_func = cuda.jit('void(float64[:,:])')(atomic_add_double_global_3)
        cuda_func[1, (4, 8)](ary)

        np.testing.assert_equal(ary, orig + 1)

Beispiel #12

0

Datei anzeigen

Datei: test_atomics.py Projekt: cpcloud/numba

 def check_atomic_max(self, dtype, lo, hi):
     vals = np.random.randint(lo, hi, size=(32, 32)).astype(dtype)
     res = np.zeros(1, dtype=vals.dtype)
     cuda_func = cuda.jit(atomic_max)
     cuda_func[32, 32](res, vals)
     gold = np.max(vals)
     np.testing.assert_equal(res, gold)

Beispiel #13

0

Datei anzeigen

Datei: test_boolean.py Projekt: cpcloud/numba

 def test_boolean(self):
     func = cuda.jit('void(float64[:], bool_)')(boolean_func)
     A = np.array([0], dtype='float64')
     func(A, True)
     self.assertTrue(A[0] == 123)
     func(A, False)
     self.assertTrue(A[0] == 321)

Beispiel #14

0

Datei anzeigen

Datei: test_print.py Projekt: Alexhuszagh/numba

 def test_printfloat(self):
     jprintfloat = cuda.jit('void()', debug=False)(printfloat)
     with captured_cuda_stdout() as stdout:
         jprintfloat()
     # CUDA and the simulator use different formats for float formatting
     self.assertIn(stdout.getvalue(), ["0 23 34.750000 321\n",
                                       "0 23 34.75 321\n"])

Beispiel #15

0

Datei anzeigen

Datei: test_atomics.py Projekt: MJJoyce/numba

    def test_atomic_add3(self):
        ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8)
        orig = ary.copy()
        cuda_atomic_add3 = cuda.jit('void(uint32[:,:])')(atomic_add3)
        cuda_atomic_add3[1, (4, 8)](ary)

        self.assertTrue(np.all(ary == orig + 1))

Beispiel #16

0

Datei anzeigen

Datei: test_constmem.py Projekt: esc/numba

    def test_const_record(self):
        A = np.zeros(2, dtype=float)
        B = np.zeros(2, dtype=int)
        jcuconst = cuda.jit(cuconstRec).specialize(A, B)

        if not ENABLE_CUDASIM:
            if not any(c in jcuconst.ptx for c in [
                # a vector load: the compiler fuses the load
                # of the x and y fields into a single instruction!
                'ld.const.v2.u64',

                # for some reason Win64 / Py3 / CUDA 9.1 decides
                # to do two u32 loads, and shifts and ors the
                # values to get the float `x` field, then uses
                # another ld.const.u32 to load the int `y` as
                # a 32-bit value!
                'ld.const.u32',
            ]):
                raise AssertionError(
                    "the compiler should realise it doesn't " \
                    "need to interpret the bytes as float!")

        jcuconst[2, 1](A, B)
        np.testing.assert_allclose(A, CONST_RECORD['x'])
        np.testing.assert_allclose(B, CONST_RECORD['y'])

Beispiel #17

0

Datei anzeigen

Datei: test_sync.py Projekt: cpcloud/numba

 def test_syncthreads_count(self):
     compiled = cuda.jit("void(int32[:], int32[:])")(use_syncthreads_count)
     ary_in = np.ones(72, dtype=np.int32)
     ary_out = np.zeros(72, dtype=np.int32)
     ary_in[31] = 0
     ary_in[42] = 0
     compiled[1, 72](ary_in, ary_out)
     self.assertTrue(np.all(ary_out == 70))

Beispiel #18

0

Datei anzeigen

Datei: test_print.py Projekt: Alexhuszagh/numba

 def test_cuhello(self):
     jcuhello = cuda.jit('void()', debug=False)(cuhello)
     with captured_cuda_stdout() as stdout:
         jcuhello[2, 3]()
     # The output of GPU threads is intermingled, just sanity check it
     out = stdout.getvalue()
     expected = ''.join('%d 999\n' % i for i in range(6))
     self.assertEqual(sorted(out), sorted(expected))

Beispiel #19

0

Datei anzeigen

Datei: test_print.py Projekt: Alexhuszagh/numba

 def test_print_array(self):
     """
     Eyeballing required
     """
     jcuprintary = cuda.jit('void(float32[:])')(cuprintary)
     A = np.arange(10, dtype=np.float32)
     jcuprintary[2, 5](A)
     cuda.synchronize()

Beispiel #20

0

Datei anzeigen

Datei: test_atomics.py Projekt: cpcloud/numba

    def test_atomic_max_nan_location(self):
        vals = np.random.randint(0, 128, size=(1,1)).astype(np.float64)
        gold = vals.copy().reshape(1)
        res = np.zeros(1, np.float64) + np.nan
        cuda_func = cuda.jit('void(float64[:], float64[:,:])')(atomic_max)
        cuda_func[1, 1](res, vals)

        np.testing.assert_equal(res, gold)

Beispiel #21

0

Datei anzeigen

Datei: test_atomics.py Projekt: cpcloud/numba

    def test_atomic_max_double_shared(self):
        vals = np.random.randint(0, 32, size=32).astype(np.float64)
        res = np.zeros(1, np.float64)
        cuda_func = cuda.jit('void(float64[:], float64[:])')(atomic_max_double_shared)
        cuda_func[1, 32](res, vals)

        gold = np.max(vals)
        np.testing.assert_equal(res, gold)

Beispiel #22

0

Datei anzeigen

Datei: test_atomics.py Projekt: cpcloud/numba

    def test_atomic_max_nan_val(self):
        res = np.random.randint(0, 128, size=1).astype(np.float64)
        gold = res.copy()
        vals = np.zeros((1, 1), np.float64) + np.nan
        cuda_func = cuda.jit('void(float64[:], float64[:,:])')(atomic_max)
        cuda_func[1, 1](res, vals)

        np.testing.assert_equal(res, gold)

Beispiel #23

0

Datei anzeigen

Datei: test_atomics.py Projekt: cpcloud/numba

    def check_atomic_min(self, dtype, lo, hi):
        vals = np.random.randint(lo, hi, size=(32, 32)).astype(dtype)
        res = np.array([65535], dtype=vals.dtype)
        cuda_func = cuda.jit(atomic_min)
        cuda_func[32, 32](res, vals)

        gold = np.min(vals)
        np.testing.assert_equal(res, gold)

Beispiel #24

0

Datei anzeigen

Datei: test_math.py Projekt: Alexhuszagh/numba

 def unary_template(self, func, npfunc, npdtype, npmtype, start, stop):
     nelem = 50
     A = np.linspace(start, stop, nelem).astype(npdtype)
     B = np.empty_like(A)
     arytype = npmtype[::1]
     cfunc = cuda.jit((arytype, arytype))(func)
     cfunc[1, nelem](A, B)
     self.assertTrue(np.allclose(npfunc(A), B))

Beispiel #25

0

Datei anzeigen

Datei: test_print.py Projekt: stefanseefeld/numba

 def test_string(self):
     cufunc = cuda.jit("void()", debug=False)(printstring)
     with captured_cuda_stdout() as stdout:
         cufunc[1, 3]()
     out = stdout.getvalue()
     lines = sorted(out.splitlines(True))
     expected = ["%d hop! 999\n" % i for i in range(3)]
     self.assertEqual(lines, expected)

Beispiel #26

0

Datei anzeigen

Datei: test_localmem.py Projekt: ASPP/numba

 def test_local_array_complex(self):
     sig = 'void(complex128[:], complex128[:])'
     jculocalcomplex = cuda.jit(sig)(culocalcomplex)
     self.assertTrue('.local' in jculocalcomplex.ptx)
     A = (numpy.arange(100, dtype='complex128') - 1) / 2j
     B = numpy.zeros_like(A)
     jculocalcomplex(A, B)
     self.assertTrue(numpy.all(A == B))

Beispiel #27

0

Datei anzeigen

Datei: test_sync.py Projekt: Alexhuszagh/numba

 def test_threadfence_codegen(self):
     # Does not test runtime behavior, just the code generation.
     compiled = cuda.jit("void(int32[:])")(use_threadfence)
     ary = np.zeros(10, dtype=np.int32)
     compiled[1, 1](ary)
     self.assertEqual(123 + 321, ary[0])
     if not ENABLE_CUDASIM:
         self.assertIn("membar.gl;", compiled.ptx)

Beispiel #28

0

Datei anzeigen

Datei: test_device_func.py Projekt: ASPP/numba

    def test_indirect_add2f(self):
        compiled = cuda.jit("void(float32[:])")(indirect_add2f)

        nelem = 10
        ary = np.arange(nelem, dtype=np.float32)
        exp = ary + ary
        compiled[1, nelem](ary)

        self.assertTrue(np.all(ary == exp), (ary, exp))

Beispiel #29

0

Datei anzeigen

Datei: test_atomics.py Projekt: cpcloud/numba

    def test_atomic_max_double_normalizedindex(self):
        vals = np.random.randint(0, 65535, size=(32, 32)).astype(np.float64)
        res = np.zeros(1, np.float64)
        cuda_func = cuda.jit('void(float64[:], float64[:,:])')(
            atomic_max_double_normalizedindex)
        cuda_func[32, 32](res, vals)

        gold = np.max(vals)
        np.testing.assert_equal(res, gold)

Beispiel #30

0

Datei anzeigen

Datei: dispatcher.py Projekt: cpcloud/numba

 def compile(self, sig, locals={}, **targetoptions):
     assert self._compiled is None
     assert not locals
     options = self.targetoptions.copy()
     options.update(targetoptions)
     kernel = jit(sig, **options)(self.py_func)
     self._compiled = kernel
     if hasattr(kernel, "_npm_context_"):
         self._npm_context_ = kernel._npm_context_

Beispiel #31

0

Datei anzeigen

 def test_ffs_i4_1s(self):
     compiled = cuda.jit("void(int32[:], int32)")(simple_ffs)
     ary = np.zeros(1, dtype=np.int32)
     compiled(ary, 0xFFFFFFFF)
     self.assertEquals(ary[0], 0)

Beispiel #32

0

Datei anzeigen

 def _(kernel):
     kernel = cuda.jit(kernel)
     kernel[self.block_dim, self.grid_dim](*args)

Beispiel #33

0

Datei anzeigen

 def test_ffs_i4_0s(self):
     compiled = cuda.jit("void(int32[:], int32)")(simple_ffs)
     ary = np.zeros(1, dtype=np.int32)
     compiled(ary, 0x0)
     self.assertEquals(ary[0], 32, "CUDA semantics")

Beispiel #34

0

Datei anzeigen

import numpy as np
import math
from timeit import default_timer as timer
from numba import cuda
from numba import *


def mult(a, b):
    return a * b


mult_gpu = cuda.jit(restype=float32, argtypes=[float32, float32],
                    device=True)(mult)


@cuda.jit(argtypes=[float32[:, :], float32[:, :], float32[:, :, :]])
def mult_kernel(a, b, c):
    Ni = c.shape[0]
    Nj = c.shape[1]
    Nk = c.shape[2]

    startX, startY, startZ = cuda.grid(3)
    gridX = cuda.gridDim.x * cuda.blockDim.x
    gridY = cuda.gridDim.y * cuda.blockDim.y
    gridZ = cuda.gridDim.z * cuda.blockDim.z

    for i in range(startX, Ni, gridX):
        for j in range(startY, Nj, gridY):
            c[i, j] = 0
            for k in range(startZ, Nk, gridZ):
                c[i, j] = c[i, j] + mult_gpu(a[i, k], b[j, k])

Beispiel #35

0

Datei anzeigen

 def test_ffs_i8(self):
     compiled = cuda.jit("void(int32[:], int64)")(simple_ffs)
     ary = np.zeros(1, dtype=np.int32)
     compiled(ary, 0x000000000010000)
     self.assertEquals(ary[0], 16)

Beispiel #36

0

Datei anzeigen

Datei: test_errors.py Projekt: srchilukoori/numba

 def test_unconfigured_untyped_cudakernel(self):
     kernfunc = cuda.jit(noop)
     self._test_unconfigured(kernfunc)

Beispiel #37

0

Datei anzeigen

 def c_contigous():
     compiled = cuda.jit("void(int32[:,:,::1])")(fill3d_threadidx)
     ary = np.zeros((X, Y, Z), dtype=np.int32)
     compiled[1, (X, Y, Z)](ary)
     return ary

Beispiel #38

0

Datei anzeigen

Datei: query_functions.py Projekt: mykolakozyr/vincenty_cuda_nns

import math

import numpy as np
import numba
from numba import cuda
from cuda_friendly_vincenty import vincenty

wrap = cuda.jit('float32(float32, float32, float32, float32)', device=True)
vincenty = wrap(vincenty)


@cuda.jit('int32(int32)', device=True)
def node_to_level(node):
    return math.floor(math.log(np.float32(node + 1)) / math.log(np.float32(2)))


@cuda.jit('int32(int32, int32)', device=True)
def node_range_start(node, n):
    level = node_to_level(node)
    step = n / (2**level)
    pos = node - 2**level + 1
    return math.floor(pos * step)


@cuda.jit('int32(int32, int32)', device=True)
def node_range_end(node, n):
    level = node_to_level(node)
    step = n / (2**level)
    pos = node - 2**level + 1
    return math.floor((pos + 1) * step)

Beispiel #39

0

Datei anzeigen

 def test_fma_f8(self):
     compiled = cuda.jit("void(f8[:], f8, f8, f8)")(simple_fma)
     ary = np.zeros(1, dtype=np.float64)
     compiled(ary, 2., 3., 4.)
     np.testing.assert_allclose(ary[0], 2 * 3 + 4)

Beispiel #40

0

Datei anzeigen

 def _compile_kernel(self, fnobj, sig):
     return cuda.jit(sig)(fnobj)

Beispiel #41

0

Datei anzeigen

import MRT_LB_local as d2q5
from numba import cuda, float64, float32
import numpy as np
import matplotlib.pyplot as plt

getg = cuda.jit('void (f8[:,:,:], f8[::1], i8, i8)', device=True)(d2q5.getg)
getfl = cuda.jit('void (f8[:,:], f8[::1], i8, i8)', device=True)(d2q5.getfl)
#getn = cuda.jit('void (f8[:,:,:], f8[::1], i8, i8)', device=True)(d2q5.getn)
calc_T = cuda.jit('void (f8[::1], f8[::1])', device=True)(d2q5.calc_T)
calc_copiafl = cuda.jit('void (f8[::1],f8[::1])', device=True)(d2q5.calc_copiafl)
calc_Hk = cuda.jit('void (f8[::1], f8[::1], f8[::1])', device=True)(d2q5.calc_Hk)
calc_fl = cuda.jit('void (f8[::1], f8[::1])', device=True)(d2q5.calc_fl)
calc_g2n = cuda.jit('void (f8[::1], f8[::1])', device=True)(d2q5.calc_g2n)

calc_alfe = cuda.jit('void (f8[::1], f8[::1], f8[:])', device=True)(d2q5.calc_alfe)
calc_taut = cuda.jit('void (f8[::1], f8[::1])', device=True)(d2q5.calc_taut)
calc_relax = cuda.jit('void (f8[::1], f8[::1])', device=True)(d2q5.calc_relax)
calc_Ssurce = cuda.jit('void (f8[::1], f8[::1], f8[::1])', device=True)(d2q5.calc_Ssurce)
n_eq_loc = cuda.jit('void (f8[::1], f8[::1])', device=True)(d2q5.n_eq_loc)
calc_colision = cuda.jit('void (f8[::1], f8[::1], f8[::1], f8[::1])', device=True)(d2q5.calc_colision)
n2g_loc = cuda.jit('void (f8[::1], f8[::1])', device=True)(d2q5.n2g_loc)

setfl = cuda.jit('void (f8[:,:], f8[::1], i8, i8)', device=True)(d2q5.setfl)
#setT = cuda.jit('void (f8[:,:], f8[:1], i4, i4)', device=True)(d2q5.setT)
setg = cuda.jit('void (f8[:,:,:], f8[::1], i8, i8)', device=True)(d2q5.setg)
set_prueba = cuda.jit('void (f8[:,:], f8[::1], i8, i8)', device=True)(d2q5.set_prueba)


@cuda.jit('void(f8[:,:,:],f8[:,:,:])')
def propagacion(d_g, copia_g):
    nx, ny, ns = d_g.shape

Beispiel #42

0

Datei anzeigen

Datei: test_record_dtype.py Projekt: ArtShp/DataScience

 def get_cfunc(self, pyfunc, argspec):
     return cuda.jit()(pyfunc)

Beispiel #43

0

Datei anzeigen

 def test_clz_i4(self):
     compiled = cuda.jit("void(int32[:], int32)")(simple_clz)
     ary = np.zeros(1, dtype=np.int32)
     compiled(ary, 0x00100000)
     self.assertEquals(ary[0], 11)

Beispiel #44

0

Datei anzeigen

 def test_global_constant_tuple(self):
     udt = cuda.jit((float32[:, :], ))(udt_global_constant_tuple)
     udt[1, 1](self.getarg2())

Beispiel #45

0

Datei anzeigen

 def _compile_core(self, sig):
     cudevfn = cuda.jit(sig, device=True, inline=True)(self.pyfunc)
     return cudevfn, cudevfn.overloads[sig.args].signature.return_type

Beispiel #46

0

Datei anzeigen

 def test_const_align(self):
     jcuconstAlign = cuda.jit('void(float64[:])')(cuconstAlign)
     A = np.full(3, fill_value=np.nan, dtype=float)
     jcuconstAlign[1, 3](A)
     self.assertTrue(np.all(A == (CONST3BYTES + CONST1D[:3])))

Beispiel #47

0

Datei anzeigen

 def test_brev_u8(self):
     compiled = cuda.jit("void(uint64[:], uint64)")(simple_brev)
     ary = np.zeros(1, dtype=np.uint64)
     compiled(ary, 0x000030F0000030F0)
     self.assertEquals(ary[0], 0x0F0C00000F0C0000)

Beispiel #48

0

Datei anzeigen

Datei: cudamandelbrot.py Projekt: JorgeExp/cosas-varias

steps = 1000


def mandel(x, y, max_iters, zoom):

    c = complex(x, y) / zoom + complex(-0.743643887037158704752191506114774,
                                       0.131825904205311970493132056385139)
    z = 0.0j
    for i in range(max_iters):
        z = z * z + c
        if (z.real * z.real + z.imag * z.imag) >= 4:
            return i
    return max_iters


mandel_gpu = cuda.jit(device=True)(mandel)


@cuda.jit
def mandel_kernel(min_x, max_x, min_y, max_y, image, iters, steps):
    height = image.shape[0]
    width = image.shape[1]

    pixel_size_x = (max_x - min_x) / width
    pixel_size_y = (max_y - min_y) / height

    startX, startY = cuda.grid(2)
    gridX = cuda.gridDim.x * cuda.blockDim.x
    gridY = cuda.gridDim.y * cuda.blockDim.y

    #start = complex(0,0)

Beispiel #49

0

Datei anzeigen

 def test_popc_u8(self):
     compiled = cuda.jit("void(int32[:], uint64)")(simple_popc)
     ary = np.zeros(1, dtype=np.int32)
     compiled(ary, 0xF00000000000)
     self.assertEquals(ary[0], 4)

Beispiel #50

0

Datei anzeigen

Datei: test_errors.py Projekt: srchilukoori/numba

 def test_unconfigured_typed_cudakernel(self):
     kernfunc = cuda.jit("void(int32)")(noop)
     self._test_unconfigured(kernfunc)

Beispiel #51

0

Datei anzeigen

 def test_const_empty(self):
     jcuconstEmpty = cuda.jit('void(float64[:])')(cuconstEmpty)
     A = np.full(1, fill_value=-1, dtype=int)
     jcuconstEmpty[1, 1](A)
     self.assertTrue(np.all(A == 0))

Beispiel #52

0

Datei anzeigen

"""Adaptor for composed CUDA math functions
    
This is necessary because creating a device function will not recursively compile nested functions.

See: https://stackoverflow.com/questions/52807489/programmatic-nested-numba-cuda-function-calls
"""
from . import math
from numba import cuda 

r = cuda.jit(math.r, device=True)
r2 = cuda.jit(lambda gn0, gn1: r(gn0, gn1) ** 2, device=True)

Beispiel #53

0

Datei anzeigen

 def test_simple_gridsize1d(self):
     compiled = cuda.jit("void(int32[::1])")(simple_gridsize1d)
     ntid, nctaid = 3, 7
     ary = np.zeros(1, dtype=np.int32)
     compiled[nctaid, ntid](ary)
     self.assertEqual(ary[0], nctaid * ntid)

Beispiel #54

0

Datei anzeigen

 def f_contigous():
     compiled = cuda.jit("void(int32[::1,:,:])")(fill3d_threadidx)
     ary = np.asfortranarray(np.zeros((X, Y, Z), dtype=np.int32))
     compiled[1, (X, Y, Z)](ary)
     return ary

Beispiel #55

0

Datei anzeigen

'''

# Importing external libs
import numpy as np
from numba import cuda, uint8, float32

# Importing app lib
import corrcoef as my

# Note the hardcoded 4 value.
# Four is the number of item per pixel.
# They represent a pixel in the form of (r, g, b, alpha)
PIXEL_DATA_LENGTH = 4

# pre compile the standard corrcoef method to be use as cuda call
cuda_corrcoef = cuda.jit(restype=float32, argtypes=[float32[:], float32[:], float32], device=True)(my.corrcoef) # pylint: disable=bad-whitespace

@cuda.jit(argtypes=[float32[:,:,:], float32[:,:,:], float32[:,:]]) # pylint: disable=bad-whitespace
def correlate_frames_kernel(cuda_img01_frames, cuda_img02_frames, cuda_coefficients):
  '''To calculate the correlation between each frame'''
  coord_x = cuda.blockIdx.x
  coord_y = cuda.blockIdx.y
  coef = 0
  cuda_coefficients[coord_x][coord_y] = cuda_corrcoef(
    cuda_img01_frames[coord_x][coord_y],
    cuda_img02_frames[coord_x][coord_y],
    coef,
  )

@cuda.jit(argtypes=[uint8[:,:,:], float32[:,:,:], uint8]) # pylint: disable=bad-whitespace
def flatten_frame_kernel(cuda_img, cuda_flatten_frame, frame_size):

Beispiel #56

0

Datei anzeigen

 def test_global_constants(self):
     udt = cuda.jit((float32[:], ))(udt_global_constants)
     udt[1, 1](self.getarg())

Beispiel #57

0

Datei anzeigen

Datei: 06_finite_diff_01_ijk.py Projekt: Potopoles/Climate_Model

    dVARdt[ii, jj, :] = 0.

    exchange_BC(VAR)
    exchange_BC(VAR1)
    exchange_BC(VAR2)

    stream = cuda.stream()

    VARd = cuda.to_device(VAR, stream)
    dVARdtd = cuda.to_device(dVARdt, stream)
    VAR1d = cuda.to_device(VAR1, stream)
    VAR2d = cuda.to_device(VAR2, stream)


    kernel_trivial = cuda.jit(cuda_kernel_decorator(kernel_trivial))\
                                (kernel_trivial)
    kernel_improve = cuda.jit(cuda_kernel_decorator(kernel_improve))\
                                (kernel_improve)
    kernel_shared = cuda.jit(cuda_kernel_decorator(kernel_shared))\
                                (kernel_shared)

    dVARdt[ii, jj, :] = 0.
    dVARdtd = cuda.to_device(dVARdt, stream)

    print('trivial')
    tpb = (1, 1, nz)
    bpg = (math.ceil((nx + 2 * nb) / tpb[0]), math.ceil(
        (ny + 2 * nb) / tpb[1]), math.ceil((nz) / tpb[2]))
    print(tpb)
    print(bpg)
    kernel_trivial[bpg, tpb](dVARdtd, VARd)

Beispiel #58

0

Datei anzeigen

 def test_simple_warpsize(self):
     compiled = cuda.jit("void(int32[:])")(simple_warpsize)
     ary = np.zeros(1, dtype=np.int32)
     compiled(ary)
     self.assertEquals(ary[0], 32, "CUDA semantics")

Beispiel #59

0

Datei anzeigen

 def test_const_record_empty(self):
     jcuconstRecEmpty = cuda.jit('void(int64[:])')(cuconstRecEmpty)
     A = np.full(1, fill_value=-1, dtype=np.int64)
     jcuconstRecEmpty[1, 1](A)
     self.assertTrue(np.all(A == 0))

Beispiel #60

0

Datei anzeigen

 def _get_globals(self, sig):
     corefn = cuda.jit(sig, device=True)(self.pyfunc)
     glbls = self.py_func.__globals__.copy()
     glbls.update({'__cuda__': cuda,
                   '__core__': corefn})
     return glbls