def test_ctx_managed_stream(self):
        logger.info("context info: %s", roc.get_context().agent)

        @roc.jit("int32[:], int32[:]")
        def add1_kernel(dst, src):
            i = roc.get_global_id(0)
            if i < dst.size:
                dst[i] = src[i] + 1

        blksz = 256
        gridsz = 10**5
        nitems = blksz * gridsz
        ntimes = 500

        arr = np.arange(nitems, dtype=np.int32)

        logger.info("make coarse_arr")
        coarse_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_arr[:] = arr

        logger.info("make coarse_res_arr")
        coarse_res_arr = roc.coarsegrain_array(shape=arr.shape,
                                               dtype=arr.dtype)
        coarse_res_arr[:] = 0

        logger.info("make stream")
        stream = roc.stream()

        with stream.auto_synchronize():
            logger.info("make gpu_res_arr")
            gpu_res_arr = roc.device_array_like(coarse_arr)

            logger.info("make gpu_arr")
            gpu_arr = roc.to_device(coarse_arr, stream=stream)

            for i in range(ntimes):
                logger.info("launch kernel: %d", i)
                add1_kernel[gridsz, blksz, stream](gpu_res_arr, gpu_arr)
                gpu_arr.copy_to_device(gpu_res_arr, stream=stream)

            logger.info("get kernel result")
            gpu_res_arr.copy_to_host(coarse_res_arr, stream=stream)

        logger.info("synchronize on ctx __exit__")

        logger.info("compare result")
        np.testing.assert_equal(coarse_res_arr, coarse_arr + ntimes)
Example #2
0
    def test_ctx_managed_stream(self):
        logger.info('context info: %s', roc.get_context().agent)

        @roc.jit("int32[:], int32[:]")
        def add1_kernel(dst, src):
            i = roc.get_global_id(0)
            if i < dst.size:
                dst[i] = src[i] + 1

        blksz = 256
        gridsz = 10**5
        nitems = blksz * gridsz
        ntimes = 500

        arr = np.arange(nitems, dtype=np.int32)

        logger.info('make coarse_arr')
        coarse_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_arr[:] = arr

        logger.info('make coarse_res_arr')
        coarse_res_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_res_arr[:] = 0

        logger.info("make stream")
        stream = roc.stream()

        with stream.auto_synchronize():
            logger.info('make gpu_res_arr')
            gpu_res_arr = roc.device_array_like(coarse_arr)

            logger.info('make gpu_arr')
            gpu_arr = roc.to_device(coarse_arr, stream=stream)

            for i in range(ntimes):
                logger.info('launch kernel: %d', i)
                add1_kernel[gridsz, blksz, stream](gpu_res_arr, gpu_arr)
                gpu_arr.copy_to_device(gpu_res_arr, stream=stream)

            logger.info('get kernel result')
            gpu_res_arr.copy_to_host(coarse_res_arr, stream=stream)

        logger.info("synchronize on ctx __exit__")

        logger.info("compare result")
        np.testing.assert_equal(coarse_res_arr, coarse_arr + ntimes)
Example #3
0
import numpy as np

from numba import roc
from numba.core.errors import TypingError
import operator as oper
import unittest

_WAVESIZE = roc.get_context().agent.wavefront_size


@roc.jit(device=True)
def shuffle_up(val, width):
    tid = roc.get_local_id(0)
    roc.wavebarrier()
    idx = (tid + width) % _WAVESIZE
    res = roc.ds_permute(idx, val)
    return res


@roc.jit(device=True)
def shuffle_down(val, width):
    tid = roc.get_local_id(0)
    roc.wavebarrier()
    idx = (tid - width) % _WAVESIZE
    res = roc.ds_permute(idx, val)
    return res


@roc.jit(device=True)
def broadcast(val, from_lane):
    tid = roc.get_local_id(0)
Example #4
0
from __future__ import print_function, absolute_import, division

import numpy as np

from numba import unittest_support as unittest
from numba import roc
from numba.errors import TypingError
import operator as oper

_WAVESIZE = roc.get_context().agent.wavefront_size

@roc.jit(device=True)
def shuffle_up(val, width):
    tid = roc.get_local_id(0)
    roc.wavebarrier()
    idx = (tid + width) % _WAVESIZE
    res = roc.ds_permute(idx, val)
    return res

@roc.jit(device=True)
def shuffle_down(val, width):
    tid = roc.get_local_id(0)
    roc.wavebarrier()
    idx = (tid - width) % _WAVESIZE
    res = roc.ds_permute(idx, val)
    return res

@roc.jit(device=True)
def broadcast(val, from_lane):
    tid = roc.get_local_id(0)
    roc.wavebarrier()