Example #1
0
 def test_async_copy_to_device_and_back(self):
     arr = np.arange(1024)
     hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     gotarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     stream = roc.stream()
     ct = len(stream._signals)
     devarr = roc.to_device(hostarr, stream=stream)
     self.assertEqual(ct + 1, len(stream._signals))
     devarr.copy_to_host(gotarr, stream=stream)
     self.assertEqual(ct + 2, len(stream._signals))
     stream.synchronize()
     self.assertEqual(0, len(stream._signals))
     np.testing.assert_equal(hostarr, gotarr)
Example #2
0
 def test_async_copy_to_device_and_back(self):
     arr = np.arange(1024)
     hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     gotarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
     stream = roc.stream()
     ct = len(stream._signals)
     devarr = roc.to_device(hostarr, stream=stream)
     self.assertEqual(ct + 1, len(stream._signals))
     devarr.copy_to_host(gotarr, stream=stream)
     self.assertEqual(ct + 2, len(stream._signals))
     stream.synchronize()
     self.assertEqual(0, len(stream._signals))
     np.testing.assert_equal(hostarr, gotarr)
    def test_ctx_managed_stream(self):
        logger.info("context info: %s", roc.get_context().agent)

        @roc.jit("int32[:], int32[:]")
        def add1_kernel(dst, src):
            i = roc.get_global_id(0)
            if i < dst.size:
                dst[i] = src[i] + 1

        blksz = 256
        gridsz = 10**5
        nitems = blksz * gridsz
        ntimes = 500

        arr = np.arange(nitems, dtype=np.int32)

        logger.info("make coarse_arr")
        coarse_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_arr[:] = arr

        logger.info("make coarse_res_arr")
        coarse_res_arr = roc.coarsegrain_array(shape=arr.shape,
                                               dtype=arr.dtype)
        coarse_res_arr[:] = 0

        logger.info("make stream")
        stream = roc.stream()

        with stream.auto_synchronize():
            logger.info("make gpu_res_arr")
            gpu_res_arr = roc.device_array_like(coarse_arr)

            logger.info("make gpu_arr")
            gpu_arr = roc.to_device(coarse_arr, stream=stream)

            for i in range(ntimes):
                logger.info("launch kernel: %d", i)
                add1_kernel[gridsz, blksz, stream](gpu_res_arr, gpu_arr)
                gpu_arr.copy_to_device(gpu_res_arr, stream=stream)

            logger.info("get kernel result")
            gpu_res_arr.copy_to_host(coarse_res_arr, stream=stream)

        logger.info("synchronize on ctx __exit__")

        logger.info("compare result")
        np.testing.assert_equal(coarse_res_arr, coarse_arr + ntimes)
Example #4
0
    def test_ctx_managed_stream(self):
        logger.info('context info: %s', roc.get_context().agent)

        @roc.jit("int32[:], int32[:]")
        def add1_kernel(dst, src):
            i = roc.get_global_id(0)
            if i < dst.size:
                dst[i] = src[i] + 1

        blksz = 256
        gridsz = 10**5
        nitems = blksz * gridsz
        ntimes = 500

        arr = np.arange(nitems, dtype=np.int32)

        logger.info('make coarse_arr')
        coarse_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_arr[:] = arr

        logger.info('make coarse_res_arr')
        coarse_res_arr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        coarse_res_arr[:] = 0

        logger.info("make stream")
        stream = roc.stream()

        with stream.auto_synchronize():
            logger.info('make gpu_res_arr')
            gpu_res_arr = roc.device_array_like(coarse_arr)

            logger.info('make gpu_arr')
            gpu_arr = roc.to_device(coarse_arr, stream=stream)

            for i in range(ntimes):
                logger.info('launch kernel: %d', i)
                add1_kernel[gridsz, blksz, stream](gpu_res_arr, gpu_arr)
                gpu_arr.copy_to_device(gpu_res_arr, stream=stream)

            logger.info('get kernel result')
            gpu_res_arr.copy_to_host(coarse_res_arr, stream=stream)

        logger.info("synchronize on ctx __exit__")

        logger.info("compare result")
        np.testing.assert_equal(coarse_res_arr, coarse_arr + ntimes)
Example #5
0
    def test_async_copy_to_device(self):
        arr = np.arange(1024)

        devarr = roc.to_device(arr)

        # allocate pinned array equivalent
        hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        hostarr[:] = arr + 100

        stream = roc.stream()
        ct = len(stream._signals)
        devarr.copy_to_device(hostarr, stream=stream)
        self.assertEqual(ct + 1, len(stream._signals), "no new async signal")
        # implicit synchronization
        got = devarr.copy_to_host()
        self.assertEqual(0, len(stream._signals),
                         "missing implicit synchronization")
        np.testing.assert_equal(hostarr, got)
Example #6
0
    def test_async_copy_to_device(self):
        arr = np.arange(1024)

        devarr = roc.to_device(arr)

        # allocate pinned array equivalent
        hostarr = roc.coarsegrain_array(shape=arr.shape, dtype=arr.dtype)
        hostarr[:] = arr + 100

        stream = roc.stream()
        ct = len(stream._signals)
        devarr.copy_to_device(hostarr, stream=stream)
        self.assertEqual(ct + 1, len(stream._signals),
                         "no new async signal")
        # implicit synchronization
        got = devarr.copy_to_host()
        self.assertEqual(0, len(stream._signals),
                         "missing implicit synchronization")
        np.testing.assert_equal(hostarr, got)