def setUp(self): if cupy.cuda.runtime.is_hip: if _environment.get_hipcc_path() is None: self.skipTest('hipcc is not found') self.can_use = cupy._core._cub_reduction._can_use_cub_block_reduction self.old_accelerators = _accelerator.get_reduction_accelerators() _accelerator.set_reduction_accelerators(['cub'])
def setUp(self): self.old_routine_accelerators = _acc.get_routine_accelerators() self.old_reduction_accelerators = _acc.get_reduction_accelerators() if self.backend == 'device': _acc.set_routine_accelerators(['cub']) _acc.set_reduction_accelerators([]) elif self.backend == 'block': _acc.set_routine_accelerators([]) _acc.set_reduction_accelerators(['cub'])
def setUp(self): cupy._core._optimize_config._clear_all_contexts_cache() self.old_reductions = _accelerator.get_reduction_accelerators() _accelerator.set_reduction_accelerators(self.backend) # avoid shadowed by the cub module self.old_routines = _accelerator.get_routine_accelerators() _accelerator.set_routine_accelerators([]) self.x = testing.shaped_arange((3, 4), cupy, dtype=cupy.float32)
def setUp(self): self.order, self.axis = self.order_and_axis self.old_routine_accelerators = _acc.get_routine_accelerators() self.old_reduction_accelerators = _acc.get_reduction_accelerators() if self.backend == 'device': if self.axis is not None: raise unittest.SkipTest('does not support') _acc.set_routine_accelerators(['cub']) _acc.set_reduction_accelerators([]) elif self.backend == 'block': _acc.set_routine_accelerators([]) _acc.set_reduction_accelerators(['cub'])
def setUp(self): self.order, self.axis = self.order_and_axis old_routine_accelerators = _acc.get_routine_accelerators() old_reduction_accelerators = _acc.get_reduction_accelerators() if self.backend == 'device': if self.axis is not None: pytest.skip('does not support') _acc.set_routine_accelerators(['cub']) _acc.set_reduction_accelerators([]) elif self.backend == 'block': _acc.set_routine_accelerators([]) _acc.set_reduction_accelerators(['cub']) yield _acc.set_routine_accelerators(old_routine_accelerators) _acc.set_reduction_accelerators(old_reduction_accelerators)
def tearDown(self): if (self.in_memory and _accelerator.ACCELERATOR_CUB not in _accelerator.get_reduction_accelerators()): # should not write any file to the cache dir, but the CUB reduction # kernel uses nvcc, with which I/O cannot be avoided files = os.listdir(self.cache_dir) for f in files: if f == 'test_load_cubin.cu': count = 1 break else: count = 0 assert len(files) == count self.in_memory_context.__exit__(*sys.exc_info()) self.temporary_cache_dir_context.__exit__(*sys.exc_info())
def test_optimize_cache(self): if (_accelerator.ACCELERATOR_CUB in _accelerator.get_reduction_accelerators()): pytest.skip('optimize cannot be mocked for CUB reduction') target = cupyx.optimizing._optimize._optimize target_full_name = '{}.{}'.format(target.__module__, target.__name__) with mock.patch(target_full_name) as optimize_impl: my_sum = cupy.ReductionKernel('T x', 'T out', 'x', 'a + b', 'out = a', '0', 'my_sum') my_sum_ = cupy.ReductionKernel('T x', 'T out', 'x', 'a + b', 'out = a', '0', 'my_sum_') x = testing.shaped_arange((3, 4), cupy) x_ = testing.shaped_arange((3, 4), cupy) y = testing.shaped_arange((4, 4), cupy) z = testing.shaped_arange((3, 4), cupy)[::-1] assert x.strides == y.strides assert x.shape == z.shape with cupyx.optimizing.optimize(): my_sum(x, axis=1) assert optimize_impl.call_count == 1 my_sum(x, axis=1) assert optimize_impl.call_count == 1 my_sum(x, axis=0) assert optimize_impl.call_count == 2 my_sum(x_, axis=1) assert optimize_impl.call_count == 2 my_sum(y, axis=1) assert optimize_impl.call_count == 3 my_sum(z, axis=1) assert optimize_impl.call_count == 4 my_sum_(x, axis=1) assert optimize_impl.call_count == 5 with cupyx.optimizing.optimize(key='new_key'): my_sum(x, axis=1) assert optimize_impl.call_count == 6 with cupyx.optimizing.optimize(key=None): my_sum(x, axis=1) assert optimize_impl.call_count == 6 my_sum(x) assert optimize_impl.call_count == 7
def test_optimize_cache_multi_gpus(self): if (_accelerator.ACCELERATOR_CUB in _accelerator.get_reduction_accelerators()): pytest.skip('optimize cannot be mocked for CUB reduction') target = cupyx.optimizing._optimize._optimize target_full_name = '{}.{}'.format(target.__module__, target.__name__) with mock.patch(target_full_name) as optimize_impl: my_sum = cupy.ReductionKernel('T x', 'T out', 'x', 'a + b', 'out = a', '0', 'my_sum') with cupyx.optimizing.optimize(): with cupy.cuda.Device(0): x = testing.shaped_arange((3, 4), cupy) my_sum(x, axis=1) assert optimize_impl.call_count == 1 with cupy.cuda.Device(1): x = testing.shaped_arange((3, 4), cupy) my_sum(x, axis=1) assert optimize_impl.call_count == 2
def setUp(self): self.old_accelerators = _acc.get_routine_accelerators() _acc.set_routine_accelerators([]) # also avoid fallback to CUB via the general reduction kernel self.old_reduction_accelerators = _acc.get_reduction_accelerators() _acc.set_reduction_accelerators([])