def test_nonempty_list_create_no_jit(self): # See Issue #6001: https://github.com/numba/numba/issues/6001 with override_config('DISABLE_JIT', True): with forbid_codegen(): l = List([1, 2, 3]) self.assertEqual(type(l), list) self.assertEqual(l, [1, 2, 3])
def run_compile(self, fnlist, parallelism='threading'): self._cache_dir = temp_directory(self.__class__.__name__) with override_config('CACHE_DIR', self._cache_dir): if parallelism == 'threading': thread_impl(fnlist) elif parallelism == 'multiprocessing_fork': fork_proc_impl(fnlist) elif parallelism == 'multiprocessing_forkserver': forkserver_proc_impl(fnlist) elif parallelism == 'multiprocessing_spawn': spawn_proc_impl(fnlist) elif parallelism == 'multiprocessing_default': default_proc_impl(fnlist) elif parallelism == 'random': ps = [thread_impl, spawn_proc_impl] if _HAVE_OS_FORK: ps.append(fork_proc_impl) ps.append(forkserver_proc_impl) random.shuffle(ps) for impl in ps: impl(fnlist) else: raise ValueError( 'Unknown parallelism supplied %s' % parallelism)
def test_usage(self): @njit def foo(n): c = 0 for i in range(n): c += i return c with override_config('LLVM_PASS_TIMINGS', True): foo(10) md = foo.get_metadata(foo.signatures[0]) timings = md['llvm_pass_timings'] # Check: timing is of correct type self.assertIsInstance(timings, lpt.PassTimingsCollection) # Check: basic for __str__ text = str(timings) self.assertIn("Module passes (full optimization)", text) # Check: there must be more than one record self.assertGreater(len(timings), 0) # Check: __getitem__ last = timings[-1] self.assertIsInstance(last, lpt.NamedTimings) # Check: NamedTimings self.assertIsInstance(last.name, str) self.assertIsInstance(last.timings, lpt.ProcessedPassTimings)
def get_ir(extend_lifetimes): class IRPreservingCompiler(CompilerBase): def define_pipelines(self): pm = DefaultPassBuilder.define_nopython_pipeline( self.state) pm.add_pass_after(PreserveIR, IRLegalization) pm.finalize() return [pm] @njit(pipeline_class=IRPreservingCompiler) def foo(): a = 10 b = 20 c = a + b # a and b are now unused, standard behaviour is ir.Del for them here d = c / c return d with override_config('EXTEND_VARIABLE_LIFETIMES', extend_lifetimes): foo() cres = foo.overloads[foo.signatures[0]] func_ir = cres.metadata['preserved_ir'] return func_ir
def test_analyze(self): @njit def foo(n): c = 0 for i in range(n): for j in range(i): c += j return c with override_config('LLVM_PASS_TIMINGS', True): foo(10) md = foo.get_metadata(foo.signatures[0]) timings_collection = md['llvm_pass_timings'] # Check: get_total_time() self.assertIsInstance(timings_collection.get_total_time(), float) # Check: summary() self.assertIsInstance(timings_collection.summary(), str) # Check: list_longest_first() ordering longest_first = timings_collection.list_longest_first() self.assertEqual(len(longest_first), len(timings_collection)) last = longest_first[0].timings.get_total_time() for rec in longest_first[1:]: cur = rec.timings.get_total_time() self.assertGreaterEqual(last, cur) cur = last
def omitted_child_test_wrapper(result_queue, cache_dir, second_call): with override_config("CACHE_DIR", cache_dir): @njit(cache=True) def test(num=1000): return num try: output = test() # If we have a second call, we should have a cache hit. # Otherwise, we expect a cache miss. if second_call: assert test._cache_hits[test.signatures[0]] == 1, \ "Cache did not hit as expected" assert test._cache_misses[test.signatures[0]] == 0, \ "Cache has an unexpected miss" else: assert test._cache_misses[test.signatures[0]] == 1, \ "Cache did not miss as expected" assert test._cache_hits[test.signatures[0]] == 0, \ "Cache has an unexpected hit" success = True # Catch anything raised so it can be propagated except: # noqa: E722 output = traceback.format_exc() success = False result_queue.put((success, output))
def test_jit_debug_simulator(self): # Ensure that the jit decorator accepts the debug kwarg when the # simulator is in use - see Issue #6615. with override_config('ENABLE_CUDASIM', 1): @cuda.jit(debug=True) def f(x): pass
def test_py_func_with_kwargs(self): with override_config('DISABLE_JIT', True): def method(x): return x jitted = jit(nopython=True)(method) self.assertEqual(jitted.py_func, method)
def test_decorated_function_with_kwargs(self): with override_config('DISABLE_JIT', True): @jit(nopython=True) def method(x): return x self.assertIsInstance(method, _DisableJitWrapper) self.assertIsNotNone(method.py_func) self.assertEqual(10, method(10))
def test_decorated_function_with_kwargs(self): with override_config('DISABLE_JIT', True): @jit(nopython=True) def method(x): return x self.assertIsInstance(method, DisableJitWrapper) self.assertIsNotNone(method.py_func) self.assertEqual(10, method(10))
def test_decorated_function_with_kwargs(self): with override_config('DISABLE_JIT', True): def method(x): return x jitted = jit(nopython=True)(method) self.assertEqual(jitted, method) self.assertEqual(10, method(10)) self.assertEqual(10, jitted(10))
def test_jitclass(self): with override_config('DISABLE_JIT', True): with forbid_codegen(): SimpleJITClass = jitclass(simple_class_spec)(SimpleClass) obj = SimpleJITClass() self.assertPreciseEqual(obj.h, 5) cfunc = jit(nopython=True)(simple_class_user) self.assertPreciseEqual(cfunc(obj), 5)
def test_efficient_launch_configuration(self): @cuda.jit def kernel(): pass with override_config('CUDA_LOW_OCCUPANCY_WARNINGS', 1): with warnings.catch_warnings(record=True) as w: kernel[256, 256]() self.assertEqual(len(w), 0)
def test_llvm_inliner_flag_conflict(self): # bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is # set functions are marked as 'noinline' this results in a conflict. # baz will be marked as 'noinline' as a result of DEBUGINFO_DEFAULT @njit(forceinline=True) def bar(x): return math.sin(x) @njit(forceinline=False) def baz(x): return math.cos(x) @njit def foo(x): a = bar(x) b = baz(x) return a, b # check it compiles with override_config('DEBUGINFO_DEFAULT', 1): result = foo(np.pi) self.assertPreciseEqual(result, foo.py_func(np.pi)) # check the LLVM IR has bar marked as 'alwaysinline' and baz as noinline full_ir = foo.inspect_llvm(foo.signatures[0]) module = llvm.parse_assembly(full_ir) name = foo.overloads[foo.signatures[0]].fndesc.mangled_name funcs = [x for x in module.functions if x.name == name] self.assertEqual(len(funcs), 1) func = funcs[0] # find the function calls and save the associated statements f_names = [] for blk in func.blocks: for stmt in blk.instructions: if stmt.opcode == 'call': # stmt.function.name This is the function being called f_names.append(str(stmt).strip()) # Need to check there's two specific things in the calls in the IR # 1. a call to the llvm.sin.f64 intrinsic, this is from the inlined bar # 2. a call to the baz function, this is from the noinline baz found_sin = False found_baz = False baz_name = baz.overloads[baz.signatures[0]].fndesc.mangled_name for x in f_names: if not found_sin and re.match('.*llvm.sin.f64.*', x): found_sin = True if not found_baz and re.match(f'.*{baz_name}.*', x): found_baz = True self.assertTrue(found_sin) self.assertTrue(found_baz)
def test_decorated_function(self): with override_config("DISABLE_JIT", True): def method(x): return x jitted = jit(method) self.assertEqual(jitted, method) self.assertEqual(10, method(10)) self.assertEqual(10, jitted(10))
def run_caching_overload_method(q, cache_dir): """ Used by TestOverloadMethodCaching.test_caching_overload_method """ with override_config('CACHE_DIR', cache_dir): arg = q.get() cfunc = jit(nopython=True, cache=True)(cache_overload_method_usecase) res = cfunc(arg) q.put(res) # Check cache stat _assert_cache_stats(cfunc, 1, 0)
def test_inner_function(self): with override_config('DUMP_ASSEMBLY', True): with captured_stdout() as out: cfunc = jit((types.int32,), nopython=True)(outer_simple) self.assertPreciseEqual(cfunc(1), 4) # Check the inner function was elided from the output (which also # guarantees it was inlined into the outer function). asm = out.getvalue() prefix = __name__ self.assert_has_pattern('%s.outer_simple' % prefix, asm) self.assert_not_has_pattern('%s.inner' % prefix, asm)
def test_dump_ir_generator(self): with override_config('DUMP_IR', True): out = self.compile_simple_gen() self.check_debug_output(out, ['ir']) self.assertIn('--GENERATOR INFO: %s' % self.func_name, out) expected_gen_info = textwrap.dedent(""" generator state variables: ['x', 'y'] yield point #1: live variables = ['y'], weak live variables = ['x'] yield point #2: live variables = [], weak live variables = ['y'] """) self.assertIn(expected_gen_info, out)
def test_inefficient_launch_configuration(self): @cuda.jit def kernel(): pass with override_config('CUDA_LOW_OCCUPANCY_WARNINGS', 1): with warnings.catch_warnings(record=True) as w: kernel[1, 1]() self.assertEqual(w[0].category, NumbaPerformanceWarning) self.assertIn('Grid size', str(w[0].message)) self.assertIn('2 * SM count', str(w[0].message))
def test_bound_function_error_string(self): # See PR #5952 def foo(x): x.max(-1) # axis not supported with override_config('DEVELOPER_MODE', 1): with self.assertRaises(errors.TypingError) as raises: njit("void(int64[:,:])")(foo) excstr = str(raises.exception) self.assertIn("AssertionError()", excstr) self.assertIn("BoundFunction(array.max for array(int64, 2d, A))", excstr)
def test_nowarn_on_device_array(self): @cuda.jit def foo(r, x): r[0] = x + 1 N = 10 ary = cuda.device_array(N, dtype=np.float32) with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1): with warnings.catch_warnings(record=True) as w: foo[1, N](ary, N) self.assertEqual(len(w), 0)
def test_multiple_inner_functions(self): # Same with multiple inner functions, and multiple calls to # the same inner function (inner()). This checks that linking in # the same library/module twice doesn't produce linker errors. with override_config('DUMP_ASSEMBLY', True): with captured_stdout() as out: cfunc = jit((types.int32,), nopython=True)(outer_multiple) self.assertPreciseEqual(cfunc(1), 6) asm = out.getvalue() prefix = __name__ self.assert_has_pattern('%s.outer_multiple' % prefix, asm) self.assert_not_has_pattern('%s.more' % prefix, asm) self.assert_not_has_pattern('%s.inner' % prefix, asm)
def test_devicearray_strict_strides(self): # From the reproducer in Issue #6824. with override_config('NPY_RELAXED_STRIDES_CHECKING', 0): # Construct a device array that is not contiguous because # the strides for the first axis (800) are not equal to # the strides * size (10 * 8 = 80) for the previous axis. arr = devicearray.DeviceNDArray((1, 10), (800, 8), np.float64) # Ensure we don't believe the array to be contiguous becase strides # checking is strict. self.assertFalse(arr.flags['C_CONTIGUOUS']) self.assertFalse(arr.flags['F_CONTIGUOUS'])
def test_c_f_contiguity_matches_numpy(self): # From the reproducer in Issue #4943. shapes = ((1, 4), (4, 1)) orders = ('C', 'F') with override_config('NPY_RELAXED_STRIDES_CHECKING', 1): for shape, order in itertools.product(shapes, orders): arr = np.ndarray(shape, order=order) d_arr = cuda.to_device(arr) self.assertEqual(arr.flags['C_CONTIGUOUS'], d_arr.flags['C_CONTIGUOUS']) self.assertEqual(arr.flags['F_CONTIGUOUS'], d_arr.flags['F_CONTIGUOUS'])
def test_environment_override(self): with override_config('DEBUGINFO_DEFAULT', 1): # Using default value @jit(nopython=True) def foo(x): return x self._check(foo, sig=(types.int32,), expect=True) # User override default @jit(nopython=True, debug=False) def bar(x): return x self._check(bar, sig=(types.int32,), expect=False)
def run_compile(self, fnlist): self._cache_dir = temp_directory(self.__class__.__name__) with override_config('CACHE_DIR', self._cache_dir): def chooser(): for _ in range(10): fn = random.choice(fnlist) fn() ths = [threading.Thread(target=chooser) for i in range(4)] for th in ths: th.start() for th in ths: th.join()
def test_devicearray_relaxed_strides(self): # From the reproducer in Issue #6824. with override_config('NPY_RELAXED_STRIDES_CHECKING', 1): # Construct a device array that is contiguous even though # the strides for the first axis (800) are not equal to # the strides * size (10 * 8 = 80) for the previous axis, # because the first axis size is 1. arr = devicearray.DeviceNDArray((1, 10), (800, 8), np.float64) # Ensure we still believe the array to be contiguous because # strides checking is relaxed. self.assertTrue(arr.flags['C_CONTIGUOUS']) self.assertTrue(arr.flags['F_CONTIGUOUS'])
def test_environment_override(self): with override_config("CUDA_DEBUGINFO_DEFAULT", 1): # Using default value @cuda.jit def foo(x): x[0] = 1 self._check(foo, sig=(types.int32[:], ), expect=True) # User override default value @cuda.jit(debug=False) def bar(x): x[0] = 1 self._check(bar, sig=(types.int32[:], ), expect=False)
def test_warn_on_host_array(self): @cuda.jit def foo(r, x): r[0] = x + 1 N = 10 arr_f32 = np.zeros(N, dtype=np.float32) with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1): with warnings.catch_warnings(record=True) as w: foo[1, N](arr_f32, N) self.assertEqual(w[0].category, NumbaPerformanceWarning) self.assertIn('Host array used in CUDA kernel will incur', str(w[0].message)) self.assertIn('copy overhead', str(w[0].message))
def test_environment_override(self): with override_config('CUDA_DEBUGINFO_DEFAULT', 1): # Using default value @cuda.jit def foo(x): x[0] = 1 self._check(foo, sig=(types.int32[:],), expect=True) # User override default value @cuda.jit(debug=False) def bar(x): x[0] = 1 self._check(bar, sig=(types.int32[:],), expect=False)
def test_consume_sync_disabled(self): # Create a foreign array with a stream s = cuda.stream() f_arr = ForeignArray(cuda.device_array(10, stream=s)) # Set sync to false before testing. The test suite should generally be # run with sync enabled, but stash the old value just in case it is # not. with override_config('CUDA_ARRAY_INTERFACE_SYNC', False): with patch.object(cuda.cudadrv.driver.Stream, 'synchronize', return_value=None) as mock_sync: cuda.as_cuda_array(f_arr) # Ensure the synchronize method of a stream was not called mock_sync.assert_not_called()
def test_omitted_arg(self): # See issue 7726 @njit(debug=True) def foo(missing=None): pass # check that it will actually compile (verifies DI emission is ok) with override_config('DEBUGINFO_DEFAULT', 1): foo() metadata = self._get_metadata(foo, sig=(types.Omitted(None), )) metadata_definition_map = self._get_metadata_map(metadata) # Find DISubroutineType tmp_disubr = [] for md in metadata: if "DISubroutineType" in md: tmp_disubr.append(md) self.assertEqual(len(tmp_disubr), 1) disubr = tmp_disubr.pop() disubr_matched = re.match(r'.*!DISubroutineType\(types: ([!0-9]+)\)$', disubr) self.assertIsNotNone(disubr_matched) disubr_groups = disubr_matched.groups() self.assertEqual(len(disubr_groups), 1) disubr_meta = disubr_groups[0] # Find the types in the DISubroutineType arg list disubr_types = metadata_definition_map[disubr_meta] disubr_types_matched = re.match(r'!{(.*)}', disubr_types) self.assertIsNotNone(disubr_matched) disubr_types_groups = disubr_types_matched.groups() self.assertEqual(len(disubr_types_groups), 1) # fetch out and assert the last argument type, should be void * md_fn_arg = [x.strip() for x in disubr_types_groups[0].split(',')][-1] arg_ty = metadata_definition_map[md_fn_arg] expected_arg_ty = (r'^.*!DICompositeType\(tag: DW_TAG_structure_type, ' r'name: "Anonymous struct \({}\)", elements: ' r'(![0-9]+), identifier: "{}"\)') self.assertRegex(arg_ty, expected_arg_ty) md_base_ty = re.match(expected_arg_ty, arg_ty).groups()[0] base_ty = metadata_definition_map[md_base_ty] # expect ir.LiteralStructType([]) self.assertEqual(base_ty, ('!{}'))
def test_vectorize(self): def foo(x): return x + math.sin(x) fastfoo = vectorize(fastmath=True)(foo) slowfoo = vectorize(foo) x = np.random.random(8).astype(np.float32) # capture the optimized llvm to check for fast flag with override_config('DUMP_OPTIMIZED', True): with captured_stdout() as slow_cap: expect = slowfoo(x) slowllvm = slow_cap.getvalue() with captured_stdout() as fast_cap: got = fastfoo(x) fastllvm = fast_cap.getvalue() np.testing.assert_almost_equal(expect, got) self.assertIn('fadd fast', fastllvm) self.assertIn('call fast', fastllvm) self.assertNotIn('fadd fast', slowllvm) self.assertNotIn('call fast', slowllvm)
def test_guvectorize(self): def foo(x, out): out[0] = x + math.sin(x) x = np.random.random(8).astype(np.float32) with override_config('DUMP_OPTIMIZED', True): types = ['(float32, float32[:])'] sig = '()->()' with captured_stdout() as fast_cap: fastfoo = guvectorize(types, sig, fastmath=True)(foo) fastllvm = fast_cap.getvalue() with captured_stdout() as slow_cap: slowfoo = guvectorize(types, sig)(foo) slowllvm = slow_cap.getvalue() expect = slowfoo(x) got = fastfoo(x) np.testing.assert_almost_equal(expect, got) self.assertIn('fadd fast', fastllvm) self.assertIn('call fast', fastllvm) self.assertNotIn('fadd fast', slowllvm) self.assertNotIn('call fast', slowllvm)
def test_dump_assembly(self): with override_config('DUMP_ASSEMBLY', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['assembly'])
def test_dump_llvm(self): with override_config('DUMP_LLVM', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['llvm'])
def test_dump_cfg(self): with override_config('DUMP_CFG', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['cfg'])
def test_dump_ir(self): with override_config('DUMP_IR', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['ir'])
def test_dump_bytecode(self): with override_config('DUMP_BYTECODE', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['bytecode'])