def test_raise_in_try(self): @njit def udt(x): try: print("A") if x: raise MyError("my_error") print("B") except: # noqa: E722 print("C") return 321 return 123 # case 1 with captured_stdout() as stdout: res = udt(True) self.assertEqual( stdout.getvalue().split(), ["A", "C"], ) self.assertEqual(res, 321) # case 2 with captured_stdout() as stdout: res = udt(False) self.assertEqual( stdout.getvalue().split(), ["A", "B"], ) self.assertEqual(res, 123)
def test_catch_exception(self): @njit def udt(x): try: print("A") if x: raise ZeroDivisionError("321") print("B") except Exception: print("C") print("D") # case 1 with captured_stdout() as stdout: udt(True) self.assertEqual( stdout.getvalue().split(), ["A", "C", "D"], ) # case 2 with captured_stdout() as stdout: udt(False) self.assertEqual( stdout.getvalue().split(), ["A", "B", "D"], )
def test_return_in_catch(self): @njit def udt(x): try: print("A") if x: raise ZeroDivisionError print("B") r = 123 except Exception: print("C") r = 321 return r print("D") return r # case 1 with captured_stdout() as stdout: res = udt(True) self.assertEqual( stdout.getvalue().split(), ["A", "C"], ) self.assertEqual(res, 321) # case 2 with captured_stdout() as stdout: res = udt(False) self.assertEqual( stdout.getvalue().split(), ["A", "B", "D"], ) self.assertEqual(res, 123)
def check_compare(self, cfunc, pyfunc, *args, **kwargs): with captured_stdout() as stdout: pyfunc(*args, **kwargs) expect = stdout.getvalue() with captured_stdout() as stdout: cfunc(*args, **kwargs) got = stdout.getvalue() self.assertEqual(expect, got, msg="args={} kwargs={}".format(args, kwargs))
def check_same_semantic(self, func): """Ensure same semantic with non-jitted code """ jitted = njit(func) with captured_stdout() as got: jitted() with captured_stdout() as expect: func() self.assertEqual(got.getvalue(), expect.getvalue())
def test_print_vararg(self): # Test *args support for print(). This is desired since # print() can use a dedicated IR node. pyfunc = print_vararg cfunc = jit(nopython=True)(pyfunc) with captured_stdout(): cfunc(1, (2, 3), (4, 5j)) self.assertEqual(sys.stdout.getvalue(), "1 (2, 3) 4 5j\n") pyfunc = print_string_vararg cfunc = jit(nopython=True)(pyfunc) with captured_stdout(): cfunc(1, (2, 3), (4, 5j)) self.assertEqual(sys.stdout.getvalue(), "1 hop! (2, 3) 4 5j\n")
def test_print_values(self): """ Test printing a single argument value. """ pyfunc = print_value def check_values(typ, values): cr = compile_isolated(pyfunc, (typ,)) cfunc = cr.entry_point for val in values: with captured_stdout(): cfunc(val) self.assertEqual(sys.stdout.getvalue(), str(val) + '\n') # Various scalars check_values(types.int32, (1, -234)) check_values(types.int64, (1, -234, 123456789876543210, -123456789876543210)) check_values(types.uint64, (1, 234, 123456789876543210, 2**63 + 123)) check_values(types.boolean, (True, False)) check_values(types.float64, (1.5, 100.0**10.0, float('nan'))) check_values(types.complex64, (1+1j,)) check_values(types.NPTimedelta('ms'), (np.timedelta64(100, 'ms'),)) cr = compile_isolated(pyfunc, (types.float32,)) cfunc = cr.entry_point with captured_stdout(): cfunc(1.1) # Float32 will lose precision got = sys.stdout.getvalue() expect = '1.10000002384' self.assertTrue(got.startswith(expect)) self.assertTrue(got.endswith('\n')) # NRT-enabled type with self.assertNoNRTLeak(): x = [1, 3, 5, 7] with self.assertRefCount(x): check_values(types.List(types.int32), (x,)) # Array will have to use object mode arraytype = types.Array(types.int32, 1, 'C') cr = compile_isolated(pyfunc, (arraytype,), flags=enable_pyobj_flags) cfunc = cr.entry_point with captured_stdout(): cfunc(np.arange(10, dtype=np.int32)) self.assertEqual(sys.stdout.getvalue(), '[0 1 2 3 4 5 6 7 8 9]\n')
def _test_usecase2to5(self, pyfunc, dtype): array = self._setup_usecase2to5(dtype) record_type = numpy_support.from_dtype(dtype) cres = compile_isolated(pyfunc, (record_type[:], types.intp)) cfunc = cres.entry_point with captured_stdout(): pyfunc(array, len(array)) expect = sys.stdout.getvalue() with captured_stdout(): cfunc(array, len(array)) got = sys.stdout.getvalue() self.assertEqual(expect, got)
def test_cuda_detect(self): # exercise the code path with captured_stdout() as out: cuda.detect() output = out.getvalue() self.assertIn('Found', output) self.assertIn('CUDA devices', output)
def test_inspect_types_pretty(self): @jit def foo(a, b): return a + b foo(1, 2) # Exercise the method, dump the output with captured_stdout(): ann = foo.inspect_types(pretty=True) # ensure HTML <span> is found in the annotation output for k, v in ann.ann.items(): span_found = False for line in v['pygments_lines']: if 'span' in line[2]: span_found = True self.assertTrue(span_found) # check that file+pretty kwarg combo raises with self.assertRaises(ValueError) as raises: foo.inspect_types(file=StringIO(), pretty=True) self.assertIn("`file` must be None if `pretty=True`", str(raises.exception))
def test_ex_inferred_list_jit(self): with captured_stdout(): # magictoken.ex_inferred_list_jit.begin from numba import njit from numba.typed import List @njit def foo(): # Instantiate a typed-list l = List() # Append a value to it, this will set the type to int32/int64 # (depending on platform) l.append(42) # The usual list operations, getitem, pop and length are # supported print(l[0]) # 42 l[0] = 23 print(l[0]) # 23 print(len(l)) # 1 l.pop() print(len(l)) # 0 return l foo()
def test_ex_inferred_list(self): with captured_stdout(): # magictoken.ex_inferred_list.begin from numba import njit from numba.typed import List @njit def foo(mylist): for i in range(10, 20): mylist.append(i) return mylist # Instantiate a typed-list, outside of a jit context l = List() # Append a value to it, this will set the type to int32/int64 # (depending on platform) l.append(42) # The usual list operations, getitem, pop and length are supported print(l[0]) # 42 l[0] = 23 print(l[0]) # 23 print(len(l)) # 1 l.pop() print(len(l)) # 0 # And you can use the typed-list as an argument for a jit compiled # function l = foo(l) print(len(l)) # 10 # You can also directly construct a typed-list from an existing # Python list py_list = [2, 3, 5] numba_list = List(py_list) print(len(numba_list)) # 3
def assert_auto_offloading(parfor_offloaded=1, parfor_offloaded_failure=0): """ If ``parfor_offloaded`` is not provided this context_manager will check for 1 occurrance of success message. Developers can always specify how many parfor offload success message is expected. If ``parfor_offloaded_failure`` is not provided the default behavior is to expect 0 failure message, in other words, we expect all parfors present in the code to be successfully offloaded to GPU. """ old_debug = config.DEBUG config.DEBUG = 1 with captured_stdout() as stdout: yield config.DEBUG = old_debug got_parfor_offloaded = stdout.getvalue().count("Parfor offloaded to") assert parfor_offloaded == got_parfor_offloaded, ( "Expected %d parfor(s) to be auto offloaded, instead got %d parfor(s) auto offloaded" % (parfor_offloaded, got_parfor_offloaded)) got_parfor_offloaded_failure = stdout.getvalue().count( "Failed to offload parfor to") assert parfor_offloaded_failure == got_parfor_offloaded_failure, ( "Expected %d parfor(s) to be not auto offloaded, instead got %d parfor(s) not auto offloaded" % (parfor_offloaded_failure, got_parfor_offloaded_failure))
def test_print_empty(self): pyfunc = print_empty cr = compile_isolated(pyfunc, ()) cfunc = cr.entry_point with captured_stdout(): cfunc() self.assertEqual(sys.stdout.getvalue(), "\n")
def test_with_dppy_context_cpu(self): @njit def nested_func(a, b): np.sin(a, b) @njit def func(b): a = np.ones((64), dtype=np.float64) nested_func(a, b) config.DEBUG = 1 expected = np.ones((64), dtype=np.float64) got_cpu = np.ones((64), dtype=np.float64) with captured_stdout() as got_cpu_message: device = dpctl.SyclDevice("opencl:cpu") with dppy.offload_to_sycl_device(device): func(got_cpu) config.DEBUG = 0 func(expected) np.testing.assert_array_equal(expected, got_cpu) self.assertTrue( "Parfor offloaded to opencl:cpu" in got_cpu_message.getvalue())
def test_ex_typed_dict_njit(self): with captured_stdout(): # magictoken.ex_typed_dict_njit.begin import numpy as np from numba import njit from numba.core import types from numba.typed import Dict # Make array type. Type-expression is not supported in jit # functions. float_array = types.float64[:] @njit def foo(): # Make dictionary d = Dict.empty( key_type=types.unicode_type, value_type=float_array, ) # Fill the dictionary d["posx"] = np.arange(3).astype(np.float64) d["posy"] = np.arange(3, 6).astype(np.float64) return d d = foo() # Print the dictionary print(d) # Out: {posx: [0. 1. 2.], posy: [3. 4. 5.]} # magictoken.ex_typed_dict_njit.end np.testing.assert_array_equal(d['posx'], [0, 1, 2]) np.testing.assert_array_equal(d['posy'], [3, 4, 5])
def test_for_loop(self): @njit def foo(n): for i in range(n): try: if i > 5: raise ValueError except: # noqa: E722 print("CAUGHT") else: try: try: try: if i > 5: raise ValueError except: # noqa: E722 print("CAUGHT1") raise ValueError except: # noqa: E722 print("CAUGHT2") raise ValueError except: # noqa: E722 print("CAUGHT3") with captured_stdout() as stdout: foo(10) self.assertEqual( stdout.getvalue().split(), [ "CAUGHT", ] * 4 + ["CAUGHT%s" % i for i in range(1, 4)], )
def test_cuda_detect(self): # exercise the code path with captured_stdout() as out: cuda.detect() output = out.getvalue() self.assertIn("Found", output) self.assertIn("CUDA devices", output)
def test_ex_initial_value_dict_compile_time_consts(self): with captured_stdout(): # magictoken.test_ex_initial_value_dict_compile_time_consts.begin from numba import njit, literally from numba.extending import overload # overload this function def specialize(x): pass @overload(specialize) def ol_specialize(x): iv = x.initial_value if iv is None: return lambda x: literally(x) # Force literal dispatch assert iv == {'a': 1, 'b': 2, 'c': 3} # INITIAL VALUE return lambda x: literally(x) @njit def foo(): d = {'a': 1, 'b': 2, 'c': 3} d['c'] = 20 # no impact on .initial_value d['d'] = 30 # no impact on .initial_value return specialize(d) result = foo() print(result) # {a: 1, b: 2, c: 20, d: 30} # NOT INITIAL VALUE! # magictoken.test_ex_initial_value_dict_compile_time_consts.end expected = typed.Dict() for k, v in {'a': 1, 'b': 2, 'c': 20, 'd': 30}.items(): expected[k] = v self.assertEqual(result, expected)
def test_unbalanced_example(self): with captured_stdout(): # magictoken.ex_unbalanced.begin from numba import ( njit, prange, ) import numpy as np @njit(parallel=True) def func1(): n = 100 vals = np.empty(n) # The work in each iteration of the following prange # loop is proportional to its index. for i in prange(n): cur = i + 1 for j in range(i): if cur % 2 == 0: cur //= 2 else: cur = cur * 3 + 1 vals[i] = cur return vals result = func1() # magictoken.ex_unbalanced.end self.assertPreciseEqual(result, func1.py_func())
def check_values(typ, values): cr = compile_isolated(pyfunc, (typ,)) cfunc = cr.entry_point for val in values: with captured_stdout(): cfunc(val) self.assertEqual(sys.stdout.getvalue(), str(val) + "\n")
def test_ex_initial_value_list_compile_time_consts(self): with captured_stdout(): # magictoken.test_ex_initial_value_list_compile_time_consts.begin from numba import njit, literally from numba.extending import overload # overload this function def specialize(x): pass @overload(specialize) def ol_specialize(x): iv = x.initial_value if iv is None: return lambda x: literally(x) # Force literal dispatch assert iv == [1, 2, 3] # INITIAL VALUE return lambda x: x @njit def foo(): l = [1, 2, 3] l[2] = 20 # no impact on .initial_value l.append(30) # no impact on .initial_value return specialize(l) result = foo() print(result) # [1, 2, 20, 30] # NOT INITIAL VALUE! # magictoken.test_ex_initial_value_list_compile_time_consts.end expected = [1, 2, 20, 30] self.assertEqual(result, expected)
def test_print_strings(self): pyfunc = print_string cr = compile_isolated(pyfunc, (types.int32,)) cfunc = cr.entry_point with captured_stdout(): cfunc(1) self.assertEqual(sys.stdout.getvalue(), "1 hop! 3.5\n")
def test_ex_inferred_dict_njit(self): with captured_stdout(): # magictoken.ex_inferred_dict_njit.begin from numba import njit import numpy as np @njit def foo(): d = dict() k = {1: np.arange(1), 2: np.arange(2)} # The following tells the compiler what the key type and the # value # type are for `d`. d[3] = np.arange(3) d[5] = np.arange(5) return d, k d, k = foo() print(d) # {3: [0 1 2], 5: [0 1 2 3 4]} print(k) # {1: [0], 2: [0 1]} # magictoken.ex_inferred_dict_njit.end np.testing.assert_array_equal(d[3], [0, 1, 2]) np.testing.assert_array_equal(d[5], [0, 1, 2, 3, 4]) np.testing.assert_array_equal(k[1], [0]) np.testing.assert_array_equal(k[2], [0, 1])
def test_print_multiple_values(self): pyfunc = print_values cr = compile_isolated(pyfunc, (types.int32,) * 3) cfunc = cr.entry_point with captured_stdout(): cfunc(1, 2, 3) self.assertEqual(sys.stdout.getvalue(), "1 2 3\n")
def test_chunksize_with(self): with captured_stdout(): # magictoken.ex_chunksize_with.begin from numba import njit, prange, parallel_chunksize @njit(parallel=True) def func1(n): acc = 0 for i in prange(n): acc += i return acc @njit(parallel=True) def func2(n): acc = 0 with parallel_chunksize(8): for i in prange(n): acc += i return acc with parallel_chunksize(4): result1 = func1(12) result2 = func2(12) result3 = func1(12) # magictoken.ex_chunksize_with.end self.assertPreciseEqual(result1, func1.py_func(12)) self.assertPreciseEqual(result2, func2.py_func(12)) self.assertPreciseEqual(result3, func1.py_func(12))
def test_print(self): """ Test re-implementing print() for a custom type with @overload. """ cfunc = jit(nopython=True)(print_usecase) with captured_stdout(): cfunc(MyDummy()) self.assertEqual(sys.stdout.getvalue(), "hello!\n")
def assert_dpnp_implementaion(): from numba.tests.support import captured_stdout with captured_stdout() as stdout, dpnp_debug(): yield assert ("dpnp implementation" in stdout.getvalue()), "dpnp implementation is not used"
def assert_equal_return_and_stdout(self, pyfunc, *args): py_args = copy.deepcopy(args) c_args = copy.deepcopy(args) cfunc = njit(pyfunc) with captured_stdout() as stream: expect_res = pyfunc(*py_args) expect_out = stream.getvalue() # avoid compiling during stdout-capturing for easier print-debugging cfunc.compile(tuple(map(typeof, c_args))) with captured_stdout() as stream: got_res = cfunc(*c_args) got_out = stream.getvalue() self.assertEqual(expect_out, got_out) self.assertPreciseEqual(expect_res, got_res)
def test_inspect_types_with_signature(self): @jit def foo(a): return a + 1 foo(1) foo(1.0) # Inspect all signatures with captured_stdout() as total: foo.inspect_types() # Inspect first signature with captured_stdout() as first: foo.inspect_types(signature=foo.signatures[0]) # Inspect second signature with captured_stdout() as second: foo.inspect_types(signature=foo.signatures[1]) self.assertEqual(total.getvalue(), first.getvalue() + second.getvalue())
def test_array_debug_opt_stats(self): """ Test that NUMBA_DEBUG_ARRAY_OPT_STATS produces valid output """ # deliberately trigger a compilation loop to increment the # Parfor class state, this is to ensure the test works based # on indices computed based on this state and not hard coded # indices. cres = compile_isolated(supported_parfor, (types.int64, ), flags=force_parallel_flags) with override_env_config("NUMBA_DEBUG_ARRAY_OPT_STATS", "1"): with captured_stdout() as out: cres = compile_isolated(supported_parfor, (types.int64, ), flags=force_parallel_flags) # grab the various parts out the output output = out.getvalue().split("\n") parallel_loop_output = [ x for x in output if "is produced from pattern" in x ] fuse_output = [x for x in output if "is fused into" in x] after_fusion_output = [ x for x in output if "After fusion, function" in x ] # Parfor's have a shared state index, grab the current value # as it will be used as an offset for all loop messages parfor_state = int( re.compile(r"#([0-9]+)").search( parallel_loop_output[0]).group(1)) bounds = range(parfor_state, parfor_state + len(parallel_loop_output)) # Check the Parallel for-loop <index> is produced from <pattern> # works first pattern = ("('ones function', 'NumPy mapping')", ("prange", "user", "")) fmt = "Parallel for-loop #{} is produced from pattern '{}' at" for i, trials, lpattern in zip(bounds, parallel_loop_output, pattern): to_match = fmt.format(i, lpattern) self.assertIn(to_match, trials) # Check the fusion statements are correct pattern = (parfor_state + 1, parfor_state + 0) fmt = "Parallel for-loop #{} is fused into for-loop #{}." for trials in fuse_output: to_match = fmt.format(*pattern) self.assertIn(to_match, trials) # Check the post fusion statements are correct pattern = (supported_parfor.__name__, 1, set([parfor_state])) fmt = "After fusion, function {} has {} parallel for-loop(s) #{}." for trials in after_fusion_output: to_match = fmt.format(*pattern) self.assertIn(to_match, trials)
def compile_simple_cuda(self): with captured_stdout() as out: cfunc = cuda.jit((float64[:], float64[:]))(simple_cuda) # Call compiled function (to ensure PTX is generated) # and sanity-check results. A = np.linspace(0, 1, 10).astype(np.float64) B = np.zeros_like(A) cfunc[1, 10](A, B) self.assertTrue(np.allclose(A + 1.5, B)) return out.getvalue()
def test_vectorize(self): def foo(x): return x + math.sin(x) fastfoo = vectorize(fastmath=True)(foo) slowfoo = vectorize(foo) x = np.random.random(8).astype(np.float32) # capture the optimized llvm to check for fast flag with override_config('DUMP_OPTIMIZED', True): with captured_stdout() as slow_cap: expect = slowfoo(x) slowllvm = slow_cap.getvalue() with captured_stdout() as fast_cap: got = fastfoo(x) fastllvm = fast_cap.getvalue() np.testing.assert_almost_equal(expect, got) self.assertIn('fadd fast', fastllvm) self.assertIn('call fast', fastllvm) self.assertNotIn('fadd fast', slowllvm) self.assertNotIn('call fast', slowllvm)
def test_guvectorize(self): def foo(x, out): out[0] = x + math.sin(x) x = np.random.random(8).astype(np.float32) with override_config('DUMP_OPTIMIZED', True): types = ['(float32, float32[:])'] sig = '()->()' with captured_stdout() as fast_cap: fastfoo = guvectorize(types, sig, fastmath=True)(foo) fastllvm = fast_cap.getvalue() with captured_stdout() as slow_cap: slowfoo = guvectorize(types, sig)(foo) slowllvm = slow_cap.getvalue() expect = slowfoo(x) got = fastfoo(x) np.testing.assert_almost_equal(expect, got) self.assertIn('fadd fast', fastllvm) self.assertIn('call fast', fastllvm) self.assertNotIn('fadd fast', slowllvm) self.assertNotIn('call fast', slowllvm)
def test_array_comp_shuffle_sideeffect(self): nelem = 100 @jit(nopython=True) def foo(): numbers = np.array([i for i in range(nelem)]) np.random.shuffle(numbers) print(numbers) with captured_stdout() as gotbuf: foo() got = gotbuf.getvalue().strip() with captured_stdout() as expectbuf: print(np.array([i for i in range(nelem)])) expect = expectbuf.getvalue().strip() # For a large enough array, the chances of shuffle to not move any # element is tiny enough. self.assertNotEqual(got, expect) self.assertRegexpMatches(got, r'\[(\s*\d+)+\]')
def captured_cuda_stdout(): """ Return a minimal stream-like object capturing the text output of either CUDA or the simulator. """ if config.ENABLE_CUDASIM: # The simulator calls print() on Python stdout with captured_stdout() as stream: yield PythonTextCapture(stream) else: # The CUDA runtime writes onto the system stdout from numba import cuda fd = sys.__stdout__.fileno() with redirect_fd(fd) as stream: yield CUDATextCapture(stream) cuda.synchronize()
def captured_cuda_stdout(): """ Return a minimal stream-like object capturing the text output of either CUDA or the simulator. """ # Prevent accidentally capturing previously output text sys.stdout.flush() if config.ENABLE_CUDASIM: # The simulator calls print() on Python stdout with captured_stdout() as stream: yield PythonTextCapture(stream) else: # The CUDA runtime writes onto the system stdout from numba import cuda with redirect_c_stdout() as stream: yield CUDATextCapture(stream) cuda.synchronize()
def test_gil_reacquire_deadlock(self): """ Testing similar issue to #1998 due to GIL reacquiring for Gufunc """ # make a ctypes callback that requires the GIL proto = ctypes.CFUNCTYPE(None, ctypes.c_int32) characters = 'abcdefghij' def bar(x): print(characters[x]) cbar = proto(bar) # our unit under test @guvectorize(['(int32, int32[:])'], "()->()", target='parallel', nopython=True) def foo(x, out): print(x % 10) # this reacquires the GIL cbar(x % 10) # this reacquires the GIL out[0] = x * 2 # Numpy ufunc has a heuristic to determine whether to release the GIL # during execution. Small input size (10) seems to not release the GIL. # Large input size (1000) seems to release the GIL. for nelem in [1, 10, 100, 1000]: # inputs a = np.arange(nelem, dtype=np.int32) acopy = a.copy() # run and capture stdout with captured_stdout() as buf: got = foo(a) stdout = buf.getvalue() buf.close() # process outputs from print got_output = sorted(map(lambda x: x.strip(), stdout.splitlines())) # build expected output expected_output = [str(x % 10) for x in range(nelem)] expected_output += [characters[x % 10] for x in range(nelem)] expected_output = sorted(expected_output) # verify self.assertEqual(got_output, expected_output) np.testing.assert_equal(got, 2 * acopy)