def mk_pipeline(cls, args, return_type=None, flags=None, locals={}, library=None, typing_context=None, target_context=None): if not flags: flags = Flags() flags.nrt = True if typing_context is None: typing_context = registry.cpu_target.typing_context if target_context is None: target_context = registry.cpu_target.target_context return cls(typing_context, target_context, library, args, return_type, flags, locals)
def mk_pipeline(cls, args, return_type=None, flags=None, locals={}, library=None, typing_context=None, target_context=None): if not flags: flags = Flags() flags.nrt = True if typing_context is None: typing_context = typing.Context() if target_context is None: target_context = cpu.CPUContext(typing_context) return cls(typing_context, target_context, library, args, return_type, flags, locals)
def test_scalar(self): flags = Flags() # Compile the inner function global cnd_jitted cr1 = compile_isolated(cnd, (types.float64, )) cnd_jitted = cr1.entry_point # Manually type the compiled function for calling into tyctx = cr1.typing_context ctx = cr1.target_context signature = typing.make_concrete_template("cnd_jitted", cnd_jitted, [cr1.signature]) tyctx.insert_user_function(cnd_jitted, signature) # Compile the outer function array = types.Array(types.float64, 1, "C") argtys = (array, ) * 5 + (types.float64, types.float64) cr2 = compile_extra( tyctx, ctx, blackscholes_scalar_jitted, args=argtys, return_type=None, flags=flags, locals={}, ) jitted_bs = cr2.entry_point OPT_N = 400 iterations = 10 callResultGold = np.zeros(OPT_N) putResultGold = np.zeros(OPT_N) callResultNumba = np.zeros(OPT_N) putResultNumba = np.zeros(OPT_N) stockPrice = randfloat(self.random.random_sample(OPT_N), 5.0, 30.0) optionStrike = randfloat(self.random.random_sample(OPT_N), 1.0, 100.0) optionYears = randfloat(self.random.random_sample(OPT_N), 0.25, 10.0) args = stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY blackscholes_scalar(callResultGold, putResultGold, *args) jitted_bs(callResultNumba, putResultNumba, *args) delta = np.abs(callResultGold - callResultNumba) L1norm = delta.sum() / np.abs(callResultGold).sum() print("L1 norm: %E" % L1norm) print("Max absolute error: %E" % delta.max()) self.assertAlmostEqual(delta.max(), 0)
def gen_ir(self, func, args_tuple, fastmath=False): with override_env_config("NUMBA_CPU_NAME", "skylake-avx512"), override_env_config( "NUMBA_CPU_FEATURES", ""): _flags = Flags() _flags.set('fastmath', FastMathOptions(fastmath)) _flags.set('nrt', True) jitted = compile_isolated(func, args_tuple, flags=_flags) return jitted.library.get_llvm_str()
def test_demangle(self): def check(flags): mangled = flags.get_mangle_string() out = flags.demangle(mangled) # Demangle result MUST match summary() self.assertEqual(out, flags.summary()) # test empty flags flags = Flags() check(flags) # test default check(DEFAULT_FLAGS) # test other flags = Flags() flags.no_cpython_wrapper = True flags.nrt = True flags.fastmath = True check(flags)
def test_demangling_from_mangled_symbols(self): """Test demangling of flags from mangled symbol""" # Use default mangler to mangle the string fname = 'foo' argtypes = types.int32, flags = Flags() flags.nrt = True flags.target_backend = "myhardware" name = default_mangler( fname, argtypes, abi_tags=[flags.get_mangle_string()], ) # Find the ABI-tag. Starts with "B" prefix = "_Z3fooB" # Find the length of the ABI-tag m = re.match("[0-9]+", name[len(prefix):]) size = m.group(0) # Extract the ABI tag base = len(prefix) + len(size) abi_mangled = name[base:base + int(size)] # Demangle and check demangled = Flags.demangle(abi_mangled) self.assertEqual(demangled, flags.summary())
def complex_constant(n): tmp = n + 4 return tmp + 3j def long_constant(n): return n + 100000000000000000000000000000000000000000000000 def delitem_usecase(x): del x[:] forceobj = Flags() forceobj.force_pyobject = True def loop_nest_3(x, y): n = 0 for i in range(x): for j in range(y): for k in range(x + y): n += i * j return n def array_of_object(x): return x
def simple_class_user(obj): return obj.h def unsupported_parfor(a, b): return np.dot(a, b) # dot as gemm unsupported def supported_parfor(n): a = np.ones(n) for i in prange(n): a[i] = a[i] + np.sin(i) return a force_parallel_flags = Flags() force_parallel_flags.set("auto_parallel", ParallelOptions(True)) force_parallel_flags.set('nrt') class DebugTestBase(TestCase): all_dumps = set([ 'bytecode', 'cfg', 'ir', 'typeinfer', 'llvm', 'func_opt_llvm', 'optimized_llvm', 'assembly' ]) def assert_fails(self, *args, **kwargs): self.assertRaises(AssertionError, *args, **kwargs) def check_debug_output(self, out, dump_names):
class TestSVML(TestCase): """ Tests SVML behaves as expected """ # env mutating, must not run in parallel _numba_parallel_test_ = False def __init__(self, *args): self.flags = Flags() self.flags.set('nrt') # flags for njit(fastmath=True) self.fastflags = Flags() self.fastflags.set('nrt') self.fastflags.set('fastmath', cpu.FastMathOptions(True)) super(TestSVML, self).__init__(*args) def compile(self, func, *args, **kwargs): assert not kwargs sig = tuple([numba.typeof(x) for x in args]) std = compile_isolated(func, sig, flags=self.flags) fast = compile_isolated(func, sig, flags=self.fastflags) return std, fast def copy_args(self, *args): if not args: return tuple() new_args = [] for x in args: if isinstance(x, np.ndarray): new_args.append(x.copy('k')) elif isinstance(x, np.number): new_args.append(x.copy()) elif isinstance(x, numbers.Number): new_args.append(x) else: raise ValueError('Unsupported argument type encountered') return tuple(new_args) def check(self, pyfunc, *args, **kwargs): jitstd, jitfast = self.compile(pyfunc, *args) std_pattern = kwargs.pop('std_pattern', None) fast_pattern = kwargs.pop('fast_pattern', None) cpu_name = kwargs.pop('cpu_name', 'skylake-avx512') # python result py_expected = pyfunc(*self.copy_args(*args)) # jit result jitstd_result = jitstd.entry_point(*self.copy_args(*args)) # fastmath result jitfast_result = jitfast.entry_point(*self.copy_args(*args)) # assert numerical equality np.testing.assert_almost_equal(jitstd_result, py_expected, **kwargs) np.testing.assert_almost_equal(jitfast_result, py_expected, **kwargs) # look for specific patters in the asm for a given target with override_env_config('NUMBA_CPU_NAME', cpu_name), \ override_env_config('NUMBA_CPU_FEATURES', ''): # recompile for overridden CPU jitstd, jitfast = self.compile(pyfunc, *args) if std_pattern: self.check_svml_presence(jitstd, std_pattern) if fast_pattern: self.check_svml_presence(jitfast, fast_pattern) def check_svml_presence(self, func, pattern): asm = func.library.get_asm_str() self.assertIn(pattern, asm) def test_scalar_context(self): # SVML will not be used. pat = '$_sin' if config.IS_OSX else '$sin' self.check(math_sin_scalar, 7., std_pattern=pat) self.check(math_sin_scalar, 7., fast_pattern=pat) def test_svml(self): # loops both with and without fastmath should use SVML. # The high accuracy routines are dropped if `fastmath` is set std = "__svml_sin8_ha," fast = "__svml_sin8," # No `_ha`! self.check(math_sin_loop, 10, std_pattern=std, fast_pattern=fast) def test_svml_disabled(self): code = """if 1: import os import numpy as np import math def math_sin_loop(n): ret = np.empty(n, dtype=np.float64) for x in range(n): ret[x] = math.sin(np.float64(x)) return ret def check_no_svml(): try: # ban the use of SVML os.environ['NUMBA_DISABLE_INTEL_SVML'] = '1' # delay numba imports to account for env change as # numba.__init__ picks up SVML and it is too late by # then to override using `numba.config` import numba from numba import config from numba.core import cpu from numba.tests.support import override_env_config from numba.core.compiler import compile_isolated, Flags # compile for overridden CPU, with and without fastmath with override_env_config('NUMBA_CPU_NAME', 'skylake-avx512'), \ override_env_config('NUMBA_CPU_FEATURES', ''): sig = (numba.int32,) f = Flags() f.set('nrt') std = compile_isolated(math_sin_loop, sig, flags=f) f.set('fastmath', cpu.FastMathOptions(True)) fast = compile_isolated(math_sin_loop, sig, flags=f) fns = std, fast # assert no SVML call is present in the asm for fn in fns: asm = fn.library.get_asm_str() assert '__svml_sin' not in asm finally: # not really needed as process is separate os.environ['NUMBA_DISABLE_INTEL_SVML'] = '0' config.reload_config() check_no_svml() """ popen = subprocess.Popen([sys.executable, "-c", code], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = popen.communicate() if popen.returncode != 0: raise AssertionError( "process failed with code %s: stderr follows\n%s\n" % (popen.returncode, err.decode())) def test_svml_working_in_non_isolated_context(self): @njit(fastmath={'fast'}, error_model="numpy") def impl(n): x = np.empty(n * 8, dtype=np.float64) ret = np.empty_like(x) for i in range(ret.size): ret[i] += math.cosh(x[i]) return ret impl(1) self.assertTrue( 'intel_svmlcc' in impl.inspect_llvm(impl.signatures[0]))
import numpy as np from numba import njit import unittest from numba.core.compiler import compile_isolated, Flags from numba.core import types, errors from numba.tests.support import TestCase, MemoryLeakMixin, tag from numba.np import numpy_support enable_pyobj_flags = Flags() enable_pyobj_flags.enable_pyobject = True force_pyobj_flags = Flags() force_pyobj_flags.force_pyobject = True no_pyobj_flags = Flags() def int_tuple_iter_usecase(): res = 0 for i in (1, 2, 99, 3): res += i return res def float_tuple_iter_usecase(): res = 0.0 for i in (1.5, 2.0, 99.3, 3.4): res += i return res
from functools import partial from itertools import permutations import numpy as np import unittest from numba.core.compiler import compile_isolated, Flags from numba import jit, njit, from_dtype, typeof from numba.core.errors import TypingError from numba.core import types, errors from numba.tests.support import (TestCase, MemoryLeakMixin, CompilationCache, tag) enable_pyobj_flags = Flags() enable_pyobj_flags.set("enable_pyobject") no_pyobj_flags = Flags() no_pyobj_flags.set('nrt') def from_generic(pyfuncs_to_use): """Decorator for generic check functions. Iterates over 'pyfuncs_to_use', calling 'func' with the iterated item as first argument. Example: @from_generic(numpy_array_reshape, array_reshape) def check_only_shape(pyfunc, arr, shape, expected_shape): # Only check Numba result to avoid Numpy bugs self.memory_leak_setup() got = generic_run(pyfunc, arr, shape) self.assertEqual(got.shape, expected_shape)
from numba.core.compiler import (compile_extra, compile_isolated, Flags, DEFAULT_FLAGS, CompilerBase, DefaultPassBuilder) from numba.core.typed_passes import IRLegalization from numba.core.untyped_passes import PreserveIR import unittest from numba.core.runtime import rtsys from numba.np import numpy_support from numba.pycc.platform import _external_compiler_ok try: import scipy except ImportError: scipy = None enable_pyobj_flags = Flags() enable_pyobj_flags.enable_pyobject = True force_pyobj_flags = Flags() force_pyobj_flags.force_pyobject = True no_pyobj_flags = Flags() nrt_flags = Flags() nrt_flags.nrt = True tag = testing.make_tag_decorator(['important', 'long_running']) _32bit = sys.maxsize <= 2**32 is_parfors_unsupported = _32bit skip_parfors_unsupported = unittest.skipIf(
# Tests numba.analysis functions import collections import numpy as np from numba.core.compiler import compile_isolated, run_frontend, Flags, StateDict from numba import jit, njit from numba.core import types, errors, ir, rewrites, ir_utils from numba.tests.support import TestCase, MemoryLeakMixin, SerialMixin from numba.core.analysis import dead_branch_prune, rewrite_semantic_constants _GLOBAL = 123 enable_pyobj_flags = Flags() enable_pyobj_flags.set("enable_pyobject") def compile_to_ir(func): func_ir = run_frontend(func) state = StateDict() state.func_ir = func_ir state.typemap = None state.calltypes = None # call this to get print etc rewrites rewrites.rewrite_registry.apply('before-inference', state) return func_ir class TestBranchPruneBase(MemoryLeakMixin, TestCase): """
nrt, ) from numba.core.extending import intrinsic, include_path from numba.core.typing import signature from numba.core.imputils import impl_ret_untracked from llvmlite import ir import llvmlite.binding as llvm import numba.core.typing.cffi_utils as cffi_support from numba.core.unsafe.nrt import NRT_get_api from numba.tests.support import (MemoryLeakMixin, TestCase, temp_directory, import_dynamic) from numba.core import cpu import unittest enable_nrt_flags = Flags() enable_nrt_flags.set("nrt") linux_only = unittest.skipIf(not sys.platform.startswith('linux'), 'linux only test') x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test') class Dummy(object): alive = 0 def __init__(self): type(self).alive += 1 def __del__(self):
import numpy as np from numba import njit import unittest from numba.core.compiler import compile_isolated, Flags from numba.core import types, errors from numba.tests.support import TestCase, MemoryLeakMixin, tag from numba.np import numpy_support enable_pyobj_flags = Flags() enable_pyobj_flags.set("enable_pyobject") force_pyobj_flags = Flags() force_pyobj_flags.set("force_pyobject") no_pyobj_flags = Flags() def int_tuple_iter_usecase(): res = 0 for i in (1, 2, 99, 3): res += i return res def float_tuple_iter_usecase(): res = 0.0 for i in (1.5, 2.0, 99.3, 3.4): res += i return res
import decimal import itertools import numpy as np import unittest from numba.core.compiler import compile_isolated, Flags from numba import njit, typeof from numba.core import utils, types, errors from numba.tests.support import TestCase, tag from numba.core.typing import arraydecl from numba.core.types import intp, ellipsis, slice2_type, slice3_type enable_pyobj_flags = Flags() enable_pyobj_flags.set("enable_pyobject") Noflags = Flags() Noflags.set("nrt") def slicing_1d_usecase(a, start, stop, step): return a[start:stop:step] def slicing_1d_usecase2(a, start, stop, step): b = a[start:stop:step] total = 0 for i in range(b.shape[0]): total += b[i] * (i + 1) return total
def test_mangled_flags_is_shorter(self): # at least for these control cases flags = Flags() flags.nrt = True flags.auto_parallel = True self.assertLess(len(flags.get_mangle_string()), len(flags.summary()))
def compile_parallel(self, func, arg_types): fast_pflags = Flags() fast_pflags.auto_parallel = cpu.ParallelOptions(True) fast_pflags.nrt = True fast_pflags.fastmath = cpu.FastMathOptions(True) return compile_isolated(func, arg_types, flags=fast_pflags).entry_point
import unittest from numba.core.compiler import compile_isolated, Flags from numba.core import types force_pyobj_flags = Flags() force_pyobj_flags.force_pyobject = True no_pyobj_flags = Flags() def return_int(a, b): return a + b class TestPythonInt(unittest.TestCase): # Issue #474: ints should be returned rather than longs under Python 2, # as much as possible. def test_int_return_type(self, flags=force_pyobj_flags, int_type=types.int64, operands=(3, 4)): pyfunc = return_int cr = compile_isolated(pyfunc, (int_type, int_type), flags=flags) cfunc = cr.entry_point expected = pyfunc(*operands) got = cfunc(*operands) self.assertIs(type(got), type(expected)) self.assertEqual(got, expected)
def simple_class_user(obj): return obj.h def unsupported_parfor(a, b): return np.dot(a, b) # dot as gemm unsupported def supported_parfor(n): a = np.ones(n) for i in prange(n): a[i] = a[i] + np.sin(i) return a force_parallel_flags = Flags() force_parallel_flags.auto_parallel = ParallelOptions(True) force_parallel_flags.nrt = True class DebugTestBase(TestCase): all_dumps = set([ 'bytecode', 'cfg', 'ir', 'typeinfer', 'llvm', 'func_opt_llvm', 'optimized_llvm', 'assembly' ]) def assert_fails(self, *args, **kwargs): self.assertRaises(AssertionError, *args, **kwargs) def check_debug_output(self, out, dump_names):
import collections import types as pytypes import numpy as np from numba.core.compiler import compile_isolated, run_frontend, Flags, StateDict from numba import jit, njit from numba.core import types, errors, ir, rewrites, ir_utils, utils, cpu from numba.core import postproc from numba.core.inline_closurecall import InlineClosureCallPass from numba.tests.support import TestCase, MemoryLeakMixin, SerialMixin from numba.core.analysis import dead_branch_prune, rewrite_semantic_constants _GLOBAL = 123 enable_pyobj_flags = Flags() enable_pyobj_flags.enable_pyobject = True def compile_to_ir(func): func_ir = run_frontend(func) state = StateDict() state.func_ir = func_ir state.typemap = None state.calltypes = None # call this to get print etc rewrites rewrites.rewrite_registry.apply('before-inference', state) return func_ir
def complex_constant(n): tmp = n + 4 return tmp + 3j def long_constant(n): return n + 100000000000000000000000000000000000000000000000 def delitem_usecase(x): del x[:] forceobj = Flags() forceobj.set("force_pyobject") def loop_nest_3(x, y): n = 0 for i in range(x): for j in range(y): for k in range(x + y): n += i * j return n def array_of_object(x): return x
from functools import partial from itertools import permutations import numpy as np import unittest from numba.core.compiler import compile_isolated, Flags from numba import jit, njit, from_dtype, typeof from numba.np.numpy_support import numpy_version from numba.core.errors import TypingError from numba.core import types, errors from numba.tests.support import (TestCase, MemoryLeakMixin, CompilationCache, tag) enable_pyobj_flags = Flags() enable_pyobj_flags.enable_pyobject = True no_pyobj_flags = Flags() no_pyobj_flags.nrt = True def from_generic(pyfuncs_to_use): """Decorator for generic check functions. Iterates over 'pyfuncs_to_use', calling 'func' with the iterated item as first argument. Example: @from_generic(numpy_array_reshape, array_reshape) def check_only_shape(pyfunc, arr, shape, expected_shape): # Only check Numba result to avoid Numpy bugs self.memory_leak_setup() got = generic_run(pyfunc, arr, shape)
from io import StringIO import numpy as np from numba.core import types from numba.core.compiler import compile_isolated, Flags from numba.tests.support import TestCase, tag, MemoryLeakMixin import unittest looplift_flags = Flags() looplift_flags.enable_pyobject = True looplift_flags.enable_looplift = True pyobject_looplift_flags = looplift_flags.copy() pyobject_looplift_flags.enable_pyobject_looplift = True def lift1(x): # Outer needs object mode because of np.empty() a = np.empty(3) for i in range(a.size): # Inner is nopython-compliant a[i] = x return a def lift2(x): # Outer needs object mode because of np.empty() a = np.empty((3, 4)) for i in range(a.shape[0]): for j in range(a.shape[1]): # Inner is nopython-compliant
import heapq as hq import itertools import numpy as np from numba import jit, typed from numba.core.compiler import Flags from numba.core.config import IS_WIN32 from numba.tests.support import TestCase, CompilationCache, MemoryLeakMixin no_pyobj_flags = Flags() no_pyobj_flags.nrt = True def heapify(x): return hq.heapify(x) def heappop(heap): return hq.heappop(heap) def heappush(heap, item): return hq.heappush(heap, item) def heappushpop(heap, item): return hq.heappushpop(heap, item) def heapreplace(heap, item):
nrt, ) from numba.core.extending import intrinsic, include_path from numba.core.typing import signature from numba.core.imputils import impl_ret_untracked from llvmlite import ir import llvmlite.binding as llvm import numba.core.typing.cffi_utils as cffi_support from numba.core.unsafe.nrt import NRT_get_api from numba.tests.support import (MemoryLeakMixin, TestCase, temp_directory, import_dynamic) from numba.core.registry import cpu_target import unittest enable_nrt_flags = Flags() enable_nrt_flags.nrt = True linux_only = unittest.skipIf(not sys.platform.startswith('linux'), 'linux only test') x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test') class Dummy(object): alive = 0 def __init__(self): type(self).alive += 1 def __del__(self):
def compile_parallel(self, func, arg_types): fast_pflags = Flags() fast_pflags.set('auto_parallel', cpu.ParallelOptions(True)) fast_pflags.set('nrt') fast_pflags.set('fastmath', cpu.FastMathOptions(True)) return compile_isolated(func, arg_types, flags=fast_pflags).entry_point
import heapq as hq import itertools import numpy as np from numba import jit from numba.core.compiler import Flags from numba.tests.support import TestCase, CompilationCache, MemoryLeakMixin no_pyobj_flags = Flags() no_pyobj_flags.set("nrt") def heapify(x): return hq.heapify(x) def heappop(heap): return hq.heappop(heap) def heappush(heap, item): return hq.heappush(heap, item) def heappushpop(heap, item): return hq.heappushpop(heap, item) def heapreplace(heap, item): return hq.heapreplace(heap, item)
def _cull_exports(self): """Read all the exported functions/modules in the translator environment, and join them into a single LLVM module. """ self.exported_function_types = {} self.function_environments = {} self.environment_gvs = {} codegen = self.context.codegen() library = codegen.create_library(self.module_name) # Generate IR for all exported functions flags = Flags() flags.set("no_compile") if not self.export_python_wrap: flags.set("no_cpython_wrapper") flags.set("no_cfunc_wrapper") if self.use_nrt: flags.set("nrt") # Compile NRT helpers nrt_module, _ = nrtdynmod.create_nrt_module(self.context) library.add_ir_module(nrt_module) for entry in self.export_entries: cres = compile_extra(self.typing_context, self.context, entry.function, entry.signature.args, entry.signature.return_type, flags, locals={}, library=library) func_name = cres.fndesc.llvm_func_name llvm_func = cres.library.get_function(func_name) if self.export_python_wrap: llvm_func.linkage = lc.LINKAGE_INTERNAL wrappername = cres.fndesc.llvm_cpython_wrapper_name wrapper = cres.library.get_function(wrappername) wrapper.name = self._mangle_method_symbol(entry.symbol) wrapper.linkage = lc.LINKAGE_EXTERNAL fnty = cres.target_context.call_conv.get_function_type( cres.fndesc.restype, cres.fndesc.argtypes) self.exported_function_types[entry] = fnty self.function_environments[entry] = cres.environment self.environment_gvs[entry] = cres.fndesc.env_name else: llvm_func.name = entry.symbol self.dll_exports.append(entry.symbol) if self.export_python_wrap: wrapper_module = library.create_ir_module("wrapper") self._emit_python_wrapper(wrapper_module) library.add_ir_module(wrapper_module) # Hide all functions in the DLL except those explicitly exported library.finalize() for fn in library.get_defined_functions(): if fn.name not in self.dll_exports: if fn.linkage in {Linkage.private, Linkage.internal}: # Private/Internal linkage must have "default" visibility fn.visibility = "default" else: fn.visibility = 'hidden' return library
from io import StringIO import numpy as np from numba.core import types from numba.core.compiler import compile_isolated, Flags from numba.tests.support import TestCase, tag, MemoryLeakMixin import unittest looplift_flags = Flags() looplift_flags.set("enable_pyobject") looplift_flags.set("enable_looplift") pyobject_looplift_flags = looplift_flags.copy() pyobject_looplift_flags.set("enable_pyobject_looplift") def lift1(x): # Outer needs object mode because of np.empty() a = np.empty(3) for i in range(a.size): # Inner is nopython-compliant a[i] = x return a def lift2(x): # Outer needs object mode because of np.empty() a = np.empty((3, 4)) for i in range(a.shape[0]): for j in range(a.shape[1]): # Inner is nopython-compliant