def __init__(self, arg_count, py_func): self._tm = default_type_manager #_dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(), arg_count) # A mapping of signatures to entry points self.overloads = {} # A mapping of signatures to compile results self._compileinfos = {} # A list of nopython signatures self._npsigs = [] self.py_func = py_func # other parts of Numba assume the old Python 2 name for code object self.func_code = get_code_object(py_func) # but newer python uses a different name self.__code__ = self.func_code self._pysig = utils.pysignature(self.py_func) _argnames = tuple(self._pysig.parameters) _dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(), arg_count, _argnames) self.doc = py_func.__doc__ self._compile_lock = utils.NonReentrantLock() utils.finalize(self, self._make_finalizer())
def __init__(self, func): func = get_function_object(func) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) if code.co_cellvars: raise NotImplementedError("cell vars are not supported") table = utils.SortedMap(ByteCodeIter(code)) labels = set(dis.findlabels(code.co_code)) labels.add(0) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self._mark_lineno(table, code) super(ByteCode, self).__init__(func=func, func_qualname=func_qualname, is_generator=inspect.isgeneratorfunction(func), pysig=pysig, filename=code.co_filename, co_names=code.co_names, co_varnames=code.co_varnames, co_consts=code.co_consts, co_freevars=code.co_freevars, table=table, labels=list(sorted(labels)))
def __init__(self, arg_count, py_func): self._tm = default_type_manager # _dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(), arg_count) # A mapping of signatures to entry points self.overloads = {} # A mapping of signatures to compile results self._compileinfos = {} # A list of nopython signatures self._npsigs = [] self.py_func = py_func # other parts of Numba assume the old Python 2 name for code object self.func_code = get_code_object(py_func) # but newer python uses a different name self.__code__ = self.func_code self._pysig = utils.pysignature(self.py_func) _argnames = tuple(self._pysig.parameters) _dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(), arg_count, _argnames) self.doc = py_func.__doc__ self._compile_lock = utils.NonReentrantLock() utils.finalize(self, self._make_finalizer())
def __init__(self, py_func, locals={}, targetoptions={}): """ Parameters ---------- py_func: function object to be compiled locals: dict, optional Mapping of local variable names to Numba types. Used to override the types deduced by the type inference engine. targetoptions: dict, optional Target-specific config options. """ self.typingctx = self.targetdescr.typing_context self.targetctx = self.targetdescr.target_context pysig = utils.pysignature(py_func) arg_count = len(pysig.parameters) _OverloadedBase.__init__(self, arg_count, py_func, pysig) functools.update_wrapper(self, py_func) self.targetoptions = targetoptions self.locals = locals self._cache = NullCache() self.typingctx.insert_overloaded(self)
def __init__(self, py_func, locals={}, targetoptions={}): """ Parameters ---------- py_func: function object to be compiled locals: dict, optional Mapping of local variable names to Numba types. Used to override the types deduced by the type inference engine. targetoptions: dict, optional Target-specific config options. """ self.typingctx = self.targetdescr.typing_context self.targetctx = self.targetdescr.target_context pysig = utils.pysignature(py_func) arg_count = len(pysig.parameters) _OverloadedBase.__init__(self, arg_count, py_func, pysig) functools.update_wrapper(self, py_func) self.targetoptions = targetoptions self.locals = locals self.typingctx.insert_overloaded(self)
def __init__(self, py_func, locals={}, targetoptions={}, impl_kind='direct'): """ Parameters ---------- py_func: function object to be compiled locals: dict, optional Mapping of local variable names to Numba types. Used to override the types deduced by the type inference engine. targetoptions: dict, optional Target-specific config options. """ self.typingctx = self.targetdescr.typing_context self.targetctx = self.targetdescr.target_context pysig = utils.pysignature(py_func) arg_count = len(pysig.parameters) can_fallback = not targetoptions.get('nopython', False) _DispatcherBase.__init__(self, arg_count, py_func, pysig, can_fallback) functools.update_wrapper(self, py_func) self.targetoptions = targetoptions self.locals = locals self._cache = NullCache() compiler_class = self._impl_kinds[impl_kind] self._impl_kind = impl_kind self._compiler = compiler_class(py_func, self.targetdescr, targetoptions, locals) self._cache_hits = collections.Counter() self._cache_misses = collections.Counter() self._type = types.Dispatcher(self) self.typingctx.insert_global(self, self._type)
def sum_expand(self, args, kws): """ sum can be called with or without an axis parameter. """ pysig = None if kws: def sum_stub(axis): pass pysig = utils.pysignature(sum_stub) # rewrite args args = list(args) + [kws['axis']] kws = None args_len = len(args) assert args_len <= 1 if args_len == 0: # No axis parameter so the return type of the summation is a scalar # of the type of the array. out = signature(_expand_integer(self.this.dtype), *args, recvr=self.this) else: # There is an axis paramter so the return type of this summation is # an array of dimension one less than the input array. return_type = types.Array(dtype=_expand_integer(self.this.dtype), ndim=self.this.ndim - 1, layout='C') out = signature(return_type, *args, recvr=self.this) return out.replace(pysig=pysig)
def from_function(cls, pyfunc): """ Create the FunctionIdentity of the given function. """ func = get_function_object(pyfunc) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self = cls() self.func = pyfunc self.func_qualname = func_qualname self.func_name = func_qualname.split('.')[-1] self.code = code self.module = inspect.getmodule(func) self.is_generator = inspect.isgeneratorfunction(func) self.pysig = pysig self.filename = code.co_filename self.firstlineno = code.co_firstlineno self.arg_count = len(pysig.parameters) self.arg_names = list(pysig.parameters) return self
def sum_expand(self, args, kws): """ sum can be called with or without an axis parameter. """ pysig = None if kws: def sum_stub(axis): pass pysig = utils.pysignature(sum_stub) # rewrite args args = list(args) + [kws['axis']] kws = None args_len = len(args) assert args_len <= 1 if args_len == 0: # No axis parameter so the return type of the summation is a scalar # of the type of the array. out = signature(_expand_integer(self.this.dtype), *args, recvr=self.this) else: # There is an axis paramter so the return type of this summation is # an array of dimension one less than the input array. return_type = types.Array(dtype=_expand_integer(self.this.dtype), ndim=self.this.ndim-1, layout='C') out = signature(return_type, *args, recvr=self.this) return out.replace(pysig=pysig)
def __init__(self, func): func = get_function_object(func) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) # A map of {offset: ByteCodeInst} table = utils.SortedMap(ByteCodeIter(code)) labels = set(dis.findlabels(code.co_code)) labels.add(0) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self._mark_lineno(table, code) super(ByteCode, self).__init__(func=func, func_qualname=func_qualname, is_generator=inspect.isgeneratorfunction(func), pysig=pysig, filename=code.co_filename, co_names=code.co_names, co_varnames=code.co_varnames, co_consts=code.co_consts, co_freevars=code.co_freevars, table=table, labels=list(sorted(labels)))
def __init__(self, func, incols, outcols, kwargs): # Get signature of user function sig = pysignature(func) self.sig = sig self.incols = incols self.outcols = outcols self.kwargs = kwargs self.kernel = self.compile(func, sig.parameters.keys(), kwargs.keys())
def __init__(self, py_func, targetdescr, targetoptions, locals, pipeline_class): self.py_func = py_func self.targetdescr = targetdescr self.targetoptions = targetoptions self.locals = locals self.pysig = utils.pysignature(self.py_func) self.pipeline_class = pipeline_class
def _getargs(fn): """ Returns list of positional and keyword argument names in order. """ sig = utils.pysignature(fn) params = sig.parameters args = [k for k, v in params.items() if (v.kind & v.POSITIONAL_OR_KEYWORD) == v.POSITIONAL_OR_KEYWORD] return args
def __init__(self, func, incols, outcols, kwargs, pessimistic_nulls, cache_key): # Get signature of user function sig = pysignature(func) self.sig = sig self.incols = incols self.outcols = outcols self.kwargs = kwargs self.pessimistic_nulls = pessimistic_nulls self.cache_key = cache_key self.kernel = self.compile(func, sig.parameters.keys(), kwargs.keys())
def __init__(self, py_func, targetdescr, targetoptions, locals, pipeline_class): self.py_func = py_func self.targetdescr = targetdescr self.targetoptions = targetoptions self.locals = locals self.pysig = utils.pysignature(self.py_func) self.pipeline_class = pipeline_class # Remember key=(args, return_type) combinations that will fail # compilation to avoid compilation attempt on them. The values are # the exceptions. self._failed_cache = {}
def resolve_argsort(self, ary, args, kws): assert not args kwargs = dict(kws) kind = kwargs.pop('kind', types.StringLiteral('quicksort')) if kwargs: msg = "Unsupported keywords: {!r}" raise TypingError(msg.format([k for k in kwargs.keys()])) if ary.ndim == 1: def argsort_stub(kind='quicksort'): pass pysig = utils.pysignature(argsort_stub) sig = signature(types.Array(types.intp, 1, 'C'), kind).replace(pysig=pysig) return sig
def for_function(self, func): """Bind the sentry to the signature of *func*. Parameters ---------- func : Function A python function. Returns ------- obj : BoundLiteralArgs """ return self.for_pysig(utils.pysignature(func))
def resolve_argsort(self, ary, args, kws): assert not args kwargs = dict(kws) kind = kwargs.pop('kind', types.Const('quicksort')) if kwargs: msg = "Unsupported keywords: {!r}" raise TypingError(msg.format([k for k in kwargs.keys()])) if ary.ndim == 1: def argsort_stub(kind='quicksort'): pass pysig = utils.pysignature(argsort_stub) sig = signature(types.Array(types.intp, 1, 'C'), kind).replace(pysig=pysig) return sig
def _get_implementation(self, args, kws): impl = self.py_func(*args, **kws) # Check the generating function and implementation signatures are # compatible, otherwise compiling would fail later. pysig = utils.pysignature(self.py_func) implsig = utils.pysignature(impl) ok = len(pysig.parameters) == len(implsig.parameters) if ok: for pyparam, implparam in zip(pysig.parameters.values(), implsig.parameters.values()): # We allow the implementation to omit default values, but # if it mentions them, they should have the same value... if (pyparam.name != implparam.name or pyparam.kind != implparam.kind or (implparam.default is not implparam.empty and implparam.default != pyparam.default)): ok = False if not ok: raise TypeError("generated implementation %s should be compatible " "with signature '%s', but has signature '%s'" % (impl, pysig, implsig)) self.impls.add(impl) return impl
def _set_init(cls): """ Generate a wrapper for calling the constructor from pure Python. Note the wrapper will only accept positional arguments. """ init = cls.class_type.instance_type.methods['__init__'] init_sig = utils.pysignature(init) # get postitional and keyword arguments # offset by one to exclude the `self` arg args = _getargs(init_sig)[1:] cls._ctor_sig = init_sig ctor_source = _ctor_template.format(args=', '.join(args)) glbls = {"__numba_cls_": cls} exec_(ctor_source, glbls) ctor = glbls['ctor'] cls._ctor = njit(ctor)
def _type_me(self, argtys, kwtys): """ Implement AbstractTemplate.generic() for the typing class built by StencilFunc._install_type(). Return the call-site signature. """ if (self.neighborhood is not None and len(self.neighborhood) != argtys[0].ndim): raise ValueError("%d dimensional neighborhood specified " "for %d dimensional input array" % (len(self.neighborhood), argtys[0].ndim)) argtys_extra = argtys sig_extra = "" result = None if 'out' in kwtys: argtys_extra += (kwtys['out'], ) sig_extra += ", out=None" result = kwtys['out'] if 'neighborhood' in kwtys: argtys_extra += (kwtys['neighborhood'], ) sig_extra += ", neighborhood=None" # look in the type cache first if argtys_extra in self._type_cache: (_sig, _, _, _) = self._type_cache[argtys_extra] return _sig (real_ret, typemap, calltypes) = self.get_return_type(argtys) sig = signature(real_ret, *argtys_extra) dummy_text = ("def __numba_dummy_stencil({}{}):\n pass\n".format( ",".join(self.kernel_ir.arg_names), sig_extra)) exec_(dummy_text) in globals(), locals() dummy_func = eval("__numba_dummy_stencil") sig.pysig = utils.pysignature(dummy_func) self._targetctx.insert_func_defn([(self._lower_me, self, argtys_extra) ]) self._type_cache[argtys_extra] = (sig, result, typemap, calltypes) return sig
def _type_me(self, argtys, kwtys): """ Implement AbstractTemplate.generic() for the typing class built by StencilFunc._install_type(). Return the call-site signature. """ if (self.neighborhood is not None and len(self.neighborhood) != argtys[0].ndim): raise ValueError("%d dimensional neighborhood specified " "for %d dimensional input array" % (len(self.neighborhood), argtys[0].ndim)) argtys_extra = argtys sig_extra = "" result = None if 'out' in kwtys: argtys_extra += (kwtys['out'],) sig_extra += ", out=None" result = kwtys['out'] if 'neighborhood' in kwtys: argtys_extra += (kwtys['neighborhood'],) sig_extra += ", neighborhood=None" # look in the type cache first if argtys_extra in self._type_cache: (_sig, _, _, _) = self._type_cache[argtys_extra] return _sig (real_ret, typemap, calltypes) = self.get_return_type(argtys) sig = signature(real_ret, *argtys_extra) dummy_text = ("def __numba_dummy_stencil({}{}):\n pass\n".format( ",".join(self.kernel_ir.arg_names), sig_extra)) exec_(dummy_text) in globals(), locals() dummy_func = eval("__numba_dummy_stencil") sig.pysig = utils.pysignature(dummy_func) self._targetctx.insert_func_defn([(self._lower_me, self, argtys_extra)]) self._type_cache[argtys_extra] = (sig, result, typemap, calltypes) return sig
def from_function(cls, pyfunc): """ Create the FunctionIdentity of the given function. """ func = get_function_object(pyfunc) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self = cls() self.func = func self.func_qualname = func_qualname self.func_name = func_qualname.split('.')[-1] self.code = code self.module = inspect.getmodule(func) self.modname = (utils._dynamic_modname if self.module is None else self.module.__name__) self.is_generator = inspect.isgeneratorfunction(func) self.pysig = pysig self.filename = code.co_filename self.firstlineno = code.co_firstlineno self.arg_count = len(pysig.parameters) self.arg_names = list(pysig.parameters) # Even the same function definition can be compiled into # several different function objects with distinct closure # variables, so we make sure to disambiguate using an unique id. uid = next(cls._unique_ids) self.unique_name = '{}${}'.format(self.func_qualname, uid) return self
def from_function(cls, pyfunc): """ Create the FunctionIdentity of the given function. """ func = get_function_object(pyfunc) code = get_code_object(func) pysig = utils.pysignature(func) if not code: raise errors.ByteCodeSupportError( "%s does not provide its bytecode" % func) try: func_qualname = func.__qualname__ except AttributeError: func_qualname = func.__name__ self = cls() self.func = pyfunc self.func_qualname = func_qualname self.func_name = func_qualname.split('.')[-1] self.code = code self.module = inspect.getmodule(func) self.modname = (utils._dynamic_modname if self.module is None else self.module.__name__) self.is_generator = inspect.isgeneratorfunction(func) self.pysig = pysig self.filename = code.co_filename self.firstlineno = code.co_firstlineno self.arg_count = len(pysig.parameters) self.arg_names = list(pysig.parameters) # Even the same function definition can be compiled into # several different function objects with distinct closure # variables, so we make sure to disambiguate using an unique id. uid = next(cls._unique_ids) self.unique_name = '{}${}'.format(self.func_qualname, uid) return self
def _has_loc(fn): """Does function *fn* take ``loc`` argument? """ sig = utils.pysignature(fn) return 'loc' in sig.parameters
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): # Overall approach: # 1) Construct a string containing a function definition for the stencil function # that will execute the stencil kernel. This function definition includes a # unique stencil function name, the parameters to the stencil kernel, loop # nests across the dimenions of the input array. Those loop nests use the # computed stencil kernel size so as not to try to compute elements where # elements outside the bounds of the input array would be needed. # 2) The but of the loop nest in this new function is a special sentinel # assignment. # 3) Get the IR of this new function. # 4) Split the block containing the sentinel assignment and remove the sentinel # assignment. Insert the stencil kernel IR into the stencil function IR # after label and variable renaming of the stencil kernel IR to prevent # conflicts with the stencil function IR. # 5) Compile the combined stencil function IR + stencil kernel IR into existence. # Copy the kernel so that our changes for this callsite # won't effect other callsites. (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(self.kernel_ir, calltypes) # The stencil kernel body becomes the body of a loop, for which args aren't needed. ir_utils.remove_args(kernel_copy.blocks) first_arg = kernel_copy.arg_names[0] in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) ir_utils.apply_copy_propagate(kernel_copy.blocks, in_cps, name_var_table, typemap, copy_calltypes) if "out" in name_var_table: raise ValueError( "Cannot use the reserved word 'out' in stencil kernels.") sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) if config.DEBUG_ARRAY_OPT == 1: print("name_var_table", name_var_table, sentinel_name) the_array = args[0] if config.DEBUG_ARRAY_OPT == 1: print("_stencil_wrapper", return_type, return_type.dtype, type(return_type.dtype), args) ir_utils.dump_blocks(kernel_copy.blocks) # We generate a Numba function to execute this stencil and here # create the unique name of this function. stencil_func_name = "__numba_stencil_%s_%s" % (hex( id(the_array)).replace("-", "_"), self.id) # We will put a loop nest in the generated function for each # dimension in the input array. Here we create the name for # the index variable for each dimension. index0, index1, ... index_vars = [] for i in range(the_array.ndim): index_var_name = ir_utils.get_unused_var_name( "index" + str(i), name_var_table) index_vars += [index_var_name] # Create extra signature for out and neighborhood. out_name = ir_utils.get_unused_var_name("out", name_var_table) neighborhood_name = ir_utils.get_unused_var_name( "neighborhood", name_var_table) sig_extra = "" if result is not None: sig_extra += ", {}=None".format(out_name) if "neighborhood" in dict(self.kws): sig_extra += ", {}=None".format(neighborhood_name) # Get a list of the standard indexed array names. standard_indexed = self.options.get("standard_indexing", []) if first_arg in standard_indexed: raise ValueError("The first argument to a stencil kernel must " "use relative indexing, not standard indexing.") if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: raise ValueError("Standard indexing requested for an array name " "not present in the stencil kernel definition.") # Add index variables to getitems in the IR to transition the accesses # in the kernel from relative to regular Python indexing. Returns the # computed size of the stencil kernel and a list of the relatively indexed # arrays. kernel_size, relatively_indexed = self.add_indices_to_kernel( kernel_copy, index_vars, the_array.ndim, self.neighborhood, standard_indexed) if self.neighborhood is None: self.neighborhood = kernel_size if config.DEBUG_ARRAY_OPT == 1: print("After add_indices_to_kernel") ir_utils.dump_blocks(kernel_copy.blocks) # The return in the stencil kernel becomes a setitem for that # particular point in the iteration space. ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, index_vars, out_name) if config.DEBUG_ARRAY_OPT == 1: print("After replace_return_with_setitem", ret_blocks) ir_utils.dump_blocks(kernel_copy.blocks) # Start to form the new function to execute the stencil kernel. func_text = "def {}({}{}):\n".format(stencil_func_name, ",".join(kernel_copy.arg_names), sig_extra) # Get loop ranges for each dimension, which could be either int # or variable. In the latter case we'll use the extra neighborhood # argument to the function. ranges = [] for i in range(the_array.ndim): if isinstance(kernel_size[i][0], int): lo = kernel_size[i][0] hi = kernel_size[i][1] else: lo = "{}[{}][0]".format(neighborhood_name, i) hi = "{}[{}][1]".format(neighborhood_name, i) ranges.append((lo, hi)) # If there are more than one relatively indexed arrays, add a call to # a function that will raise an error if any of the relatively indexed # arrays are of different size than the first input array. if len(relatively_indexed) > 1: func_text += " raise_if_incompatible_array_sizes(" + first_arg for other_array in relatively_indexed: if other_array != first_arg: func_text += "," + other_array func_text += ")\n" # Get the shape of the first input array. shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) func_text += " {} = {}.shape\n".format(shape_name, first_arg) # If we have to allocate the output array (the out argument was not used) # then us numpy.full if the user specified a cval stencil decorator option # or np.zeros if they didn't to allocate the array. if result is None: return_type_name = numpy_support.as_dtype( return_type.dtype).type.__name__ if "cval" in self.options: cval = self.options["cval"] if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") out_init = "{} = np.full({}, {}, dtype=np.{})\n".format( out_name, shape_name, cval, return_type_name) else: out_init = "{} = np.zeros({}, dtype=np.{})\n".format( out_name, shape_name, return_type_name) func_text += " " + out_init offset = 1 # Add the loop nests to the new function. for i in range(the_array.ndim): for j in range(offset): func_text += " " # ranges[i][0] is the minimum index used in the i'th dimension # but minimum's greater than 0 don't preclude any entry in the array. # So, take the minimum of 0 and the minimum index found in the kernel # and this will be a negative number (potentially -0). Then, we do # unary - on that to get the positive offset in this dimension whose # use is precluded. # ranges[i][1] is the maximum of 0 and the observed maximum index # in this dimension because negative maximums would not cause us to # preclude any entry in the array from being used. func_text += ("for {} in range(-min(0,{})," "{}[{}]-max(0,{})):\n").format( index_vars[i], ranges[i][0], shape_name, i, ranges[i][1]) offset += 1 for j in range(offset): func_text += " " # Put a sentinel in the code so we can locate it in the IR. We will # remove this sentinel assignment and replace it with the IR for the # stencil kernel body. func_text += "{} = 0\n".format(sentinel_name) func_text += " return {}\n".format(out_name) if config.DEBUG_ARRAY_OPT == 1: print("new stencil func text") print(func_text) # Force the new stencil function into existence. exec_(func_text) in globals(), locals() stencil_func = eval(stencil_func_name) if sigret is not None: pysig = utils.pysignature(stencil_func) sigret.pysig = pysig # Get the IR for the newly created stencil function. stencil_ir = compiler.run_frontend(stencil_func) ir_utils.remove_dels(stencil_ir.blocks) # rename all variables in stencil_ir afresh var_table = ir_utils.get_name_var_table(stencil_ir.blocks) new_var_dict = {} reserved_names = ( [sentinel_name, out_name, neighborhood_name, shape_name] + kernel_copy.arg_names + index_vars) for name, var in var_table.items(): if not name in reserved_names: new_var_dict[name] = ir_utils.mk_unique_var(name) ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 # Shift lables in the kernel copy so they are guaranteed unique # and don't conflict with any labels in the stencil_ir. kernel_copy.blocks = ir_utils.add_offset_to_labels( kernel_copy.blocks, stencil_stub_last_label) new_label = max(kernel_copy.blocks.keys()) + 1 # Adjust ret_blocks to account for addition of the offset. ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] if config.DEBUG_ARRAY_OPT == 1: print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) print("before replace sentinel stencil_ir") ir_utils.dump_blocks(stencil_ir.blocks) print("before replace sentinel kernel_copy") ir_utils.dump_blocks(kernel_copy.blocks) # Search all the block in the stencil outline for the sentinel. for label, block in stencil_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance(inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block # label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(kernel_copy.blocks.keys()) # The previous block jumps to the minimum labelled block of # the parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc # function's IR. for (l, b) in kernel_copy.blocks.items(): stencil_ir.blocks[l] = b stencil_ir.blocks[new_label] = block stencil_ir.blocks[label] = prev_block # Add a jump from all the blocks that previously contained # a return in the stencil kernel to the block # containing statements after the sentinel. for ret_block in ret_blocks: stencil_ir.blocks[ret_block].append( ir.Jump(new_label, loc)) break else: continue break stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) ir_utils.remove_dels(stencil_ir.blocks) assert (isinstance(the_array, types.Type)) array_types = args new_stencil_param_types = list(array_types) if config.DEBUG_ARRAY_OPT == 1: print("new_stencil_param_types", new_stencil_param_types) ir_utils.dump_blocks(stencil_ir.blocks) # Compile the combined stencil function with the replaced loop # body in it. new_func = compiler.compile_ir(self._typingctx, self._targetctx, stencil_ir, new_stencil_param_types, None, compiler.DEFAULT_FLAGS, {}) return new_func
def sum_expand(self, args, kws): """ sum can be called with or without an axis parameter, and with or without a dtype parameter """ pysig = None if 'axis' in kws and 'dtype' not in kws: def sum_stub(axis): pass pysig = utils.pysignature(sum_stub) # rewrite args args = list(args) + [kws['axis']] elif 'dtype' in kws and 'axis' not in kws: def sum_stub(dtype): pass pysig = utils.pysignature(sum_stub) # rewrite args args = list(args) + [kws['dtype']] elif 'dtype' in kws and 'axis' in kws: def sum_stub(axis, dtype): pass pysig = utils.pysignature(sum_stub) # rewrite args args = list(args) + [kws['axis'], kws['dtype']] args_len = len(args) assert args_len <= 2 if args_len == 0: # No axis or dtype parameter so the return type of the summation is a scalar # of the type of the array. out = signature(_expand_integer(self.this.dtype), *args, recvr=self.this) elif args_len == 1 and 'dtype' not in kws: # There is an axis parameter, either arg or kwarg if self.this.ndim == 1: # 1d reduces to a scalar return_type = self.this.dtype else: # the return type of this summation is an array of dimension one # less than the input array. return_type = types.Array(dtype=_expand_integer(self.this.dtype), ndim=self.this.ndim-1, layout='C') out = signature(return_type, *args, recvr=self.this) elif args_len == 1 and 'dtype' in kws: # No axis parameter so the return type of the summation is a scalar # of the dtype parameter. from .npydecl import _parse_dtype dtype, = args dtype = _parse_dtype(dtype) out = signature(dtype, *args, recvr=self.this) elif args_len == 2: # There is an axis and dtype parameter, either arg or kwarg from .npydecl import _parse_dtype dtype = _parse_dtype(args[1]) return_type = dtype if self.this.ndim != 1: # 1d reduces to a scalar, 2d and above reduce dim by 1 # the return type of this summation is an array of dimension one # less than the input array. return_type = types.Array(dtype=return_type, ndim=self.this.ndim-1, layout='C') out = signature(return_type, *args, recvr=self.this) else: pass return out.replace(pysig=pysig)
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): # Overall approach: # 1) Construct a string containing a function definition for the stencil function # that will execute the stencil kernel. This function definition includes a # unique stencil function name, the parameters to the stencil kernel, loop # nests across the dimenions of the input array. Those loop nests use the # computed stencil kernel size so as not to try to compute elements where # elements outside the bounds of the input array would be needed. # 2) The but of the loop nest in this new function is a special sentinel # assignment. # 3) Get the IR of this new function. # 4) Split the block containing the sentinel assignment and remove the sentinel # assignment. Insert the stencil kernel IR into the stencil function IR # after label and variable renaming of the stencil kernel IR to prevent # conflicts with the stencil function IR. # 5) Compile the combined stencil function IR + stencil kernel IR into existence. # Copy the kernel so that our changes for this callsite # won't effect other callsites. (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes( self.kernel_ir, calltypes) # The stencil kernel body becomes the body of a loop, for which args aren't needed. ir_utils.remove_args(kernel_copy.blocks) first_arg = kernel_copy.arg_names[0] in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) ir_utils.apply_copy_propagate( kernel_copy.blocks, in_cps, name_var_table, typemap, copy_calltypes) if "out" in name_var_table: raise ValueError("Cannot use the reserved word 'out' in stencil kernels.") sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) if config.DEBUG_ARRAY_OPT == 1: print("name_var_table", name_var_table, sentinel_name) the_array = args[0] if config.DEBUG_ARRAY_OPT == 1: print("_stencil_wrapper", return_type, return_type.dtype, type(return_type.dtype), args) ir_utils.dump_blocks(kernel_copy.blocks) # We generate a Numba function to execute this stencil and here # create the unique name of this function. stencil_func_name = "__numba_stencil_%s_%s" % ( hex(id(the_array)).replace("-", "_"), self.id) # We will put a loop nest in the generated function for each # dimension in the input array. Here we create the name for # the index variable for each dimension. index0, index1, ... index_vars = [] for i in range(the_array.ndim): index_var_name = ir_utils.get_unused_var_name("index" + str(i), name_var_table) index_vars += [index_var_name] # Create extra signature for out and neighborhood. out_name = ir_utils.get_unused_var_name("out", name_var_table) neighborhood_name = ir_utils.get_unused_var_name("neighborhood", name_var_table) sig_extra = "" if result is not None: sig_extra += ", {}=None".format(out_name) if "neighborhood" in dict(self.kws): sig_extra += ", {}=None".format(neighborhood_name) # Get a list of the standard indexed array names. standard_indexed = self.options.get("standard_indexing", []) if first_arg in standard_indexed: raise ValueError("The first argument to a stencil kernel must " "use relative indexing, not standard indexing.") if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: raise ValueError("Standard indexing requested for an array name " "not present in the stencil kernel definition.") # Add index variables to getitems in the IR to transition the accesses # in the kernel from relative to regular Python indexing. Returns the # computed size of the stencil kernel and a list of the relatively indexed # arrays. kernel_size, relatively_indexed = self.add_indices_to_kernel( kernel_copy, index_vars, the_array.ndim, self.neighborhood, standard_indexed) if self.neighborhood is None: self.neighborhood = kernel_size if config.DEBUG_ARRAY_OPT == 1: print("After add_indices_to_kernel") ir_utils.dump_blocks(kernel_copy.blocks) # The return in the stencil kernel becomes a setitem for that # particular point in the iteration space. ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, index_vars, out_name) if config.DEBUG_ARRAY_OPT == 1: print("After replace_return_with_setitem", ret_blocks) ir_utils.dump_blocks(kernel_copy.blocks) # Start to form the new function to execute the stencil kernel. func_text = "def {}({}{}):\n".format(stencil_func_name, ",".join(kernel_copy.arg_names), sig_extra) # Get loop ranges for each dimension, which could be either int # or variable. In the latter case we'll use the extra neighborhood # argument to the function. ranges = [] for i in range(the_array.ndim): if isinstance(kernel_size[i][0], int): lo = kernel_size[i][0] hi = kernel_size[i][1] else: lo = "{}[{}][0]".format(neighborhood_name, i) hi = "{}[{}][1]".format(neighborhood_name, i) ranges.append((lo, hi)) # If there are more than one relatively indexed arrays, add a call to # a function that will raise an error if any of the relatively indexed # arrays are of different size than the first input array. if len(relatively_indexed) > 1: func_text += " raise_if_incompatible_array_sizes(" + first_arg for other_array in relatively_indexed: if other_array != first_arg: func_text += "," + other_array func_text += ")\n" # Get the shape of the first input array. shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) func_text += " {} = {}.shape\n".format(shape_name, first_arg) # If we have to allocate the output array (the out argument was not used) # then us numpy.full if the user specified a cval stencil decorator option # or np.zeros if they didn't to allocate the array. if result is None: if "cval" in self.options: cval = self.options["cval"] if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") out_init ="{} = np.full({}, {}, dtype=np.{})\n".format( out_name, shape_name, cval, return_type.dtype) else: out_init ="{} = np.zeros({}, dtype=np.{})\n".format( out_name, shape_name, return_type.dtype) func_text += " " + out_init offset = 1 # Add the loop nests to the new function. for i in range(the_array.ndim): for j in range(offset): func_text += " " # ranges[i][0] is the minimum index used in the i'th dimension # but minimum's greater than 0 don't preclude any entry in the array. # So, take the minimum of 0 and the minimum index found in the kernel # and this will be a negative number (potentially -0). Then, we do # unary - on that to get the positive offset in this dimension whose # use is precluded. # ranges[i][1] is the maximum of 0 and the observed maximum index # in this dimension because negative maximums would not cause us to # preclude any entry in the array from being used. func_text += ("for {} in range(-min(0,{})," "{}[{}]-max(0,{})):\n").format( index_vars[i], ranges[i][0], shape_name, i, ranges[i][1]) offset += 1 for j in range(offset): func_text += " " # Put a sentinel in the code so we can locate it in the IR. We will # remove this sentinel assignment and replace it with the IR for the # stencil kernel body. func_text += "{} = 0\n".format(sentinel_name) func_text += " return {}\n".format(out_name) if config.DEBUG_ARRAY_OPT == 1: print("new stencil func text") print(func_text) # Force the new stencil function into existence. exec_(func_text) in globals(), locals() stencil_func = eval(stencil_func_name) if sigret is not None: pysig = utils.pysignature(stencil_func) sigret.pysig = pysig # Get the IR for the newly created stencil function. stencil_ir = compiler.run_frontend(stencil_func) ir_utils.remove_dels(stencil_ir.blocks) # rename all variables in stencil_ir afresh var_table = ir_utils.get_name_var_table(stencil_ir.blocks) new_var_dict = {} reserved_names = ([sentinel_name, out_name, neighborhood_name, shape_name] + kernel_copy.arg_names + index_vars) for name, var in var_table.items(): if not name in reserved_names: new_var_dict[name] = ir_utils.mk_unique_var(name) ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 # Shift lables in the kernel copy so they are guaranteed unique # and don't conflict with any labels in the stencil_ir. kernel_copy.blocks = ir_utils.add_offset_to_labels( kernel_copy.blocks, stencil_stub_last_label) new_label = max(kernel_copy.blocks.keys()) + 1 # Adjust ret_blocks to account for addition of the offset. ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] if config.DEBUG_ARRAY_OPT == 1: print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) print("before replace sentinel stencil_ir") ir_utils.dump_blocks(stencil_ir.blocks) print("before replace sentinel kernel_copy") ir_utils.dump_blocks(kernel_copy.blocks) # Search all the block in the stencil outline for the sentinel. for label, block in stencil_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance( inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block # label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(kernel_copy.blocks.keys()) # The previous block jumps to the minimum labelled block of # the parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc # function's IR. for (l, b) in kernel_copy.blocks.items(): stencil_ir.blocks[l] = b stencil_ir.blocks[new_label] = block stencil_ir.blocks[label] = prev_block # Add a jump from all the blocks that previously contained # a return in the stencil kernel to the block # containing statements after the sentinel. for ret_block in ret_blocks: stencil_ir.blocks[ret_block].append( ir.Jump(new_label, loc)) break else: continue break stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) ir_utils.remove_dels(stencil_ir.blocks) assert(isinstance(the_array, types.Type)) array_types = args new_stencil_param_types = list(array_types) if config.DEBUG_ARRAY_OPT == 1: print("new_stencil_param_types", new_stencil_param_types) ir_utils.dump_blocks(stencil_ir.blocks) # Compile the combined stencil function with the replaced loop # body in it. new_func = compiler.compile_ir( self._typingctx, self._targetctx, stencil_ir, new_stencil_param_types, None, compiler.DEFAULT_FLAGS, {}) return new_func
def generic(self, args, kws): sig = signature(svc_type, types.intp) pysig = utils.pysignature(SVC_dummy) sig.pysig = pysig return sig
def generic(self, args, kws): sig = signature(mnb_type, types.intp) pysig = utils.pysignature(MultinomialNB_dummy) sig.pysig = pysig return sig