def test_issue_5087(self): # This is an odd issue. The exact number of print below is # necessary to trigger it. Too many or too few will alter the behavior. # Also note that the function below will not be executed. The problem # occurs at compilation. The definition below is invalid for execution. # The problem occurs in the bytecode analysis. def udt(): print print print for i in range: print print print print print print print print print print print print print print print print print print for j in range: print print print print print print print for k in range: for l in range: print print print print print print print print print print if print: for n in range: print else: print run_frontend(udt)
def test1(self): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_will_propagate) #print("Num blocks = ", len(test_ir.blocks)) #print(test_ir.dump()) with cpu_target.nested_context(typingctx, targetctx): typingctx.refresh() targetctx.refresh() args = (types.int64, types.int64, types.int64) typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None) #print("typemap = ", typemap) #print("return_type = ", return_type) type_annotation = type_annotations.TypeAnnotation( func_ir=test_ir, typemap=typemap, calltypes=calltypes, lifted=(), lifted_from=None, args=args, return_type=return_type, html_output=config.HTML) remove_dels(test_ir.blocks) in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertFalse(findLhsAssign(test_ir, "x"))
def test_test1(self): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test1) with cpu_target.nested_context(typingctx, targetctx): one_arg = numba.types.npytypes.Array( numba.types.scalars.Float(name="float64"), 1, 'C') args = (one_arg, one_arg, one_arg, one_arg, one_arg) tp = TestPipeline(typingctx, targetctx, args, test_ir) numba.rewrites.rewrite_registry.apply('before-inference', tp, tp.func_ir) tp.typemap, tp.return_type, tp.calltypes = compiler.type_inference_stage( tp.typingctx, tp.func_ir, tp.args, None) type_annotation = type_annotations.TypeAnnotation( func_ir=tp.func_ir, typemap=tp.typemap, calltypes=tp.calltypes, lifted=(), lifted_from=None, args=tp.args, return_type=tp.return_type, html_output=config.HTML) numba.rewrites.rewrite_registry.apply('after-inference', tp, tp.func_ir) parfor_pass = numba.parfor.ParforPass(tp.func_ir, tp.typemap, tp.calltypes, tp.return_type, tp.typingctx) parfor_pass.run() self.assertTrue(countParfors(test_ir) == 1)
def test_inline_var_dict_ret(self): # make sure inline_closure_call returns the variable replacement dict # and it contains the original variable name used in locals @numba.njit(locals={'b': numba.float64}) def g(a): b = a + 1 return b def test_impl(): return g(1) func_ir = compiler.run_frontend(test_impl) blocks = list(func_ir.blocks.values()) for block in blocks: for i, stmt in enumerate(block.body): if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'call'): func_def = guard(get_definition, func_ir, stmt.value.func) if (isinstance(func_def, (ir.Global, ir.FreeVar)) and isinstance(func_def.value, CPUDispatcher)): py_func = func_def.value.py_func _, var_map = inline_closure_call( func_ir, py_func.__globals__, block, i, py_func) break self.assertTrue('b' in var_map)
def test_inline_update_target_def(self): def test_impl(a): if a == 1: b = 2 else: b = 3 return b func_ir = compiler.run_frontend(test_impl) blocks = list(func_ir.blocks.values()) for block in blocks: for i, stmt in enumerate(block.body): # match b = 2 and replace with lambda: 2 if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var) and guard(find_const, func_ir, stmt.value) == 2): # replace expr with a dummy call func_ir._definitions[stmt.target.name].remove(stmt.value) stmt.value = ir.Expr.call(ir.Var(block.scope, "myvar", loc=stmt.loc), (), (), stmt.loc) func_ir._definitions[stmt.target.name].append(stmt.value) #func = g.py_func# inline_closure_call(func_ir, {}, block, i, lambda: 2) break self.assertEqual(len(func_ir._definitions['b']), 2)
def test_inline_update_target_def(self): def test_impl(a): if a == 1: b = 2 else: b = 3 return b func_ir = compiler.run_frontend(test_impl) blocks = list(func_ir.blocks.values()) for block in blocks: for i, stmt in enumerate(block.body): # match b = 2 and replace with lambda: 2 if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var) and guard(find_const, func_ir, stmt.value) == 2): # replace expr with a dummy call func_ir._definitions[stmt.target.name].remove(stmt.value) stmt.value = ir.Expr.call( ir.Var(block.scope, "myvar", loc=stmt.loc), (), (), stmt.loc) func_ir._definitions[stmt.target.name].append(stmt.value) #func = g.py_func# inline_closure_call(func_ir, {}, block, i, lambda: 2) break self.assertEqual(len(func_ir._definitions['b']), 2)
def test_inline_var_dict_ret(self): # make sure inline_closure_call returns the variable replacement dict # and it contains the original variable name used in locals @numba.njit(locals={'b': numba.float64}) def g(a): b = a + 1 return b def test_impl(): return g(1) func_ir = compiler.run_frontend(test_impl) blocks = list(func_ir.blocks.values()) for block in blocks: for i, stmt in enumerate(block.body): if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'call'): func_def = guard(get_definition, func_ir, stmt.value.func) if (isinstance(func_def, (ir.Global, ir.FreeVar)) and isinstance(func_def.value, CPUDispatcher)): py_func = func_def.value.py_func _, var_map = inline_closure_call( func_ir, py_func.__globals__, block, i, py_func) break self.assertTrue('b' in var_map)
def test1(self): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_will_propagate) #print("Num blocks = ", len(test_ir.blocks)) #print(test_ir.dump()) with cpu_target.nested_context(typingctx, targetctx): typingctx.refresh() targetctx.refresh() args = (types.int64, types.int64, types.int64) typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None) #print("typemap = ", typemap) #print("return_type = ", return_type) type_annotation = type_annotations.TypeAnnotation( func_ir=test_ir, typemap=typemap, calltypes=calltypes, lifted=(), lifted_from=None, args=args, return_type=return_type, html_output=config.HTML) remove_dels(test_ir.blocks) in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertFalse(findLhsAssign(test_ir, "x"))
def test_find_const_global(self): """ Test find_const() for values in globals (ir.Global) and freevars (ir.FreeVar) that are considered constants for compilation. """ FREEVAR_C = 12 def foo(a): b = GLOBAL_B c = FREEVAR_C return a + b + c f_ir = compiler.run_frontend(foo) block = f_ir.blocks[0] const_b = None const_c = None for inst in block.body: if isinstance(inst, ir.Assign) and inst.target.name == 'b': const_b = ir_utils.guard( ir_utils.find_const, f_ir, inst.target) if isinstance(inst, ir.Assign) and inst.target.name == 'c': const_c = ir_utils.guard( ir_utils.find_const, f_ir, inst.target) self.assertEqual(const_b, GLOBAL_B) self.assertEqual(const_c, FREEVAR_C)
def compile_to_ir(func): func_ir = run_frontend(func) state = StateDict() state.func_ir = func_ir state.typemap = None state.calltypes = None # call this to get print etc rewrites rewrites.rewrite_registry.apply('before-inference', state) return func_ir
def test_mk_func_literal(self): """make sure make_function is passed to typer class as a literal """ test_ir = compiler.run_frontend(mk_func_test_impl) typingctx = cpu_target.typing_context typingctx.refresh() typemap, _, _ = compiler.type_inference_stage( typingctx, test_ir, (), None) self.assertTrue(any(isinstance(a, types.MakeFunctionLiteral) for a in typemap.values()))
def test_mk_func_literal(self): """make sure make_function is passed to typer class as a literal """ test_ir = compiler.run_frontend(mk_func_test_impl) typingctx = cpu_target.typing_context typingctx.refresh() typemap, _, _ = type_inference_stage( typingctx, test_ir, (), None) self.assertTrue(any(isinstance(a, types.MakeFunctionLiteral) for a in typemap.values()))
def generic(self, args, kws): """ Type the overloaded function by compiling the appropriate implementation for the given args. """ disp, new_args = self._get_impl(args, kws) if disp is None: return # Compile and type it for the given types disp_type = types.Dispatcher(disp) # Store the compiled overload for use in the lowering phase if there's # no inlining required (else functions are being compiled which will # never be used as they are inlined) if not self._inline.is_never_inline: # need to run the compiler front end up to type inference to compute # a signature from numba import compiler, typed_passes ir = compiler.run_frontend(disp_type.dispatcher.py_func) resolve = disp_type.dispatcher.get_call_template template, pysig, folded_args, kws = resolve(new_args, kws) typemap, return_type, calltypes = typed_passes.type_inference_stage( self.context, ir, folded_args, None) sig = Signature(return_type, folded_args, None) # this stores a load of info for the cost model function if supplied # it by default is None self._inline_overloads[sig.args] = {'folded_args': folded_args} # this stores the compiled overloads, if there's no compiled # overload available i.e. function is always inlined, the key still # needs to exist for type resolution # NOTE: If lowering is failing on a `_EmptyImplementationEntry`, # the inliner has failed to inline this entry corretly. impl_init = _EmptyImplementationEntry('always inlined') self._compiled_overloads[sig.args] = impl_init if not self._inline.is_always_inline: # this branch is here because a user has supplied a function to # determine whether to inline or not. As a result both compiled # function and inliner info needed, delaying the computation of # this leads to an internal state mess at present. TODO: Fix! sig = disp_type.get_call_type(self.context, new_args, kws) self._compiled_overloads[sig.args] = disp_type.get_overload( sig) # store the inliner information, it's used later in the cost # model function call iinfo = _inline_info(ir, typemap, calltypes, sig) self._inline_overloads[sig.args] = { 'folded_args': folded_args, 'iinfo': iinfo } else: sig = disp_type.get_call_type(self.context, new_args, kws) self._compiled_overloads[sig.args] = disp_type.get_overload(sig) return sig
def countParfors(test_func, args, **kws): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_func) if kws: options = cpu.ParallelOptions(kws) else: options = cpu.ParallelOptions(True) with cpu_target.nested_context(typingctx, targetctx): tp = TestPipeline(typingctx, targetctx, args, test_ir) inline_pass = inline_closurecall.InlineClosureCallPass( tp.func_ir, options) inline_pass.run() numba.rewrites.rewrite_registry.apply('before-inference', tp, tp.func_ir) tp.typemap, tp.return_type, tp.calltypes = compiler.type_inference_stage( tp.typingctx, tp.func_ir, tp.args, None) type_annotations.TypeAnnotation(func_ir=tp.func_ir, typemap=tp.typemap, calltypes=tp.calltypes, lifted=(), lifted_from=None, args=tp.args, return_type=tp.return_type, html_output=config.HTML) preparfor_pass = numba.parfor.PreParforPass(tp.func_ir, tp.typemap, tp.calltypes, tp.typingctx, options) preparfor_pass.run() numba.rewrites.rewrite_registry.apply('after-inference', tp, tp.func_ir) parfor_pass = numba.parfor.ParforPass(tp.func_ir, tp.typemap, tp.calltypes, tp.return_type, tp.typingctx, options) parfor_pass.run() ret_count = 0 for label, block in test_ir.blocks.items(): for i, inst in enumerate(block.body): if isinstance(inst, numba.parfor.Parfor): ret_count += 1 return ret_count
def test_obj_func_match(self): """Test matching of an object method (other than Array see #3449) """ def test_func(): d = Dummy([1]) d.val.append(2) test_ir = compiler.run_frontend(test_func) typingctx = cpu_target.typing_context typemap, _, _ = compiler.type_inference_stage(typingctx, test_ir, (), None) matched_call = numba.ir_utils.find_callname( test_ir, test_ir.blocks[0].body[14].value, typemap) self.assertTrue( isinstance(matched_call, tuple) and len(matched_call) == 2 and matched_call[0] == 'append')
def compile_to_ir(func): func_ir = run_frontend(func) class MockPipeline(object): def __init__(self, func_ir): self.typingctx = None self.targetctx = None self.args = None self.func_ir = func_ir self.typemap = None self.return_type = None self.calltypes = None # call this to get print etc rewrites rewrites.rewrite_registry.apply('before-inference', MockPipeline(func_ir), func_ir) return func_ir
def test2(self): def call_np_random_seed(): np.random.seed(2) def seed_call_exists(func_ir): for inst in func_ir.blocks[0].body: if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) and inst.value.op == 'call' and func_ir.get_definition(inst.value.func).attr == 'seed'): return True return False test_ir = compiler.run_frontend(call_np_random_seed) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertTrue(seed_call_exists(test_ir))
def test2(self): def call_np_random_seed(): np.random.seed(2) def seed_call_exists(func_ir): for inst in func_ir.blocks[0].body: if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) and inst.value.op == 'call' and func_ir.get_definition(inst.value.func).attr == 'seed'): return True return False test_ir = compiler.run_frontend(call_np_random_seed) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertTrue(seed_call_exists(test_ir))
def test_obj_func_match(self): """Test matching of an object method (other than Array see #3449) """ def test_func(): d = Dummy([1]) d.val.append(2) test_ir = compiler.run_frontend(test_func) typingctx = cpu_target.typing_context typemap, _, _ = compiler.type_inference_stage( typingctx, test_ir, (), None) matched_call = numba.ir_utils.find_callname( test_ir, test_ir.blocks[0].body[14].value, typemap) self.assertTrue(isinstance(matched_call, tuple) and len(matched_call) == 2 and matched_call[0] == 'append')
def get_inner_ir(func): # get untyped numba ir f_ir = numba_compiler.run_frontend(func) blocks = f_ir.blocks remove_dels(blocks) topo_order = find_topo_order(blocks) first_block = blocks[topo_order[0]] last_block = blocks[topo_order[-1]] # remove arg nodes new_first_body = [] for stmt in first_block.body: if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): continue new_first_body.append(stmt) first_block.body = new_first_body # rename all variables to avoid conflict, except args var_table = get_name_var_table(blocks) new_var_dict = {} for name, var in var_table.items(): if not (name in f_ir.arg_names): new_var_dict[name] = mk_unique_var(name) replace_var_names(blocks, new_var_dict) return blocks
def get_ir(self, pyfunc): return compiler.run_frontend(pyfunc)
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): # Overall approach: # 1) Construct a string containing a function definition for the stencil function # that will execute the stencil kernel. This function definition includes a # unique stencil function name, the parameters to the stencil kernel, loop # nests across the dimenions of the input array. Those loop nests use the # computed stencil kernel size so as not to try to compute elements where # elements outside the bounds of the input array would be needed. # 2) The but of the loop nest in this new function is a special sentinel # assignment. # 3) Get the IR of this new function. # 4) Split the block containing the sentinel assignment and remove the sentinel # assignment. Insert the stencil kernel IR into the stencil function IR # after label and variable renaming of the stencil kernel IR to prevent # conflicts with the stencil function IR. # 5) Compile the combined stencil function IR + stencil kernel IR into existence. # Copy the kernel so that our changes for this callsite # won't effect other callsites. (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes( self.kernel_ir, calltypes) # The stencil kernel body becomes the body of a loop, for which args aren't needed. ir_utils.remove_args(kernel_copy.blocks) first_arg = kernel_copy.arg_names[0] in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) ir_utils.apply_copy_propagate( kernel_copy.blocks, in_cps, name_var_table, typemap, copy_calltypes) if "out" in name_var_table: raise ValueError("Cannot use the reserved word 'out' in stencil kernels.") sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) if config.DEBUG_ARRAY_OPT == 1: print("name_var_table", name_var_table, sentinel_name) the_array = args[0] if config.DEBUG_ARRAY_OPT == 1: print("_stencil_wrapper", return_type, return_type.dtype, type(return_type.dtype), args) ir_utils.dump_blocks(kernel_copy.blocks) # We generate a Numba function to execute this stencil and here # create the unique name of this function. stencil_func_name = "__numba_stencil_%s_%s" % ( hex(id(the_array)).replace("-", "_"), self.id) # We will put a loop nest in the generated function for each # dimension in the input array. Here we create the name for # the index variable for each dimension. index0, index1, ... index_vars = [] for i in range(the_array.ndim): index_var_name = ir_utils.get_unused_var_name("index" + str(i), name_var_table) index_vars += [index_var_name] # Create extra signature for out and neighborhood. out_name = ir_utils.get_unused_var_name("out", name_var_table) neighborhood_name = ir_utils.get_unused_var_name("neighborhood", name_var_table) sig_extra = "" if result is not None: sig_extra += ", {}=None".format(out_name) if "neighborhood" in dict(self.kws): sig_extra += ", {}=None".format(neighborhood_name) # Get a list of the standard indexed array names. standard_indexed = self.options.get("standard_indexing", []) if first_arg in standard_indexed: raise ValueError("The first argument to a stencil kernel must " "use relative indexing, not standard indexing.") if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: raise ValueError("Standard indexing requested for an array name " "not present in the stencil kernel definition.") # Add index variables to getitems in the IR to transition the accesses # in the kernel from relative to regular Python indexing. Returns the # computed size of the stencil kernel and a list of the relatively indexed # arrays. kernel_size, relatively_indexed = self.add_indices_to_kernel( kernel_copy, index_vars, the_array.ndim, self.neighborhood, standard_indexed) if self.neighborhood is None: self.neighborhood = kernel_size if config.DEBUG_ARRAY_OPT == 1: print("After add_indices_to_kernel") ir_utils.dump_blocks(kernel_copy.blocks) # The return in the stencil kernel becomes a setitem for that # particular point in the iteration space. ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, index_vars, out_name) if config.DEBUG_ARRAY_OPT == 1: print("After replace_return_with_setitem", ret_blocks) ir_utils.dump_blocks(kernel_copy.blocks) # Start to form the new function to execute the stencil kernel. func_text = "def {}({}{}):\n".format(stencil_func_name, ",".join(kernel_copy.arg_names), sig_extra) # Get loop ranges for each dimension, which could be either int # or variable. In the latter case we'll use the extra neighborhood # argument to the function. ranges = [] for i in range(the_array.ndim): if isinstance(kernel_size[i][0], int): lo = kernel_size[i][0] hi = kernel_size[i][1] else: lo = "{}[{}][0]".format(neighborhood_name, i) hi = "{}[{}][1]".format(neighborhood_name, i) ranges.append((lo, hi)) # If there are more than one relatively indexed arrays, add a call to # a function that will raise an error if any of the relatively indexed # arrays are of different size than the first input array. if len(relatively_indexed) > 1: func_text += " raise_if_incompatible_array_sizes(" + first_arg for other_array in relatively_indexed: if other_array != first_arg: func_text += "," + other_array func_text += ")\n" # Get the shape of the first input array. shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) func_text += " {} = {}.shape\n".format(shape_name, first_arg) # If we have to allocate the output array (the out argument was not used) # then us numpy.full if the user specified a cval stencil decorator option # or np.zeros if they didn't to allocate the array. if result is None: if "cval" in self.options: cval = self.options["cval"] if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") out_init ="{} = np.full({}, {}, dtype=np.{})\n".format( out_name, shape_name, cval, return_type.dtype) else: out_init ="{} = np.zeros({}, dtype=np.{})\n".format( out_name, shape_name, return_type.dtype) func_text += " " + out_init offset = 1 # Add the loop nests to the new function. for i in range(the_array.ndim): for j in range(offset): func_text += " " # ranges[i][0] is the minimum index used in the i'th dimension # but minimum's greater than 0 don't preclude any entry in the array. # So, take the minimum of 0 and the minimum index found in the kernel # and this will be a negative number (potentially -0). Then, we do # unary - on that to get the positive offset in this dimension whose # use is precluded. # ranges[i][1] is the maximum of 0 and the observed maximum index # in this dimension because negative maximums would not cause us to # preclude any entry in the array from being used. func_text += ("for {} in range(-min(0,{})," "{}[{}]-max(0,{})):\n").format( index_vars[i], ranges[i][0], shape_name, i, ranges[i][1]) offset += 1 for j in range(offset): func_text += " " # Put a sentinel in the code so we can locate it in the IR. We will # remove this sentinel assignment and replace it with the IR for the # stencil kernel body. func_text += "{} = 0\n".format(sentinel_name) func_text += " return {}\n".format(out_name) if config.DEBUG_ARRAY_OPT == 1: print("new stencil func text") print(func_text) # Force the new stencil function into existence. exec_(func_text) in globals(), locals() stencil_func = eval(stencil_func_name) if sigret is not None: pysig = utils.pysignature(stencil_func) sigret.pysig = pysig # Get the IR for the newly created stencil function. stencil_ir = compiler.run_frontend(stencil_func) ir_utils.remove_dels(stencil_ir.blocks) # rename all variables in stencil_ir afresh var_table = ir_utils.get_name_var_table(stencil_ir.blocks) new_var_dict = {} reserved_names = ([sentinel_name, out_name, neighborhood_name, shape_name] + kernel_copy.arg_names + index_vars) for name, var in var_table.items(): if not name in reserved_names: new_var_dict[name] = ir_utils.mk_unique_var(name) ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 # Shift lables in the kernel copy so they are guaranteed unique # and don't conflict with any labels in the stencil_ir. kernel_copy.blocks = ir_utils.add_offset_to_labels( kernel_copy.blocks, stencil_stub_last_label) new_label = max(kernel_copy.blocks.keys()) + 1 # Adjust ret_blocks to account for addition of the offset. ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] if config.DEBUG_ARRAY_OPT == 1: print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) print("before replace sentinel stencil_ir") ir_utils.dump_blocks(stencil_ir.blocks) print("before replace sentinel kernel_copy") ir_utils.dump_blocks(kernel_copy.blocks) # Search all the block in the stencil outline for the sentinel. for label, block in stencil_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance( inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block # label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(kernel_copy.blocks.keys()) # The previous block jumps to the minimum labelled block of # the parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc # function's IR. for (l, b) in kernel_copy.blocks.items(): stencil_ir.blocks[l] = b stencil_ir.blocks[new_label] = block stencil_ir.blocks[label] = prev_block # Add a jump from all the blocks that previously contained # a return in the stencil kernel to the block # containing statements after the sentinel. for ret_block in ret_blocks: stencil_ir.blocks[ret_block].append( ir.Jump(new_label, loc)) break else: continue break stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) ir_utils.remove_dels(stencil_ir.blocks) assert(isinstance(the_array, types.Type)) array_types = args new_stencil_param_types = list(array_types) if config.DEBUG_ARRAY_OPT == 1: print("new_stencil_param_types", new_stencil_param_types) ir_utils.dump_blocks(stencil_ir.blocks) # Compile the combined stencil function with the replaced loop # body in it. new_func = compiler.compile_ir( self._typingctx, self._targetctx, stencil_ir, new_stencil_param_types, None, compiler.DEFAULT_FLAGS, {}) return new_func
def decorated(func): kernel_ir = compiler.run_frontend(func) return StencilFunc(kernel_ir, mode, options)
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): # Overall approach: # 1) Construct a string containing a function definition for the stencil function # that will execute the stencil kernel. This function definition includes a # unique stencil function name, the parameters to the stencil kernel, loop # nests across the dimenions of the input array. Those loop nests use the # computed stencil kernel size so as not to try to compute elements where # elements outside the bounds of the input array would be needed. # 2) The but of the loop nest in this new function is a special sentinel # assignment. # 3) Get the IR of this new function. # 4) Split the block containing the sentinel assignment and remove the sentinel # assignment. Insert the stencil kernel IR into the stencil function IR # after label and variable renaming of the stencil kernel IR to prevent # conflicts with the stencil function IR. # 5) Compile the combined stencil function IR + stencil kernel IR into existence. # Copy the kernel so that our changes for this callsite # won't effect other callsites. (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(self.kernel_ir, calltypes) # The stencil kernel body becomes the body of a loop, for which args aren't needed. ir_utils.remove_args(kernel_copy.blocks) first_arg = kernel_copy.arg_names[0] in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) ir_utils.apply_copy_propagate(kernel_copy.blocks, in_cps, name_var_table, typemap, copy_calltypes) if "out" in name_var_table: raise ValueError( "Cannot use the reserved word 'out' in stencil kernels.") sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) if config.DEBUG_ARRAY_OPT == 1: print("name_var_table", name_var_table, sentinel_name) the_array = args[0] if config.DEBUG_ARRAY_OPT == 1: print("_stencil_wrapper", return_type, return_type.dtype, type(return_type.dtype), args) ir_utils.dump_blocks(kernel_copy.blocks) # We generate a Numba function to execute this stencil and here # create the unique name of this function. stencil_func_name = "__numba_stencil_%s_%s" % (hex( id(the_array)).replace("-", "_"), self.id) # We will put a loop nest in the generated function for each # dimension in the input array. Here we create the name for # the index variable for each dimension. index0, index1, ... index_vars = [] for i in range(the_array.ndim): index_var_name = ir_utils.get_unused_var_name( "index" + str(i), name_var_table) index_vars += [index_var_name] # Create extra signature for out and neighborhood. out_name = ir_utils.get_unused_var_name("out", name_var_table) neighborhood_name = ir_utils.get_unused_var_name( "neighborhood", name_var_table) sig_extra = "" if result is not None: sig_extra += ", {}=None".format(out_name) if "neighborhood" in dict(self.kws): sig_extra += ", {}=None".format(neighborhood_name) # Get a list of the standard indexed array names. standard_indexed = self.options.get("standard_indexing", []) if first_arg in standard_indexed: raise ValueError("The first argument to a stencil kernel must " "use relative indexing, not standard indexing.") if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: raise ValueError("Standard indexing requested for an array name " "not present in the stencil kernel definition.") # Add index variables to getitems in the IR to transition the accesses # in the kernel from relative to regular Python indexing. Returns the # computed size of the stencil kernel and a list of the relatively indexed # arrays. kernel_size, relatively_indexed = self.add_indices_to_kernel( kernel_copy, index_vars, the_array.ndim, self.neighborhood, standard_indexed) if self.neighborhood is None: self.neighborhood = kernel_size if config.DEBUG_ARRAY_OPT == 1: print("After add_indices_to_kernel") ir_utils.dump_blocks(kernel_copy.blocks) # The return in the stencil kernel becomes a setitem for that # particular point in the iteration space. ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, index_vars, out_name) if config.DEBUG_ARRAY_OPT == 1: print("After replace_return_with_setitem", ret_blocks) ir_utils.dump_blocks(kernel_copy.blocks) # Start to form the new function to execute the stencil kernel. func_text = "def {}({}{}):\n".format(stencil_func_name, ",".join(kernel_copy.arg_names), sig_extra) # Get loop ranges for each dimension, which could be either int # or variable. In the latter case we'll use the extra neighborhood # argument to the function. ranges = [] for i in range(the_array.ndim): if isinstance(kernel_size[i][0], int): lo = kernel_size[i][0] hi = kernel_size[i][1] else: lo = "{}[{}][0]".format(neighborhood_name, i) hi = "{}[{}][1]".format(neighborhood_name, i) ranges.append((lo, hi)) # If there are more than one relatively indexed arrays, add a call to # a function that will raise an error if any of the relatively indexed # arrays are of different size than the first input array. if len(relatively_indexed) > 1: func_text += " raise_if_incompatible_array_sizes(" + first_arg for other_array in relatively_indexed: if other_array != first_arg: func_text += "," + other_array func_text += ")\n" # Get the shape of the first input array. shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) func_text += " {} = {}.shape\n".format(shape_name, first_arg) # If we have to allocate the output array (the out argument was not used) # then us numpy.full if the user specified a cval stencil decorator option # or np.zeros if they didn't to allocate the array. if result is None: return_type_name = numpy_support.as_dtype( return_type.dtype).type.__name__ if "cval" in self.options: cval = self.options["cval"] if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") out_init = "{} = np.full({}, {}, dtype=np.{})\n".format( out_name, shape_name, cval, return_type_name) else: out_init = "{} = np.zeros({}, dtype=np.{})\n".format( out_name, shape_name, return_type_name) func_text += " " + out_init offset = 1 # Add the loop nests to the new function. for i in range(the_array.ndim): for j in range(offset): func_text += " " # ranges[i][0] is the minimum index used in the i'th dimension # but minimum's greater than 0 don't preclude any entry in the array. # So, take the minimum of 0 and the minimum index found in the kernel # and this will be a negative number (potentially -0). Then, we do # unary - on that to get the positive offset in this dimension whose # use is precluded. # ranges[i][1] is the maximum of 0 and the observed maximum index # in this dimension because negative maximums would not cause us to # preclude any entry in the array from being used. func_text += ("for {} in range(-min(0,{})," "{}[{}]-max(0,{})):\n").format( index_vars[i], ranges[i][0], shape_name, i, ranges[i][1]) offset += 1 for j in range(offset): func_text += " " # Put a sentinel in the code so we can locate it in the IR. We will # remove this sentinel assignment and replace it with the IR for the # stencil kernel body. func_text += "{} = 0\n".format(sentinel_name) func_text += " return {}\n".format(out_name) if config.DEBUG_ARRAY_OPT == 1: print("new stencil func text") print(func_text) # Force the new stencil function into existence. exec_(func_text) in globals(), locals() stencil_func = eval(stencil_func_name) if sigret is not None: pysig = utils.pysignature(stencil_func) sigret.pysig = pysig # Get the IR for the newly created stencil function. stencil_ir = compiler.run_frontend(stencil_func) ir_utils.remove_dels(stencil_ir.blocks) # rename all variables in stencil_ir afresh var_table = ir_utils.get_name_var_table(stencil_ir.blocks) new_var_dict = {} reserved_names = ( [sentinel_name, out_name, neighborhood_name, shape_name] + kernel_copy.arg_names + index_vars) for name, var in var_table.items(): if not name in reserved_names: new_var_dict[name] = ir_utils.mk_unique_var(name) ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 # Shift lables in the kernel copy so they are guaranteed unique # and don't conflict with any labels in the stencil_ir. kernel_copy.blocks = ir_utils.add_offset_to_labels( kernel_copy.blocks, stencil_stub_last_label) new_label = max(kernel_copy.blocks.keys()) + 1 # Adjust ret_blocks to account for addition of the offset. ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] if config.DEBUG_ARRAY_OPT == 1: print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) print("before replace sentinel stencil_ir") ir_utils.dump_blocks(stencil_ir.blocks) print("before replace sentinel kernel_copy") ir_utils.dump_blocks(kernel_copy.blocks) # Search all the block in the stencil outline for the sentinel. for label, block in stencil_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance(inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block # label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(kernel_copy.blocks.keys()) # The previous block jumps to the minimum labelled block of # the parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc # function's IR. for (l, b) in kernel_copy.blocks.items(): stencil_ir.blocks[l] = b stencil_ir.blocks[new_label] = block stencil_ir.blocks[label] = prev_block # Add a jump from all the blocks that previously contained # a return in the stencil kernel to the block # containing statements after the sentinel. for ret_block in ret_blocks: stencil_ir.blocks[ret_block].append( ir.Jump(new_label, loc)) break else: continue break stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) ir_utils.remove_dels(stencil_ir.blocks) assert (isinstance(the_array, types.Type)) array_types = args new_stencil_param_types = list(array_types) if config.DEBUG_ARRAY_OPT == 1: print("new_stencil_param_types", new_stencil_param_types) ir_utils.dump_blocks(stencil_ir.blocks) # Compile the combined stencil function with the replaced loop # body in it. new_func = compiler.compile_ir(self._typingctx, self._targetctx, stencil_ir, new_stencil_param_types, None, compiler.DEFAULT_FLAGS, {}) return new_func
def decorated(func): kernel_ir = compiler.run_frontend(func) return StencilFunc(kernel_ir, mode, options)
def test_functionir(self): # this creates a function full of all sorts of things to ensure the IR # is pretty involved, it then compares two instances of the compiled # function IR to check the IR is the same invariant of objects, and then # a tiny mutation is made to the IR in the second function and detection # of this change is checked. def gen(): _FREEVAR = 0xCAFE def foo(a, b, c=12, d=1j, e=None): f = a + b a += _FREEVAR g = np.zeros(c, dtype=np.complex64) h = f + g i = 1j / d if np.abs(i) > 0: k = h / i l = np.arange(1, c + 1) with objmode(): print(e, k) m = np.sqrt(l - g) if np.abs(m[0]) < 1: n = 0 for o in range(a): n += 0 if np.abs(n) < 3: break n += m[2] p = g / l q = [] for r in range(len(p)): q.append(p[r]) if r > 4 + 1: with objmode(s='intp', t='complex128'): s = 123 t = 5 if s > 122: t += s t += q[0] + _GLOBAL return f + o + r + t + r + a + n return foo x = gen() y = gen() x_ir = compiler.run_frontend(x) y_ir = compiler.run_frontend(y) self.assertTrue(x_ir.equal_ir(y_ir)) def check_diffstr(string, pointing_at=[]): lines = string.splitlines() for item in pointing_at: for l in lines: if l.startswith('->'): if item in l: break else: raise AssertionError("Could not find %s " % item) self.assertIn("IR is considered equivalent", x_ir.diff_str(y_ir)) # minor mutation, simply switch branch targets on last branch for label in reversed(list(y_ir.blocks.keys())): blk = y_ir.blocks[label] if isinstance(blk.body[-1], ir.Branch): ref = blk.body[-1] ref.truebr, ref.falsebr = ref.falsebr, ref.truebr break check_diffstr(x_ir.diff_str(y_ir), ['branch']) z = gen() self.assertFalse(x_ir.equal_ir(y_ir)) z_ir = compiler.run_frontend(z) change_set = set() for label in reversed(list(z_ir.blocks.keys())): blk = z_ir.blocks[label] ref = blk.body[:-1] idx = None for i in range(len(ref)): # look for two adjacent Del if (isinstance(ref[i], ir.Del) and isinstance(ref[i + 1], ir.Del)): idx = i break if idx is not None: b = blk.body change_set.add(str(b[idx + 1])) change_set.add(str(b[idx])) b[idx], b[idx + 1] = b[idx + 1], b[idx] break self.assertFalse(x_ir.equal_ir(z_ir)) self.assertEqual(len(change_set), 2) for item in change_set: self.assertTrue(item.startswith('del ')) check_diffstr(x_ir.diff_str(z_ir), change_set) def foo(a, b): c = a * 2 d = c + b e = np.sqrt(d) return e def bar(a, b): # same as foo c = a * 2 d = c + b e = np.sqrt(d) return e def baz(a, b): c = a * 2 d = b + c e = np.sqrt(d + 1) return e foo_ir = compiler.run_frontend(foo) bar_ir = compiler.run_frontend(bar) self.assertTrue(foo_ir.equal_ir(bar_ir)) self.assertIn("IR is considered equivalent", foo_ir.diff_str(bar_ir)) baz_ir = compiler.run_frontend(baz) self.assertFalse(foo_ir.equal_ir(baz_ir)) tmp = foo_ir.diff_str(baz_ir) self.assertIn("Other block contains more statements", tmp) check_diffstr(tmp, ["c + b", "b + c"])
def _do_work(self, state, work_list, block, i, expr): from numba.inline_closurecall import (inline_closure_call, callee_ir_validator) from numba.compiler import run_frontend from numba.targets.cpu import InlineOptions # try and get a definition for the call, this isn't always possible as # it might be a eval(str)/part generated awaiting update etc. (parfors) to_inline = None try: to_inline = state.func_ir.get_definition(expr.func) except Exception: if self._DEBUG: print("Cannot find definition for %s" % expr.func) return False # do not handle closure inlining here, another pass deals with that. if getattr(to_inline, 'op', False) == 'make_function': return False # see if the definition is a "getattr", in which case walk the IR to # try and find the python function via the module from which it's # imported, this should all be encoded in the IR. if getattr(to_inline, 'op', False) == 'getattr': val = resolve_func_from_module(state.func_ir, to_inline) else: # This is likely a freevar or global # # NOTE: getattr 'value' on a call may fail if it's an ir.Expr as # getattr is overloaded to look in _kws. try: val = getattr(to_inline, 'value', False) except Exception: raise GuardException # if something was found... if val: # check it's dispatcher-like, the targetoptions attr holds the # kwargs supplied in the jit decorator and is where 'inline' will # be if it is present. topt = getattr(val, 'targetoptions', False) if topt: inline_type = topt.get('inline', None) # has 'inline' been specified? if inline_type is not None: inline_opt = InlineOptions(inline_type) # Could this be inlinable? if not inline_opt.is_never_inline: # yes, it could be inlinable do_inline = True pyfunc = val.py_func # Has it got an associated cost model? if inline_opt.has_cost_model: # yes, it has a cost model, use it to determine # whether to do the inline py_func_ir = run_frontend(pyfunc) do_inline = inline_type(state.func_ir, py_func_ir) # if do_inline is True then inline! if do_inline: inline_closure_call( state.func_ir, pyfunc.__globals__, block, i, pyfunc, work_list=work_list, callee_validator=callee_ir_validator) return True return False
def get_ir(self, pyfunc): return compiler.run_frontend(pyfunc)