def include_new_blocks(blocks, new_blocks, label, new_body, remove_non_return=True, work_list=None, func_ir=None): inner_blocks = add_offset_to_labels(new_blocks, ir_utils._max_label + 1) blocks.update(inner_blocks) ir_utils._max_label = max(blocks.keys()) scope = blocks[label].scope loc = blocks[label].loc inner_topo_order = find_topo_order(inner_blocks) inner_first_label = inner_topo_order[0] inner_last_label = inner_topo_order[-1] if remove_non_return: remove_return_from_block(inner_blocks[inner_last_label]) new_body.append(ir.Jump(inner_first_label, loc)) blocks[label].body = new_body label = ir_utils.next_label() blocks[label] = ir.Block(scope, loc) if remove_non_return: inner_blocks[inner_last_label].body.append(ir.Jump(label, loc)) # new_body.clear() if work_list is not None: topo_order = find_topo_order(inner_blocks) for _label in topo_order: block = inner_blocks[_label] block.scope = scope numba.inline_closurecall._add_definitions(func_ir, block) work_list.append((_label, block)) return label
def _loop_lift_modify_call_block(liftedloop, block, inputs, outputs, returnto): """ Transform calling block from top-level function to call the lifted loop. """ scope = block.scope loc = block.loc blk = ir.Block(scope=scope, loc=loc) # load loop fn = ir.Const(value=liftedloop, loc=loc) fnvar = scope.make_temp(loc=loc) blk.append(ir.Assign(target=fnvar, value=fn, loc=loc)) # call loop args = [scope.get_exact(name) for name in inputs] callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc) # temp variable for the return value callres = scope.make_temp(loc=loc) blk.append(ir.Assign(target=callres, value=callexpr, loc=loc)) # unpack return value for i, out in enumerate(outputs): target = scope.get_exact(out) getitem = ir.Expr.static_getitem(value=callres, index=i, index_var=None, loc=loc) blk.append(ir.Assign(target=target, value=getitem, loc=loc)) # jump to next block blk.append(ir.Jump(target=returnto, loc=loc)) return blk
def mk_range_block(typemap, start, stop, step, calltypes, scope, loc): """make a block that initializes loop range and iteration variables. target label in jump needs to be set. """ # g_range_var = Global(range) g_range_var = ir.Var(scope, mk_unique_var("$range_g_var"), loc) typemap[g_range_var.name] = get_global_func_typ(range) g_range = ir.Global('range', range, loc) g_range_assign = ir.Assign(g_range, g_range_var, loc) arg_nodes, args = _mk_range_args(typemap, start, stop, step, scope, loc) # range_call_var = call g_range_var(start, stop, step) range_call = ir.Expr.call(g_range_var, args, (), loc) calltypes[range_call] = typemap[g_range_var.name].get_call_type( typing.Context(), [types.intp] * len(args), {}) #signature(types.range_state64_type, types.intp) range_call_var = ir.Var(scope, mk_unique_var("$range_c_var"), loc) typemap[range_call_var.name] = types.iterators.RangeType(types.intp) range_call_assign = ir.Assign(range_call, range_call_var, loc) # iter_var = getiter(range_call_var) iter_call = ir.Expr.getiter(range_call_var, loc) calltypes[iter_call] = signature(types.range_iter64_type, types.range_state64_type) iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc) typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp) iter_call_assign = ir.Assign(iter_call, iter_var, loc) # $phi = iter_var phi_var = ir.Var(scope, mk_unique_var("$phi"), loc) typemap[phi_var.name] = types.iterators.RangeIteratorType(types.intp) phi_assign = ir.Assign(iter_var, phi_var, loc) # jump to header jump_header = ir.Jump(-1, loc) range_block = ir.Block(scope, loc) range_block.body = arg_nodes + [g_range_assign, range_call_assign, iter_call_assign, phi_assign, jump_header] return range_block
def replace_return_with_setitem(self, blocks, exit_value_var, parfor_body_exit_label): """ Find return statements in the IR and replace them with a SetItem call of the value "returned" by the kernel into the result array. Returns the block labels that contained return statements. """ for label, block in blocks.items(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if isinstance(stmt, ir.Return): # previous stmt should have been a cast prev_stmt = new_body.pop() assert (isinstance(prev_stmt, ir.Assign) and isinstance(prev_stmt.value, ir.Expr) and prev_stmt.value.op == 'cast') new_body.append( ir.Assign(prev_stmt.value.value, exit_value_var, loc)) new_body.append(ir.Jump(parfor_body_exit_label, loc)) else: new_body.append(stmt) block.body = new_body
def include_new_blocks(blocks, new_blocks, label, new_body): inner_blocks = add_offset_to_labels(new_blocks, ir_utils._max_label + 1) blocks.update(inner_blocks) ir_utils._max_label = max(blocks.keys()) scope = blocks[label].scope loc = blocks[label].loc inner_topo_order = find_topo_order(inner_blocks) inner_first_label = inner_topo_order[0] inner_last_label = inner_topo_order[-1] remove_none_return_from_block(inner_blocks[inner_last_label]) new_body.append(ir.Jump(inner_first_label, loc)) blocks[label].body = new_body label = ir_utils.next_label() blocks[label] = ir.Block(scope, loc) inner_blocks[inner_last_label].body.append(ir.Jump(label, loc)) #new_body.clear() return label
def _replace_returns(blocks, target, return_label): """ Return return statement by assigning directly to target, and a jump. """ for label, block in blocks.items(): for i in range(len(block.body)): stmt = block.body[i] if isinstance(stmt, ir.Return): assert(i + 1 == len(block.body)) block.body[i] = ir.Assign(stmt.value, target, stmt.loc) block.body.append(ir.Jump(return_label, stmt.loc))
def _start_new_block(self, inst): self.loc = ir.Loc(filename=self.bytecode.filename, line=inst.lineno) oldblock = self.current_block self.insert_block(inst.offset) # Ensure the last block is terminated if oldblock is not None and not oldblock.is_terminated: jmp = ir.Jump(inst.offset, loc=self.loc) oldblock.append(jmp) # Get DFA block info self.dfainfo = self.dfa.infos[self.current_block_offset] self.assigner = Assigner()
def run(self): dprint_func_ir(self.func_ir, "starting hiframes") topo_order = find_topo_order(self.func_ir.blocks) for label in topo_order: new_body = [] for inst in self.func_ir.blocks[label].body: # df['col'] = arr if isinstance(inst, ir.StaticSetItem) and inst.target.name in self.df_vars: df_name = inst.target.name self.df_vars[df_name][inst.index] = inst.value self._update_df_cols() elif isinstance(inst, ir.Assign): out_nodes = self._run_assign(inst) if isinstance(out_nodes, list): new_body.extend(out_nodes) if isinstance(out_nodes, dict): inner_blocks = add_offset_to_labels(out_nodes, ir_utils._max_label+1) self.func_ir.blocks.update(inner_blocks) ir_utils._max_label = max(self.func_ir.blocks.keys()) scope = self.func_ir.blocks[label].scope loc = self.func_ir.blocks[label].loc inner_topo_order = find_topo_order(inner_blocks) inner_first_label = inner_topo_order[0] inner_last_label = inner_topo_order[-1] remove_none_return_from_block(inner_blocks[inner_last_label]) new_body.append(ir.Jump(inner_first_label, loc)) self.func_ir.blocks[label].body = new_body label = ir_utils.next_label() self.func_ir.blocks[label] = ir.Block(scope, loc) inner_blocks[inner_last_label].body.append(ir.Jump(label, loc)) new_body = [] else: new_body.append(inst) self.func_ir.blocks[label].body = new_body remove_dead(self.func_ir.blocks, self.func_ir.arg_names) dprint_func_ir(self.func_ir, "after hiframes") if config.DEBUG_ARRAY_OPT==1: print("df_vars: ", self.df_vars) return
def replace_target(term, src, dst): def replace(target): return (dst if target == src else target) if isinstance(term, ir.Branch): return ir.Branch(cond=term.cond, truebr=replace(term.truebr), falsebr=replace(term.falsebr), loc=term.loc) elif isinstance(term, ir.Jump): return ir.Jump(target=replace(term.target), loc=term.loc) else: assert not term.get_targets() return term
def _start_new_block(self, inst): self.loc = ir.Loc(filename=self.bytecode.filename, line=inst.lineno) oldblock = self.current_block self.insert_block(inst.offset) # Ensure the last block is terminated if oldblock is not None and not oldblock.is_terminated: jmp = ir.Jump(inst.offset, loc=self.loc) oldblock.append(jmp) # Get DFA block info self.dfainfo = self.dfa.infos[self.current_block_offset] # Insert PHI self._insert_phi() # Notify listeners for the new block for fn in utils.dict_itervalues(self._block_actions): fn(self.current_block_offset, self.current_block)
def _bypass_with_context(blocks, blk_start, blk_end, forwardvars): """Given the starting and ending block of the with-context, replaces the head block with a new block that jumps to the end. *blocks* is modified inplace. """ sblk = blocks[blk_start] scope = sblk.scope loc = sblk.loc newblk = ir.Block(scope=scope, loc=loc) for k, v in forwardvars.items(): newblk.append(ir.Assign(value=scope.get_exact(k), target=scope.get_exact(v), loc=loc)) newblk.append(ir.Jump(target=blk_end, loc=loc)) blocks[blk_start] = newblk
def inline_calls_inner(func_ir, block, stmt, i, py_func): call_expr = stmt.value scope = block.scope callee_ir = numba.compiler.run_frontend(py_func) # relabel callee_ir by adding an offset max_label = max(func_ir.blocks.keys()) callee_blocks = add_offset_to_labels(callee_ir.blocks, max_label + 1) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # init _max_label global in ir_utils before using next_label() ir_utils._max_label = max_label # rename all variables in callee blocks var_table = get_name_var_table(callee_ir.blocks) new_var_dict = {} for name, var in var_table.items(): new_var = scope.define(mk_unique_var(var.name), loc=var.loc) new_var_dict[name] = new_var replace_vars(callee_ir.blocks, new_var_dict) # replace callee arguments args = list(call_expr.args) # TODO: replace defaults (add to args) _replace_args(callee_ir.blocks, args) # split caller blocks into two new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = ir_utils.next_label() func_ir.blocks[new_label] = new_block block.body = block.body[:i] block.body.append(ir.Jump(min_label, stmt.loc)) # replace Return with assignment to LHS _replace_returns(callee_ir.blocks, stmt.target, new_label) # insert all new blocks for label, bl in callee_ir.blocks.items(): func_ir.blocks[label] = bl # run inline_calls recursively to transform other calls inline_calls(func_ir) return
def inline_new_blocks(func_ir, block, i, callee_blocks, work_list=None): # adopted from inline_closure_call scope = block.scope instr = block.body[i] # 1. relabel callee_ir by adding an offset callee_blocks = add_offset_to_labels(callee_blocks, ir_utils._max_label + 1) callee_blocks = ir_utils.simplify_CFG(callee_blocks) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label topo_order = find_topo_order(callee_blocks) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = ir_utils.next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] min_label = topo_order[0] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS numba.inline_closurecall._replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions): func_ir._definitions[instr.target.name] = [] # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope numba.inline_closurecall._add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) if work_list is not None: for block in new_blocks: work_list.append(block) return callee_blocks
def replace_return_with_setitem(self, blocks, exit_value_var, parfor_body_exit_label): """ Find return statements in the IR and replace them with a SetItem call of the value "returned" by the kernel into the result array. Returns the block labels that contained return statements. """ for label, block in blocks.items(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if isinstance(stmt, ir.Return): new_body.append(ir.Assign(stmt.value, exit_value_var, loc)) new_body.append(ir.Jump(parfor_body_exit_label, loc)) else: new_body.append(stmt) block.body = new_body
def _replace_returns(blocks, target, return_label): """ Return return statement by assigning directly to target, and a jump. """ for label, block in blocks.items(): casts = [] for i in range(len(block.body)): stmt = block.body[i] if isinstance(stmt, ir.Return): assert(i + 1 == len(block.body)) block.body[i] = ir.Assign(stmt.value, target, stmt.loc) block.body.append(ir.Jump(return_label, stmt.loc)) # remove cast of the returned value for cast in casts: if cast.target.name == stmt.value.name: cast.value = cast.value.value elif isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'cast': casts.append(stmt)
def make_prologue(): """ Make a new block that unwraps the argument and jump to the loop entry. This block is the entry block of the function. """ entry_block = blocks[loopinfo.callfrom] scope = entry_block.scope loc = entry_block.loc block = ir.Block(scope=scope, loc=loc) # load args args = [ir.Arg(name=k, index=i, loc=loc) for i, k in enumerate(loopinfo.inputs)] for aname, aval in zip(loopinfo.inputs, args): tmp = ir.Var(scope=scope, name=aname, loc=loc) block.append(ir.Assign(target=tmp, value=aval, loc=loc)) # jump to loop entry block.append(ir.Jump(target=loopinfo.callfrom, loc=loc)) return block
def rewrite_single_backedge(loop): """ Add new tail block that gathers all the backedges """ header = loop.header tailkey = new_block_id() for blkkey in loop.body: blk = newblocks[blkkey] if header in blk.terminator.get_targets(): newblk = blk.copy() # rewrite backedge into jumps to new tail block newblk.body[-1] = replace_target(blk.terminator, header, tailkey) newblocks[blkkey] = newblk # create new tail block entryblk = newblocks[header] tailblk = ir.Block(scope=entryblk.scope, loc=entryblk.loc) # add backedge tailblk.append(ir.Jump(target=header, loc=tailblk.loc)) newblocks[tailkey] = tailblk
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): # Overall approach: # 1) Construct a string containing a function definition for the stencil function # that will execute the stencil kernel. This function definition includes a # unique stencil function name, the parameters to the stencil kernel, loop # nests across the dimenions of the input array. Those loop nests use the # computed stencil kernel size so as not to try to compute elements where # elements outside the bounds of the input array would be needed. # 2) The but of the loop nest in this new function is a special sentinel # assignment. # 3) Get the IR of this new function. # 4) Split the block containing the sentinel assignment and remove the sentinel # assignment. Insert the stencil kernel IR into the stencil function IR # after label and variable renaming of the stencil kernel IR to prevent # conflicts with the stencil function IR. # 5) Compile the combined stencil function IR + stencil kernel IR into existence. # Copy the kernel so that our changes for this callsite # won't effect other callsites. (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(self.kernel_ir, calltypes) # The stencil kernel body becomes the body of a loop, for which args aren't needed. ir_utils.remove_args(kernel_copy.blocks) first_arg = kernel_copy.arg_names[0] in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) ir_utils.apply_copy_propagate(kernel_copy.blocks, in_cps, name_var_table, typemap, copy_calltypes) if "out" in name_var_table: raise ValueError( "Cannot use the reserved word 'out' in stencil kernels.") sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) if config.DEBUG_ARRAY_OPT == 1: print("name_var_table", name_var_table, sentinel_name) the_array = args[0] if config.DEBUG_ARRAY_OPT == 1: print("_stencil_wrapper", return_type, return_type.dtype, type(return_type.dtype), args) ir_utils.dump_blocks(kernel_copy.blocks) # We generate a Numba function to execute this stencil and here # create the unique name of this function. stencil_func_name = "__numba_stencil_%s_%s" % (hex( id(the_array)).replace("-", "_"), self.id) # We will put a loop nest in the generated function for each # dimension in the input array. Here we create the name for # the index variable for each dimension. index0, index1, ... index_vars = [] for i in range(the_array.ndim): index_var_name = ir_utils.get_unused_var_name( "index" + str(i), name_var_table) index_vars += [index_var_name] # Create extra signature for out and neighborhood. out_name = ir_utils.get_unused_var_name("out", name_var_table) neighborhood_name = ir_utils.get_unused_var_name( "neighborhood", name_var_table) sig_extra = "" if result is not None: sig_extra += ", {}=None".format(out_name) if "neighborhood" in dict(self.kws): sig_extra += ", {}=None".format(neighborhood_name) # Get a list of the standard indexed array names. standard_indexed = self.options.get("standard_indexing", []) if first_arg in standard_indexed: raise ValueError("The first argument to a stencil kernel must " "use relative indexing, not standard indexing.") if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: raise ValueError("Standard indexing requested for an array name " "not present in the stencil kernel definition.") # Add index variables to getitems in the IR to transition the accesses # in the kernel from relative to regular Python indexing. Returns the # computed size of the stencil kernel and a list of the relatively indexed # arrays. kernel_size, relatively_indexed = self.add_indices_to_kernel( kernel_copy, index_vars, the_array.ndim, self.neighborhood, standard_indexed) if self.neighborhood is None: self.neighborhood = kernel_size if config.DEBUG_ARRAY_OPT == 1: print("After add_indices_to_kernel") ir_utils.dump_blocks(kernel_copy.blocks) # The return in the stencil kernel becomes a setitem for that # particular point in the iteration space. ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, index_vars, out_name) if config.DEBUG_ARRAY_OPT == 1: print("After replace_return_with_setitem", ret_blocks) ir_utils.dump_blocks(kernel_copy.blocks) # Start to form the new function to execute the stencil kernel. func_text = "def {}({}{}):\n".format(stencil_func_name, ",".join(kernel_copy.arg_names), sig_extra) # Get loop ranges for each dimension, which could be either int # or variable. In the latter case we'll use the extra neighborhood # argument to the function. ranges = [] for i in range(the_array.ndim): if isinstance(kernel_size[i][0], int): lo = kernel_size[i][0] hi = kernel_size[i][1] else: lo = "{}[{}][0]".format(neighborhood_name, i) hi = "{}[{}][1]".format(neighborhood_name, i) ranges.append((lo, hi)) # If there are more than one relatively indexed arrays, add a call to # a function that will raise an error if any of the relatively indexed # arrays are of different size than the first input array. if len(relatively_indexed) > 1: func_text += " raise_if_incompatible_array_sizes(" + first_arg for other_array in relatively_indexed: if other_array != first_arg: func_text += "," + other_array func_text += ")\n" # Get the shape of the first input array. shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) func_text += " {} = {}.shape\n".format(shape_name, first_arg) # If we have to allocate the output array (the out argument was not used) # then us numpy.full if the user specified a cval stencil decorator option # or np.zeros if they didn't to allocate the array. if result is None: return_type_name = numpy_support.as_dtype( return_type.dtype).type.__name__ if "cval" in self.options: cval = self.options["cval"] if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") out_init = "{} = np.full({}, {}, dtype=np.{})\n".format( out_name, shape_name, cval, return_type_name) else: out_init = "{} = np.zeros({}, dtype=np.{})\n".format( out_name, shape_name, return_type_name) func_text += " " + out_init offset = 1 # Add the loop nests to the new function. for i in range(the_array.ndim): for j in range(offset): func_text += " " # ranges[i][0] is the minimum index used in the i'th dimension # but minimum's greater than 0 don't preclude any entry in the array. # So, take the minimum of 0 and the minimum index found in the kernel # and this will be a negative number (potentially -0). Then, we do # unary - on that to get the positive offset in this dimension whose # use is precluded. # ranges[i][1] is the maximum of 0 and the observed maximum index # in this dimension because negative maximums would not cause us to # preclude any entry in the array from being used. func_text += ("for {} in range(-min(0,{})," "{}[{}]-max(0,{})):\n").format( index_vars[i], ranges[i][0], shape_name, i, ranges[i][1]) offset += 1 for j in range(offset): func_text += " " # Put a sentinel in the code so we can locate it in the IR. We will # remove this sentinel assignment and replace it with the IR for the # stencil kernel body. func_text += "{} = 0\n".format(sentinel_name) func_text += " return {}\n".format(out_name) if config.DEBUG_ARRAY_OPT == 1: print("new stencil func text") print(func_text) # Force the new stencil function into existence. exec_(func_text) in globals(), locals() stencil_func = eval(stencil_func_name) if sigret is not None: pysig = utils.pysignature(stencil_func) sigret.pysig = pysig # Get the IR for the newly created stencil function. stencil_ir = compiler.run_frontend(stencil_func) ir_utils.remove_dels(stencil_ir.blocks) # rename all variables in stencil_ir afresh var_table = ir_utils.get_name_var_table(stencil_ir.blocks) new_var_dict = {} reserved_names = ( [sentinel_name, out_name, neighborhood_name, shape_name] + kernel_copy.arg_names + index_vars) for name, var in var_table.items(): if not name in reserved_names: new_var_dict[name] = ir_utils.mk_unique_var(name) ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 # Shift lables in the kernel copy so they are guaranteed unique # and don't conflict with any labels in the stencil_ir. kernel_copy.blocks = ir_utils.add_offset_to_labels( kernel_copy.blocks, stencil_stub_last_label) new_label = max(kernel_copy.blocks.keys()) + 1 # Adjust ret_blocks to account for addition of the offset. ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] if config.DEBUG_ARRAY_OPT == 1: print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) print("before replace sentinel stencil_ir") ir_utils.dump_blocks(stencil_ir.blocks) print("before replace sentinel kernel_copy") ir_utils.dump_blocks(kernel_copy.blocks) # Search all the block in the stencil outline for the sentinel. for label, block in stencil_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance(inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block # label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(kernel_copy.blocks.keys()) # The previous block jumps to the minimum labelled block of # the parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc # function's IR. for (l, b) in kernel_copy.blocks.items(): stencil_ir.blocks[l] = b stencil_ir.blocks[new_label] = block stencil_ir.blocks[label] = prev_block # Add a jump from all the blocks that previously contained # a return in the stencil kernel to the block # containing statements after the sentinel. for ret_block in ret_blocks: stencil_ir.blocks[ret_block].append( ir.Jump(new_label, loc)) break else: continue break stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) ir_utils.remove_dels(stencil_ir.blocks) assert (isinstance(the_array, types.Type)) array_types = args new_stencil_param_types = list(array_types) if config.DEBUG_ARRAY_OPT == 1: print("new_stencil_param_types", new_stencil_param_types) ir_utils.dump_blocks(stencil_ir.blocks) # Compile the combined stencil function with the replaced loop # body in it. new_func = compiler.compile_ir(self._typingctx, self._targetctx, stencil_ir, new_stencil_param_types, None, compiler.DEFAULT_FLAGS, {}) return new_func
def test_jump(self): a = ir.Jump(1, self.loc1) b = ir.Jump(1, self.loc1) c = ir.Jump(1, self.loc2) d = ir.Jump(2, self.loc1) self.check(a, same=[b, c], different=[d])
def inline_closure_call(self, block, i, callee): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) """ scope = block.scope instr = block.body[i] call_expr = instr.value _debug_print("Found closure call: ", instr, " with callee = ", callee) func_ir = self.func_ir # first, get the IR of the callee from_ir = self.get_ir_of_code(callee.code) from_blocks = from_ir.blocks # 1. relabel from_ir by adding an offset max_label = max(func_ir.blocks.keys()) from_blocks = add_offset_to_labels(from_blocks, max_label + 1) from_ir.blocks = from_blocks min_label = min(from_blocks.keys()) max_label = max(from_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label ir_utils.visit_vars_extensions = {} # 2. rename all local variables in from_ir with new locals created in func_ir from_scopes = _get_all_scopes(from_blocks) _debug_print("obj_IR has scopes: ", from_scopes) # one function should only have one local scope assert(len(from_scopes) == 1) from_scope = from_scopes[0] var_dict = {} for var in from_scope.localvars._con.values(): if not (var.name in callee.code.co_freevars): var_dict[var.name] = scope.make_temp(var.loc) _debug_print("Before local var rename: var_dict = ", var_dict) _debug_dump(from_ir) replace_vars(from_blocks, var_dict) _debug_print("After local var rename: ") _debug_dump(from_ir) # 3. replace formal parameters with actual arguments args = list(call_expr.args) if callee.defaults: _debug_print("defaults", callee.defaults) if isinstance(callee.defaults, tuple): # Python 3.5 args = args + list(callee.defaults) elif isinstance(callee.defaults, ir.Var) or isinstance(callee.defaults, str): defaults = func_ir.get_definition(callee.defaults) assert(isinstance(defaults, ir.Const)) loc = defaults.loc args = args + [ ir.Const(value=v, loc=loc) for v in defaults.value ] else: raise NotImplementedError("Unsupported defaults to make_function: {}".format(defaults)) _replace_args_with(from_blocks, args) _debug_print("After arguments rename: ") _debug_dump(from_ir) # 4. replace freevar with actual closure var if callee.closure: closure = func_ir.get_definition(callee.closure) assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') assert(len(callee.code.co_freevars) == len(closure.items)) _debug_print("callee's closure = ", closure) _replace_freevars(from_blocks, closure.items) _debug_print("After closure rename: ") _debug_dump(from_ir) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i+1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS _replace_returns(from_blocks, instr.target, new_label) # 7. insert all new blocks, and add back definitions for label, block in from_blocks.items(): # block scope must point to parent's block.scope = scope _add_definition(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) _debug_print("After merge: ") _debug_dump(func_ir) return new_blocks
def op_JUMP_FORWARD(self, inst): jmp = ir.Jump(inst.get_jump_target(), loc=self.loc) self.current_block.append(jmp)
def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None, arg_typs=None, typemap=None, calltypes=None, work_list=None): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) `func_ir` is the func_ir object of the caller function and `glbls` is its global variable environment (func_ir.func_id.func.__globals__). `block` is the IR block of the callsite and `i` is the index of the callsite's node. `callee` is either the called function or a make_function node. `typingctx`, `typemap` and `calltypes` are typing data structures of the caller, available if we are in a typed pass. `arg_typs` includes the types of the arguments at the callsite. """ scope = block.scope instr = block.body[i] call_expr = instr.value debug_print = _make_debug_print("inline_closure_call") debug_print("Found closure call: ", instr, " with callee = ", callee) # support both function object and make_function Expr callee_code = callee.code if hasattr(callee, 'code') else callee.__code__ callee_defaults = callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__ callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__ # first, get the IR of the callee callee_ir = get_ir_of_code(glbls, callee_code) callee_blocks = callee_ir.blocks # 1. relabel callee_ir by adding an offset max_label = max(func_ir.blocks.keys()) callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) callee_blocks = simplify_CFG(callee_blocks) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label debug_print("After relabel") _debug_dump(callee_ir) # 2. rename all local variables in callee_ir with new locals created in func_ir callee_scopes = _get_all_scopes(callee_blocks) debug_print("callee_scopes = ", callee_scopes) # one function should only have one local scope assert(len(callee_scopes) == 1) callee_scope = callee_scopes[0] var_dict = {} for var in callee_scope.localvars._con.values(): if not (var.name in callee_code.co_freevars): new_var = scope.define(mk_unique_var(var.name), loc=var.loc) var_dict[var.name] = new_var debug_print("var_dict = ", var_dict) replace_vars(callee_blocks, var_dict) debug_print("After local var rename") _debug_dump(callee_ir) # 3. replace formal parameters with actual arguments args = list(call_expr.args) if callee_defaults: debug_print("defaults = ", callee_defaults) if isinstance(callee_defaults, tuple): # Python 3.5 args = args + list(callee_defaults) elif isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str): defaults = func_ir.get_definition(callee_defaults) assert(isinstance(defaults, ir.Const)) loc = defaults.loc args = args + [ir.Const(value=v, loc=loc) for v in defaults.value] else: raise NotImplementedError( "Unsupported defaults to make_function: {}".format(defaults)) debug_print("After arguments rename: ") _debug_dump(callee_ir) # 4. replace freevar with actual closure var if callee_closure: closure = func_ir.get_definition(callee_closure) debug_print("callee's closure = ", closure) if isinstance(closure, tuple): cellget = ctypes.pythonapi.PyCell_Get cellget.restype = ctypes.py_object cellget.argtypes = (ctypes.py_object,) items = tuple(cellget(x) for x in closure) else: assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') items = closure.items assert(len(callee_code.co_freevars) == len(items)) _replace_freevars(callee_blocks, items) debug_print("After closure rename") _debug_dump(callee_ir) if typingctx: from numba import compiler f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage( typingctx, callee_ir, arg_typs, None) canonicalize_array_math(callee_ir, f_typemap, f_calltypes, typingctx) # remove argument entries like arg.a from typemap arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] for a in arg_names: f_typemap.pop(a) typemap.update(f_typemap) calltypes.update(f_calltypes) _replace_args_with(callee_blocks, args) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS topo_order = find_topo_order(callee_blocks) _replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions): func_ir._definitions[instr.target.name] = [] # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope _add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) debug_print("After merge in") _debug_dump(func_ir) if work_list != None: for block in new_blocks: work_list.append(block) return callee_blocks
def op_BREAK_LOOP(self, inst): loop = self.syntax_blocks[-1] assert isinstance(loop, ir.Loop) jmp = ir.Jump(target=loop.exit, loc=self.loc) self.current_block.append(jmp)
def do_prune(take_truebr, blk): keep = branch.truebr if take_truebr else branch.falsebr # replace the branch with a direct jump jmp = ir.Jump(keep, loc=branch.loc) blk.body[-1] = jmp return 1 if keep == branch.truebr else 0