def _gen_col_var(self, out_var, args, col_var): loc = out_var.loc scope = out_var.scope # calculate mean first mean_var = ir.Var(scope, mk_unique_var("mean_val"), loc) f_mean_blocks = self._gen_col_mean(mean_var, args, col_var) f_mean_blocks = add_offset_to_labels(f_mean_blocks, ir_utils._max_label+1) ir_utils._max_label = max(f_mean_blocks.keys()) m_last_label = find_topo_order(f_mean_blocks)[-1] remove_none_return_from_block(f_mean_blocks[m_last_label]) def f(A, s, m): count = 0 for i in numba.parfor.prange(len(A)): val = A[i] if not np.isnan(val): s += (val-m)**2 count += 1 if count <= 1: s = np.nan else: s = s/(count-1) f_blocks = get_inner_ir(f) replace_var_names(f_blocks, {'A': col_var.name}) replace_var_names(f_blocks, {'s': out_var.name}) replace_var_names(f_blocks, {'m': mean_var.name}) f_blocks[0].body.insert(0, ir.Assign(ir.Const(0.0, loc), out_var, loc)) # attach first var block to last mean block f_mean_blocks[m_last_label].body.extend(f_blocks[0].body) f_blocks.pop(0) f_blocks = add_offset_to_labels(f_blocks, ir_utils._max_label+1) # add offset to jump of first f_block since it didn't go through call f_mean_blocks[m_last_label].body[-1].target += ir_utils._max_label+1 ir_utils._max_label = max(f_blocks.keys()) f_mean_blocks.update(f_blocks) return f_mean_blocks
def include_new_blocks(blocks, new_blocks, label, new_body, remove_non_return=True, work_list=None, func_ir=None): inner_blocks = add_offset_to_labels(new_blocks, ir_utils._max_label + 1) blocks.update(inner_blocks) ir_utils._max_label = max(blocks.keys()) scope = blocks[label].scope loc = blocks[label].loc inner_topo_order = find_topo_order(inner_blocks) inner_first_label = inner_topo_order[0] inner_last_label = inner_topo_order[-1] if remove_non_return: remove_return_from_block(inner_blocks[inner_last_label]) new_body.append(ir.Jump(inner_first_label, loc)) blocks[label].body = new_body label = ir_utils.next_label() blocks[label] = ir.Block(scope, loc) if remove_non_return: inner_blocks[inner_last_label].body.append(ir.Jump(label, loc)) # new_body.clear() if work_list is not None: topo_order = find_topo_order(inner_blocks) for _label in topo_order: block = inner_blocks[_label] block.scope = scope numba.inline_closurecall._add_definitions(func_ir, block) work_list.append((_label, block)) return label
def inline_calls_inner(func_ir, block, stmt, i, py_func): call_expr = stmt.value scope = block.scope callee_ir = numba.compiler.run_frontend(py_func) # relabel callee_ir by adding an offset max_label = max(func_ir.blocks.keys()) callee_blocks = add_offset_to_labels(callee_ir.blocks, max_label + 1) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # init _max_label global in ir_utils before using next_label() ir_utils._max_label = max_label # rename all variables in callee blocks var_table = get_name_var_table(callee_ir.blocks) new_var_dict = {} for name, var in var_table.items(): new_var = scope.define(mk_unique_var(var.name), loc=var.loc) new_var_dict[name] = new_var replace_vars(callee_ir.blocks, new_var_dict) # replace callee arguments args = list(call_expr.args) # TODO: replace defaults (add to args) _replace_args(callee_ir.blocks, args) # split caller blocks into two new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = ir_utils.next_label() func_ir.blocks[new_label] = new_block block.body = block.body[:i] block.body.append(ir.Jump(min_label, stmt.loc)) # replace Return with assignment to LHS _replace_returns(callee_ir.blocks, stmt.target, new_label) # insert all new blocks for label, bl in callee_ir.blocks.items(): func_ir.blocks[label] = bl # run inline_calls recursively to transform other calls inline_calls(func_ir) return
def include_new_blocks(blocks, new_blocks, label, new_body): inner_blocks = add_offset_to_labels(new_blocks, ir_utils._max_label + 1) blocks.update(inner_blocks) ir_utils._max_label = max(blocks.keys()) scope = blocks[label].scope loc = blocks[label].loc inner_topo_order = find_topo_order(inner_blocks) inner_first_label = inner_topo_order[0] inner_last_label = inner_topo_order[-1] remove_none_return_from_block(inner_blocks[inner_last_label]) new_body.append(ir.Jump(inner_first_label, loc)) blocks[label].body = new_body label = ir_utils.next_label() blocks[label] = ir.Block(scope, loc) inner_blocks[inner_last_label].body.append(ir.Jump(label, loc)) #new_body.clear() return label
def inline_new_blocks(func_ir, block, i, callee_blocks, work_list=None): # adopted from inline_closure_call scope = block.scope instr = block.body[i] # 1. relabel callee_ir by adding an offset callee_blocks = add_offset_to_labels(callee_blocks, ir_utils._max_label + 1) callee_blocks = ir_utils.simplify_CFG(callee_blocks) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label topo_order = find_topo_order(callee_blocks) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = ir_utils.next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] min_label = topo_order[0] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS numba.inline_closurecall._replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions): func_ir._definitions[instr.target.name] = [] # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope numba.inline_closurecall._add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) if work_list is not None: for block in new_blocks: work_list.append(block) return callee_blocks
def run(self): dprint_func_ir(self.func_ir, "starting hiframes") topo_order = find_topo_order(self.func_ir.blocks) for label in topo_order: new_body = [] for inst in self.func_ir.blocks[label].body: # df['col'] = arr if isinstance(inst, ir.StaticSetItem) and inst.target.name in self.df_vars: df_name = inst.target.name self.df_vars[df_name][inst.index] = inst.value self._update_df_cols() elif isinstance(inst, ir.Assign): out_nodes = self._run_assign(inst) if isinstance(out_nodes, list): new_body.extend(out_nodes) if isinstance(out_nodes, dict): inner_blocks = add_offset_to_labels(out_nodes, ir_utils._max_label+1) self.func_ir.blocks.update(inner_blocks) ir_utils._max_label = max(self.func_ir.blocks.keys()) scope = self.func_ir.blocks[label].scope loc = self.func_ir.blocks[label].loc inner_topo_order = find_topo_order(inner_blocks) inner_first_label = inner_topo_order[0] inner_last_label = inner_topo_order[-1] remove_none_return_from_block(inner_blocks[inner_last_label]) new_body.append(ir.Jump(inner_first_label, loc)) self.func_ir.blocks[label].body = new_body label = ir_utils.next_label() self.func_ir.blocks[label] = ir.Block(scope, loc) inner_blocks[inner_last_label].body.append(ir.Jump(label, loc)) new_body = [] else: new_body.append(inst) self.func_ir.blocks[label].body = new_body remove_dead(self.func_ir.blocks, self.func_ir.arg_names) dprint_func_ir(self.func_ir, "after hiframes") if config.DEBUG_ARRAY_OPT==1: print("df_vars: ", self.df_vars) return
def _create_gufunc_for_parfor_body(lowerer, parfor, typemap, typingctx, targetctx, flags, locals): ''' Takes a parfor and creates a gufunc function for its body. There are two parts to this function. 1) Code to iterate across the iteration space as defined by the schedule. 2) The parfor body that does the work for a single point in the iteration space. Part 1 is created as Python text for simplicity with a sentinel assignment to mark the point in the IR where the parfor body should be added. This Python text is 'exec'ed into existence and its IR retrieved with run_frontend. The IR is scanned for the sentinel assignment where that basic block is split and the IR for the parfor body inserted. ''' # TODO: need copy? # The parfor body and the main function body share ir.Var nodes. # We have to do some replacements of Var names in the parfor body to make them # legal parameter names. If we don't copy then the Vars in the main function also # would incorrectly change their name. loop_body = copy.copy(parfor.loop_body) parfor_dim = len(parfor.loop_nests) loop_indices = [l.index_variable.name for l in parfor.loop_nests] # Get all the parfor params. parfor_params = parfor.params # Get just the outputs of the parfor. parfor_outputs = numba.parfor.get_parfor_outputs(parfor, parfor_params) # Get all parfor reduction vars, and operators. parfor_redvars, parfor_reddict = numba.parfor.get_parfor_reductions( parfor, parfor_params, lowerer.fndesc.calltypes) # Compute just the parfor inputs as a set difference. parfor_inputs = sorted( list(set(parfor_params) - set(parfor_outputs) - set(parfor_redvars))) if config.DEBUG_ARRAY_OPT == 1: print("parfor_params = ", parfor_params, " ", type(parfor_params)) print("parfor_outputs = ", parfor_outputs, " ", type(parfor_outputs)) print("parfor_inputs = ", parfor_inputs, " ", type(parfor_inputs)) print("parfor_redvars = ", parfor_redvars, " ", type(parfor_redvars)) # Reduction variables are represented as arrays, so they go under # different names. parfor_redarrs = [] for var in parfor_redvars: arr = var + "_arr" parfor_redarrs.append(arr) typemap[arr] = types.npytypes.Array(typemap[var], 1, "C") # Reorder all the params so that inputs go first then outputs. parfor_params = parfor_inputs + parfor_outputs + parfor_redarrs if config.DEBUG_ARRAY_OPT == 1: print("parfor_params = ", parfor_params, " ", type(parfor_params)) #print("loop_ranges = ", loop_ranges, " ", type(loop_ranges)) print("loop_indices = ", loop_indices, " ", type(loop_indices)) print("loop_body = ", loop_body, " ", type(loop_body)) _print_body(loop_body) # Some Var are not legal parameter names so create a dict of potentially illegal # param name to guaranteed legal name. param_dict = legalize_names(parfor_params + parfor_redvars) if config.DEBUG_ARRAY_OPT == 1: print("param_dict = ", sorted(param_dict.items()), " ", type(param_dict)) # Some loop_indices are not legal parameter names so create a dict of potentially illegal # loop index to guaranteed legal name. ind_dict = legalize_names(loop_indices) # Compute a new list of legal loop index names. legal_loop_indices = [ind_dict[v] for v in loop_indices] if config.DEBUG_ARRAY_OPT == 1: print("ind_dict = ", sorted(ind_dict.items()), " ", type(ind_dict)) print("legal_loop_indices = ", legal_loop_indices, " ", type(legal_loop_indices)) for pd in parfor_params: print("pd = ", pd) print("pd type = ", typemap[pd], " ", type(typemap[pd])) # Get the types of each parameter. param_types = [typemap[v] for v in parfor_params] # if config.DEBUG_ARRAY_OPT==1: # param_types_dict = { v:typemap[v] for v in parfor_params } # print("param_types_dict = ", param_types_dict, " ", type(param_types_dict)) # print("param_types = ", param_types, " ", type(param_types)) # Replace illegal parameter names in the loop body with legal ones. replace_var_names(loop_body, param_dict) # remember the name before legalizing as the actual arguments parfor_args = parfor_params # Change parfor_params to be legal names. parfor_params = [param_dict[v] for v in parfor_params] # Change parfor body to replace illegal loop index vars with legal ones. replace_var_names(loop_body, ind_dict) if config.DEBUG_ARRAY_OPT == 1: print("legal parfor_params = ", parfor_params, " ", type(parfor_params)) # Determine the unique names of the scheduling and gufunc functions. # sched_func_name = "__numba_parfor_sched_%s" % (hex(hash(parfor)).replace("-", "_")) gufunc_name = "__numba_parfor_gufunc_%s" % (hex(hash(parfor)).replace( "-", "_")) if config.DEBUG_ARRAY_OPT: # print("sched_func_name ", type(sched_func_name), " ", sched_func_name) print("gufunc_name ", type(gufunc_name), " ", gufunc_name) # Create the gufunc function. gufunc_txt = "def " + gufunc_name + \ "(sched, " + (", ".join(parfor_params)) + "):\n" # Add initialization of reduction variables for arr, var in zip(parfor_redarrs, parfor_redvars): gufunc_txt += " " + param_dict[var] + \ "=" + param_dict[arr] + "[0]\n" # For each dimension of the parfor, create a for loop in the generated gufunc function. # Iterate across the proper values extracted from the schedule. # The form of the schedule is start_dim0, start_dim1, ..., start_dimN, end_dim0, # end_dim1, ..., end_dimN for eachdim in range(parfor_dim): for indent in range(eachdim + 1): gufunc_txt += " " sched_dim = eachdim gufunc_txt += ("for " + legal_loop_indices[eachdim] + " in range(sched[" + str(sched_dim) + "], sched[" + str(sched_dim + parfor_dim) + "] + 1):\n") # Add the sentinel assignment so that we can find the loop body position # in the IR. for indent in range(parfor_dim + 1): gufunc_txt += " " gufunc_txt += "__sentinel__ = 0\n" # Add assignments of reduction variables (for returning the value) for arr, var in zip(parfor_redarrs, parfor_redvars): gufunc_txt += " " + param_dict[arr] + \ "[0] = " + param_dict[var] + "\n" gufunc_txt += " return None\n" if config.DEBUG_ARRAY_OPT: print("gufunc_txt = ", type(gufunc_txt), "\n", gufunc_txt) # Force gufunc outline into existence. exec(gufunc_txt) gufunc_func = eval(gufunc_name) if config.DEBUG_ARRAY_OPT: print("gufunc_func = ", type(gufunc_func), "\n", gufunc_func) # Get the IR for the gufunc outline. gufunc_ir = compiler.run_frontend(gufunc_func) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump ", type(gufunc_ir)) gufunc_ir.dump() print("loop_body dump ", type(loop_body)) _print_body(loop_body) # rename all variables in gufunc_ir afresh var_table = get_name_var_table(gufunc_ir.blocks) new_var_dict = {} reserved_names = ["__sentinel__"] + \ list(param_dict.values()) + legal_loop_indices for name, var in var_table.items(): if not (name in reserved_names): new_var_dict[name] = mk_unique_var(name) replace_var_names(gufunc_ir.blocks, new_var_dict) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump after renaming ") gufunc_ir.dump() gufunc_param_types = [numba.types.npytypes.Array(numba.intp, 1, "C") ] + param_types if config.DEBUG_ARRAY_OPT: print("gufunc_param_types = ", type(gufunc_param_types), "\n", gufunc_param_types) gufunc_stub_last_label = max(gufunc_ir.blocks.keys()) # Add gufunc stub last label to each parfor.loop_body label to prevent # label conflicts. loop_body = add_offset_to_labels(loop_body, gufunc_stub_last_label) # new label for splitting sentinel block new_label = max(loop_body.keys()) + 1 if config.DEBUG_ARRAY_OPT: _print_body(loop_body) # Search all the block in the gufunc outline for the sentinel assignment. for label, block in gufunc_ir.blocks.items(): for i, inst in enumerate(block.body): if isinstance(inst, ir.Assign) and inst.target.name == "__sentinel__": # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the sentinel # but the new block maintains the current block label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after the sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(loop_body.keys()) # The previous block jumps to the minimum labelled block of the # parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc function's # IR. for (l, b) in loop_body.items(): gufunc_ir.blocks[l] = b body_last_label = max(loop_body.keys()) gufunc_ir.blocks[new_label] = block gufunc_ir.blocks[label] = prev_block # Add a jump from the last parfor body block to the block containing # statements after the sentinel. gufunc_ir.blocks[body_last_label].append( ir.Jump(new_label, loc)) break else: continue break if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump before renaming") gufunc_ir.dump() gufunc_ir.blocks = rename_labels(gufunc_ir.blocks) remove_dels(gufunc_ir.blocks) if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump") gufunc_ir.dump() kernel_func = compiler.compile_ir(typingctx, targetctx, gufunc_ir, gufunc_param_types, types.none, flags, locals) kernel_sig = signature(types.none, *gufunc_param_types) if config.DEBUG_ARRAY_OPT: print("kernel_sig = ", kernel_sig) return kernel_func, parfor_args, kernel_sig
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): # Overall approach: # 1) Construct a string containing a function definition for the stencil function # that will execute the stencil kernel. This function definition includes a # unique stencil function name, the parameters to the stencil kernel, loop # nests across the dimenions of the input array. Those loop nests use the # computed stencil kernel size so as not to try to compute elements where # elements outside the bounds of the input array would be needed. # 2) The but of the loop nest in this new function is a special sentinel # assignment. # 3) Get the IR of this new function. # 4) Split the block containing the sentinel assignment and remove the sentinel # assignment. Insert the stencil kernel IR into the stencil function IR # after label and variable renaming of the stencil kernel IR to prevent # conflicts with the stencil function IR. # 5) Compile the combined stencil function IR + stencil kernel IR into existence. # Copy the kernel so that our changes for this callsite # won't effect other callsites. (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(self.kernel_ir, calltypes) # The stencil kernel body becomes the body of a loop, for which args aren't needed. ir_utils.remove_args(kernel_copy.blocks) first_arg = kernel_copy.arg_names[0] in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) ir_utils.apply_copy_propagate(kernel_copy.blocks, in_cps, name_var_table, typemap, copy_calltypes) if "out" in name_var_table: raise ValueError( "Cannot use the reserved word 'out' in stencil kernels.") sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) if config.DEBUG_ARRAY_OPT == 1: print("name_var_table", name_var_table, sentinel_name) the_array = args[0] if config.DEBUG_ARRAY_OPT == 1: print("_stencil_wrapper", return_type, return_type.dtype, type(return_type.dtype), args) ir_utils.dump_blocks(kernel_copy.blocks) # We generate a Numba function to execute this stencil and here # create the unique name of this function. stencil_func_name = "__numba_stencil_%s_%s" % (hex( id(the_array)).replace("-", "_"), self.id) # We will put a loop nest in the generated function for each # dimension in the input array. Here we create the name for # the index variable for each dimension. index0, index1, ... index_vars = [] for i in range(the_array.ndim): index_var_name = ir_utils.get_unused_var_name( "index" + str(i), name_var_table) index_vars += [index_var_name] # Create extra signature for out and neighborhood. out_name = ir_utils.get_unused_var_name("out", name_var_table) neighborhood_name = ir_utils.get_unused_var_name( "neighborhood", name_var_table) sig_extra = "" if result is not None: sig_extra += ", {}=None".format(out_name) if "neighborhood" in dict(self.kws): sig_extra += ", {}=None".format(neighborhood_name) # Get a list of the standard indexed array names. standard_indexed = self.options.get("standard_indexing", []) if first_arg in standard_indexed: raise ValueError("The first argument to a stencil kernel must " "use relative indexing, not standard indexing.") if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: raise ValueError("Standard indexing requested for an array name " "not present in the stencil kernel definition.") # Add index variables to getitems in the IR to transition the accesses # in the kernel from relative to regular Python indexing. Returns the # computed size of the stencil kernel and a list of the relatively indexed # arrays. kernel_size, relatively_indexed = self.add_indices_to_kernel( kernel_copy, index_vars, the_array.ndim, self.neighborhood, standard_indexed) if self.neighborhood is None: self.neighborhood = kernel_size if config.DEBUG_ARRAY_OPT == 1: print("After add_indices_to_kernel") ir_utils.dump_blocks(kernel_copy.blocks) # The return in the stencil kernel becomes a setitem for that # particular point in the iteration space. ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, index_vars, out_name) if config.DEBUG_ARRAY_OPT == 1: print("After replace_return_with_setitem", ret_blocks) ir_utils.dump_blocks(kernel_copy.blocks) # Start to form the new function to execute the stencil kernel. func_text = "def {}({}{}):\n".format(stencil_func_name, ",".join(kernel_copy.arg_names), sig_extra) # Get loop ranges for each dimension, which could be either int # or variable. In the latter case we'll use the extra neighborhood # argument to the function. ranges = [] for i in range(the_array.ndim): if isinstance(kernel_size[i][0], int): lo = kernel_size[i][0] hi = kernel_size[i][1] else: lo = "{}[{}][0]".format(neighborhood_name, i) hi = "{}[{}][1]".format(neighborhood_name, i) ranges.append((lo, hi)) # If there are more than one relatively indexed arrays, add a call to # a function that will raise an error if any of the relatively indexed # arrays are of different size than the first input array. if len(relatively_indexed) > 1: func_text += " raise_if_incompatible_array_sizes(" + first_arg for other_array in relatively_indexed: if other_array != first_arg: func_text += "," + other_array func_text += ")\n" # Get the shape of the first input array. shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) func_text += " {} = {}.shape\n".format(shape_name, first_arg) # If we have to allocate the output array (the out argument was not used) # then us numpy.full if the user specified a cval stencil decorator option # or np.zeros if they didn't to allocate the array. if result is None: return_type_name = numpy_support.as_dtype( return_type.dtype).type.__name__ if "cval" in self.options: cval = self.options["cval"] if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") out_init = "{} = np.full({}, {}, dtype=np.{})\n".format( out_name, shape_name, cval, return_type_name) else: out_init = "{} = np.zeros({}, dtype=np.{})\n".format( out_name, shape_name, return_type_name) func_text += " " + out_init offset = 1 # Add the loop nests to the new function. for i in range(the_array.ndim): for j in range(offset): func_text += " " # ranges[i][0] is the minimum index used in the i'th dimension # but minimum's greater than 0 don't preclude any entry in the array. # So, take the minimum of 0 and the minimum index found in the kernel # and this will be a negative number (potentially -0). Then, we do # unary - on that to get the positive offset in this dimension whose # use is precluded. # ranges[i][1] is the maximum of 0 and the observed maximum index # in this dimension because negative maximums would not cause us to # preclude any entry in the array from being used. func_text += ("for {} in range(-min(0,{})," "{}[{}]-max(0,{})):\n").format( index_vars[i], ranges[i][0], shape_name, i, ranges[i][1]) offset += 1 for j in range(offset): func_text += " " # Put a sentinel in the code so we can locate it in the IR. We will # remove this sentinel assignment and replace it with the IR for the # stencil kernel body. func_text += "{} = 0\n".format(sentinel_name) func_text += " return {}\n".format(out_name) if config.DEBUG_ARRAY_OPT == 1: print("new stencil func text") print(func_text) # Force the new stencil function into existence. exec_(func_text) in globals(), locals() stencil_func = eval(stencil_func_name) if sigret is not None: pysig = utils.pysignature(stencil_func) sigret.pysig = pysig # Get the IR for the newly created stencil function. stencil_ir = compiler.run_frontend(stencil_func) ir_utils.remove_dels(stencil_ir.blocks) # rename all variables in stencil_ir afresh var_table = ir_utils.get_name_var_table(stencil_ir.blocks) new_var_dict = {} reserved_names = ( [sentinel_name, out_name, neighborhood_name, shape_name] + kernel_copy.arg_names + index_vars) for name, var in var_table.items(): if not name in reserved_names: new_var_dict[name] = ir_utils.mk_unique_var(name) ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 # Shift lables in the kernel copy so they are guaranteed unique # and don't conflict with any labels in the stencil_ir. kernel_copy.blocks = ir_utils.add_offset_to_labels( kernel_copy.blocks, stencil_stub_last_label) new_label = max(kernel_copy.blocks.keys()) + 1 # Adjust ret_blocks to account for addition of the offset. ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] if config.DEBUG_ARRAY_OPT == 1: print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) print("before replace sentinel stencil_ir") ir_utils.dump_blocks(stencil_ir.blocks) print("before replace sentinel kernel_copy") ir_utils.dump_blocks(kernel_copy.blocks) # Search all the block in the stencil outline for the sentinel. for label, block in stencil_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance(inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block # label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(kernel_copy.blocks.keys()) # The previous block jumps to the minimum labelled block of # the parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc # function's IR. for (l, b) in kernel_copy.blocks.items(): stencil_ir.blocks[l] = b stencil_ir.blocks[new_label] = block stencil_ir.blocks[label] = prev_block # Add a jump from all the blocks that previously contained # a return in the stencil kernel to the block # containing statements after the sentinel. for ret_block in ret_blocks: stencil_ir.blocks[ret_block].append( ir.Jump(new_label, loc)) break else: continue break stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) ir_utils.remove_dels(stencil_ir.blocks) assert (isinstance(the_array, types.Type)) array_types = args new_stencil_param_types = list(array_types) if config.DEBUG_ARRAY_OPT == 1: print("new_stencil_param_types", new_stencil_param_types) ir_utils.dump_blocks(stencil_ir.blocks) # Compile the combined stencil function with the replaced loop # body in it. new_func = compiler.compile_ir(self._typingctx, self._targetctx, stencil_ir, new_stencil_param_types, None, compiler.DEFAULT_FLAGS, {}) return new_func
def inline_closure_call(self, block, i, callee): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) """ scope = block.scope instr = block.body[i] call_expr = instr.value _debug_print("Found closure call: ", instr, " with callee = ", callee) func_ir = self.func_ir # first, get the IR of the callee from_ir = self.get_ir_of_code(callee.code) from_blocks = from_ir.blocks # 1. relabel from_ir by adding an offset max_label = max(func_ir.blocks.keys()) from_blocks = add_offset_to_labels(from_blocks, max_label + 1) from_ir.blocks = from_blocks min_label = min(from_blocks.keys()) max_label = max(from_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label ir_utils.visit_vars_extensions = {} # 2. rename all local variables in from_ir with new locals created in func_ir from_scopes = _get_all_scopes(from_blocks) _debug_print("obj_IR has scopes: ", from_scopes) # one function should only have one local scope assert(len(from_scopes) == 1) from_scope = from_scopes[0] var_dict = {} for var in from_scope.localvars._con.values(): if not (var.name in callee.code.co_freevars): var_dict[var.name] = scope.make_temp(var.loc) _debug_print("Before local var rename: var_dict = ", var_dict) _debug_dump(from_ir) replace_vars(from_blocks, var_dict) _debug_print("After local var rename: ") _debug_dump(from_ir) # 3. replace formal parameters with actual arguments args = list(call_expr.args) if callee.defaults: _debug_print("defaults", callee.defaults) if isinstance(callee.defaults, tuple): # Python 3.5 args = args + list(callee.defaults) elif isinstance(callee.defaults, ir.Var) or isinstance(callee.defaults, str): defaults = func_ir.get_definition(callee.defaults) assert(isinstance(defaults, ir.Const)) loc = defaults.loc args = args + [ ir.Const(value=v, loc=loc) for v in defaults.value ] else: raise NotImplementedError("Unsupported defaults to make_function: {}".format(defaults)) _replace_args_with(from_blocks, args) _debug_print("After arguments rename: ") _debug_dump(from_ir) # 4. replace freevar with actual closure var if callee.closure: closure = func_ir.get_definition(callee.closure) assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') assert(len(callee.code.co_freevars) == len(closure.items)) _debug_print("callee's closure = ", closure) _replace_freevars(from_blocks, closure.items) _debug_print("After closure rename: ") _debug_dump(from_ir) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i+1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS _replace_returns(from_blocks, instr.target, new_label) # 7. insert all new blocks, and add back definitions for label, block in from_blocks.items(): # block scope must point to parent's block.scope = scope _add_definition(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) _debug_print("After merge: ") _debug_dump(func_ir) return new_blocks
def get_stencil_ir(sf, typingctx, args, scope, loc, input_dict, typemap, calltypes): """get typed IR from stencil bytecode """ from numba.targets.cpu import CPUContext from numba.targets.registry import cpu_target from numba.annotations import type_annotations from numba.compiler import type_inference_stage # get untyped IR stencil_func_ir = sf.kernel_ir.copy() # copy the IR nodes to avoid changing IR in the StencilFunc object stencil_blocks = copy.deepcopy(stencil_func_ir.blocks) stencil_func_ir.blocks = stencil_blocks name_var_table = ir_utils.get_name_var_table(stencil_func_ir.blocks) if "out" in name_var_table: raise ValueError("Cannot use the reserved word 'out' in stencil kernels.") # get typed IR with a dummy pipeline (similar to test_parfors.py) targetctx = CPUContext(typingctx) with cpu_target.nested_context(typingctx, targetctx): tp = DummyPipeline(typingctx, targetctx, args, stencil_func_ir) numba.rewrites.rewrite_registry.apply( 'before-inference', tp, tp.func_ir) tp.typemap, tp.return_type, tp.calltypes = type_inference_stage( tp.typingctx, tp.func_ir, tp.args, None) type_annotations.TypeAnnotation( func_ir=tp.func_ir, typemap=tp.typemap, calltypes=tp.calltypes, lifted=(), lifted_from=None, args=tp.args, return_type=tp.return_type, html_output=numba.config.HTML) # make block labels unique stencil_blocks = ir_utils.add_offset_to_labels(stencil_blocks, ir_utils.next_label()) min_label = min(stencil_blocks.keys()) max_label = max(stencil_blocks.keys()) ir_utils._max_label = max_label if config.DEBUG_ARRAY_OPT == 1: print("Initial stencil_blocks") ir_utils.dump_blocks(stencil_blocks) # rename variables, var_dict = {} for v, typ in tp.typemap.items(): new_var = ir.Var(scope, mk_unique_var(v), loc) var_dict[v] = new_var typemap[new_var.name] = typ # add new var type for overall function ir_utils.replace_vars(stencil_blocks, var_dict) if config.DEBUG_ARRAY_OPT == 1: print("After replace_vars") ir_utils.dump_blocks(stencil_blocks) # add call types to overall function for call, call_typ in tp.calltypes.items(): calltypes[call] = call_typ arg_to_arr_dict = {} # replace arg with arr for block in stencil_blocks.values(): for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): if config.DEBUG_ARRAY_OPT == 1: print("input_dict", input_dict, stmt.value.index, stmt.value.name, stmt.value.index in input_dict) arg_to_arr_dict[stmt.value.name] = input_dict[stmt.value.index].name stmt.value = input_dict[stmt.value.index] if config.DEBUG_ARRAY_OPT == 1: print("arg_to_arr_dict", arg_to_arr_dict) print("After replace arg with arr") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dels(stencil_blocks) stencil_func_ir.blocks = stencil_blocks return stencil_func_ir, sf.get_return_type(args)[0], arg_to_arr_dict
def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None, arg_typs=None, typemap=None, calltypes=None, work_list=None): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) `func_ir` is the func_ir object of the caller function and `glbls` is its global variable environment (func_ir.func_id.func.__globals__). `block` is the IR block of the callsite and `i` is the index of the callsite's node. `callee` is either the called function or a make_function node. `typingctx`, `typemap` and `calltypes` are typing data structures of the caller, available if we are in a typed pass. `arg_typs` includes the types of the arguments at the callsite. """ scope = block.scope instr = block.body[i] call_expr = instr.value debug_print = _make_debug_print("inline_closure_call") debug_print("Found closure call: ", instr, " with callee = ", callee) # support both function object and make_function Expr callee_code = callee.code if hasattr(callee, 'code') else callee.__code__ callee_defaults = callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__ callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__ # first, get the IR of the callee callee_ir = get_ir_of_code(glbls, callee_code) callee_blocks = callee_ir.blocks # 1. relabel callee_ir by adding an offset max_label = max(func_ir.blocks.keys()) callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) callee_blocks = simplify_CFG(callee_blocks) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label debug_print("After relabel") _debug_dump(callee_ir) # 2. rename all local variables in callee_ir with new locals created in func_ir callee_scopes = _get_all_scopes(callee_blocks) debug_print("callee_scopes = ", callee_scopes) # one function should only have one local scope assert(len(callee_scopes) == 1) callee_scope = callee_scopes[0] var_dict = {} for var in callee_scope.localvars._con.values(): if not (var.name in callee_code.co_freevars): new_var = scope.define(mk_unique_var(var.name), loc=var.loc) var_dict[var.name] = new_var debug_print("var_dict = ", var_dict) replace_vars(callee_blocks, var_dict) debug_print("After local var rename") _debug_dump(callee_ir) # 3. replace formal parameters with actual arguments args = list(call_expr.args) if callee_defaults: debug_print("defaults = ", callee_defaults) if isinstance(callee_defaults, tuple): # Python 3.5 args = args + list(callee_defaults) elif isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str): defaults = func_ir.get_definition(callee_defaults) assert(isinstance(defaults, ir.Const)) loc = defaults.loc args = args + [ir.Const(value=v, loc=loc) for v in defaults.value] else: raise NotImplementedError( "Unsupported defaults to make_function: {}".format(defaults)) debug_print("After arguments rename: ") _debug_dump(callee_ir) # 4. replace freevar with actual closure var if callee_closure: closure = func_ir.get_definition(callee_closure) debug_print("callee's closure = ", closure) if isinstance(closure, tuple): cellget = ctypes.pythonapi.PyCell_Get cellget.restype = ctypes.py_object cellget.argtypes = (ctypes.py_object,) items = tuple(cellget(x) for x in closure) else: assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') items = closure.items assert(len(callee_code.co_freevars) == len(items)) _replace_freevars(callee_blocks, items) debug_print("After closure rename") _debug_dump(callee_ir) if typingctx: from numba import compiler f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage( typingctx, callee_ir, arg_typs, None) canonicalize_array_math(callee_ir, f_typemap, f_calltypes, typingctx) # remove argument entries like arg.a from typemap arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] for a in arg_names: f_typemap.pop(a) typemap.update(f_typemap) calltypes.update(f_calltypes) _replace_args_with(callee_blocks, args) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS topo_order = find_topo_order(callee_blocks) _replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions): func_ir._definitions[instr.target.name] = [] # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope _add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) debug_print("After merge in") _debug_dump(func_ir) if work_list != None: for block in new_blocks: work_list.append(block) return callee_blocks
def _create_gufunc_for_parfor_body(lowerer, parfor, typemap, typingctx, targetctx, flags, locals, has_aliases, index_var_typ, races): ''' Takes a parfor and creates a gufunc function for its body. There are two parts to this function. 1) Code to iterate across the iteration space as defined by the schedule. 2) The parfor body that does the work for a single point in the iteration space. Part 1 is created as Python text for simplicity with a sentinel assignment to mark the point in the IR where the parfor body should be added. This Python text is 'exec'ed into existence and its IR retrieved with run_frontend. The IR is scanned for the sentinel assignment where that basic block is split and the IR for the parfor body inserted. ''' loc = parfor.init_block.loc # The parfor body and the main function body share ir.Var nodes. # We have to do some replacements of Var names in the parfor body to make them # legal parameter names. If we don't copy then the Vars in the main function also # would incorrectly change their name. loop_body = copy.copy(parfor.loop_body) remove_dels(loop_body) parfor_dim = len(parfor.loop_nests) loop_indices = [l.index_variable.name for l in parfor.loop_nests] # Get all the parfor params. parfor_params = parfor.params # Get just the outputs of the parfor. parfor_outputs = numba.parfor.get_parfor_outputs(parfor, parfor_params) # Get all parfor reduction vars, and operators. parfor_redvars, parfor_reddict = numba.parfor.get_parfor_reductions( parfor, parfor_params, lowerer.fndesc.calltypes) # Compute just the parfor inputs as a set difference. parfor_inputs = sorted( list(set(parfor_params) - set(parfor_outputs) - set(parfor_redvars))) races = races.difference(set(parfor_redvars)) for race in races: warnings.warn_explicit( "Variable %s used in parallel loop may be written " "to simultaneously by multiple workers and may result " "in non-deterministic or unintended results." % race, ParallelSafetyWarning, loc.filename, loc.line) replace_var_with_array(races, loop_body, typemap, lowerer.fndesc.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("parfor_params = ", parfor_params, " ", type(parfor_params)) print("parfor_outputs = ", parfor_outputs, " ", type(parfor_outputs)) print("parfor_inputs = ", parfor_inputs, " ", type(parfor_inputs)) print("parfor_redvars = ", parfor_redvars, " ", type(parfor_redvars)) # Reduction variables are represented as arrays, so they go under # different names. parfor_redarrs = [] for var in parfor_redvars: arr = var + "_arr" parfor_redarrs.append(arr) typemap[arr] = types.npytypes.Array(typemap[var], 1, "C") # Reorder all the params so that inputs go first then outputs. parfor_params = parfor_inputs + parfor_outputs + parfor_redarrs if config.DEBUG_ARRAY_OPT == 1: print("parfor_params = ", parfor_params, " ", type(parfor_params)) print("loop_indices = ", loop_indices, " ", type(loop_indices)) print("loop_body = ", loop_body, " ", type(loop_body)) _print_body(loop_body) # Some Var are not legal parameter names so create a dict of potentially illegal # param name to guaranteed legal name. param_dict = legalize_names(parfor_params + parfor_redvars) if config.DEBUG_ARRAY_OPT == 1: print("param_dict = ", sorted(param_dict.items()), " ", type(param_dict)) # Some loop_indices are not legal parameter names so create a dict of potentially illegal # loop index to guaranteed legal name. ind_dict = legalize_names(loop_indices) # Compute a new list of legal loop index names. legal_loop_indices = [ind_dict[v] for v in loop_indices] if config.DEBUG_ARRAY_OPT == 1: print("ind_dict = ", sorted(ind_dict.items()), " ", type(ind_dict)) print("legal_loop_indices = ", legal_loop_indices, " ", type(legal_loop_indices)) for pd in parfor_params: print("pd = ", pd) print("pd type = ", typemap[pd], " ", type(typemap[pd])) # Get the types of each parameter. param_types = [typemap[v] for v in parfor_params] # if config.DEBUG_ARRAY_OPT==1: # param_types_dict = { v:typemap[v] for v in parfor_params } # print("param_types_dict = ", param_types_dict, " ", type(param_types_dict)) # print("param_types = ", param_types, " ", type(param_types)) # Replace illegal parameter names in the loop body with legal ones. replace_var_names(loop_body, param_dict) # remember the name before legalizing as the actual arguments parfor_args = parfor_params # Change parfor_params to be legal names. parfor_params = [param_dict[v] for v in parfor_params] parfor_params_orig = parfor_params parfor_params = [] ascontig = False for pindex in range(len(parfor_params_orig)): if ascontig and pindex < len(parfor_inputs) and isinstance( param_types[pindex], types.npytypes.Array): parfor_params.append(parfor_params_orig[pindex] + "param") else: parfor_params.append(parfor_params_orig[pindex]) # Change parfor body to replace illegal loop index vars with legal ones. replace_var_names(loop_body, ind_dict) loop_body_var_table = get_name_var_table(loop_body) sentinel_name = get_unused_var_name("__sentinel__", loop_body_var_table) if config.DEBUG_ARRAY_OPT == 1: print("legal parfor_params = ", parfor_params, " ", type(parfor_params)) # Determine the unique names of the scheduling and gufunc functions. # sched_func_name = "__numba_parfor_sched_%s" % (hex(hash(parfor)).replace("-", "_")) gufunc_name = "__numba_parfor_gufunc_%s" % (hex(hash(parfor)).replace( "-", "_")) if config.DEBUG_ARRAY_OPT: # print("sched_func_name ", type(sched_func_name), " ", sched_func_name) print("gufunc_name ", type(gufunc_name), " ", gufunc_name) gufunc_txt = "" # Create the gufunc function. gufunc_txt += "def " + gufunc_name + \ "(sched, " + (", ".join(parfor_params)) + "):\n" for pindex in range(len(parfor_inputs)): if ascontig and isinstance(param_types[pindex], types.npytypes.Array): gufunc_txt += (" " + parfor_params_orig[pindex] + " = np.ascontiguousarray(" + parfor_params[pindex] + ")\n") # Add initialization of reduction variables for arr, var in zip(parfor_redarrs, parfor_redvars): gufunc_txt += " " + param_dict[var] + \ "=" + param_dict[arr] + "[0]\n" # For each dimension of the parfor, create a for loop in the generated gufunc function. # Iterate across the proper values extracted from the schedule. # The form of the schedule is start_dim0, start_dim1, ..., start_dimN, end_dim0, # end_dim1, ..., end_dimN for eachdim in range(parfor_dim): for indent in range(eachdim + 1): gufunc_txt += " " sched_dim = eachdim gufunc_txt += ("for " + legal_loop_indices[eachdim] + " in range(sched[" + str(sched_dim) + "], sched[" + str(sched_dim + parfor_dim) + "] + np.uint8(1)):\n") if config.DEBUG_ARRAY_OPT_RUNTIME: for indent in range(parfor_dim + 1): gufunc_txt += " " gufunc_txt += "print(" for eachdim in range(parfor_dim): gufunc_txt += "\"" + legal_loop_indices[ eachdim] + "\"," + legal_loop_indices[eachdim] + "," gufunc_txt += ")\n" # Add the sentinel assignment so that we can find the loop body position # in the IR. for indent in range(parfor_dim + 1): gufunc_txt += " " gufunc_txt += sentinel_name + " = 0\n" # Add assignments of reduction variables (for returning the value) for arr, var in zip(parfor_redarrs, parfor_redvars): gufunc_txt += " " + param_dict[arr] + \ "[0] = " + param_dict[var] + "\n" gufunc_txt += " return None\n" if config.DEBUG_ARRAY_OPT: print("gufunc_txt = ", type(gufunc_txt), "\n", gufunc_txt) # Force gufunc outline into existence. globls = {"np": np} locls = {} exec_(gufunc_txt, globls, locls) gufunc_func = locls[gufunc_name] if config.DEBUG_ARRAY_OPT: print("gufunc_func = ", type(gufunc_func), "\n", gufunc_func) # Get the IR for the gufunc outline. gufunc_ir = compiler.run_frontend(gufunc_func) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump ", type(gufunc_ir)) gufunc_ir.dump() print("loop_body dump ", type(loop_body)) _print_body(loop_body) # rename all variables in gufunc_ir afresh var_table = get_name_var_table(gufunc_ir.blocks) new_var_dict = {} reserved_names = [sentinel_name] + \ list(param_dict.values()) + legal_loop_indices for name, var in var_table.items(): if not (name in reserved_names): new_var_dict[name] = mk_unique_var(name) replace_var_names(gufunc_ir.blocks, new_var_dict) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump after renaming ") gufunc_ir.dump() gufunc_param_types = [numba.types.npytypes.Array(index_var_typ, 1, "C") ] + param_types if config.DEBUG_ARRAY_OPT: print("gufunc_param_types = ", type(gufunc_param_types), "\n", gufunc_param_types) gufunc_stub_last_label = max(gufunc_ir.blocks.keys()) + 1 # Add gufunc stub last label to each parfor.loop_body label to prevent # label conflicts. loop_body = add_offset_to_labels(loop_body, gufunc_stub_last_label) # new label for splitting sentinel block new_label = max(loop_body.keys()) + 1 # If enabled, add a print statement after every assignment. if config.DEBUG_ARRAY_OPT_RUNTIME: for label, block in loop_body.items(): new_block = block.copy() new_block.clear() loc = block.loc scope = block.scope for inst in block.body: new_block.append(inst) # Append print after assignment if isinstance(inst, ir.Assign): # Only apply to numbers if typemap[inst.target.name] not in types.number_domain: continue # Make constant string strval = "{} =".format(inst.target.name) strconsttyp = types.Const(strval) lhs = ir.Var(scope, mk_unique_var("str_const"), loc) assign_lhs = ir.Assign(value=ir.Const(value=strval, loc=loc), target=lhs, loc=loc) typemap[lhs.name] = strconsttyp new_block.append(assign_lhs) # Make print node print_node = ir.Print(args=[lhs, inst.target], vararg=None, loc=loc) new_block.append(print_node) sig = numba.typing.signature(types.none, typemap[lhs.name], typemap[inst.target.name]) lowerer.fndesc.calltypes[print_node] = sig loop_body[label] = new_block if config.DEBUG_ARRAY_OPT: print("parfor loop body") _print_body(loop_body) wrapped_blocks = wrap_loop_body(loop_body) hoisted = hoist(parfor_params, loop_body, typemap, wrapped_blocks) start_block = gufunc_ir.blocks[min(gufunc_ir.blocks.keys())] start_block.body = start_block.body[:-1] + hoisted + [start_block.body[-1]] unwrap_loop_body(loop_body) if config.DEBUG_ARRAY_OPT: print("After hoisting") _print_body(loop_body) # Search all the block in the gufunc outline for the sentinel assignment. for label, block in gufunc_ir.blocks.items(): for i, inst in enumerate(block.body): if isinstance(inst, ir.Assign) and inst.target.name == sentinel_name: # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the sentinel # but the new block maintains the current block label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after the sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(loop_body.keys()) # The previous block jumps to the minimum labelled block of the # parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc function's # IR. for (l, b) in loop_body.items(): gufunc_ir.blocks[l] = b body_last_label = max(loop_body.keys()) gufunc_ir.blocks[new_label] = block gufunc_ir.blocks[label] = prev_block # Add a jump from the last parfor body block to the block containing # statements after the sentinel. gufunc_ir.blocks[body_last_label].append( ir.Jump(new_label, loc)) break else: continue break if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump before renaming") gufunc_ir.dump() gufunc_ir.blocks = rename_labels(gufunc_ir.blocks) remove_dels(gufunc_ir.blocks) if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump") gufunc_ir.dump() print("flags", flags) print("typemap", typemap) old_alias = flags.noalias if not has_aliases: if config.DEBUG_ARRAY_OPT: print("No aliases found so adding noalias flag.") flags.noalias = True kernel_func = compiler.compile_ir(typingctx, targetctx, gufunc_ir, gufunc_param_types, types.none, flags, locals) flags.noalias = old_alias kernel_sig = signature(types.none, *gufunc_param_types) if config.DEBUG_ARRAY_OPT: print("kernel_sig = ", kernel_sig) return kernel_func, parfor_args, kernel_sig
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args): # Overall approach: # 1) Construct a string containing a function definition for the stencil function # that will execute the stencil kernel. This function definition includes a # unique stencil function name, the parameters to the stencil kernel, loop # nests across the dimenions of the input array. Those loop nests use the # computed stencil kernel size so as not to try to compute elements where # elements outside the bounds of the input array would be needed. # 2) The but of the loop nest in this new function is a special sentinel # assignment. # 3) Get the IR of this new function. # 4) Split the block containing the sentinel assignment and remove the sentinel # assignment. Insert the stencil kernel IR into the stencil function IR # after label and variable renaming of the stencil kernel IR to prevent # conflicts with the stencil function IR. # 5) Compile the combined stencil function IR + stencil kernel IR into existence. # Copy the kernel so that our changes for this callsite # won't effect other callsites. (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes( self.kernel_ir, calltypes) # The stencil kernel body becomes the body of a loop, for which args aren't needed. ir_utils.remove_args(kernel_copy.blocks) first_arg = kernel_copy.arg_names[0] in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap) name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks) ir_utils.apply_copy_propagate( kernel_copy.blocks, in_cps, name_var_table, typemap, copy_calltypes) if "out" in name_var_table: raise ValueError("Cannot use the reserved word 'out' in stencil kernels.") sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table) if config.DEBUG_ARRAY_OPT == 1: print("name_var_table", name_var_table, sentinel_name) the_array = args[0] if config.DEBUG_ARRAY_OPT == 1: print("_stencil_wrapper", return_type, return_type.dtype, type(return_type.dtype), args) ir_utils.dump_blocks(kernel_copy.blocks) # We generate a Numba function to execute this stencil and here # create the unique name of this function. stencil_func_name = "__numba_stencil_%s_%s" % ( hex(id(the_array)).replace("-", "_"), self.id) # We will put a loop nest in the generated function for each # dimension in the input array. Here we create the name for # the index variable for each dimension. index0, index1, ... index_vars = [] for i in range(the_array.ndim): index_var_name = ir_utils.get_unused_var_name("index" + str(i), name_var_table) index_vars += [index_var_name] # Create extra signature for out and neighborhood. out_name = ir_utils.get_unused_var_name("out", name_var_table) neighborhood_name = ir_utils.get_unused_var_name("neighborhood", name_var_table) sig_extra = "" if result is not None: sig_extra += ", {}=None".format(out_name) if "neighborhood" in dict(self.kws): sig_extra += ", {}=None".format(neighborhood_name) # Get a list of the standard indexed array names. standard_indexed = self.options.get("standard_indexing", []) if first_arg in standard_indexed: raise ValueError("The first argument to a stencil kernel must " "use relative indexing, not standard indexing.") if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0: raise ValueError("Standard indexing requested for an array name " "not present in the stencil kernel definition.") # Add index variables to getitems in the IR to transition the accesses # in the kernel from relative to regular Python indexing. Returns the # computed size of the stencil kernel and a list of the relatively indexed # arrays. kernel_size, relatively_indexed = self.add_indices_to_kernel( kernel_copy, index_vars, the_array.ndim, self.neighborhood, standard_indexed) if self.neighborhood is None: self.neighborhood = kernel_size if config.DEBUG_ARRAY_OPT == 1: print("After add_indices_to_kernel") ir_utils.dump_blocks(kernel_copy.blocks) # The return in the stencil kernel becomes a setitem for that # particular point in the iteration space. ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks, index_vars, out_name) if config.DEBUG_ARRAY_OPT == 1: print("After replace_return_with_setitem", ret_blocks) ir_utils.dump_blocks(kernel_copy.blocks) # Start to form the new function to execute the stencil kernel. func_text = "def {}({}{}):\n".format(stencil_func_name, ",".join(kernel_copy.arg_names), sig_extra) # Get loop ranges for each dimension, which could be either int # or variable. In the latter case we'll use the extra neighborhood # argument to the function. ranges = [] for i in range(the_array.ndim): if isinstance(kernel_size[i][0], int): lo = kernel_size[i][0] hi = kernel_size[i][1] else: lo = "{}[{}][0]".format(neighborhood_name, i) hi = "{}[{}][1]".format(neighborhood_name, i) ranges.append((lo, hi)) # If there are more than one relatively indexed arrays, add a call to # a function that will raise an error if any of the relatively indexed # arrays are of different size than the first input array. if len(relatively_indexed) > 1: func_text += " raise_if_incompatible_array_sizes(" + first_arg for other_array in relatively_indexed: if other_array != first_arg: func_text += "," + other_array func_text += ")\n" # Get the shape of the first input array. shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table) func_text += " {} = {}.shape\n".format(shape_name, first_arg) # If we have to allocate the output array (the out argument was not used) # then us numpy.full if the user specified a cval stencil decorator option # or np.zeros if they didn't to allocate the array. if result is None: if "cval" in self.options: cval = self.options["cval"] if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") out_init ="{} = np.full({}, {}, dtype=np.{})\n".format( out_name, shape_name, cval, return_type.dtype) else: out_init ="{} = np.zeros({}, dtype=np.{})\n".format( out_name, shape_name, return_type.dtype) func_text += " " + out_init offset = 1 # Add the loop nests to the new function. for i in range(the_array.ndim): for j in range(offset): func_text += " " # ranges[i][0] is the minimum index used in the i'th dimension # but minimum's greater than 0 don't preclude any entry in the array. # So, take the minimum of 0 and the minimum index found in the kernel # and this will be a negative number (potentially -0). Then, we do # unary - on that to get the positive offset in this dimension whose # use is precluded. # ranges[i][1] is the maximum of 0 and the observed maximum index # in this dimension because negative maximums would not cause us to # preclude any entry in the array from being used. func_text += ("for {} in range(-min(0,{})," "{}[{}]-max(0,{})):\n").format( index_vars[i], ranges[i][0], shape_name, i, ranges[i][1]) offset += 1 for j in range(offset): func_text += " " # Put a sentinel in the code so we can locate it in the IR. We will # remove this sentinel assignment and replace it with the IR for the # stencil kernel body. func_text += "{} = 0\n".format(sentinel_name) func_text += " return {}\n".format(out_name) if config.DEBUG_ARRAY_OPT == 1: print("new stencil func text") print(func_text) # Force the new stencil function into existence. exec_(func_text) in globals(), locals() stencil_func = eval(stencil_func_name) if sigret is not None: pysig = utils.pysignature(stencil_func) sigret.pysig = pysig # Get the IR for the newly created stencil function. stencil_ir = compiler.run_frontend(stencil_func) ir_utils.remove_dels(stencil_ir.blocks) # rename all variables in stencil_ir afresh var_table = ir_utils.get_name_var_table(stencil_ir.blocks) new_var_dict = {} reserved_names = ([sentinel_name, out_name, neighborhood_name, shape_name] + kernel_copy.arg_names + index_vars) for name, var in var_table.items(): if not name in reserved_names: new_var_dict[name] = ir_utils.mk_unique_var(name) ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict) stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1 # Shift lables in the kernel copy so they are guaranteed unique # and don't conflict with any labels in the stencil_ir. kernel_copy.blocks = ir_utils.add_offset_to_labels( kernel_copy.blocks, stencil_stub_last_label) new_label = max(kernel_copy.blocks.keys()) + 1 # Adjust ret_blocks to account for addition of the offset. ret_blocks = [x + stencil_stub_last_label for x in ret_blocks] if config.DEBUG_ARRAY_OPT == 1: print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label) print("before replace sentinel stencil_ir") ir_utils.dump_blocks(stencil_ir.blocks) print("before replace sentinel kernel_copy") ir_utils.dump_blocks(kernel_copy.blocks) # Search all the block in the stencil outline for the sentinel. for label, block in stencil_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance( inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block # label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(kernel_copy.blocks.keys()) # The previous block jumps to the minimum labelled block of # the parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc # function's IR. for (l, b) in kernel_copy.blocks.items(): stencil_ir.blocks[l] = b stencil_ir.blocks[new_label] = block stencil_ir.blocks[label] = prev_block # Add a jump from all the blocks that previously contained # a return in the stencil kernel to the block # containing statements after the sentinel. for ret_block in ret_blocks: stencil_ir.blocks[ret_block].append( ir.Jump(new_label, loc)) break else: continue break stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks) ir_utils.remove_dels(stencil_ir.blocks) assert(isinstance(the_array, types.Type)) array_types = args new_stencil_param_types = list(array_types) if config.DEBUG_ARRAY_OPT == 1: print("new_stencil_param_types", new_stencil_param_types) ir_utils.dump_blocks(stencil_ir.blocks) # Compile the combined stencil function with the replaced loop # body in it. new_func = compiler.compile_ir( self._typingctx, self._targetctx, stencil_ir, new_stencil_param_types, None, compiler.DEFAULT_FLAGS, {}) return new_func
def _create_gufunc_for_parfor_body( lowerer, parfor, typemap, typingctx, targetctx, flags, locals, has_aliases, index_var_typ): ''' Takes a parfor and creates a gufunc function for its body. There are two parts to this function. 1) Code to iterate across the iteration space as defined by the schedule. 2) The parfor body that does the work for a single point in the iteration space. Part 1 is created as Python text for simplicity with a sentinel assignment to mark the point in the IR where the parfor body should be added. This Python text is 'exec'ed into existence and its IR retrieved with run_frontend. The IR is scanned for the sentinel assignment where that basic block is split and the IR for the parfor body inserted. ''' # The parfor body and the main function body share ir.Var nodes. # We have to do some replacements of Var names in the parfor body to make them # legal parameter names. If we don't copy then the Vars in the main function also # would incorrectly change their name. loop_body = copy.copy(parfor.loop_body) remove_dels(loop_body) parfor_dim = len(parfor.loop_nests) loop_indices = [l.index_variable.name for l in parfor.loop_nests] # Get all the parfor params. parfor_params = parfor.params # Get just the outputs of the parfor. parfor_outputs = numba.parfor.get_parfor_outputs(parfor, parfor_params) # Get all parfor reduction vars, and operators. parfor_redvars, parfor_reddict = numba.parfor.get_parfor_reductions( parfor, parfor_params, lowerer.fndesc.calltypes) # Compute just the parfor inputs as a set difference. parfor_inputs = sorted( list( set(parfor_params) - set(parfor_outputs) - set(parfor_redvars))) if config.DEBUG_ARRAY_OPT == 1: print("parfor_params = ", parfor_params, " ", type(parfor_params)) print("parfor_outputs = ", parfor_outputs, " ", type(parfor_outputs)) print("parfor_inputs = ", parfor_inputs, " ", type(parfor_inputs)) print("parfor_redvars = ", parfor_redvars, " ", type(parfor_redvars)) # Reduction variables are represented as arrays, so they go under # different names. parfor_redarrs = [] for var in parfor_redvars: arr = var + "_arr" parfor_redarrs.append(arr) typemap[arr] = types.npytypes.Array(typemap[var], 1, "C") # Reorder all the params so that inputs go first then outputs. parfor_params = parfor_inputs + parfor_outputs + parfor_redarrs if config.DEBUG_ARRAY_OPT == 1: print("parfor_params = ", parfor_params, " ", type(parfor_params)) print("loop_indices = ", loop_indices, " ", type(loop_indices)) print("loop_body = ", loop_body, " ", type(loop_body)) _print_body(loop_body) # Some Var are not legal parameter names so create a dict of potentially illegal # param name to guaranteed legal name. param_dict = legalize_names(parfor_params + parfor_redvars) if config.DEBUG_ARRAY_OPT == 1: print( "param_dict = ", sorted( param_dict.items()), " ", type(param_dict)) # Some loop_indices are not legal parameter names so create a dict of potentially illegal # loop index to guaranteed legal name. ind_dict = legalize_names(loop_indices) # Compute a new list of legal loop index names. legal_loop_indices = [ind_dict[v] for v in loop_indices] if config.DEBUG_ARRAY_OPT == 1: print("ind_dict = ", sorted(ind_dict.items()), " ", type(ind_dict)) print( "legal_loop_indices = ", legal_loop_indices, " ", type(legal_loop_indices)) for pd in parfor_params: print("pd = ", pd) print("pd type = ", typemap[pd], " ", type(typemap[pd])) # Get the types of each parameter. param_types = [typemap[v] for v in parfor_params] # if config.DEBUG_ARRAY_OPT==1: # param_types_dict = { v:typemap[v] for v in parfor_params } # print("param_types_dict = ", param_types_dict, " ", type(param_types_dict)) # print("param_types = ", param_types, " ", type(param_types)) # Replace illegal parameter names in the loop body with legal ones. replace_var_names(loop_body, param_dict) # remember the name before legalizing as the actual arguments parfor_args = parfor_params # Change parfor_params to be legal names. parfor_params = [param_dict[v] for v in parfor_params] parfor_params_orig = parfor_params parfor_params = [] ascontig = False for pindex in range(len(parfor_params_orig)): if ascontig and pindex < len(parfor_inputs) and isinstance(param_types[pindex], types.npytypes.Array): parfor_params.append(parfor_params_orig[pindex]+"param") else: parfor_params.append(parfor_params_orig[pindex]) # Change parfor body to replace illegal loop index vars with legal ones. replace_var_names(loop_body, ind_dict) loop_body_var_table = get_name_var_table(loop_body) sentinel_name = get_unused_var_name("__sentinel__", loop_body_var_table) if config.DEBUG_ARRAY_OPT == 1: print( "legal parfor_params = ", parfor_params, " ", type(parfor_params)) # Determine the unique names of the scheduling and gufunc functions. # sched_func_name = "__numba_parfor_sched_%s" % (hex(hash(parfor)).replace("-", "_")) gufunc_name = "__numba_parfor_gufunc_%s" % ( hex(hash(parfor)).replace("-", "_")) if config.DEBUG_ARRAY_OPT: # print("sched_func_name ", type(sched_func_name), " ", sched_func_name) print("gufunc_name ", type(gufunc_name), " ", gufunc_name) gufunc_txt = "" # Create the gufunc function. gufunc_txt += "def " + gufunc_name + \ "(sched, " + (", ".join(parfor_params)) + "):\n" for pindex in range(len(parfor_inputs)): if ascontig and isinstance(param_types[pindex], types.npytypes.Array): gufunc_txt += (" " + parfor_params_orig[pindex] + " = np.ascontiguousarray(" + parfor_params[pindex] + ")\n") # Add initialization of reduction variables for arr, var in zip(parfor_redarrs, parfor_redvars): gufunc_txt += " " + param_dict[var] + \ "=" + param_dict[arr] + "[0]\n" # For each dimension of the parfor, create a for loop in the generated gufunc function. # Iterate across the proper values extracted from the schedule. # The form of the schedule is start_dim0, start_dim1, ..., start_dimN, end_dim0, # end_dim1, ..., end_dimN for eachdim in range(parfor_dim): for indent in range(eachdim + 1): gufunc_txt += " " sched_dim = eachdim gufunc_txt += ("for " + legal_loop_indices[eachdim] + " in range(sched[" + str(sched_dim) + "], sched[" + str(sched_dim + parfor_dim) + "] + np.uint8(1)):\n") if config.DEBUG_ARRAY_OPT_RUNTIME: for indent in range(parfor_dim + 1): gufunc_txt += " " gufunc_txt += "print(" for eachdim in range(parfor_dim): gufunc_txt += "\"" + legal_loop_indices[eachdim] + "\"," + legal_loop_indices[eachdim] + "," gufunc_txt += ")\n" # Add the sentinel assignment so that we can find the loop body position # in the IR. for indent in range(parfor_dim + 1): gufunc_txt += " " gufunc_txt += sentinel_name + " = 0\n" # Add assignments of reduction variables (for returning the value) for arr, var in zip(parfor_redarrs, parfor_redvars): gufunc_txt += " " + param_dict[arr] + \ "[0] = " + param_dict[var] + "\n" gufunc_txt += " return None\n" if config.DEBUG_ARRAY_OPT: print("gufunc_txt = ", type(gufunc_txt), "\n", gufunc_txt) # Force gufunc outline into existence. globls = {"np": np} locls = {} exec_(gufunc_txt, globls, locls) gufunc_func = locls[gufunc_name] if config.DEBUG_ARRAY_OPT: print("gufunc_func = ", type(gufunc_func), "\n", gufunc_func) # Get the IR for the gufunc outline. gufunc_ir = compiler.run_frontend(gufunc_func) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump ", type(gufunc_ir)) gufunc_ir.dump() print("loop_body dump ", type(loop_body)) _print_body(loop_body) # rename all variables in gufunc_ir afresh var_table = get_name_var_table(gufunc_ir.blocks) new_var_dict = {} reserved_names = [sentinel_name] + \ list(param_dict.values()) + legal_loop_indices for name, var in var_table.items(): if not (name in reserved_names): new_var_dict[name] = mk_unique_var(name) replace_var_names(gufunc_ir.blocks, new_var_dict) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump after renaming ") gufunc_ir.dump() gufunc_param_types = [ numba.types.npytypes.Array( index_var_typ, 1, "C")] + param_types if config.DEBUG_ARRAY_OPT: print( "gufunc_param_types = ", type(gufunc_param_types), "\n", gufunc_param_types) gufunc_stub_last_label = max(gufunc_ir.blocks.keys()) + 1 # Add gufunc stub last label to each parfor.loop_body label to prevent # label conflicts. loop_body = add_offset_to_labels(loop_body, gufunc_stub_last_label) # new label for splitting sentinel block new_label = max(loop_body.keys()) + 1 # If enabled, add a print statement after every assignment. if config.DEBUG_ARRAY_OPT_RUNTIME: for label, block in loop_body.items(): new_block = block.copy() new_block.clear() loc = block.loc scope = block.scope for inst in block.body: new_block.append(inst) # Append print after assignment if isinstance(inst, ir.Assign): # Only apply to numbers if typemap[inst.target.name] not in types.number_domain: continue # Make constant string strval = "{} =".format(inst.target.name) strconsttyp = types.Const(strval) lhs = ir.Var(scope, mk_unique_var("str_const"), loc) assign_lhs = ir.Assign(value=ir.Const(value=strval, loc=loc), target=lhs, loc=loc) typemap[lhs.name] = strconsttyp new_block.append(assign_lhs) # Make print node print_node = ir.Print(args=[lhs, inst.target], vararg=None, loc=loc) new_block.append(print_node) sig = numba.typing.signature(types.none, typemap[lhs.name], typemap[inst.target.name]) lowerer.fndesc.calltypes[print_node] = sig loop_body[label] = new_block if config.DEBUG_ARRAY_OPT: print("parfor loop body") _print_body(loop_body) wrapped_blocks = wrap_loop_body(loop_body) hoisted = hoist(parfor_params, loop_body, typemap, wrapped_blocks) start_block = gufunc_ir.blocks[min(gufunc_ir.blocks.keys())] start_block.body = start_block.body[:-1] + hoisted + [start_block.body[-1]] unwrap_loop_body(loop_body) if config.DEBUG_ARRAY_OPT: print("After hoisting") _print_body(loop_body) # Search all the block in the gufunc outline for the sentinel assignment. for label, block in gufunc_ir.blocks.items(): for i, inst in enumerate(block.body): if isinstance( inst, ir.Assign) and inst.target.name == sentinel_name: # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the sentinel # but the new block maintains the current block label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after the sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(loop_body.keys()) # The previous block jumps to the minimum labelled block of the # parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc function's # IR. for (l, b) in loop_body.items(): gufunc_ir.blocks[l] = b body_last_label = max(loop_body.keys()) gufunc_ir.blocks[new_label] = block gufunc_ir.blocks[label] = prev_block # Add a jump from the last parfor body block to the block containing # statements after the sentinel. gufunc_ir.blocks[body_last_label].append( ir.Jump(new_label, loc)) break else: continue break if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump before renaming") gufunc_ir.dump() gufunc_ir.blocks = rename_labels(gufunc_ir.blocks) remove_dels(gufunc_ir.blocks) if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump") gufunc_ir.dump() print("flags", flags) print("typemap", typemap) old_alias = flags.noalias if not has_aliases: if config.DEBUG_ARRAY_OPT: print("No aliases found so adding noalias flag.") flags.noalias = True kernel_func = compiler.compile_ir( typingctx, targetctx, gufunc_ir, gufunc_param_types, types.none, flags, locals) flags.noalias = old_alias kernel_sig = signature(types.none, *gufunc_param_types) if config.DEBUG_ARRAY_OPT: print("kernel_sig = ", kernel_sig) return kernel_func, parfor_args, kernel_sig