def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope, loc): last_ind = dim_size if end_length != 0: # set last index to size minus stencil size to avoid invalid # memory access index_const = ir.Var(scope, mk_unique_var("stencil_const_var"), loc) self.typemap[index_const.name] = types.intp if isinstance(end_length, numbers.Number): const_assign = ir.Assign(ir.Const(end_length, loc), index_const, loc) else: const_assign = ir.Assign(end_length, index_const, loc) gen_nodes.append(const_assign) last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc) self.typemap[last_ind.name] = types.intp g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc) check_func = numba.njit(_compute_last_ind) func_typ = types.functions.Dispatcher(check_func) self.typemap[g_var.name] = func_typ g_obj = ir.Global("_compute_last_ind", check_func, loc) g_assign = ir.Assign(g_obj, g_var, loc) gen_nodes.append(g_assign) index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc) self.calltypes[index_call] = func_typ.get_call_type( self.typingctx, [types.intp, types.intp], {}) index_assign = ir.Assign(index_call, last_ind, loc) gen_nodes.append(index_assign) return last_ind
def test_var(self): a = ir.Var(None, 'foo', self.loc1) b = ir.Var(None, 'foo', self.loc1) c = ir.Var(None, 'foo', self.loc2) d = ir.Var(ir.Scope(None, ir.unknown_loc), 'foo', self.loc1) e = ir.Var(None, 'bar', self.loc1) self.check(a, same=[b, c, d], different=[e])
def handle_border(slice_fn_ty, dim, scope, loc, slice_func_var, stmts, border_inds, border_tuple_items, other_arg, other_first): # Handle the border for start or end of the index range. # ---- Generate call to slice func. sig = self.typingctx.resolve_function_type( slice_fn_ty, (types.intp,) * 2, {}) si = border_inds[dim] assert(isinstance(si, (int, ir.Var))) si_var = ir.Var(scope, mk_unique_var("$border_ind"), loc) self.typemap[si_var.name] = types.intp if isinstance(si, int): si_assign = ir.Assign(ir.Const(si, loc), si_var, loc) else: si_assign = ir.Assign(si, si_var, loc) stmts.append(si_assign) slice_callexpr = ir.Expr.call( func=slice_func_var, args=(other_arg, si_var) if other_first else (si_var, other_arg), kws=(), loc=loc) self.calltypes[slice_callexpr] = sig # ---- Generate slice var border_slice_var = ir.Var(scope, mk_unique_var("$slice"), loc) self.typemap[border_slice_var.name] = types.slice2_type slice_assign = ir.Assign(slice_callexpr, border_slice_var, loc) stmts.append(slice_assign) border_tuple_items[dim] = border_slice_var border_ind_var = ir.Var(scope, mk_unique_var( "$border_index_tuple_var"), loc) self.typemap[border_ind_var.name] = types.containers.UniTuple( types.slice2_type, ndims) tuple_call = ir.Expr.build_tuple(border_tuple_items, loc) tuple_assign = ir.Assign(tuple_call, border_ind_var, loc) stmts.append(tuple_assign) setitem_call = ir.SetItem(out_arr, border_ind_var, zero_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[border_ind_var.name], self.typemap[out_arr.name].dtype ) stmts.append(setitem_call)
def replace_var_with_array_in_block(vars, block, typemap, calltypes): new_block = [] for inst in block.body: if isinstance(inst, ir.Assign) and inst.target.name in vars: const_node = ir.Const(0, inst.loc) const_var = ir.Var(inst.target.scope, mk_unique_var("$const_ind_0"), inst.loc) typemap[const_var.name] = types.uintp const_assign = ir.Assign(const_node, const_var, inst.loc) new_block.append(const_assign) setitem_node = ir.SetItem(inst.target, const_var, inst.value, inst.loc) calltypes[setitem_node] = signature( types.none, types.npytypes.Array(typemap[inst.target.name], 1, "C"), types.intp, typemap[inst.target.name], ) new_block.append(setitem_node) continue elif isinstance(inst, parfor.Parfor): replace_var_with_array_internal(vars, {0: inst.init_block}, typemap, calltypes) replace_var_with_array_internal(vars, inst.loop_body, typemap, calltypes) new_block.append(inst) return new_block
class CheckEquality(unittest.TestCase): var_a = ir.Var(None, "a", ir.unknown_loc) var_b = ir.Var(None, "b", ir.unknown_loc) var_c = ir.Var(None, "c", ir.unknown_loc) var_d = ir.Var(None, "d", ir.unknown_loc) var_e = ir.Var(None, "e", ir.unknown_loc) loc1 = ir.Loc("mock", 1, 0) loc2 = ir.Loc("mock", 2, 0) loc3 = ir.Loc("mock", 3, 0) def check(self, base, same=[], different=[]): for s in same: self.assertTrue(base == s) for d in different: self.assertTrue(base != d)
class CheckEquality(unittest.TestCase): var_a = ir.Var(None, 'a', ir.unknown_loc) var_b = ir.Var(None, 'b', ir.unknown_loc) var_c = ir.Var(None, 'c', ir.unknown_loc) var_d = ir.Var(None, 'd', ir.unknown_loc) var_e = ir.Var(None, 'e', ir.unknown_loc) loc1 = ir.Loc('mock', 1, 0) loc2 = ir.Loc('mock', 2, 0) loc3 = ir.Loc('mock', 3, 0) def check(self, base, same=[], different=[]): for s in same: self.assertTrue(base == s) for d in different: self.assertTrue(base != d)
def test_inline_update_target_def(self): def test_impl(a): if a == 1: b = 2 else: b = 3 return b func_ir = compiler.run_frontend(test_impl) blocks = list(func_ir.blocks.values()) for block in blocks: for i, stmt in enumerate(block.body): # match b = 2 and replace with lambda: 2 if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var) and guard(find_const, func_ir, stmt.value) == 2): # replace expr with a dummy call func_ir._definitions[stmt.target.name].remove(stmt.value) stmt.value = ir.Expr.call(ir.Var(block.scope, "myvar", loc=stmt.loc), (), (), stmt.loc) func_ir._definitions[stmt.target.name].append(stmt.value) #func = g.py_func# inline_closure_call(func_ir, {}, block, i, lambda: 2) break self.assertEqual(len(func_ir._definitions['b']), 2)
def _dbgprint_after_each_array_assignments(lowerer, loop_body, typemap): for label, block in loop_body.items(): new_block = block.copy() new_block.clear() loc = block.loc scope = block.scope for inst in block.body: new_block.append(inst) # Append print after assignment if isinstance(inst, ir.Assign): # Only apply to numbers if typemap[inst.target.name] not in types.number_domain: continue # Make constant string strval = "{} =".format(inst.target.name) strconsttyp = types.StringLiteral(strval) lhs = ir.Var(scope, mk_unique_var("str_const"), loc) assign_lhs = ir.Assign(value=ir.Const(value=strval, loc=loc), target=lhs, loc=loc) typemap[lhs.name] = strconsttyp new_block.append(assign_lhs) # Make print node print_node = ir.Print(args=[lhs, inst.target], vararg=None, loc=loc) new_block.append(print_node) sig = numba.typing.signature(types.none, typemap[lhs.name], typemap[inst.target.name]) lowerer.fndesc.calltypes[print_node] = sig loop_body[label] = new_block
def replace_return_with_setitem(self, blocks, index_vars, out_name): """ Find return statements in the IR and replace them with a SetItem call of the value "returned" by the kernel into the result array. Returns the block labels that contained return statements. """ ret_blocks = [] for label, block in blocks.items(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if isinstance(stmt, ir.Return): ret_blocks.append(label) # If 1D array then avoid the tuple construction. if len(index_vars) == 1: rvar = ir.Var(scope, out_name, loc) ivar = ir.Var(scope, index_vars[0], loc) new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc)) else: # Convert the string names of the index variables into # ir.Var's. var_index_vars = [] for one_var in index_vars: index_var = ir.Var(scope, one_var, loc) var_index_vars += [index_var] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) # Build a tuple from the index ir.Var's. tuple_call = ir.Expr.build_tuple(var_index_vars, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) rvar = ir.Var(scope, out_name, loc) # Write the return statements original value into # the array using the tuple index. si = ir.SetItem(rvar, s_index_var, stmt.value, loc) new_body.append(si) else: new_body.append(stmt) block.body = new_body return ret_blocks
def _gen_rebalances(self, rebalance_arrs, blocks): # for block in blocks.values(): new_body = [] for inst in block.body: # TODO: handle hiframes filter etc. if isinstance(inst, Parfor): self._gen_rebalances(rebalance_arrs, {0: inst.init_block}) self._gen_rebalances(rebalance_arrs, inst.loop_body) if isinstance( inst, ir.Assign) and inst.target.name in rebalance_arrs: out_arr = inst.target self.func_ir._definitions[out_arr.name].remove(inst.value) # hold inst results in tmp array tmp_arr = ir.Var(out_arr.scope, mk_unique_var("rebalance_tmp"), out_arr.loc) self.typemap[tmp_arr.name] = self.typemap[out_arr.name] inst.target = tmp_arr nodes = [inst] def f(in_arr): # pragma: no cover out_a = sdc.distributed_api.rebalance_array(in_arr) f_block = compile_to_numba_ir( f, { 'sdc': sdc }, self.typingctx, (self.typemap[tmp_arr.name], ), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [tmp_arr]) nodes += f_block.body[:-3] # remove none return nodes[-1].target = out_arr # update definitions dumm_block = ir.Block(out_arr.scope, out_arr.loc) dumm_block.body = nodes build_definitions({0: dumm_block}, self.func_ir._definitions) new_body += nodes else: new_body.append(inst) block.body = new_body
def assign_inplace(self, rhs, typ, name) -> ir.Var: """Assign a value to a new variable or inplace if it already exist Parameters ---------- rhs : object The value typ : types.Type type of the value name : str variable name to store to Returns ------- res : ir.Var """ loc = self._loc var = ir.Var(self._scope, name, loc) assign = ir.Assign(rhs, var, loc) self._typemap.setdefault(var.name, typ) self._lowerer.lower_inst(assign) return var
def assign(self, rhs, typ, name="pf_assign") -> ir.Var: """Assign a value to a new variable Parameters ---------- rhs : object The value typ : types.Type type of the value name : str variable name to store to Returns ------- res : ir.Var """ loc = self._loc var = ir.Var(self._scope, mk_unique_var(name), loc) self._typemap[var.name] = typ assign = ir.Assign(rhs, var, loc) self._lowerer.lower_inst(assign) return var
def mutate_with_body(self, func_ir, blocks, blk_start, blk_end, body_blocks, dispatcher_factory, extra): ir_utils.dprint_func_ir(func_ir, "Before with changes", blocks=blocks) assert extra is not None args = extra["args"] assert len(args) == 1 arg = args[0] scope = blocks[blk_start].scope loc = blocks[blk_start].loc if isinstance(arg, ir.Arg): arg = ir.Var(scope, arg.name, loc) set_state = [] restore_state = [] # global for Numba itself gvar = scope.redefine("$ngvar", loc) set_state.append(ir.Assign(ir.Global('numba', numba, loc), gvar, loc)) # getattr for set chunksize function in Numba spcattr = ir.Expr.getattr(gvar, 'set_parallel_chunksize', loc) spcvar = scope.redefine("$spc", loc) set_state.append(ir.Assign(spcattr, spcvar, loc)) # call set_parallel_chunksize orig_pc_var = scope.redefine("$save_pc", loc) cs_var = scope.redefine("$cs_var", loc) set_state.append(ir.Assign(arg, cs_var, loc)) spc_call = ir.Expr.call(spcvar, [cs_var], (), loc) set_state.append(ir.Assign(spc_call, orig_pc_var, loc)) restore_spc_call = ir.Expr.call(spcvar, [orig_pc_var], (), loc) restore_state.append(ir.Assign(restore_spc_call, orig_pc_var, loc)) blocks[blk_start].body = (blocks[blk_start].body[1:-1] + set_state + [blocks[blk_start].body[-1]]) blocks[blk_end].body = restore_state + blocks[blk_end].body func_ir._definitions = build_definitions(blocks) ir_utils.dprint_func_ir(func_ir, "After with changes", blocks=blocks)
def _lower_parfor_gufunc(lowerer, parfor): """Lowerer that handles LLVM code generation for parfor. This function lowers a parfor IR node to LLVM. The general approach is as follows: 1) The code from the parfor's init block is lowered normally in the context of the current function. 2) The body of the parfor is transformed into a gufunc function. 3) Code is inserted into the main function that calls do_scheduling to divide the iteration space for each thread, allocates reduction arrays, calls the gufunc function, and then invokes the reduction function across the reduction arrays to produce the final reduction values. """ typingctx = lowerer.context.typing_context targetctx = lowerer.context # We copy the typemap here because for race condition variable we'll # update their type to array so they can be updated by the gufunc. orig_typemap = lowerer.fndesc.typemap # replace original typemap with copy and restore the original at the end. lowerer.fndesc.typemap = copy.copy(orig_typemap) if config.DEBUG_ARRAY_OPT: print("lowerer.fndesc", lowerer.fndesc, type(lowerer.fndesc)) typemap = lowerer.fndesc.typemap varmap = lowerer.varmap if config.DEBUG_ARRAY_OPT: print("_lower_parfor_parallel") parfor.dump() loc = parfor.init_block.loc scope = parfor.init_block.scope # produce instructions for init_block if config.DEBUG_ARRAY_OPT: print("init_block = ", parfor.init_block, type(parfor.init_block)) for instr in parfor.init_block.body: if config.DEBUG_ARRAY_OPT: print("lower init_block instr = ", instr) lowerer.lower_inst(instr) for racevar in parfor.races: if racevar not in varmap: rvtyp = typemap[racevar] rv = ir.Var(scope, racevar, loc) lowerer._alloca_var(rv.name, rvtyp) alias_map = {} arg_aliases = {} numba.parfors.parfor.find_potential_aliases_parfor(parfor, parfor.params, typemap, lowerer.func_ir, alias_map, arg_aliases) if config.DEBUG_ARRAY_OPT: print("alias_map", alias_map) print("arg_aliases", arg_aliases) # run get_parfor_outputs() and get_parfor_reductions() before # gufunc creation since Jumps are modified so CFG of loop_body # dict will become invalid assert parfor.params is not None parfor_output_arrays = numba.parfors.parfor.get_parfor_outputs( parfor, parfor.params) # compile parfor body as a separate function to be used with GUFuncWrapper flags = copy.copy(parfor.flags) flags.error_model = "numpy" # Can't get here unless flags.set('auto_parallel', ParallelOptions(True)) index_var_typ = typemap[parfor.loop_nests[0].index_variable.name] # index variables should have the same type, check rest of indices for l in parfor.loop_nests[1:]: assert typemap[l.index_variable.name] == index_var_typ numba.parfors.parfor.sequential_parfor_lowering = True loop_ranges = [(l.start, l.stop, l.step) for l in parfor.loop_nests] try: ( func, func_args, func_sig, func_arg_types, modified_arrays, ) = _create_gufunc_for_parfor_body( lowerer, parfor, typemap, typingctx, targetctx, flags, loop_ranges, {}, bool(alias_map), index_var_typ, parfor.races, ) finally: numba.parfors.parfor.sequential_parfor_lowering = False # get the shape signature get_shape_classes = parfor.get_shape_classes num_inputs = len(func_args) - len(parfor_output_arrays) if config.DEBUG_ARRAY_OPT: print("func", func, type(func)) print("func_args", func_args, type(func_args)) print("func_sig", func_sig, type(func_sig)) print("num_inputs = ", num_inputs) print("parfor_outputs = ", parfor_output_arrays) # call the func in parallel by wrapping it with ParallelGUFuncBuilder if config.DEBUG_ARRAY_OPT: print("loop_nests = ", parfor.loop_nests) print("loop_ranges = ", loop_ranges) gu_signature = _create_shape_signature( parfor.get_shape_classes, num_inputs, func_args, func_sig, parfor.races, typemap, ) generate_kernel_launch_ops( lowerer, func, gu_signature, func_sig, func_args, num_inputs, func_arg_types, loop_ranges, modified_arrays, ) if config.DEBUG_ARRAY_OPT: sys.stdout.flush() # Restore the original typemap of the function that was replaced # temporarily at the beginning of this function. lowerer.fndesc.typemap = orig_typemap
def _add_index_offsets(self, index_list, index_offsets, new_body, scope, loc): """ Does the actual work of adding loop index variables to the relative index constants or variables. """ assert len(index_list) == len(index_offsets) # shortcut if all values are integer if all([isinstance(v, int) for v in index_list + index_offsets]): # add offsets in all dimensions return list(map(add, index_list, index_offsets)) out_nodes = [] index_vars = [] for i in range(len(index_list)): # new_index = old_index + offset old_index_var = index_list[i] if isinstance(old_index_var, int): old_index_var = ir.Var(scope, mk_unique_var("old_index_var"), loc) self.typemap[old_index_var.name] = types.intp const_assign = ir.Assign(ir.Const(index_list[i], loc), old_index_var, loc) out_nodes.append(const_assign) offset_var = index_offsets[i] if isinstance(offset_var, int): offset_var = ir.Var(scope, mk_unique_var("offset_var"), loc) self.typemap[offset_var.name] = types.intp const_assign = ir.Assign(ir.Const(index_offsets[i], loc), offset_var, loc) out_nodes.append(const_assign) if (isinstance(old_index_var, slice) or isinstance( self.typemap[old_index_var.name], types.misc.SliceType)): # only one arg can be slice assert self.typemap[offset_var.name] == types.intp index_var = self._add_offset_to_slice(old_index_var, offset_var, out_nodes, scope, loc) index_vars.append(index_var) continue if (isinstance(offset_var, slice) or isinstance( self.typemap[offset_var.name], types.misc.SliceType)): # only one arg can be slice assert self.typemap[old_index_var.name] == types.intp index_var = self._add_offset_to_slice(offset_var, old_index_var, out_nodes, scope, loc) index_vars.append(index_var) continue index_var = ir.Var(scope, mk_unique_var("offset_stencil_index"), loc) self.typemap[index_var.name] = types.intp index_call = ir.Expr.binop(operator.add, old_index_var, offset_var, loc) self.calltypes[index_call] = self.typingctx.resolve_function_type( operator.add, (types.intp, types.intp), {}) index_assign = ir.Assign(index_call, index_var, loc) out_nodes.append(index_assign) index_vars.append(index_var) new_body.extend(out_nodes) return index_vars
def run(self): typingctx = self.state.typingctx # save array arg to call # call_varname -> array func_ir = self.state.func_ir blocks = func_ir.blocks saved_arr_arg = {} topo_order = find_topo_order(blocks) replaced = False for label in topo_order: block = blocks[label] new_body = [] for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance( stmt.value, ir.Expr): lhs = stmt.target.name rhs = stmt.value # replace A.func with np.func, and save A in saved_arr_arg if (rhs.op == "getattr" and rhs.attr in self.function_name_map and isinstance(self.typemap[rhs.value.name], types.npytypes.Array)): rhs = stmt.value arr = rhs.value saved_arr_arg[lhs] = arr scope = arr.scope loc = arr.loc g_dppy_var = ir.Var(scope, mk_unique_var("$load_global"), loc) self.typemap[g_dppy_var.name] = types.misc.Module( numba_dppy) g_dppy = ir.Global("numba_dppy", numba_dppy, loc) g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc) dpnp_var = ir.Var(scope, mk_unique_var("$load_attr"), loc) self.typemap[dpnp_var.name] = types.misc.Module( numba_dppy.dpnp) getattr_dpnp = ir.Expr.getattr(g_dppy_var, "dpnp", loc) dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc) rhs.value = dpnp_var new_body.append(g_dppy_assign) new_body.append(dpnp_assign) func_ir._definitions[g_dppy_var.name] = [getattr_dpnp] func_ir._definitions[dpnp_var.name] = [getattr_dpnp] # update func var type func = getattr(numba_dppy.dpnp, rhs.attr) func_typ = get_dpnp_func_typ(func) self.typemap.pop(lhs) self.typemap[lhs] = func_typ replaced = True if rhs.op == "call" and rhs.func.name in saved_arr_arg: # add array as first arg arr = saved_arr_arg[rhs.func.name] # update call type signature to include array arg old_sig = self.calltypes.pop(rhs) # argsort requires kws for typing so sig.args can't be used # reusing sig.args since some types become Const in sig argtyps = old_sig.args[:len(rhs.args)] kwtyps = { name: self.typemap[v.name] for name, v in rhs.kws } self.calltypes[rhs] = self.typemap[ rhs.func.name].get_call_type( typingctx, [self.typemap[arr.name]] + list(argtyps), kwtyps, ) rhs.args = [arr] + rhs.args new_body.append(stmt) block.body = new_body return replaced
def run(self): """ This function rewrites the name of NumPy functions that exist in self.function_name_map e.g np.sum(a) would produce the following: np.sum() --> numba_dppy.dpnp.sum() --------------------------------------------------------------------------------------- Numba IR Before Rewrite: --------------------------------------------------------------------------------------- $2load_global.0 = global(np: <module 'numpy' from 'numpy/__init__.py'>) ['$2load_global.0'] $4load_method.1 = getattr(value=$2load_global.0, attr=sum) ['$2load_global.0', '$4load_method.1'] $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)], kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a'] --------------------------------------------------------------------------------------- Numba IR After Rewrite: --------------------------------------------------------------------------------------- $dppy_replaced_var.0 = global(numba_dppy: <module 'numba_dppy' from 'numba_dppy/__init__.py'>) ['$dppy_replaced_var.0'] $dpnp_var.1 = getattr(value=$dppy_replaced_var.0, attr=dpnp) ['$dpnp_var.1', '$dppy_replaced_var.0'] $4load_method.1 = getattr(value=$dpnp_var.1, attr=sum) ['$4load_method.1', '$dpnp_var.1'] $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)], kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a'] --------------------------------------------------------------------------------------- """ func_ir = self.state.func_ir blocks = func_ir.blocks topo_order = find_topo_order(blocks) replaced = False for label in topo_order: block = blocks[label] saved_arr_arg = {} new_body = [] for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance( stmt.value, ir.Expr): lhs = stmt.target.name rhs = stmt.value # replace np.FOO with name from self.function_name_map["FOO"] # e.g. np.sum will be replaced with numba_dppy.dpnp.sum if (rhs.op == "getattr" and rhs.attr in self.function_name_map): module_node = block.find_variable_assignment( rhs.value.name).value if (isinstance(module_node, ir.Global) and module_node.value.__name__ in self.function_name_map[rhs.attr][0]) or ( isinstance(module_node, ir.Expr) and module_node.attr in self.function_name_map[rhs.attr][0]): rhs = stmt.value rhs.attr = self.function_name_map[rhs.attr][1] global_module = rhs.value saved_arr_arg[lhs] = global_module scope = global_module.scope loc = global_module.loc g_dppy_var = ir.Var(scope, mk_unique_var("$2load_global"), loc) # We are trying to rename np.function_name/np.linalg.function_name with # numba_dppy.dpnp.function_name. # Hence, we need to have a global variable representing module numba_dppy. # Next, we add attribute dpnp to global module numba_dppy to # represent numba_dppy.dpnp. g_dppy = ir.Global("numba_dppy", numba_dppy, loc) g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc) dpnp_var = ir.Var(scope, mk_unique_var("$4load_attr"), loc) getattr_dpnp = ir.Expr.getattr( g_dppy_var, "dpnp", loc) dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc) rhs.value = dpnp_var new_body.append(g_dppy_assign) new_body.append(dpnp_assign) func_ir._definitions[dpnp_var.name] = [ getattr_dpnp ] func_ir._definitions[g_dppy_var.name] = [g_dppy] replaced = True new_body.append(stmt) block.body = new_body return replaced
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] stencil_blocks = stencil_ir.blocks if config.DEBUG_ARRAY_OPT >= 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, self.typemap) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_ir, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after replace stencil accesses") ir_utils.dump_blocks(stencil_blocks) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind(start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append( numba.parfors.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # We have to guarantee that the exit block has maximum label and that # there's only one exit block for the parfor body. # So, all return statements will change to jump to the parfor exit block. parfor_body_exit_label = max(stencil_blocks.keys()) + 1 stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) self.typemap[exit_value_var.name] = return_type.dtype # create parfor index var for_replacing_ret = [] if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var("$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) for_replacing_ret.append(tuple_assign) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after creating parfor index var") ir_utils.dump_blocks(stencil_blocks) # empty init block init_block = ir.Block(scope, loc) if out_arr is None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple( types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.core.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass( return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) else: # out is present if "cval" in stencil_func.options: # do out[:] = cval cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. cval_ty = typing.typeof.typeof(cval) if not self.typingctx.can_convert(cval_ty, return_type.dtype): msg = "cval type does not match stencil return type." raise ValueError(msg) # get slice ref slice_var = ir.Var(scope, mk_unique_var("$py_g_var"), loc) slice_fn_ty = self.typingctx.resolve_value_type(slice) self.typemap[slice_var.name] = slice_fn_ty slice_g = ir.Global('slice', slice, loc) slice_assigned = ir.Assign(slice_g, slice_var, loc) init_block.body.append(slice_assigned) sig = self.typingctx.resolve_function_type( slice_fn_ty, (types.none, ) * 2, {}) callexpr = ir.Expr.call(func=slice_var, args=(), kws=(), loc=loc) self.calltypes[callexpr] = sig slice_inst_var = ir.Var(scope, mk_unique_var("$slice_inst"), loc) self.typemap[slice_inst_var.name] = types.slice2_type slice_assign = ir.Assign(callexpr, slice_inst_var, loc) init_block.body.append(slice_assign) # get const val for cval cval_const_val = ir.Const(return_type.dtype(cval), loc) cval_const_var = ir.Var(scope, mk_unique_var("$cval_const"), loc) self.typemap[cval_const_var.name] = return_type.dtype cval_const_assign = ir.Assign(cval_const_val, cval_const_var, loc) init_block.body.append(cval_const_assign) # do setitem on `out` array setitemexpr = ir.StaticSetItem(out_arr, slice(None, None), slice_inst_var, cval_const_var, loc) init_block.body.append(setitemexpr) sig = signature(types.none, self.typemap[out_arr.name], self.typemap[slice_inst_var.name], self.typemap[out_arr.name].dtype) self.calltypes[setitemexpr] = sig self.replace_return_with_setitem(stencil_blocks, exit_value_var, parfor_body_exit_label) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after replacing return") ir_utils.dump_blocks(stencil_blocks) setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype) stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) stencil_blocks[parfor_body_exit_label].body.append(setitem_call) # simplify CFG of parfor body (exit block could be simplified often) # add dummy return to enable CFG dummy_loc = ir.Loc("stencilparfor_dummy", -1) ret_const_var = ir.Var(scope, mk_unique_var("$cval_const"), dummy_loc) cval_const_assign = ir.Assign(ir.Const(0, loc=dummy_loc), ret_const_var, dummy_loc) stencil_blocks[parfor_body_exit_label].body.append(cval_const_assign) stencil_blocks[parfor_body_exit_label].body.append( ir.Return(ret_const_var, dummy_loc), ) stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) stencil_blocks[max(stencil_blocks.keys())].body.pop() if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after adding SetItem") ir_utils.dump_blocks(stencil_blocks) pattern = ('stencil', [start_lengths, end_lengths]) parfor = numba.parfors.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set, pattern, self.flags) gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood, standard_indexed, typemap, calltypes): """ Transforms the stencil kernel as specified by the user into one that includes each dimension's index variable as part of the getitem calls. So, in effect array[-1] becomes array[index0-1]. """ const_dict = {} kernel_consts = [] if config.DEBUG_ARRAY_OPT >= 1: print("add_indices_to_kernel", ndim, neighborhood) ir_utils.dump_blocks(kernel.blocks) if neighborhood is None: need_to_calc_kernel = True else: need_to_calc_kernel = False if len(neighborhood) != ndim: raise ValueError("%d dimensional neighborhood specified for %d " \ "dimensional input array" % (len(neighborhood), ndim)) tuple_table = ir_utils.get_tuple_table(kernel.blocks) relatively_indexed = set() for block in kernel.blocks.values(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Const)): if config.DEBUG_ARRAY_OPT >= 1: print("remembering in const_dict", stmt.target.name, stmt.value.value) # Remember consts for use later. const_dict[stmt.target.name] = stmt.value.value if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in kernel.arg_names) or (isinstance(stmt, ir.SetItem) and stmt.target.name in kernel.arg_names)): raise ValueError("Assignments to arrays passed to stencil " \ "kernels is not allowed.") if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['getitem', 'static_getitem'] and stmt.value.value.name in kernel.arg_names and stmt.value.value.name not in standard_indexed): # We found a getitem from the input array. if stmt.value.op == 'getitem': stmt_index_var = stmt.value.index else: stmt_index_var = stmt.value.index_var # allow static_getitem since rewrite passes are applied #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.") relatively_indexed.add(stmt.value.value.name) # Store the index used after looking up the variable in # the const dictionary. if need_to_calc_kernel: assert hasattr(stmt_index_var, 'name') if stmt_index_var.name in tuple_table: kernel_consts += [tuple_table[stmt_index_var.name]] elif stmt_index_var.name in const_dict: kernel_consts += [const_dict[stmt_index_var.name]] else: raise ValueError( "stencil kernel index is not " "constant, 'neighborhood' option required") if ndim == 1: # Single dimension always has index variable 'index0'. # tmpvar will hold the real index and is computed by # adding the relative offset in stmt.value.index to # the current absolute location in index0. index_var = ir.Var(scope, index_names[0], loc) tmpname = ir_utils.mk_unique_var("stencil_index") tmpvar = ir.Var(scope, tmpname, loc) stmt_index_var_typ = typemap[stmt_index_var.name] # If the array is indexed with a slice then we # have to add the index value with a call to # slice_addition. if isinstance(stmt_index_var_typ, types.misc.SliceType): sa_var = ir.Var( scope, ir_utils.mk_unique_var("slice_addition"), loc) sa_func = numba.njit(slice_addition) sa_func_typ = types.functions.Dispatcher(sa_func) typemap[sa_var.name] = sa_func_typ g_sa = ir.Global("slice_addition", sa_func, loc) new_body.append(ir.Assign(g_sa, sa_var, loc)) slice_addition_call = ir.Expr.call( sa_var, [stmt_index_var, index_var], (), loc) calltypes[ slice_addition_call] = sa_func_typ.get_call_type( self._typingctx, [stmt_index_var_typ, types.intp], {}) new_body.append( ir.Assign(slice_addition_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: acc_call = ir.Expr.binop(operator.add, stmt_index_var, index_var, loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: index_vars = [] sum_results = [] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) const_index_vars = [] ind_stencils = [] stmt_index_var_typ = typemap[stmt_index_var.name] # Same idea as above but you have to extract # individual elements out of the tuple indexing # expression and add the corresponding index variable # to them and then reconstitute as a tuple that can # index the array. for dim in range(ndim): tmpname = ir_utils.mk_unique_var("const_index") tmpvar = ir.Var(scope, tmpname, loc) new_body.append( ir.Assign(ir.Const(dim, loc), tmpvar, loc)) const_index_vars += [tmpvar] index_var = ir.Var(scope, index_names[dim], loc) index_vars += [index_var] tmpname = ir_utils.mk_unique_var( "ind_stencil_index") tmpvar = ir.Var(scope, tmpname, loc) ind_stencils += [tmpvar] getitemname = ir_utils.mk_unique_var("getitem") getitemvar = ir.Var(scope, getitemname, loc) getitemcall = ir.Expr.getitem( stmt_index_var, const_index_vars[dim], loc) new_body.append( ir.Assign(getitemcall, getitemvar, loc)) # Get the type of this particular part of the index tuple. one_index_typ = stmt_index_var_typ[dim] # If the array is indexed with a slice then we # have to add the index value with a call to # slice_addition. if isinstance(one_index_typ, types.misc.SliceType): sa_var = ir.Var( scope, ir_utils.mk_unique_var("slice_addition"), loc) sa_func = numba.njit(slice_addition) sa_func_typ = types.functions.Dispatcher( sa_func) typemap[sa_var.name] = sa_func_typ g_sa = ir.Global("slice_addition", sa_func, loc) new_body.append(ir.Assign(g_sa, sa_var, loc)) slice_addition_call = ir.Expr.call( sa_var, [getitemvar, index_vars[dim]], (), loc) calltypes[ slice_addition_call] = sa_func_typ.get_call_type( self._typingctx, [one_index_typ, types.intp], {}) new_body.append( ir.Assign(slice_addition_call, tmpvar, loc)) else: acc_call = ir.Expr.binop( operator.add, getitemvar, index_vars[dim], loc) new_body.append( ir.Assign(acc_call, tmpvar, loc)) tuple_call = ir.Expr.build_tuple(ind_stencils, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, s_index_var, loc), stmt.target, loc)) else: new_body.append(stmt) block.body = new_body if need_to_calc_kernel: # Find the size of the kernel by finding the maximum absolute value # index used in the kernel specification. neighborhood = [[0, 0] for _ in range(ndim)] if len(kernel_consts) == 0: raise ValueError("Stencil kernel with no accesses to " "relatively indexed arrays.") for index in kernel_consts: if isinstance(index, tuple) or isinstance(index, list): for i in range(len(index)): te = index[i] if isinstance(te, ir.Var) and te.name in const_dict: te = const_dict[te.name] if isinstance(te, int): neighborhood[i][0] = min(neighborhood[i][0], te) neighborhood[i][1] = max(neighborhood[i][1], te) else: raise ValueError( "stencil kernel index is not constant," "'neighborhood' option required") index_len = len(index) elif isinstance(index, int): neighborhood[0][0] = min(neighborhood[0][0], index) neighborhood[0][1] = max(neighborhood[0][1], index) index_len = 1 else: raise ValueError( "Non-tuple or non-integer used as stencil index.") if index_len != ndim: raise ValueError( "Stencil index does not match array dimensionality.") return (neighborhood, relatively_indexed)
def _replace_stencil_accesses(self, stencil_ir, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict): """ Convert relative indexing in the stencil kernel to standard indexing by adding the loop index variables to the corresponding dimensions of the array index tuples. """ stencil_blocks = stencil_ir.blocks in_arr = in_args[0] in_arg_names = [x.name for x in in_args] if "standard_indexing" in stencil_func.options: for x in stencil_func.options["standard_indexing"]: if x not in arg_to_arr_dict: raise ValueError("Standard indexing requested for an array " \ "name not present in the stencil kernel definition.") standard_indexed = [ arg_to_arr_dict[x] for x in stencil_func.options["standard_indexing"] ] else: standard_indexed = [] if in_arr.name in standard_indexed: raise ValueError("The first argument to a stencil kernel must use " \ "relative indexing, not standard indexing.") ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc # replace access indices, find access lengths in each dimension need_to_calc_kernel = stencil_func.neighborhood is None # If we need to infer the kernel size then initialize the minimum and # maximum seen indices for each dimension to 0. If we already have # the neighborhood calculated then just convert from neighborhood format # to the separate start and end lengths format used here. if need_to_calc_kernel: start_lengths = ndims * [0] end_lengths = ndims * [0] else: start_lengths = [x[0] for x in stencil_func.neighborhood] end_lengths = [x[1] for x in stencil_func.neighborhood] # Get all the tuples defined in the stencil blocks. tuple_table = ir_utils.get_tuple_table(stencil_blocks) found_relative_index = False # For all blocks in the stencil kernel... for label, block in stencil_blocks.items(): new_body = [] # For all statements in those blocks... for stmt in block.body: # Reject assignments to input arrays. if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in in_arg_names) or ((isinstance(stmt, ir.SetItem) or isinstance(stmt, ir.StaticSetItem)) and stmt.target.name in in_arg_names)): raise ValueError( "Assignments to arrays passed to stencil kernels is not allowed." ) # We found a getitem for some array. If that array is an input # array and isn't in the list of standard indexed arrays then # update min and max seen indices if we are inferring the # kernel size and create a new tuple where the relative offsets # are added to loop index vars to get standard indexing. if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['static_getitem', 'getitem'] and stmt.value.value.name in in_arg_names and stmt.value.value.name not in standard_indexed): index_list = stmt.value.index # handle 1D case if ndims == 1: index_list = [index_list] else: if hasattr(index_list, 'name') and index_list.name in tuple_table: index_list = tuple_table[index_list.name] # indices can be inferred as constant in simple expressions # like -c where c is constant # handled here since this is a common stencil index pattern stencil_ir._definitions = ir_utils.build_definitions( stencil_blocks) index_list = [ _get_const_index_expr(stencil_ir, self.func_ir, v) for v in index_list ] if index_offsets: index_list = self._add_index_offsets( index_list, list(index_offsets), new_body, scope, loc) # update min and max indices if need_to_calc_kernel: # all indices should be integer to be able to calculate # neighborhood automatically if (isinstance(index_list, ir.Var) or any( [not isinstance(v, int) for v in index_list])): raise ValueError( "Variable stencil index only " "possible with known neighborhood") start_lengths = list( map(min, start_lengths, index_list)) end_lengths = list(map(max, end_lengths, index_list)) found_relative_index = True # update access indices index_vars = self._add_index_offsets( parfor_vars, list(index_list), new_body, scope, loc) # new access index tuple if ndims == 1: ind_var = index_vars[0] else: ind_var = ir.Var( scope, mk_unique_var("$parfor_index_ind_var"), loc) self.typemap[ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(index_vars, loc) tuple_assign = ir.Assign(tuple_call, ind_var, loc) new_body.append(tuple_assign) # getitem return type is scalar if all indices are integer if all([ self.typemap[v.name] == types.intp for v in index_vars ]): getitem_return_typ = self.typemap[ stmt.value.value.name].dtype else: # getitem returns an array getitem_return_typ = self.typemap[ stmt.value.value.name] # new getitem with the new index var getitem_call = ir.Expr.getitem(stmt.value.value, ind_var, loc) self.calltypes[getitem_call] = signature( getitem_return_typ, self.typemap[stmt.value.value.name], self.typemap[ind_var.name]) stmt.value = getitem_call new_body.append(stmt) block.body = new_body if need_to_calc_kernel and not found_relative_index: raise ValueError("Stencil kernel with no accesses to " \ "relatively indexed arrays.") return start_lengths, end_lengths
def gen_parquet_read(self, file_name, lhs): scope = file_name.scope loc = file_name.loc table_types = None # lhs is temporary and will possibly be assigned to user variable assert lhs.name.startswith('$') if lhs.name in self.reverse_copies and self.reverse_copies[ lhs.name] in self.locals: table_types = self.locals[self.reverse_copies[lhs.name]] self.locals.pop(self.reverse_copies[lhs.name]) convert_types = {} # user-specified type conversion if lhs.name in self.reverse_copies and (self.reverse_copies[lhs.name] + ':convert') in self.locals: convert_types = self.locals[self.reverse_copies[lhs.name] + ':convert'] self.locals.pop(self.reverse_copies[lhs.name] + ':convert') if table_types is None: fname_def = guard(get_definition, self.func_ir, file_name) if (not isinstance(fname_def, (ir.Const, ir.Global, ir.FreeVar)) or not isinstance(fname_def.value, str)): raise ValueError("Parquet schema not available") file_name_str = fname_def.value col_names, col_types = parquet_file_schema(file_name_str) # remove Pandas index if exists # TODO: handle index properly when indices are supported _rm_pd_index(col_names, col_types) else: col_names = list(table_types.keys()) col_types = list(table_types.values()) out_nodes = [] # get arrow readers once def init_arrow_readers(fname): arrow_readers = get_arrow_readers(unicode_to_char_ptr(fname)) f_block = compile_to_numba_ir( init_arrow_readers, { 'get_arrow_readers': _get_arrow_readers, 'unicode_to_char_ptr': unicode_to_char_ptr, }).blocks.popitem()[1] replace_arg_nodes(f_block, [file_name]) out_nodes += f_block.body[:-3] arrow_readers_var = out_nodes[-1].target col_arrs = [] for i, cname in enumerate(col_names): # get column type from schema c_type = col_types[i] if cname in convert_types: c_type = convert_types[cname].dtype # create a variable for column and assign type varname = mk_unique_var(cname) #self.locals[varname] = c_type cvar = ir.Var(scope, varname, loc) col_arrs.append(cvar) out_nodes += get_column_read_nodes(c_type, cvar, arrow_readers_var, i) # delete arrow readers def cleanup_arrow_readers(readers): s = del_arrow_readers(readers) f_block = compile_to_numba_ir(cleanup_arrow_readers, { 'del_arrow_readers': _del_arrow_readers, }).blocks.popitem()[1] replace_arg_nodes(f_block, [arrow_readers_var]) out_nodes += f_block.body[:-3] return col_names, col_arrs, out_nodes
def get_stencil_ir(sf, typingctx, args, scope, loc, input_dict, typemap, calltypes): """get typed IR from stencil bytecode """ from numba.core.cpu import CPUContext from numba.core.registry import cpu_target from numba.core.annotations import type_annotations from numba.core.typed_passes import type_inference_stage # get untyped IR stencil_func_ir = sf.kernel_ir.copy() # copy the IR nodes to avoid changing IR in the StencilFunc object stencil_blocks = copy.deepcopy(stencil_func_ir.blocks) stencil_func_ir.blocks = stencil_blocks name_var_table = ir_utils.get_name_var_table(stencil_func_ir.blocks) if "out" in name_var_table: raise ValueError( "Cannot use the reserved word 'out' in stencil kernels.") # get typed IR with a dummy pipeline (similar to test_parfors.py) targetctx = CPUContext(typingctx) with cpu_target.nested_context(typingctx, targetctx): tp = DummyPipeline(typingctx, targetctx, args, stencil_func_ir) rewrites.rewrite_registry.apply('before-inference', tp.state) tp.state.typemap, tp.state.return_type, tp.state.calltypes = type_inference_stage( tp.state.typingctx, tp.state.func_ir, tp.state.args, None) type_annotations.TypeAnnotation(func_ir=tp.state.func_ir, typemap=tp.state.typemap, calltypes=tp.state.calltypes, lifted=(), lifted_from=None, args=tp.state.args, return_type=tp.state.return_type, html_output=config.HTML) # make block labels unique stencil_blocks = ir_utils.add_offset_to_labels(stencil_blocks, ir_utils.next_label()) min_label = min(stencil_blocks.keys()) max_label = max(stencil_blocks.keys()) ir_utils._max_label = max_label if config.DEBUG_ARRAY_OPT >= 1: print("Initial stencil_blocks") ir_utils.dump_blocks(stencil_blocks) # rename variables, var_dict = {} for v, typ in tp.state.typemap.items(): new_var = ir.Var(scope, mk_unique_var(v), loc) var_dict[v] = new_var typemap[new_var.name] = typ # add new var type for overall function ir_utils.replace_vars(stencil_blocks, var_dict) if config.DEBUG_ARRAY_OPT >= 1: print("After replace_vars") ir_utils.dump_blocks(stencil_blocks) # add call types to overall function for call, call_typ in tp.state.calltypes.items(): calltypes[call] = call_typ arg_to_arr_dict = {} # replace arg with arr for block in stencil_blocks.values(): for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): if config.DEBUG_ARRAY_OPT >= 1: print("input_dict", input_dict, stmt.value.index, stmt.value.name, stmt.value.index in input_dict) arg_to_arr_dict[stmt.value.name] = input_dict[ stmt.value.index].name stmt.value = input_dict[stmt.value.index] if config.DEBUG_ARRAY_OPT >= 1: print("arg_to_arr_dict", arg_to_arr_dict) print("After replace arg with arr") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dels(stencil_blocks) stencil_func_ir.blocks = stencil_blocks return stencil_func_ir, sf.get_return_type(args)[0], arg_to_arr_dict