def test_global(self): a = ir.Global('foo', 0, self.loc1) b = ir.Global('foo', 0, self.loc1) c = ir.Global('foo', 0, self.loc2) d = ir.Global('bar', 0, self.loc1) e = ir.Global('foo', 1, self.loc1) self.check(a, same=[b, c], different=[d, e])
def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope, loc): last_ind = dim_size if end_length != 0: # set last index to size minus stencil size to avoid invalid # memory access index_const = ir.Var(scope, mk_unique_var("stencil_const_var"), loc) self.typemap[index_const.name] = types.intp if isinstance(end_length, numbers.Number): const_assign = ir.Assign(ir.Const(end_length, loc), index_const, loc) else: const_assign = ir.Assign(end_length, index_const, loc) gen_nodes.append(const_assign) last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc) self.typemap[last_ind.name] = types.intp g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc) check_func = numba.njit(_compute_last_ind) func_typ = types.functions.Dispatcher(check_func) self.typemap[g_var.name] = func_typ g_obj = ir.Global("_compute_last_ind", check_func, loc) g_assign = ir.Assign(g_obj, g_var, loc) gen_nodes.append(g_assign) index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc) self.calltypes[index_call] = func_typ.get_call_type( self.typingctx, [types.intp, types.intp], {}) index_assign = ir.Assign(index_call, last_ind, loc) gen_nodes.append(index_assign) return last_ind
def run_pass(self, state): func_ir = state.func_ir # get the FunctionIR object for blk in func_ir.blocks.values(): for stmt in blk.find_insts(ir.Assign): if (isinstance(stmt.value, ir.FreeVar) and stmt.value.name in BufferMeta.class_names): break else: continue break else: return False # one does not changes the IR for blk in func_ir.blocks.values(): loc = blk.loc scope = blk.scope for ret in blk.find_insts(ir.Return): name = "free_omnisci_buffer_fn" value = ir.Global(name, free_omnisci_buffer, loc) target = scope.make_temp(loc) stmt = ir.Assign(value, target, loc) blk.insert_before_terminator(stmt) fn_call = ir.Expr.call(func=target, args=[ret.value], kws=(), loc=loc) lhs = scope.make_temp(loc) var = ir.Assign(fn_call, lhs, blk.loc) blk.insert_before_terminator(var) break return True # we changed the IR
def op_COMPARE_OP(self, inst, lhs, rhs, res): op = dis.cmp_op[inst.arg] if op == 'in' or op == 'not in': lhs, rhs = rhs, lhs if op == 'not in': self._binop('in', lhs, rhs, res) tmp = self.get(res) out = ir.Expr.unary('not', value=tmp, loc=self.loc) self.store(out, res) elif op == 'exception match': gv_fn = ir.Global( "exception_match", eh.exception_match, loc=self.loc, ) exc_match_name = '$exc_match' self.store(value=gv_fn, name=exc_match_name, redefine=True) lhs = self.get(lhs) rhs = self.get(rhs) exc = ir.Expr.call( self.get(exc_match_name), args=(lhs, rhs), kws=(), loc=self.loc, ) self.store(exc, res) else: self._binop(op, lhs, rhs, res)
def op_DELETE_SLICE_3(self, inst, base, start, stop, slicevar, indexvar): base = self.get(base) start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) self.current_block.append(stmt)
def op_SLICE_3(self, inst, base, start, stop, res, slicevar, indexvar): base = self.get(base) start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) self.store(value=index, name=indexvar) expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) self.store(value=expr, name=res)
def op_DELETE_SLICE_0(self, inst, base, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) self.current_block.append(stmt)
def op_BUILD_SLICE(self, inst, start, stop, step, res, slicevar): start = self.get(start) stop = self.get(stop) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) if step is None: sliceinst = ir.Expr.call(self.get(slicevar), (start, stop), (), loc=self.loc) else: step = self.get(step) sliceinst = ir.Expr.call(self.get(slicevar), (start, stop, step), (), loc=self.loc) self.store(value=sliceinst, name=res)
def op_SLICE_0(self, inst, base, res, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) self.store(value=expr, name=res)
def op_STORE_SLICE_1(self, inst, base, start, nonevar, value, slicevar, indexvar): base = self.get(base) start = self.get(start) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt)
def _inject_call(self, func, gv_name, res_name=None): """A helper function to inject a call to *func* which is a python function. Parameters ---------- func : callable The function object to be called. gv_name : str The variable name to be used to store the function object. res_name : str; optional The variable name to be used to store the call result. If ``None``, a name is created automatically. """ gv_fn = ir.Global(gv_name, func, loc=self.loc) self.store(value=gv_fn, name=gv_name, redefine=True) callres = ir.Expr.call(self.get(gv_name), (), (), loc=self.loc) res_name = res_name or '$callres_{}'.format(gv_name) self.store(value=callres, name=res_name, redefine=True)
def _op_JUMP_IF(self, inst, pred, iftrue): brs = { True: inst.get_jump_target(), False: inst.next, } truebr = brs[iftrue] falsebr = brs[not iftrue] name = "bool%s" % (inst.offset) gv_fn = ir.Global("bool", bool, loc=self.loc) self.store(value=gv_fn, name=name) callres = ir.Expr.call(self.get(name), (self.get(pred),), (), loc=self.loc) pname = "$%spred" % (inst.offset) predicate = self.store(value=callres, name=pname) bra = ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr, loc=self.loc) self.current_block.append(bra)
def mutate_with_body(self, func_ir, blocks, blk_start, blk_end, body_blocks, dispatcher_factory, extra): ir_utils.dprint_func_ir(func_ir, "Before with changes", blocks=blocks) assert extra is not None args = extra["args"] assert len(args) == 1 arg = args[0] scope = blocks[blk_start].scope loc = blocks[blk_start].loc if isinstance(arg, ir.Arg): arg = ir.Var(scope, arg.name, loc) set_state = [] restore_state = [] # global for Numba itself gvar = scope.redefine("$ngvar", loc) set_state.append(ir.Assign(ir.Global('numba', numba, loc), gvar, loc)) # getattr for set chunksize function in Numba spcattr = ir.Expr.getattr(gvar, 'set_parallel_chunksize', loc) spcvar = scope.redefine("$spc", loc) set_state.append(ir.Assign(spcattr, spcvar, loc)) # call set_parallel_chunksize orig_pc_var = scope.redefine("$save_pc", loc) cs_var = scope.redefine("$cs_var", loc) set_state.append(ir.Assign(arg, cs_var, loc)) spc_call = ir.Expr.call(spcvar, [cs_var], (), loc) set_state.append(ir.Assign(spc_call, orig_pc_var, loc)) restore_spc_call = ir.Expr.call(spcvar, [orig_pc_var], (), loc) restore_state.append(ir.Assign(restore_spc_call, orig_pc_var, loc)) blocks[blk_start].body = (blocks[blk_start].body[1:-1] + set_state + [blocks[blk_start].body[-1]]) blocks[blk_end].body = restore_state + blocks[blk_end].body func_ir._definitions = build_definitions(blocks) ir_utils.dprint_func_ir(func_ir, "After with changes", blocks=blocks)
def bind_global_function(self, fobj, ftype, args, kws={}): """Binds a global function to a variable. Parameters ---------- fobj : object The function to be bound. ftype : types.Type args : Sequence[types.Type] kws : Mapping[str, types.Type] Returns ------- callable: _CallableNode """ loc = self._loc varname = f"{fobj.__name__}_func" gvname = f"{fobj.__name__}" func_sig = self._typingctx.resolve_function_type(ftype, args, kws) func_var = self.assign(rhs=ir.Global(gvname, fobj, loc=loc), typ=ftype, name=varname) return _CallableNode(func=func_var, sig=func_sig)
def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood, standard_indexed, typemap, calltypes): """ Transforms the stencil kernel as specified by the user into one that includes each dimension's index variable as part of the getitem calls. So, in effect array[-1] becomes array[index0-1]. """ const_dict = {} kernel_consts = [] if config.DEBUG_ARRAY_OPT >= 1: print("add_indices_to_kernel", ndim, neighborhood) ir_utils.dump_blocks(kernel.blocks) if neighborhood is None: need_to_calc_kernel = True else: need_to_calc_kernel = False if len(neighborhood) != ndim: raise ValueError("%d dimensional neighborhood specified for %d " \ "dimensional input array" % (len(neighborhood), ndim)) tuple_table = ir_utils.get_tuple_table(kernel.blocks) relatively_indexed = set() for block in kernel.blocks.values(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Const)): if config.DEBUG_ARRAY_OPT >= 1: print("remembering in const_dict", stmt.target.name, stmt.value.value) # Remember consts for use later. const_dict[stmt.target.name] = stmt.value.value if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in kernel.arg_names) or (isinstance(stmt, ir.SetItem) and stmt.target.name in kernel.arg_names)): raise ValueError("Assignments to arrays passed to stencil " \ "kernels is not allowed.") if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['getitem', 'static_getitem'] and stmt.value.value.name in kernel.arg_names and stmt.value.value.name not in standard_indexed): # We found a getitem from the input array. if stmt.value.op == 'getitem': stmt_index_var = stmt.value.index else: stmt_index_var = stmt.value.index_var # allow static_getitem since rewrite passes are applied #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.") relatively_indexed.add(stmt.value.value.name) # Store the index used after looking up the variable in # the const dictionary. if need_to_calc_kernel: assert hasattr(stmt_index_var, 'name') if stmt_index_var.name in tuple_table: kernel_consts += [tuple_table[stmt_index_var.name]] elif stmt_index_var.name in const_dict: kernel_consts += [const_dict[stmt_index_var.name]] else: raise ValueError( "stencil kernel index is not " "constant, 'neighborhood' option required") if ndim == 1: # Single dimension always has index variable 'index0'. # tmpvar will hold the real index and is computed by # adding the relative offset in stmt.value.index to # the current absolute location in index0. index_var = ir.Var(scope, index_names[0], loc) tmpname = ir_utils.mk_unique_var("stencil_index") tmpvar = ir.Var(scope, tmpname, loc) stmt_index_var_typ = typemap[stmt_index_var.name] # If the array is indexed with a slice then we # have to add the index value with a call to # slice_addition. if isinstance(stmt_index_var_typ, types.misc.SliceType): sa_var = ir.Var( scope, ir_utils.mk_unique_var("slice_addition"), loc) sa_func = numba.njit(slice_addition) sa_func_typ = types.functions.Dispatcher(sa_func) typemap[sa_var.name] = sa_func_typ g_sa = ir.Global("slice_addition", sa_func, loc) new_body.append(ir.Assign(g_sa, sa_var, loc)) slice_addition_call = ir.Expr.call( sa_var, [stmt_index_var, index_var], (), loc) calltypes[ slice_addition_call] = sa_func_typ.get_call_type( self._typingctx, [stmt_index_var_typ, types.intp], {}) new_body.append( ir.Assign(slice_addition_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: acc_call = ir.Expr.binop(operator.add, stmt_index_var, index_var, loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: index_vars = [] sum_results = [] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) const_index_vars = [] ind_stencils = [] stmt_index_var_typ = typemap[stmt_index_var.name] # Same idea as above but you have to extract # individual elements out of the tuple indexing # expression and add the corresponding index variable # to them and then reconstitute as a tuple that can # index the array. for dim in range(ndim): tmpname = ir_utils.mk_unique_var("const_index") tmpvar = ir.Var(scope, tmpname, loc) new_body.append( ir.Assign(ir.Const(dim, loc), tmpvar, loc)) const_index_vars += [tmpvar] index_var = ir.Var(scope, index_names[dim], loc) index_vars += [index_var] tmpname = ir_utils.mk_unique_var( "ind_stencil_index") tmpvar = ir.Var(scope, tmpname, loc) ind_stencils += [tmpvar] getitemname = ir_utils.mk_unique_var("getitem") getitemvar = ir.Var(scope, getitemname, loc) getitemcall = ir.Expr.getitem( stmt_index_var, const_index_vars[dim], loc) new_body.append( ir.Assign(getitemcall, getitemvar, loc)) # Get the type of this particular part of the index tuple. one_index_typ = stmt_index_var_typ[dim] # If the array is indexed with a slice then we # have to add the index value with a call to # slice_addition. if isinstance(one_index_typ, types.misc.SliceType): sa_var = ir.Var( scope, ir_utils.mk_unique_var("slice_addition"), loc) sa_func = numba.njit(slice_addition) sa_func_typ = types.functions.Dispatcher( sa_func) typemap[sa_var.name] = sa_func_typ g_sa = ir.Global("slice_addition", sa_func, loc) new_body.append(ir.Assign(g_sa, sa_var, loc)) slice_addition_call = ir.Expr.call( sa_var, [getitemvar, index_vars[dim]], (), loc) calltypes[ slice_addition_call] = sa_func_typ.get_call_type( self._typingctx, [one_index_typ, types.intp], {}) new_body.append( ir.Assign(slice_addition_call, tmpvar, loc)) else: acc_call = ir.Expr.binop( operator.add, getitemvar, index_vars[dim], loc) new_body.append( ir.Assign(acc_call, tmpvar, loc)) tuple_call = ir.Expr.build_tuple(ind_stencils, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, s_index_var, loc), stmt.target, loc)) else: new_body.append(stmt) block.body = new_body if need_to_calc_kernel: # Find the size of the kernel by finding the maximum absolute value # index used in the kernel specification. neighborhood = [[0, 0] for _ in range(ndim)] if len(kernel_consts) == 0: raise ValueError("Stencil kernel with no accesses to " "relatively indexed arrays.") for index in kernel_consts: if isinstance(index, tuple) or isinstance(index, list): for i in range(len(index)): te = index[i] if isinstance(te, ir.Var) and te.name in const_dict: te = const_dict[te.name] if isinstance(te, int): neighborhood[i][0] = min(neighborhood[i][0], te) neighborhood[i][1] = max(neighborhood[i][1], te) else: raise ValueError( "stencil kernel index is not constant," "'neighborhood' option required") index_len = len(index) elif isinstance(index, int): neighborhood[0][0] = min(neighborhood[0][0], index) neighborhood[0][1] = max(neighborhood[0][1], index) index_len = 1 else: raise ValueError( "Non-tuple or non-integer used as stencil index.") if index_len != ndim: raise ValueError( "Stencil index does not match array dimensionality.") return (neighborhood, relatively_indexed)
def op_LOAD_GLOBAL(self, inst, res): name = self.code_names[inst.arg] value = self.get_global_value(name) gl = ir.Global(name, value, loc=self.loc) self.store(gl, res)
def run(self): """ This function rewrites the name of NumPy functions that exist in self.function_name_map e.g np.sum(a) would produce the following: np.sum() --> numba_dppy.dpnp.sum() --------------------------------------------------------------------------------------- Numba IR Before Rewrite: --------------------------------------------------------------------------------------- $2load_global.0 = global(np: <module 'numpy' from 'numpy/__init__.py'>) ['$2load_global.0'] $4load_method.1 = getattr(value=$2load_global.0, attr=sum) ['$2load_global.0', '$4load_method.1'] $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)], kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a'] --------------------------------------------------------------------------------------- Numba IR After Rewrite: --------------------------------------------------------------------------------------- $dppy_replaced_var.0 = global(numba_dppy: <module 'numba_dppy' from 'numba_dppy/__init__.py'>) ['$dppy_replaced_var.0'] $dpnp_var.1 = getattr(value=$dppy_replaced_var.0, attr=dpnp) ['$dpnp_var.1', '$dppy_replaced_var.0'] $4load_method.1 = getattr(value=$dpnp_var.1, attr=sum) ['$4load_method.1', '$dpnp_var.1'] $8call_method.3 = call $4load_method.1(a, func=$4load_method.1, args=[Var(a, test_rewrite.py:7)], kws=(), vararg=None) ['$4load_method.1', '$8call_method.3', 'a'] --------------------------------------------------------------------------------------- """ func_ir = self.state.func_ir blocks = func_ir.blocks topo_order = find_topo_order(blocks) replaced = False for label in topo_order: block = blocks[label] saved_arr_arg = {} new_body = [] for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance( stmt.value, ir.Expr): lhs = stmt.target.name rhs = stmt.value # replace np.FOO with name from self.function_name_map["FOO"] # e.g. np.sum will be replaced with numba_dppy.dpnp.sum if (rhs.op == "getattr" and rhs.attr in self.function_name_map): module_node = block.find_variable_assignment( rhs.value.name).value if (isinstance(module_node, ir.Global) and module_node.value.__name__ in self.function_name_map[rhs.attr][0]) or ( isinstance(module_node, ir.Expr) and module_node.attr in self.function_name_map[rhs.attr][0]): rhs = stmt.value rhs.attr = self.function_name_map[rhs.attr][1] global_module = rhs.value saved_arr_arg[lhs] = global_module scope = global_module.scope loc = global_module.loc g_dppy_var = ir.Var(scope, mk_unique_var("$2load_global"), loc) # We are trying to rename np.function_name/np.linalg.function_name with # numba_dppy.dpnp.function_name. # Hence, we need to have a global variable representing module numba_dppy. # Next, we add attribute dpnp to global module numba_dppy to # represent numba_dppy.dpnp. g_dppy = ir.Global("numba_dppy", numba_dppy, loc) g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc) dpnp_var = ir.Var(scope, mk_unique_var("$4load_attr"), loc) getattr_dpnp = ir.Expr.getattr( g_dppy_var, "dpnp", loc) dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc) rhs.value = dpnp_var new_body.append(g_dppy_assign) new_body.append(dpnp_assign) func_ir._definitions[dpnp_var.name] = [ getattr_dpnp ] func_ir._definitions[g_dppy_var.name] = [g_dppy] replaced = True new_body.append(stmt) block.body = new_body return replaced
def run(self): typingctx = self.state.typingctx # save array arg to call # call_varname -> array func_ir = self.state.func_ir blocks = func_ir.blocks saved_arr_arg = {} topo_order = find_topo_order(blocks) replaced = False for label in topo_order: block = blocks[label] new_body = [] for stmt in block.body: if isinstance(stmt, ir.Assign) and isinstance( stmt.value, ir.Expr): lhs = stmt.target.name rhs = stmt.value # replace A.func with np.func, and save A in saved_arr_arg if (rhs.op == "getattr" and rhs.attr in self.function_name_map and isinstance(self.typemap[rhs.value.name], types.npytypes.Array)): rhs = stmt.value arr = rhs.value saved_arr_arg[lhs] = arr scope = arr.scope loc = arr.loc g_dppy_var = ir.Var(scope, mk_unique_var("$load_global"), loc) self.typemap[g_dppy_var.name] = types.misc.Module( numba_dppy) g_dppy = ir.Global("numba_dppy", numba_dppy, loc) g_dppy_assign = ir.Assign(g_dppy, g_dppy_var, loc) dpnp_var = ir.Var(scope, mk_unique_var("$load_attr"), loc) self.typemap[dpnp_var.name] = types.misc.Module( numba_dppy.dpnp) getattr_dpnp = ir.Expr.getattr(g_dppy_var, "dpnp", loc) dpnp_assign = ir.Assign(getattr_dpnp, dpnp_var, loc) rhs.value = dpnp_var new_body.append(g_dppy_assign) new_body.append(dpnp_assign) func_ir._definitions[g_dppy_var.name] = [getattr_dpnp] func_ir._definitions[dpnp_var.name] = [getattr_dpnp] # update func var type func = getattr(numba_dppy.dpnp, rhs.attr) func_typ = get_dpnp_func_typ(func) self.typemap.pop(lhs) self.typemap[lhs] = func_typ replaced = True if rhs.op == "call" and rhs.func.name in saved_arr_arg: # add array as first arg arr = saved_arr_arg[rhs.func.name] # update call type signature to include array arg old_sig = self.calltypes.pop(rhs) # argsort requires kws for typing so sig.args can't be used # reusing sig.args since some types become Const in sig argtyps = old_sig.args[:len(rhs.args)] kwtyps = { name: self.typemap[v.name] for name, v in rhs.kws } self.calltypes[rhs] = self.typemap[ rhs.func.name].get_call_type( typingctx, [self.typemap[arr.name]] + list(argtyps), kwtyps, ) rhs.args = [arr] + rhs.args new_body.append(stmt) block.body = new_body return replaced
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] stencil_blocks = stencil_ir.blocks if config.DEBUG_ARRAY_OPT >= 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, self.typemap) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_ir, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after replace stencil accesses") ir_utils.dump_blocks(stencil_blocks) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind(start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append( numba.parfors.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # We have to guarantee that the exit block has maximum label and that # there's only one exit block for the parfor body. # So, all return statements will change to jump to the parfor exit block. parfor_body_exit_label = max(stencil_blocks.keys()) + 1 stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) self.typemap[exit_value_var.name] = return_type.dtype # create parfor index var for_replacing_ret = [] if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var("$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) for_replacing_ret.append(tuple_assign) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after creating parfor index var") ir_utils.dump_blocks(stencil_blocks) # empty init block init_block = ir.Block(scope, loc) if out_arr is None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple( types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.core.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass( return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) else: # out is present if "cval" in stencil_func.options: # do out[:] = cval cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. cval_ty = typing.typeof.typeof(cval) if not self.typingctx.can_convert(cval_ty, return_type.dtype): msg = "cval type does not match stencil return type." raise ValueError(msg) # get slice ref slice_var = ir.Var(scope, mk_unique_var("$py_g_var"), loc) slice_fn_ty = self.typingctx.resolve_value_type(slice) self.typemap[slice_var.name] = slice_fn_ty slice_g = ir.Global('slice', slice, loc) slice_assigned = ir.Assign(slice_g, slice_var, loc) init_block.body.append(slice_assigned) sig = self.typingctx.resolve_function_type( slice_fn_ty, (types.none, ) * 2, {}) callexpr = ir.Expr.call(func=slice_var, args=(), kws=(), loc=loc) self.calltypes[callexpr] = sig slice_inst_var = ir.Var(scope, mk_unique_var("$slice_inst"), loc) self.typemap[slice_inst_var.name] = types.slice2_type slice_assign = ir.Assign(callexpr, slice_inst_var, loc) init_block.body.append(slice_assign) # get const val for cval cval_const_val = ir.Const(return_type.dtype(cval), loc) cval_const_var = ir.Var(scope, mk_unique_var("$cval_const"), loc) self.typemap[cval_const_var.name] = return_type.dtype cval_const_assign = ir.Assign(cval_const_val, cval_const_var, loc) init_block.body.append(cval_const_assign) # do setitem on `out` array setitemexpr = ir.StaticSetItem(out_arr, slice(None, None), slice_inst_var, cval_const_var, loc) init_block.body.append(setitemexpr) sig = signature(types.none, self.typemap[out_arr.name], self.typemap[slice_inst_var.name], self.typemap[out_arr.name].dtype) self.calltypes[setitemexpr] = sig self.replace_return_with_setitem(stencil_blocks, exit_value_var, parfor_body_exit_label) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after replacing return") ir_utils.dump_blocks(stencil_blocks) setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype) stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) stencil_blocks[parfor_body_exit_label].body.append(setitem_call) # simplify CFG of parfor body (exit block could be simplified often) # add dummy return to enable CFG dummy_loc = ir.Loc("stencilparfor_dummy", -1) ret_const_var = ir.Var(scope, mk_unique_var("$cval_const"), dummy_loc) cval_const_assign = ir.Assign(ir.Const(0, loc=dummy_loc), ret_const_var, dummy_loc) stencil_blocks[parfor_body_exit_label].body.append(cval_const_assign) stencil_blocks[parfor_body_exit_label].body.append( ir.Return(ret_const_var, dummy_loc), ) stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) stencil_blocks[max(stencil_blocks.keys())].body.pop() if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after adding SetItem") ir_utils.dump_blocks(stencil_blocks) pattern = ('stencil', [start_lengths, end_lengths]) parfor = numba.parfors.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set, pattern, self.flags) gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def op_PRINT_ITEM(self, inst, item, printvar, res): item = self.get(item) printgv = ir.Global("print", print, loc=self.loc) self.store(value=printgv, name=printvar) call = ir.Expr.call(self.get(printvar), (item,), (), loc=self.loc) self.store(value=call, name=res)
def op_PRINT_NEWLINE(self, inst, printvar, res): printgv = ir.Global("print", print, loc=self.loc) self.store(value=printgv, name=printvar) call = ir.Expr.call(self.get(printvar), (), (), loc=self.loc) self.store(value=call, name=res)