def inline_array(array_var, expr, stmts, list_vars, dels): """Check to see if the given "array_var" is created from a list of constants, and try to inline the list definition as array initialization. Extra statements produced with be appended to "stmts". """ callname = guard(find_callname, func_ir, expr) require(callname and callname[1] == 'numpy' and callname[0] == 'array') require(expr.args[0].name in list_vars) ret_type = calltypes[expr].return_type require(isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1) loc = expr.loc list_var = expr.args[0] array_typ = typemap[array_var.name] debug_print("inline array_var = ", array_var, " list_var = ", list_var) dtype = array_typ.dtype seq, op = find_build_sequence(func_ir, list_var) size = len(seq) size_var = ir.Var(scope, mk_unique_var("size"), loc) size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) size_typ = types.intp size_tuple_typ = types.UniTuple(size_typ, 1) typemap[size_var.name] = size_typ typemap[size_tuple_var.name] = size_tuple_typ stmts.append(_new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)) stmts.append(_new_definition(func_ir, size_tuple_var, ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) fnty = get_np_ufunc_typ(np.empty) sig = context.resolve_function_type(fnty, (size_typ,), {}) typemap[empty_func.name] = fnty # stmts.append(_new_definition(func_ir, empty_func, ir.Global('empty', np.empty, loc=loc), loc)) empty_call = ir.Expr.call(empty_func, [size_var], {}, loc=loc) calltypes[empty_call] = typing.signature(array_typ, size_typ) stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) for i in range(size): index_var = ir.Var(scope, mk_unique_var("index"), loc) index_typ = types.intp typemap[index_var.name] = index_typ stmts.append(_new_definition(func_ir, index_var, ir.Const(i, loc), loc)) setitem = ir.SetItem(array_var, index_var, seq[i], loc) calltypes[setitem] = typing.signature(types.none, array_typ, index_typ, dtype) stmts.append(setitem) stmts.extend(dels) return True
def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope, loc): last_ind = dim_size if end_length != 0: # set last index to size minus stencil size to avoid invalid # memory access index_const = ir.Var(scope, mk_unique_var("stencil_const_var"), loc) self.typemap[index_const.name] = types.intp if isinstance(end_length, numbers.Number): const_assign = ir.Assign(ir.Const(end_length, loc), index_const, loc) else: const_assign = ir.Assign(end_length, index_const, loc) gen_nodes.append(const_assign) last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc) self.typemap[last_ind.name] = types.intp g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc) check_func = numba.njit(_compute_last_ind) func_typ = types.functions.Dispatcher(check_func) self.typemap[g_var.name] = func_typ g_obj = ir.Global("_compute_last_ind", check_func, loc) g_assign = ir.Assign(g_obj, g_var, loc) gen_nodes.append(g_assign) index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc) self.calltypes[index_call] = func_typ.get_call_type( self.typingctx, [types.intp, types.intp], {}) index_assign = ir.Assign(index_call, last_ind, loc) gen_nodes.append(index_assign) return last_ind
def run(self): """ Finds all calls to StencilFuncs in the IR and converts them to parfor. """ from numba.stencil import StencilFunc # Get all the calls in the function IR. call_table, _ = get_call_table(self.func_ir.blocks) stencil_calls = [] stencil_dict = {} for call_varname, call_list in call_table.items(): if isinstance(call_list[0], StencilFunc): # Remember all calls to StencilFuncs. stencil_calls.append(call_varname) stencil_dict[call_varname] = call_list[0] if not stencil_calls: return # return early if no stencil calls found # find and transform stencil calls for label, block in self.func_ir.blocks.items(): for i, stmt in reversed(list(enumerate(block.body))): # Found a call to a StencilFunc. if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'call' and stmt.value.func.name in stencil_calls): kws = dict(stmt.value.kws) # Create dictionary of input argument number to # the argument itself. input_dict = { i: stmt.value.args[i] for i in range(len(stmt.value.args)) } in_args = stmt.value.args arg_typemap = tuple(self.typemap[i.name] for i in in_args) for arg_type in arg_typemap: if isinstance(arg_type, types.BaseTuple): raise ValueError("Tuple parameters not supported " \ "for stencil kernels in parallel=True mode.") out_arr = kws.get('out') # Get the StencilFunc object corresponding to this call. sf = stencil_dict[stmt.value.func.name] stencil_blocks, rt, arg_to_arr_dict = get_stencil_blocks( sf, self.typingctx, arg_typemap, block.scope, block.loc, input_dict, self.typemap, self.calltypes) index_offsets = sf.options.get('index_offsets', None) gen_nodes = self._mk_stencil_parfor( label, in_args, out_arr, stencil_blocks, index_offsets, stmt.target, rt, sf, arg_to_arr_dict) block.body = block.body[:i] + gen_nodes + block.body[i + 1:] # Found a call to a stencil via numba.stencil(). elif (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'call' and guard(find_callname, self.func_ir, stmt.value) == ('stencil', 'numba')): # remove dummy stencil() call stmt.value = ir.Const(0, stmt.loc)
def _loop_lift_modify_call_block(liftedloop, block, inputs, outputs, returnto): """ Transform calling block from top-level function to call the lifted loop. """ scope = block.scope loc = block.loc blk = ir.Block(scope=scope, loc=loc) # load loop fn = ir.Const(value=liftedloop, loc=loc) fnvar = scope.make_temp(loc=loc) blk.append(ir.Assign(target=fnvar, value=fn, loc=loc)) # call loop args = [scope.get_exact(name) for name in inputs] callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc) # temp variable for the return value callres = scope.make_temp(loc=loc) blk.append(ir.Assign(target=callres, value=callexpr, loc=loc)) # unpack return value for i, out in enumerate(outputs): target = scope.get_exact(out) getitem = ir.Expr.static_getitem(value=callres, index=i, index_var=None, loc=loc) blk.append(ir.Assign(target=target, value=getitem, loc=loc)) # jump to next block blk.append(ir.Jump(target=returnto, loc=loc)) return blk
def _gen_col_var(self, out_var, args, col_var): loc = out_var.loc scope = out_var.scope # calculate mean first mean_var = ir.Var(scope, mk_unique_var("mean_val"), loc) f_mean_blocks = self._gen_col_mean(mean_var, args, col_var) f_mean_blocks = add_offset_to_labels(f_mean_blocks, ir_utils._max_label+1) ir_utils._max_label = max(f_mean_blocks.keys()) m_last_label = find_topo_order(f_mean_blocks)[-1] remove_none_return_from_block(f_mean_blocks[m_last_label]) def f(A, s, m): count = 0 for i in numba.parfor.prange(len(A)): val = A[i] if not np.isnan(val): s += (val-m)**2 count += 1 if count <= 1: s = np.nan else: s = s/(count-1) f_blocks = get_inner_ir(f) replace_var_names(f_blocks, {'A': col_var.name}) replace_var_names(f_blocks, {'s': out_var.name}) replace_var_names(f_blocks, {'m': mean_var.name}) f_blocks[0].body.insert(0, ir.Assign(ir.Const(0.0, loc), out_var, loc)) # attach first var block to last mean block f_mean_blocks[m_last_label].body.extend(f_blocks[0].body) f_blocks.pop(0) f_blocks = add_offset_to_labels(f_blocks, ir_utils._max_label+1) # add offset to jump of first f_block since it didn't go through call f_mean_blocks[m_last_label].body[-1].target += ir_utils._max_label+1 ir_utils._max_label = max(f_blocks.keys()) f_mean_blocks.update(f_blocks) return f_mean_blocks
def _gen_size_call(self, var, i): out = [] ndims = self._get_ndims(var.name) # attr call: A_sh_attr = getattr(A, shape) shape_attr_call = ir.Expr.getattr(var, "shape", var.loc) attr_var = ir.Var(var.scope, mk_unique_var(var.name + "_sh_attr" + str(i)), var.loc) self.typemap[attr_var.name] = types.containers.UniTuple( types.intp, ndims) attr_assign = ir.Assign(shape_attr_call, attr_var, var.loc) out.append(attr_assign) # const var for dim: $constA0 = Const(0) const_node = ir.Const(i, var.loc) const_var = ir.Var(var.scope, mk_unique_var("$const" + var.name + str(i)), var.loc) self.typemap[const_var.name] = types.intp const_assign = ir.Assign(const_node, const_var, var.loc) out.append(const_assign) # get size: Asize0 = A_sh_attr[0] size_var = ir.Var(var.scope, mk_unique_var(var.name + "size" + str(i)), var.loc) self.typemap[size_var.name] = types.intp getitem_node = ir.Expr.static_getitem(attr_var, i, const_var, var.loc) self.calltypes[getitem_node] = None getitem_assign = ir.Assign(getitem_node, size_var, var.loc) out.append(getitem_assign) return out
def rewrite_statement(func_ir, stmt, new_val): """ Rewrites the stmt as a ir.Const new_val and fixes up the entries in func_ir._definitions """ stmt.value = ir.Const(new_val, stmt.loc) defns = func_ir._definitions[stmt.target.name] repl_idx = defns.index(val) defns[repl_idx] = stmt.value
def _gen_rolling_init(self, win_size, func, center): nodes = [] right_length = 0 scope = win_size.scope loc = win_size.loc right_length = ir.Var(scope, mk_unique_var('zero_var'), scope) nodes.append(ir.Assign(ir.Const(0, loc), right_length, win_size.loc)) def f(w): return -w + 1 f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1] replace_arg_nodes(f_block, [win_size]) nodes.extend(f_block.body[:-2]) # remove none return left_length = nodes[-1].target if center: def f(w): return -(w // 2) f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1] replace_arg_nodes(f_block, [win_size]) nodes.extend(f_block.body[:-2]) # remove none return left_length = nodes[-1].target def f(w): return (w // 2) f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1] replace_arg_nodes(f_block, [win_size]) nodes.extend(f_block.body[:-2]) # remove none return right_length = nodes[-1].target def f(a, b): return ((a, b), ) f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1] replace_arg_nodes(f_block, [left_length, right_length]) nodes.extend(f_block.body[:-2]) # remove none return win_tuple = nodes[-1].target index_offsets = [right_length] if func == 'apply': index_offsets = [left_length] def f(a): return (a, ) f_block = compile_to_numba_ir(f, {}).blocks.popitem()[1] replace_arg_nodes(f_block, index_offsets) nodes.extend(f_block.body[:-2]) # remove none return index_offsets = nodes[-1].target return index_offsets, win_tuple, nodes
def convert_size_to_var(size_var, typemap, scope, loc, nodes): if isinstance(size_var, int): new_size = ir.Var(scope, mk_unique_var("$alloc_size"), loc) if typemap: typemap[new_size.name] = types.intp size_assign = ir.Assign(ir.Const(size_var, loc), new_size, loc) nodes.append(size_assign) return new_size assert isinstance(size_var, ir.Var) return size_var
def _handle_h5_File_call(self, assign, lhs, rhs): """ Handle h5py.File calls like: f = h5py.File(file_name, mode) """ # parallel arg = False for this stage loc = lhs.loc scope = lhs.scope parallel_var = ir.Var(scope, mk_unique_var("$const_parallel"), loc) parallel_assign = ir.Assign(ir.Const(0, loc), parallel_var, loc) rhs.args.append(parallel_var) return [parallel_assign, assign]
def _gen_h5write(self, f_id, dset_var, arr_var): scope = dset_var.scope loc = dset_var.loc # g_pio_var = Global(hpat.pio_api) g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc) g_pio = ir.Global('pio_api', hpat.pio_api, loc) g_pio_assign = ir.Assign(g_pio, g_pio_var, loc) # attr call: h5write_attr = getattr(g_pio_var, h5write) h5write_attr_call = ir.Expr.getattr(g_pio_var, "h5write", loc) attr_var = ir.Var(scope, mk_unique_var("$h5write_attr"), loc) attr_assign = ir.Assign(h5write_attr_call, attr_var, loc) out = [g_pio_assign, attr_assign] # ndims args ndims = len(self.h5_dsets_sizes[dset_var.name]) ndims_var = ir.Var(scope, mk_unique_var("$h5_ndims"), loc) ndims_assign = ir.Assign(ir.Const(np.int32(ndims), loc), ndims_var, loc) # sizes arg sizes_var = ir.Var(scope, mk_unique_var("$h5_sizes"), loc) tuple_call = ir.Expr.getattr(arr_var, 'shape', loc) sizes_assign = ir.Assign(tuple_call, sizes_var, loc) zero_var = ir.Var(scope, mk_unique_var("$const_zero"), loc) zero_assign = ir.Assign(ir.Const(0, loc), zero_var, loc) # starts: assign to zeros starts_var = ir.Var(scope, mk_unique_var("$h5_starts"), loc) start_tuple_call = ir.Expr.build_tuple([zero_var] * ndims, loc) starts_assign = ir.Assign(start_tuple_call, starts_var, loc) out += [ndims_assign, zero_assign, starts_assign, sizes_assign] # err = h5write(f_id) err_var = ir.Var(scope, mk_unique_var("$pio_ret_var"), loc) write_call = ir.Expr.call(attr_var, [ f_id, dset_var, ndims_var, starts_var, sizes_var, zero_var, arr_var ], (), loc) write_assign = ir.Assign(write_call, err_var, loc) out.append(write_assign) return out
def _gen_h5read_call(self, f_id, dset, start_vars, size_vars, lhs_var, scope, loc, out): # g_pio_var = Global(hpat.pio_api) g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc) g_pio = ir.Global('pio_api', hpat.pio_api, loc) g_pio_assign = ir.Assign(g_pio, g_pio_var, loc) # attr call: h5size_attr = getattr(g_pio_var, h5read) h5size_attr_call = ir.Expr.getattr(g_pio_var, "h5read", loc) attr_var = ir.Var(scope, mk_unique_var("$h5read_attr"), loc) attr_assign = ir.Assign(h5size_attr_call, attr_var, loc) out += [g_pio_assign, attr_assign] # ndims args ndims = len(size_vars) ndims_var = ir.Var(scope, mk_unique_var("$h5_ndims"), loc) ndims_assign = ir.Assign(ir.Const(np.int32(ndims), loc), ndims_var, loc) # sizes arg sizes_var = ir.Var(scope, mk_unique_var("$h5_sizes"), loc) tuple_call = ir.Expr.build_tuple(size_vars, loc) sizes_assign = ir.Assign(tuple_call, sizes_var, loc) zero_var = ir.Var(scope, mk_unique_var("$const_zero"), loc) zero_assign = ir.Assign(ir.Const(0, loc), zero_var, loc) # starts: assign to zeros if not start_vars: start_vars = [zero_var] * ndims starts_var = ir.Var(scope, mk_unique_var("$h5_starts"), loc) start_tuple_call = ir.Expr.build_tuple(start_vars, loc) starts_assign = ir.Assign(start_tuple_call, starts_var, loc) out += [ndims_assign, zero_assign, starts_assign, sizes_assign] err_var = ir.Var(scope, mk_unique_var("$h5_err_var"), loc) read_call = ir.Expr.call( attr_var, [f_id, dset, ndims_var, starts_var, sizes_var, zero_var, lhs_var], (), loc) out.append(ir.Assign(read_call, err_var, loc)) return
def _replace_freevars(blocks, args): """ Replace ir.FreeVar(...) with real variables from parent function """ for label, block in blocks.items(): assigns = block.find_insts(ir.Assign) for stmt in assigns: if isinstance(stmt.value, ir.FreeVar): idx = stmt.value.index assert(idx < len(args)) if isinstance(args[idx], ir.Var): stmt.value = args[idx] else: stmt.value = ir.Const(args[idx], stmt.loc)
def _mk_range_args(typemap, start, stop, step, scope, loc): nodes = [] if isinstance(stop, ir.Var): g_stop_var = stop else: assert isinstance(stop, int) g_stop_var = ir.Var(scope, mk_unique_var("$range_stop"), loc) if typemap: typemap[g_stop_var.name] = types.intp stop_assign = ir.Assign(ir.Const(stop, loc), g_stop_var, loc) nodes.append(stop_assign) if start == 0 and step == 1: return nodes, [g_stop_var] if isinstance(start, ir.Var): g_start_var = start else: assert isinstance(start, int) g_start_var = ir.Var(scope, mk_unique_var("$range_start"), loc) if typemap: typemap[g_start_var.name] = types.intp start_assign = ir.Assign(ir.Const(start, loc), g_start_var) nodes.append(start_assign) if step == 1: return nodes, [g_start_var, g_stop_var] if isinstance(step, ir.Var): g_step_var = step else: assert isinstance(step, int) g_step_var = ir.Var(scope, mk_unique_var("$range_step"), loc) if typemap: typemap[g_step_var.name] = types.intp step_assign = ir.Assign(ir.Const(step, loc), g_step_var) nodes.append(step_assign) return nodes, [g_start_var, g_stop_var, g_step_var]
def _handle_h5_File_call(self, assign, lhs, rhs): """ Handle h5py.File calls like: f = h5py.File(file_name, mode) """ if guard(find_callname, self.func_ir, rhs) == ('File', 'h5py'): self.h5_files[lhs.name] = rhs.args[0] # parallel arg = False for this stage loc = lhs.loc scope = lhs.scope parallel_var = ir.Var(scope, mk_unique_var("$const_parallel"), loc) parallel_assign = ir.Assign(ir.Const(0, loc), parallel_var, loc) rhs.args.append(parallel_var) return [parallel_assign, assign] return None
def op_SLICE_0(self, inst, base, res, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) self.store(value=expr, name=res)
def _gen_col_sum(self, out_var, args, col_var): def f(A, s): count = 0 for i in numba.parfor.prange(len(A)): val = A[i] if not np.isnan(val): s += val count += 1 if not count: s = np.nan f_blocks = get_inner_ir(f) replace_var_names(f_blocks, {'A': col_var.name}) replace_var_names(f_blocks, {'s': out_var.name}) loc = out_var.loc f_blocks[0].body.insert(0, ir.Assign(ir.Const(0.0, loc), out_var, loc)) return f_blocks
def op_DELETE_SLICE_0(self, inst, base, slicevar, indexvar, nonevar): base = self.get(base) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) self.current_block.append(stmt)
def fix_dependencies(expr, varlist): """Double check if all variables in varlist are defined before expr is used. Try to move constant definition when the check fails. Bails out by raising GuardException if it can't be moved. """ debug_print = _make_debug_print("fix_dependencies") for label, block in blocks.items(): scope = block.scope body = block.body defined = set() for i in range(len(body)): inst = body[i] if isinstance(inst, ir.Assign): defined.add(inst.target.name) if inst.value == expr: new_varlist = [] for var in varlist: # var must be defined before this inst, or live # and not later defined. if (var.name in defined or (var.name in livemap[label] and not (var.name in usedefs.defmap[label]))): debug_print(var.name, " already defined") new_varlist.append(var) else: debug_print(var.name, " not yet defined") var_def = get_definition(func_ir, var.name) if isinstance(var_def, ir.Const): loc = var.loc new_var = ir.Var(scope, mk_unique_var("new_var"), loc) new_const = ir.Const(var_def.value, loc) new_vardef = _new_definition( func_ir, new_var, new_const, loc) new_body = [] new_body.extend(body[:i]) new_body.append(new_vardef) new_body.extend(body[i:]) block.body = new_body new_varlist.append(new_var) else: raise GuardException return new_varlist # when expr is not found in block raise GuardException
def op_STORE_SLICE_1(self, inst, base, start, nonevar, value, slicevar, indexvar): base = self.get(base) start = self.get(start) nonegv = ir.Const(None, loc=self.loc) self.store(value=nonegv, name=nonevar) none = self.get(nonevar) slicegv = ir.Global("slice", slice, loc=self.loc) self.store(value=slicegv, name=slicevar) index = ir.Expr.call(self.get(slicevar), (start, none), (), loc=self.loc) self.store(value=index, name=indexvar) stmt = ir.SetItem(base, self.get(indexvar), self.get(value), loc=self.loc) self.current_block.append(stmt)
def apply(self): """ Rewrite `var = call <print function>(...)` as a sequence of `print(...)` and `var = const(None)`. """ new_block = self.block.copy() new_block.clear() for inst in self.block.body: if inst in self.prints: expr = self.prints[inst] print_node = ir.Print(args=expr.args, vararg=expr.vararg, loc=expr.loc) new_block.append(print_node) assign_node = ir.Assign(value=ir.Const(None, loc=expr.loc), target=inst.target, loc=inst.loc) new_block.append(assign_node) else: new_block.append(inst) return new_block
def _gen_h5size(self, f_id, dset, ndims, scope, loc, out): # g_pio_var = Global(hpat.pio_api) g_pio_var = ir.Var(scope, mk_unique_var("$pio_g_var"), loc) g_pio = ir.Global('pio_api', hpat.pio_api, loc) g_pio_assign = ir.Assign(g_pio, g_pio_var, loc) # attr call: h5size_attr = getattr(g_pio_var, h5size) h5size_attr_call = ir.Expr.getattr(g_pio_var, "h5size", loc) attr_var = ir.Var(scope, mk_unique_var("$h5size_attr"), loc) attr_assign = ir.Assign(h5size_attr_call, attr_var, loc) out += [g_pio_assign, attr_assign] size_vars = [] for i in range(ndims): dim_var = ir.Var(scope, mk_unique_var("$h5_dim_var"), loc) dim_assign = ir.Assign(ir.Const(np.int32(i), loc), dim_var, loc) out.append(dim_assign) size_var = ir.Var(scope, mk_unique_var("$h5_size_var"), loc) size_vars.append(size_var) size_call = ir.Expr.call(attr_var, [f_id, dset, dim_var], (), loc) size_assign = ir.Assign(size_call, size_var, loc) out.append(size_assign) return size_vars
def op_LOAD_CONST(self, inst, res): value = self.code_consts[inst.arg] const = ir.Const(value, loc=self.loc) self.store(const, res)
def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood, standard_indexed): """ Transforms the stencil kernel as specified by the user into one that includes each dimension's index variable as part of the getitem calls. So, in effect array[-1] becomes array[index0-1]. """ const_dict = {} kernel_consts = [] if config.DEBUG_ARRAY_OPT == 1: print("add_indices_to_kernel", ndim, neighborhood) ir_utils.dump_blocks(kernel.blocks) if neighborhood is None: need_to_calc_kernel = True else: need_to_calc_kernel = False if len(neighborhood) != ndim: raise ValueError("%d dimensional neighborhood specified for %d " \ "dimensional input array" % (len(neighborhood), ndim)) tuple_table = ir_utils.get_tuple_table(kernel.blocks) relatively_indexed = set() for block in kernel.blocks.values(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Const)): if config.DEBUG_ARRAY_OPT == 1: print("remembering in const_dict", stmt.target.name, stmt.value.value) # Remember consts for use later. const_dict[stmt.target.name] = stmt.value.value if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in kernel.arg_names) or (isinstance(stmt, ir.SetItem) and stmt.target.name in kernel.arg_names)): raise ValueError("Assignments to arrays passed to stencil " \ "kernels is not allowed.") if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['getitem', 'static_getitem'] and stmt.value.value.name in kernel.arg_names and stmt.value.value.name not in standard_indexed): # We found a getitem from the input array. if stmt.value.op == 'getitem': stmt_index_var = stmt.value.index else: stmt_index_var = stmt.value.index_var # allow static_getitem since rewrite passes are applied #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.") relatively_indexed.add(stmt.value.value.name) # Store the index used after looking up the variable in # the const dictionary. if need_to_calc_kernel: assert hasattr(stmt_index_var, 'name') if stmt_index_var.name in tuple_table: kernel_consts += [tuple_table[stmt_index_var.name]] elif stmt_index_var.name in const_dict: kernel_consts += [const_dict[stmt_index_var.name]] else: raise ValueError( "stencil kernel index is not " "constant, 'neighborhood' option required") if ndim == 1: # Single dimension always has index variable 'index0'. # tmpvar will hold the real index and is computed by # adding the relative offset in stmt.value.index to # the current absolute location in index0. index_var = ir.Var(scope, index_names[0], loc) tmpname = ir_utils.mk_unique_var("stencil_index") tmpvar = ir.Var(scope, tmpname, loc) acc_call = ir.Expr.binop(operator.add, stmt_index_var, index_var, loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: index_vars = [] sum_results = [] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) const_index_vars = [] ind_stencils = [] # Same idea as above but you have to extract # individual elements out of the tuple indexing # expression and add the corresponding index variable # to them and then reconstitute as a tuple that can # index the array. for dim in range(ndim): tmpname = ir_utils.mk_unique_var("const_index") tmpvar = ir.Var(scope, tmpname, loc) new_body.append( ir.Assign(ir.Const(dim, loc), tmpvar, loc)) const_index_vars += [tmpvar] index_var = ir.Var(scope, index_names[dim], loc) index_vars += [index_var] tmpname = ir_utils.mk_unique_var( "ind_stencil_index") tmpvar = ir.Var(scope, tmpname, loc) ind_stencils += [tmpvar] getitemname = ir_utils.mk_unique_var("getitem") getitemvar = ir.Var(scope, getitemname, loc) getitemcall = ir.Expr.getitem( stmt_index_var, const_index_vars[dim], loc) new_body.append( ir.Assign(getitemcall, getitemvar, loc)) acc_call = ir.Expr.binop(operator.add, getitemvar, index_vars[dim], loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) tuple_call = ir.Expr.build_tuple(ind_stencils, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, s_index_var, loc), stmt.target, loc)) else: new_body.append(stmt) block.body = new_body if need_to_calc_kernel: # Find the size of the kernel by finding the maximum absolute value # index used in the kernel specification. neighborhood = [[0, 0] for _ in range(ndim)] if len(kernel_consts) == 0: raise ValueError("Stencil kernel with no accesses to " "relatively indexed arrays.") for index in kernel_consts: if isinstance(index, tuple) or isinstance(index, list): for i in range(len(index)): te = index[i] if isinstance(te, ir.Var) and te.name in const_dict: te = const_dict[te.name] if isinstance(te, int): neighborhood[i][0] = min(neighborhood[i][0], te) neighborhood[i][1] = max(neighborhood[i][1], te) else: raise ValueError( "stencil kernel index is not constant," "'neighborhood' option required") index_len = len(index) elif isinstance(index, int): neighborhood[0][0] = min(neighborhood[0][0], index) neighborhood[0][1] = max(neighborhood[0][1], index) index_len = 1 else: raise ValueError( "Non-tuple or non-integer used as stencil index.") if index_len != ndim: raise ValueError( "Stencil index does not match array dimensionality.") return (neighborhood, relatively_indexed)
def _gen_rolling_call(self, args, col_var, win_size, center, func, out_var): loc = col_var.loc scope = col_var.scope if func == 'apply': if len(args) != 1: raise ValueError("One argument expected for rolling apply") kernel_func = guard(get_definition, self.func_ir, args[0]) elif func in ['sum', 'mean', 'min', 'max', 'std', 'var']: if len(args) != 0: raise ValueError( "No argument expected for rolling {}".format(func)) g_pack = "np" if func in ['std', 'var', 'mean']: g_pack = "hpat.hiframes_api" if isinstance(win_size, int) and win_size < LARGE_WIN_SIZE: # unroll if size is less than 5 kernel_args = ','.join( ['a[{}]'.format(-i) for i in range(win_size)]) kernel_expr = '{}.{}(np.array([{}]))'.format( g_pack, func, kernel_args) if func == 'sum': # simplify sum kernel_expr = '+'.join( ['a[{}]'.format(-i) for i in range(win_size)]) else: kernel_expr = '{}.{}(a[(-w+1):1])'.format(g_pack, func) func_text = 'def g(a, w):\n return {}\n'.format(kernel_expr) loc_vars = {} exec(func_text, {}, loc_vars) kernel_func = loc_vars['g'] init_nodes = [] col_var, init_nodes = self._fix_rolling_array(col_var, func) if isinstance(win_size, int): win_size_var = ir.Var(scope, mk_unique_var("win_size"), loc) init_nodes.append( ir.Assign(ir.Const(win_size, loc), win_size_var, loc)) win_size = win_size_var index_offsets, win_tuple, option_nodes = self._gen_rolling_init( win_size, func, center) init_nodes += option_nodes other_args = [win_size] if func == 'apply': other_args = None options = {'neighborhood': win_tuple} fir_globals = self.func_ir.func_id.func.__globals__ stencil_nodes = gen_stencil_call(col_var, out_var, kernel_func, index_offsets, fir_globals, other_args, options) def f(A, w): A[0:w - 1] = np.nan f_block = compile_to_numba_ir(f, {'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, [out_var, win_size]) setitem_nodes = f_block.body[:-3] # remove none return if center: def f1(A, w): A[0:w // 2] = np.nan def f2(A, w): n = len(A) A[n - (w // 2):n] = np.nan f_block = compile_to_numba_ir(f1, {'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, [out_var, win_size]) setitem_nodes1 = f_block.body[:-3] # remove none return f_block = compile_to_numba_ir(f2, {'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, [out_var, win_size]) setitem_nodes2 = f_block.body[:-3] # remove none return setitem_nodes = setitem_nodes1 + setitem_nodes2 return init_nodes + stencil_nodes + setitem_nodes
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_blocks, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] if config.DEBUG_ARRAY_OPT == 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, self.typemap) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr.name) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind(start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append( numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # replace return value to setitem to output array return_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() assert isinstance(return_node, ir.Return) last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() while not isinstance(last_node, ir.Assign) or not isinstance( last_node.value, ir.Expr) or not last_node.value.op == 'cast': last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() assert isinstance(last_node, ir.Assign) assert isinstance(last_node.value, ir.Expr) assert last_node.value.op == 'cast' return_val = last_node.value.value # create parfor index var if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var("$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) stencil_blocks[max( stencil_blocks.keys())].body.append(tuple_assign) # empty init block init_block = ir.Block(scope, loc) if out_arr == None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple( types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass( return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) setitem_call = ir.SetItem(out_arr, parfor_ind_var, return_val, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype) stencil_blocks[max(stencil_blocks.keys())].body.append(setitem_call) parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set) parfor.patterns = [('stencil', [start_lengths, end_lengths])] gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def inline_closure_call(self, block, i, callee): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) """ scope = block.scope instr = block.body[i] call_expr = instr.value _debug_print("Found closure call: ", instr, " with callee = ", callee) func_ir = self.func_ir # first, get the IR of the callee from_ir = self.get_ir_of_code(callee.code) from_blocks = from_ir.blocks # 1. relabel from_ir by adding an offset max_label = max(func_ir.blocks.keys()) from_blocks = add_offset_to_labels(from_blocks, max_label + 1) from_ir.blocks = from_blocks min_label = min(from_blocks.keys()) max_label = max(from_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label ir_utils.visit_vars_extensions = {} # 2. rename all local variables in from_ir with new locals created in func_ir from_scopes = _get_all_scopes(from_blocks) _debug_print("obj_IR has scopes: ", from_scopes) # one function should only have one local scope assert(len(from_scopes) == 1) from_scope = from_scopes[0] var_dict = {} for var in from_scope.localvars._con.values(): if not (var.name in callee.code.co_freevars): var_dict[var.name] = scope.make_temp(var.loc) _debug_print("Before local var rename: var_dict = ", var_dict) _debug_dump(from_ir) replace_vars(from_blocks, var_dict) _debug_print("After local var rename: ") _debug_dump(from_ir) # 3. replace formal parameters with actual arguments args = list(call_expr.args) if callee.defaults: _debug_print("defaults", callee.defaults) if isinstance(callee.defaults, tuple): # Python 3.5 args = args + list(callee.defaults) elif isinstance(callee.defaults, ir.Var) or isinstance(callee.defaults, str): defaults = func_ir.get_definition(callee.defaults) assert(isinstance(defaults, ir.Const)) loc = defaults.loc args = args + [ ir.Const(value=v, loc=loc) for v in defaults.value ] else: raise NotImplementedError("Unsupported defaults to make_function: {}".format(defaults)) _replace_args_with(from_blocks, args) _debug_print("After arguments rename: ") _debug_dump(from_ir) # 4. replace freevar with actual closure var if callee.closure: closure = func_ir.get_definition(callee.closure) assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') assert(len(callee.code.co_freevars) == len(closure.items)) _debug_print("callee's closure = ", closure) _replace_freevars(from_blocks, closure.items) _debug_print("After closure rename: ") _debug_dump(from_ir) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i+1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS _replace_returns(from_blocks, instr.target, new_label) # 7. insert all new blocks, and add back definitions for label, block in from_blocks.items(): # block scope must point to parent's block.scope = scope _add_definition(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) _debug_print("After merge: ") _debug_dump(func_ir) return new_blocks
def dead_branch_prune(func_ir, called_args): """ Removes dead branches based on constant inference from function args. This directly mutates the IR. func_ir is the IR called_args are the actual arguments with which the function is called """ from .ir_utils import get_definition, guard, find_const, GuardException DEBUG = 0 def find_branches(func_ir): # find *all* branches branches = [] for blk in func_ir.blocks.values(): branch_or_jump = blk.body[-1] if isinstance(branch_or_jump, ir.Branch): branch = branch_or_jump condition = guard(get_definition, func_ir, branch.cond.name) if condition is not None: branches.append((branch, condition, blk)) return branches def do_prune(take_truebr, blk): keep = branch.truebr if take_truebr else branch.falsebr # replace the branch with a direct jump jmp = ir.Jump(keep, loc=branch.loc) blk.body[-1] = jmp return 1 if keep == branch.truebr else 0 def prune_by_type(branch, condition, blk, *conds): # this prunes a given branch and fixes up the IR # at least one needs to be a NoneType lhs_cond, rhs_cond = conds lhs_none = isinstance(lhs_cond, types.NoneType) rhs_none = isinstance(rhs_cond, types.NoneType) if lhs_none or rhs_none: take_truebr = condition.fn(lhs_cond, rhs_cond) if DEBUG > 0: kill = branch.falsebr if take_truebr else branch.truebr print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, condition.fn) taken = do_prune(take_truebr, blk) return True, taken return False, None def prune_by_value(branch, condition, blk, *conds): lhs_cond, rhs_cond = conds take_truebr = condition.fn(lhs_cond, rhs_cond) if DEBUG > 0: kill = branch.falsebr if take_truebr else branch.truebr print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, condition.fn) taken = do_prune(take_truebr, blk) return True, taken class Unknown(object): pass def resolve_input_arg_const(input_arg): """ Resolves an input arg to a constant (if possible) """ idx = func_ir.arg_names.index(input_arg) input_arg_ty = called_args[idx] # comparing to None? if isinstance(input_arg_ty, types.NoneType): return input_arg_ty # is it a kwarg default if isinstance(input_arg_ty, types.Omitted): val = input_arg_ty.value if isinstance(val, types.NoneType): return val elif val is None: return types.NoneType('none') # literal type, return the type itself so comparisons like `x == None` # still work as e.g. x = types.int64 will never be None/NoneType so # the branch can still be pruned return getattr(input_arg_ty, 'literal_type', Unknown()) if DEBUG > 1: print("before".center(80, '-')) print(func_ir.dump()) # This looks for branches where: # at least one arg of the condition is in input args and const # at least one an arg of the condition is a const # if the condition is met it will replace the branch with a jump branch_info = find_branches(func_ir) nullified_conditions = [ ] # stores conditions that have no impact post prune for branch, condition, blk in branch_info: const_conds = [] if isinstance(condition, ir.Expr) and condition.op == 'binop': prune = prune_by_value for arg in [condition.lhs, condition.rhs]: resolved_const = Unknown() if arg.name in func_ir.arg_names: # it's an e.g. literal argument to the function resolved_const = resolve_input_arg_const(arg.name) prune = prune_by_type else: # it's some const argument to the function, cannot use guard # here as the const itself may be None try: resolved_const = find_const(func_ir, arg) if resolved_const is None: resolved_const = types.NoneType('none') except GuardException: pass if not isinstance(resolved_const, Unknown): const_conds.append(resolved_const) # lhs/rhs are consts if len(const_conds) == 2: # prune the branch, switch the branch for an unconditional jump prune_stat, taken = prune(branch, condition, blk, *const_conds) if (prune_stat): # add the condition to the list of nullified conditions nullified_conditions.append((condition, taken)) # 'ERE BE DRAGONS... # It is the evaluation of the condition expression that often trips up type # inference, so ideally it would be removed as it is effectively rendered # dead by the unconditional jump if a branch was pruned. However, there may # be references to the condition that exist in multiple places (e.g. dels) # and we cannot run DCE here as typing has not taken place to give enough # information to run DCE safely. Upshot of all this is the condition gets # rewritten below into a benign const that typing will be happy with and DCE # can remove it and its reference post typing when it is safe to do so # (if desired). It is required that the const is assigned a value that # indicates the branch taken as its mutated value would be read in the case # of object mode fall back in place of the condition itself. For # completeness the func_ir._definitions and ._consts are also updated to # make the IR state self consistent. deadcond = [x[0] for x in nullified_conditions] for _, cond, blk in branch_info: if cond in deadcond: for x in blk.body: if isinstance(x, ir.Assign) and x.value is cond: # rewrite the condition as a true/false bit branch_bit = nullified_conditions[deadcond.index(cond)][1] x.value = ir.Const(branch_bit, loc=x.loc) # update the specific definition to the new const defns = func_ir._definitions[x.target.name] repl_idx = defns.index(cond) defns[repl_idx] = x.value # Remove dead blocks, this is safe as it relies on the CFG only. cfg = compute_cfg_from_blocks(func_ir.blocks) for dead in cfg.dead_nodes(): del func_ir.blocks[dead] # if conditions were nullified then consts were rewritten, update if nullified_conditions: func_ir._consts = consts.ConstantInference(func_ir) if DEBUG > 1: print("after".center(80, '-')) print(func_ir.dump())
def _add_index_offsets(self, index_list, index_offsets, new_body, scope, loc): """ Does the actual work of adding loop index variables to the relative index constants or variables. """ assert len(index_list) == len(index_offsets) # shortcut if all values are integer if all([isinstance(v, int) for v in index_list+index_offsets]): # add offsets in all dimensions return list(map(add, index_list, index_offsets)) out_nodes = [] index_vars = [] for i in range(len(index_list)): # new_index = old_index + offset old_index_var = index_list[i] if isinstance(old_index_var, int): old_index_var = ir.Var(scope, mk_unique_var("old_index_var"), loc) self.typemap[old_index_var.name] = types.intp const_assign = ir.Assign(ir.Const(index_list[i], loc), old_index_var, loc) out_nodes.append(const_assign) offset_var = index_offsets[i] if isinstance(offset_var, int): offset_var = ir.Var(scope, mk_unique_var("offset_var"), loc) self.typemap[offset_var.name] = types.intp const_assign = ir.Assign(ir.Const(index_offsets[i], loc), offset_var, loc) out_nodes.append(const_assign) if (isinstance(old_index_var, slice) or isinstance(self.typemap[old_index_var.name], types.misc.SliceType)): # only one arg can be slice assert self.typemap[offset_var.name] == types.intp index_var = self._add_offset_to_slice(old_index_var, offset_var, out_nodes, scope, loc) index_vars.append(index_var) continue if (isinstance(offset_var, slice) or isinstance(self.typemap[offset_var.name], types.misc.SliceType)): # only one arg can be slice assert self.typemap[old_index_var.name] == types.intp index_var = self._add_offset_to_slice(offset_var, old_index_var, out_nodes, scope, loc) index_vars.append(index_var) continue index_var = ir.Var(scope, mk_unique_var("offset_stencil_index"), loc) self.typemap[index_var.name] = types.intp index_call = ir.Expr.binop('+', old_index_var, offset_var, loc) self.calltypes[index_call] = ir_utils.find_op_typ('+', [types.intp, types.intp]) index_assign = ir.Assign(index_call, index_var, loc) out_nodes.append(index_assign) index_vars.append(index_var) new_body.extend(out_nodes) return index_vars
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] stencil_blocks = stencil_ir.blocks if config.DEBUG_ARRAY_OPT == 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate( stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, self.typemap) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var( "$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replace stencil accesses") ir_utils.dump_blocks(stencil_blocks) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind( start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append(numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # We have to guarantee that the exit block has maximum label and that # there's only one exit block for the parfor body. # So, all return statements will change to jump to the parfor exit block. parfor_body_exit_label = max(stencil_blocks.keys()) + 1 stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) self.typemap[exit_value_var.name] = return_type.dtype # create parfor index var for_replacing_ret = [] if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var( "$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) for_replacing_ret.append(tuple_assign) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after creating parfor index var") ir_utils.dump_blocks(stencil_blocks) # empty init block init_block = ir.Block(scope, loc) if out_arr == None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple(types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError("cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) self.replace_return_with_setitem(stencil_blocks, exit_value_var, parfor_body_exit_label) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replacing return") ir_utils.dump_blocks(stencil_blocks) setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype ) stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) stencil_blocks[parfor_body_exit_label].body.append(setitem_call) # simplify CFG of parfor body (exit block could be simplified often) # add dummy return to enable CFG stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0, ir.Loc("stencilparfor_dummy", -1))) stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) stencil_blocks[max(stencil_blocks.keys())].body.pop() if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after adding SetItem") ir_utils.dump_blocks(stencil_blocks) pattern = ('stencil', [start_lengths, end_lengths]) parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set, pattern, self.flags) gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes