def test1(self): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_will_propagate) #print("Num blocks = ", len(test_ir.blocks)) #print(test_ir.dump()) with cpu_target.nested_context(typingctx, targetctx): typingctx.refresh() targetctx.refresh() args = (types.int64, types.int64, types.int64) typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None) #print("typemap = ", typemap) #print("return_type = ", return_type) type_annotation = type_annotations.TypeAnnotation( func_ir=test_ir, typemap=typemap, calltypes=calltypes, lifted=(), lifted_from=None, args=args, return_type=return_type, html_output=config.HTML) remove_dels(test_ir.blocks) in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertFalse(findLhsAssign(test_ir, "x"))
def test1(self): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_will_propagate) #print("Num blocks = ", len(test_ir.blocks)) #print(test_ir.dump()) with cpu_target.nested_context(typingctx, targetctx): typingctx.refresh() targetctx.refresh() args = (types.int64, types.int64, types.int64) typemap, return_type, calltypes = compiler.type_inference_stage(typingctx, test_ir, args, None) #print("typemap = ", typemap) #print("return_type = ", return_type) type_annotation = type_annotations.TypeAnnotation( func_ir=test_ir, typemap=typemap, calltypes=calltypes, lifted=(), lifted_from=None, args=args, return_type=return_type, html_output=config.HTML) remove_dels(test_ir.blocks) in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertFalse(findLhsAssign(test_ir, "x"))
def compare_ir(self, ir_list): outputs = [] for func_ir in ir_list: remove_dead(func_ir.blocks, func_ir.arg_names, func_ir) output = utils.StringIO() func_ir.dump(file=output) outputs.append(output.getvalue()) self.assertTrue(len(set(outputs)) == 1) # assert all outputs are equal
def compare_ir(self, ir_list): outputs = [] for func_ir in ir_list: remove_dead(func_ir.blocks, func_ir.arg_names, func_ir) output = utils.StringIO() func_ir.dump(file=output) outputs.append(output.getvalue()) self.assertTrue(len(set(outputs)) == 1) # assert all outputs are equal
def run_pass(self, state): parfor_pass = numba.parfor.ParforPass( state.func_ir, state.type_annotation.typemap, state.type_annotation.calltypes, state.return_type, state.typingctx, state.flags.auto_parallel, state.flags, state.parfor_diagnostics) remove_dels(state.func_ir.blocks) parfor_pass.array_analysis.run(state.func_ir.blocks) parfor_pass._convert_loop(state.func_ir.blocks) remove_dead(state.func_ir.blocks, state.func_ir.arg_names, state.func_ir, state.type_annotation.typemap) numba.parfor.get_parfor_params(state.func_ir.blocks, parfor_pass.options.fusion, parfor_pass.nested_fusion_info) return True
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) _debug_print("START InlineClosureCall") while work_list: label, block = work_list.pop() for i in range(len(block.body)): instr = block.body[i] if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': try: func_def = self.func_ir.get_definition(expr.func) except KeyError: func_def = None _debug_print("found call to ", expr.func, " def = ", func_def) if isinstance(func_def, ir.Expr) and func_def.op == "make_function": new_blocks = self.inline_closure_call(block, i, func_def) for block in new_blocks: work_list.append(block) modified = True # current block is modified, skip the rest break if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks)
def test2(self): def call_np_random_seed(): np.random.seed(2) def seed_call_exists(func_ir): for inst in func_ir.blocks[0].body: if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) and inst.value.op == 'call' and func_ir.get_definition(inst.value.func).attr == 'seed'): return True return False test_ir = compiler.run_frontend(call_np_random_seed) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertTrue(seed_call_exists(test_ir))
def run(self): dprint_func_ir(self.func_ir, "starting IO") topo_order = find_topo_order(self.func_ir.blocks) for label in topo_order: new_body = [] # copies are collected before running the pass since # variables typed in locals are assigned late self._get_reverse_copies(self.func_ir.blocks[label].body) for inst in self.func_ir.blocks[label].body: if isinstance(inst, ir.Assign): inst_list = self._run_assign(inst) new_body.extend(inst_list) elif isinstance(inst, ir.StaticSetItem): inst_list = self._run_static_setitem(inst) new_body.extend(inst_list) else: new_body.append(inst) self.func_ir.blocks[label].body = new_body # iterative remove dead to make sure all extra code (e.g. df vars) is removed while remove_dead(self.func_ir.blocks, self.func_ir.arg_names, self.func_ir): pass self.func_ir._definitions = get_definitions(self.func_ir.blocks) dprint_func_ir(self.func_ir, "after IO") if debug_prints(): print("h5 files: ", self.h5_files) print("h5 dsets: ", self.h5_dsets)
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i, instr in enumerate(block.body): if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': call_name = guard(find_callname, self.func_ir, expr) func_def = guard(get_definition, self.func_ir, expr.func) if guard(self._inline_reduction, work_list, block, i, expr, call_name): modified = True break # because block structure changed if guard(self._inline_closure, work_list, block, i, func_def): modified = True break # because block structure changed if guard(self._inline_stencil, instr, call_name, func_def): modified = True if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir.blocks) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.parallel_options.comprehension): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names, self.func_ir)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i, instr in enumerate(block.body): if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': call_name = guard(find_callname, self.func_ir, expr) func_def = guard(get_definition, self.func_ir, expr.func) if guard(self._inline_reduction, work_list, block, i, expr, call_name): modified = True break # because block structure changed if guard(self._inline_closure, work_list, block, i, func_def): modified = True break # because block structure changed if guard(self._inline_stencil, instr, call_name, func_def): modified = True if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir.blocks) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.swapped, self.parallel_options.comprehension): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names, self.func_ir)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")
def test2(self): def call_np_random_seed(): np.random.seed(2) def seed_call_exists(func_ir): for inst in func_ir.blocks[0].body: if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) and inst.value.op == 'call' and func_ir.get_definition(inst.value.func).attr == 'seed'): return True return False test_ir = compiler.run_frontend(call_np_random_seed) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertTrue(seed_call_exists(test_ir))
def run(self): dprint_func_ir(self.func_ir, "starting hiframes") topo_order = find_topo_order(self.func_ir.blocks) for label in topo_order: new_body = [] for inst in self.func_ir.blocks[label].body: # df['col'] = arr if isinstance( inst, ir.StaticSetItem) and inst.target.name in self.df_vars: df_name = inst.target.name self.df_vars[df_name][inst.index] = inst.value self._update_df_cols() elif isinstance(inst, ir.Assign): out_nodes = self._run_assign(inst) if isinstance(out_nodes, list): new_body.extend(out_nodes) if isinstance(out_nodes, dict): label = include_new_blocks(self.func_ir.blocks, out_nodes, label, new_body) new_body = [] else: new_body.append(inst) self.func_ir.blocks[label].body = new_body self.func_ir._definitions = _get_definitions(self.func_ir.blocks) #remove_dead(self.func_ir.blocks, self.func_ir.arg_names) if config._has_h5py: io_pass = pio.PIO(self.func_ir, self.locals) io_pass.run() remove_dead(self.func_ir.blocks, self.func_ir.arg_names) DummyFlags = namedtuple('DummyFlags', 'auto_parallel') inline_pass = InlineClosureCallPass(self.func_ir, DummyFlags(True)) inline_pass.run() self.typemap, self.return_type, self.calltypes = numba_compiler.type_inference_stage( self.typingctx, self.func_ir, self.args, None) self.fix_series_filter(self.func_ir.blocks) self.func_ir._definitions = _get_definitions(self.func_ir.blocks) dprint_func_ir(self.func_ir, "after hiframes") if numba.config.DEBUG_ARRAY_OPT == 1: print("df_vars: ", self.df_vars) return
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i in range(len(block.body)): instr = block.body[i] if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': func_def = guard(get_definition, self.func_ir, expr.func) debug_print("found call to ", expr.func, " def = ", func_def) if isinstance(func_def, ir.Expr) and func_def.op == "make_function": new_blocks = self.inline_closure_call(block, i, func_def) for block in new_blocks: work_list.append(block) modified = True # current block is modified, skip the rest break if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.flags.auto_parallel): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")
def run(self): dprint_func_ir(self.func_ir, "starting hiframes") topo_order = find_topo_order(self.func_ir.blocks) for label in topo_order: new_body = [] for inst in self.func_ir.blocks[label].body: # df['col'] = arr if isinstance(inst, ir.StaticSetItem) and inst.target.name in self.df_vars: df_name = inst.target.name self.df_vars[df_name][inst.index] = inst.value self._update_df_cols() elif isinstance(inst, ir.Assign): out_nodes = self._run_assign(inst) if isinstance(out_nodes, list): new_body.extend(out_nodes) if isinstance(out_nodes, dict): inner_blocks = add_offset_to_labels(out_nodes, ir_utils._max_label+1) self.func_ir.blocks.update(inner_blocks) ir_utils._max_label = max(self.func_ir.blocks.keys()) scope = self.func_ir.blocks[label].scope loc = self.func_ir.blocks[label].loc inner_topo_order = find_topo_order(inner_blocks) inner_first_label = inner_topo_order[0] inner_last_label = inner_topo_order[-1] remove_none_return_from_block(inner_blocks[inner_last_label]) new_body.append(ir.Jump(inner_first_label, loc)) self.func_ir.blocks[label].body = new_body label = ir_utils.next_label() self.func_ir.blocks[label] = ir.Block(scope, loc) inner_blocks[inner_last_label].body.append(ir.Jump(label, loc)) new_body = [] else: new_body.append(inst) self.func_ir.blocks[label].body = new_body remove_dead(self.func_ir.blocks, self.func_ir.arg_names) dprint_func_ir(self.func_ir, "after hiframes") if config.DEBUG_ARRAY_OPT==1: print("df_vars: ", self.df_vars) return
def run(self): dprint_func_ir(self.func_ir, "starting IO") topo_order = find_topo_order(self.func_ir.blocks) for label in topo_order: new_body = [] # copies are collected before running the pass since # variables typed in locals are assigned late self._get_reverse_copies(self.func_ir.blocks[label].body) for inst in self.func_ir.blocks[label].body: if isinstance(inst, ir.Assign): inst_list = self._run_assign(inst) new_body.extend(inst_list) elif isinstance(inst, ir.StaticSetItem): inst_list = self._run_static_setitem(inst) new_body.extend(inst_list) else: new_body.append(inst) self.func_ir.blocks[label].body = new_body remove_dead(self.func_ir.blocks, self.func_ir.arg_names) dprint_func_ir(self.func_ir, "after IO") if config.DEBUG_ARRAY_OPT == 1: print("h5 files: ", self.h5_files) print("h5 dsets: ", self.h5_dsets)
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_blocks, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] if config.DEBUG_ARRAY_OPT == 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, self.typemap) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr.name) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind(start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append( numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # replace return value to setitem to output array return_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() assert isinstance(return_node, ir.Return) last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() while not isinstance(last_node, ir.Assign) or not isinstance( last_node.value, ir.Expr) or not last_node.value.op == 'cast': last_node = stencil_blocks[max(stencil_blocks.keys())].body.pop() assert isinstance(last_node, ir.Assign) assert isinstance(last_node.value, ir.Expr) assert last_node.value.op == 'cast' return_val = last_node.value.value # create parfor index var if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var("$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) stencil_blocks[max( stencil_blocks.keys())].body.append(tuple_assign) # empty init block init_block = ir.Block(scope, loc) if out_arr == None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple( types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass( return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) setitem_call = ir.SetItem(out_arr, parfor_ind_var, return_val, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype) stencil_blocks[max(stencil_blocks.keys())].body.append(setitem_call) parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set) parfor.patterns = [('stencil', [start_lengths, end_lengths])] gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i in range(len(block.body)): instr = block.body[i] if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': # inline reduce() when parallel is off if not self.flags.auto_parallel: call_name = guard(find_callname, self.func_ir, expr) if (call_name == ('reduce', 'builtin') or call_name == ('reduce', 'functools')): if len(expr.args) != 3: raise TypeError("invalid reduce call, " "three arguments including initial " "value required") check_reduce_func(self.func_ir, expr.args[0]) def reduce_func(f, A, v): s = v it = iter(A) for a in it: s = f(s, a) return s new_blocks = inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, reduce_func) for block in new_blocks: work_list.append(block) modified = True # current block is modified, skip the rest break func_def = guard(get_definition, self.func_ir, expr.func) debug_print("found call to ", expr.func, " def = ", func_def) if isinstance(func_def, ir.Expr) and func_def.op == "make_function": new_blocks = inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, func_def) for block in new_blocks: work_list.append(block) modified = True # current block is modified, skip the rest break if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.flags.auto_parallel): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] stencil_blocks = stencil_ir.blocks if config.DEBUG_ARRAY_OPT == 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate( stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, self.typemap) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var( "$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replace stencil accesses") ir_utils.dump_blocks(stencil_blocks) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind( start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append(numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # We have to guarantee that the exit block has maximum label and that # there's only one exit block for the parfor body. # So, all return statements will change to jump to the parfor exit block. parfor_body_exit_label = max(stencil_blocks.keys()) + 1 stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) self.typemap[exit_value_var.name] = return_type.dtype # create parfor index var for_replacing_ret = [] if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var( "$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) for_replacing_ret.append(tuple_assign) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after creating parfor index var") ir_utils.dump_blocks(stencil_blocks) # empty init block init_block = ir.Block(scope, loc) if out_arr == None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple(types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError("cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) self.replace_return_with_setitem(stencil_blocks, exit_value_var, parfor_body_exit_label) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replacing return") ir_utils.dump_blocks(stencil_blocks) setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype ) stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) stencil_blocks[parfor_body_exit_label].body.append(setitem_call) # simplify CFG of parfor body (exit block could be simplified often) # add dummy return to enable CFG stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0, ir.Loc("stencilparfor_dummy", -1))) stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) stencil_blocks[max(stencil_blocks.keys())].body.pop() if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after adding SetItem") ir_utils.dump_blocks(stencil_blocks) pattern = ('stencil', [start_lengths, end_lengths]) parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set, pattern, self.flags) gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes