def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i, instr in enumerate(block.body): if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': call_name = guard(find_callname, self.func_ir, expr) func_def = guard(get_definition, self.func_ir, expr.func) if guard(self._inline_reduction, work_list, block, i, expr, call_name): modified = True break # because block structure changed if guard(self._inline_closure, work_list, block, i, func_def): modified = True break # because block structure changed if guard(self._inline_stencil, instr, call_name, func_def): modified = True if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir.blocks) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.parallel_options.comprehension): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names, self.func_ir)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i, instr in enumerate(block.body): if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': call_name = guard(find_callname, self.func_ir, expr) func_def = guard(get_definition, self.func_ir, expr.func) if guard(self._inline_reduction, work_list, block, i, expr, call_name): modified = True break # because block structure changed if guard(self._inline_closure, work_list, block, i, func_def): modified = True break # because block structure changed if guard(self._inline_stencil, instr, call_name, func_def): modified = True if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir.blocks) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.swapped, self.parallel_options.comprehension): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names, self.func_ir)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")
def copy_propagate(blocks, typemap): """compute copy propagation information for each block using fixed-point iteration on data flow equations: in_b = intersect(predec(B)) out_b = gen_b | (in_b - kill_b) """ cfg = compute_cfg_from_blocks(blocks) entry = cfg.entry_point() # format: dict of block labels to copies as tuples # label -> (l,r) c_data = init_copy_propagate_data(blocks, entry, typemap) (gen_copies, all_copies, kill_copies, in_copies, out_copies) = c_data old_point = None new_point = copy.deepcopy(out_copies) # comparison works since dictionary of built-in types while old_point != new_point: for label in blocks.keys(): if label == entry: continue predecs = [i for i, _d in cfg.predecessors(label)] # in_b = intersect(predec(B)) in_copies[label] = out_copies[predecs[0]].copy() for p in predecs: in_copies[label] &= out_copies[p] # out_b = gen_b | (in_b - kill_b) out_copies[label] = (gen_copies[label] | (in_copies[label] - kill_copies[label])) old_point = new_point new_point = copy.deepcopy(out_copies) if config.DEBUG_ARRAY_OPT == 1: print("copy propagate out_copies:", out_copies) return in_copies, out_copies
def remove_dead(blocks, args, typemap=None, alias_map=None, arg_aliases=None): """dead code elimination using liveness and CFG info. Returns True if something has been removed, or False if nothing is removed. """ cfg = compute_cfg_from_blocks(blocks) usedefs = compute_use_defs(blocks) live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) if alias_map is None or arg_aliases is None: alias_map, arg_aliases = find_potential_aliases(blocks, args, typemap) if config.DEBUG_ARRAY_OPT == 1: print("alias map:", alias_map) # keep set for easier search alias_set = set(alias_map.keys()) call_table, _ = get_call_table(blocks) removed = False for label, block in blocks.items(): # find live variables at each statement to delete dead assignment lives = {v.name for v in block.terminator.list_vars()} # find live variables at the end of block for out_blk, _data in cfg.successors(label): lives |= live_map[out_blk] lives |= arg_aliases removed |= remove_dead_block(block, lives, call_table, arg_aliases, alias_map, alias_set, typemap) return removed
def simplify_CFG(blocks): """transform chains of blocks that have no loop into a single block""" # first, inline single-branch-block to its predecessors cfg = compute_cfg_from_blocks(blocks) def find_single_branch(label): block = blocks[label] return len(block.body) == 1 and isinstance(block.body[0], ir.Branch) single_branch_blocks = list(filter(find_single_branch, blocks.keys())) for label in single_branch_blocks: inst = blocks[label].body[0] predecessors = cfg.predecessors(label) delete_block = True for (p, q) in predecessors: block = blocks[p] if isinstance(block.body[-1], ir.Jump): block.body[-1] = copy.copy(inst) else: delete_block = False if delete_block: del blocks[label] cfg = compute_cfg_from_blocks(blocks) label_map = {} for node in cfg.nodes(): # find nodes with one successors, that has one predecessor successors = [n for n, _ in cfg.successors(node)] if len(successors) == 1: next_node = successors[0] next_preds = list(cfg.predecessors(successors[0])) if len(next_preds) == 1: # nodes could have been replaced with previous nodes node = label_map.get(node, node) next_node = label_map.get(next_node, next_node) assert isinstance(blocks[node].body[-1], ir.Jump) assert blocks[node].body[-1].target == next_node # remove next_node and append it's body to node blocks[node].body.pop() blocks[node].body.extend(blocks[next_node].body) blocks.pop(next_node) label_map[next_node] = node return rename_labels(blocks)
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i in range(len(block.body)): instr = block.body[i] if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': func_def = guard(get_definition, self.func_ir, expr.func) debug_print("found call to ", expr.func, " def = ", func_def) if isinstance(func_def, ir.Expr) and func_def.op == "make_function": new_blocks = self.inline_closure_call(block, i, func_def) for block in new_blocks: work_list.append(block) modified = True # current block is modified, skip the rest break if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.flags.auto_parallel): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")
def remove_dead(blocks, args): """dead code elimination using liveness and CFG info""" cfg = compute_cfg_from_blocks(blocks) usedefs = compute_use_defs(blocks) live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) arg_aliases = find_potential_aliases(blocks, args) for label, block in blocks.items(): # find live variables at each statement to delete dead assignment lives = {v.name for v in block.terminator.list_vars()} # find live variables at the end of block for out_blk, _data in cfg.successors(label): lives |= live_map[out_blk] if label in cfg.exit_points(): lives |= arg_aliases remove_dead_block(block, lives, arg_aliases) return
def loop_lifting(func_ir, typingctx, targetctx, flags, locals): """ Loop lifting transformation. Given a interpreter `func_ir` returns a 2 tuple of `(toplevel_interp, [loop0_interp, loop1_interp, ....])` """ blocks = func_ir.blocks.copy() cfg = compute_cfg_from_blocks(blocks) loopinfos = _loop_lift_get_candidate_infos( cfg, blocks, func_ir.variable_lifetime.livemap) loops = [] for loopinfo in loopinfos: lifted = _loop_lift_modify_blocks(func_ir, loopinfo, blocks, typingctx, targetctx, flags, locals) loops.append(lifted) # Make main IR main = func_ir.derive(blocks=blocks) return main, loops
def loop_lifting(func_ir, typingctx, targetctx, flags, locals): """ Loop lifting transformation. Given a interpreter `func_ir` returns a 2 tuple of `(toplevel_interp, [loop0_interp, loop1_interp, ....])` """ blocks = func_ir.blocks.copy() cfg = compute_cfg_from_blocks(blocks) loopinfos = _loop_lift_get_candidate_infos(cfg, blocks, func_ir.variable_lifetime.livemap) loops = [] for loopinfo in loopinfos: lifted = _loop_lift_modify_blocks(func_ir, loopinfo, blocks, typingctx, targetctx, flags, locals) loops.append(lifted) # Make main IR main = func_ir.derive(blocks=blocks) return main, loops
def simplify_CFG(blocks): """transform chains of blocks that have no loop into a single block""" cfg = compute_cfg_from_blocks(blocks) label_map = {} for node in cfg.nodes(): # find nodes with one successors, that has one predecessor successors = [n for n, _ in cfg.successors(node)] if len(successors) == 1: next_node = successors[0] next_preds = list(cfg.predecessors(successors[0])) if len(next_preds) == 1: # nodes could have been replaced with previous nodes node = label_map.get(node, node) next_node = label_map.get(next_node, next_node) assert isinstance(blocks[node].body[-1], ir.Jump) assert blocks[node].body[-1].target == next_node # remove next_node and append it's body to node blocks[node].body.pop() blocks[node].body.extend(blocks[next_node].body) blocks.pop(next_node) label_map[next_node] = node return
def loop_lifting(interp, typingctx, targetctx, flags, locals): """ Loop lifting transformation. Given a interpreter `interp` returns a 2 tuple of `(toplevel_interp, [loop0_interp, loop1_interp, ....])` """ blocks = interp.blocks.copy() cfg = compute_cfg_from_blocks(blocks) loopinfos = _loop_lift_get_candidate_infos(cfg, blocks, interp.variable_lifetime.livemap) loops = [] for loopinfo in loopinfos: lifted = _loop_lift_modify_blocks(interp.bytecode, loopinfo, blocks, typingctx, targetctx, flags, locals) loops.append(lifted) # make main interpreter main = Interpreter.from_blocks(bytecode=interp.bytecode, blocks=blocks, used_globals=interp.used_globals) return main, loops
def find_topo_order(blocks): """find topological order of blocks such that true branches are visited first (e.g. for_break test in test_dataflow). """ cfg = compute_cfg_from_blocks(blocks) post_order = [] seen = set() def _dfs_rec(node): if node not in seen: seen.add(node) succs = cfg._succs[node] last_inst = blocks[node].body[-1] if isinstance(last_inst, ir.Branch): succs = [last_inst.falsebr, last_inst.truebr] for dest in succs: if (node, dest) not in cfg._back_edges: _dfs_rec(dest) post_order.append(node) _dfs_rec(cfg.entry_point()) post_order.reverse() return post_order
def merge_adjacent_blocks(func_ir): cfg = compute_cfg_from_blocks(func_ir.blocks) # merge adjacent blocks removed = [] for label in list(func_ir.blocks.keys()): if label in removed: continue succs = list(cfg.successors(label)) if len(succs) != 1: continue next_label = succs[0][0] preds = list(cfg.predecessors(next_label)) if len(preds) != 1 or preds[0][0] != label: continue block = func_ir.blocks[label] next_block = func_ir.blocks[next_label] if block.scope != next_block.scope: continue # merge removed.append(next_label) block.body = block.body[:(len(block.body) - 1)] for stmts in next_block.body: block.body.append(stmts) del func_ir.blocks[next_label]
def _fix_nested_array(func_ir): """Look for assignment like: a[..] = b, where both a and b are numpy arrays, and try to eliminate array b by expanding a with an extra dimension. """ blocks = func_ir.blocks cfg = compute_cfg_from_blocks(blocks) usedefs = compute_use_defs(blocks) empty_deadmap = dict([(label, set()) for label in blocks.keys()]) livemap = compute_live_variables(cfg, blocks, usedefs.defmap, empty_deadmap) def find_array_def(arr): """Find numpy array definition such as arr = numba.unsafe.ndarray.empty_inferred(...). If it is arr = b[...], find array definition of b recursively. """ arr_def = func_ir.get_definition(arr) _make_debug_print("find_array_def")(arr, arr_def) if isinstance(arr_def, ir.Expr): if guard(_find_unsafe_empty_inferred, func_ir, arr_def): return arr_def elif arr_def.op == 'getitem': return find_array_def(arr_def.value) raise GuardException def fix_dependencies(expr, varlist): """Double check if all variables in varlist are defined before expr is used. Try to move constant definition when the check fails. Bails out by raising GuardException if it can't be moved. """ debug_print = _make_debug_print("fix_dependencies") for label, block in blocks.items(): scope = block.scope body = block.body defined = set() for i in range(len(body)): inst = body[i] if isinstance(inst, ir.Assign): defined.add(inst.target.name) if inst.value == expr: new_varlist = [] for var in varlist: # var must be defined before this inst, or live # and not later defined. if (var.name in defined or (var.name in livemap[label] and not (var.name in usedefs.defmap[label]))): debug_print(var.name, " already defined") new_varlist.append(var) else: debug_print(var.name, " not yet defined") var_def = get_definition(func_ir, var.name) if isinstance(var_def, ir.Const): loc = var.loc new_var = ir.Var(scope, mk_unique_var("new_var"), loc) new_const = ir.Const(var_def.value, loc) new_vardef = _new_definition(func_ir, new_var, new_const, loc) new_body = [] new_body.extend(body[:i]) new_body.append(new_vardef) new_body.extend(body[i:]) block.body = new_body new_varlist.append(new_var) else: raise GuardException return new_varlist # when expr is not found in block raise GuardException def fix_array_assign(stmt): """For assignment like lhs[idx] = rhs, where both lhs and rhs are arrays, do the following: 1. find the definition of rhs, which has to be a call to numba.unsafe.ndarray.empty_inferred 2. find the source array creation for lhs, insert an extra dimension of size of b. 3. replace the definition of rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx] """ require(isinstance(stmt, ir.SetItem)) require(isinstance(stmt.value, ir.Var)) debug_print = _make_debug_print("fix_array_assign") debug_print("found SetItem: ", stmt) lhs = stmt.target # Find the source array creation of lhs lhs_def = find_array_def(lhs) debug_print("found lhs_def: ", lhs_def) rhs_def = get_definition(func_ir, stmt.value) debug_print("found rhs_def: ", rhs_def) require(isinstance(rhs_def, ir.Expr)) if rhs_def.op == 'cast': rhs_def = get_definition(func_ir, rhs_def.value) require(isinstance(rhs_def, ir.Expr)) require(_find_unsafe_empty_inferred(func_ir, rhs_def)) # Find the array dimension of rhs dim_def = get_definition(func_ir, rhs_def.args[0]) require(isinstance(dim_def, ir.Expr) and dim_def.op == 'build_tuple') debug_print("dim_def = ", dim_def) extra_dims = [ get_definition(func_ir, x, lhs_only=True) for x in dim_def.items ] debug_print("extra_dims = ", extra_dims) # Expand size tuple when creating lhs_def with extra_dims size_tuple_def = get_definition(func_ir, lhs_def.args[0]) require(isinstance(size_tuple_def, ir.Expr) and size_tuple_def.op == 'build_tuple') debug_print("size_tuple_def = ", size_tuple_def) extra_dims = fix_dependencies(size_tuple_def, extra_dims) size_tuple_def.items += extra_dims # In-place modify rhs_def to be getitem rhs_def.op = 'getitem' rhs_def.value = get_definition(func_ir, lhs, lhs_only=True) rhs_def.index = stmt.index del rhs_def._kws['func'] del rhs_def._kws['args'] del rhs_def._kws['vararg'] del rhs_def._kws['kws'] # success return True for label in find_topo_order(func_ir.blocks): block = func_ir.blocks[label] for stmt in block.body: if guard(fix_array_assign, stmt): block.body.remove(stmt)
def canonicalize_cfg_single_backedge(blocks): """ Rewrite loops that have multiple backedges. """ cfg = compute_cfg_from_blocks(blocks) newblocks = blocks.copy() def new_block_id(): return max(newblocks.keys()) + 1 def has_multiple_backedges(loop): count = 0 for k in loop.body: blk = blocks[k] edges = blk.terminator.get_targets() # is a backedge? if loop.header in edges: count += 1 if count > 1: # early exit return True return False def yield_loops_with_multiple_backedges(): for lp in cfg.loops().values(): if has_multiple_backedges(lp): yield lp def replace_target(term, src, dst): def replace(target): return (dst if target == src else target) if isinstance(term, ir.Branch): return ir.Branch(cond=term.cond, truebr=replace(term.truebr), falsebr=replace(term.falsebr), loc=term.loc) elif isinstance(term, ir.Jump): return ir.Jump(target=replace(term.target), loc=term.loc) else: assert not term.get_targets() return term def rewrite_single_backedge(loop): """ Add new tail block that gathers all the backedges """ header = loop.header tailkey = new_block_id() for blkkey in loop.body: blk = newblocks[blkkey] if header in blk.terminator.get_targets(): newblk = blk.copy() # rewrite backedge into jumps to new tail block newblk.body[-1] = replace_target(blk.terminator, header, tailkey) newblocks[blkkey] = newblk # create new tail block entryblk = newblocks[header] tailblk = ir.Block(scope=entryblk.scope, loc=entryblk.loc) # add backedge tailblk.append(ir.Jump(target=header, loc=tailblk.loc)) newblocks[tailkey] = tailblk for loop in yield_loops_with_multiple_backedges(): rewrite_single_backedge(loop) return newblocks
def run(self): """Run inline closure call pass. """ modified = False work_list = list(self.func_ir.blocks.items()) debug_print = _make_debug_print("InlineClosureCallPass") debug_print("START") while work_list: label, block = work_list.pop() for i in range(len(block.body)): instr = block.body[i] if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': # inline reduce() when parallel is off if not self.flags.auto_parallel: call_name = guard(find_callname, self.func_ir, expr) if (call_name == ('reduce', 'builtin') or call_name == ('reduce', 'functools')): if len(expr.args) != 3: raise TypeError("invalid reduce call, " "three arguments including initial " "value required") check_reduce_func(self.func_ir, expr.args[0]) def reduce_func(f, A, v): s = v it = iter(A) for a in it: s = f(s, a) return s new_blocks = inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, reduce_func) for block in new_blocks: work_list.append(block) modified = True # current block is modified, skip the rest break func_def = guard(get_definition, self.func_ir, expr.func) debug_print("found call to ", expr.func, " def = ", func_def) if isinstance(func_def, ir.Expr) and func_def.op == "make_function": new_blocks = inline_closure_call(self.func_ir, self.func_ir.func_id.func.__globals__, block, i, func_def) for block in new_blocks: work_list.append(block) modified = True # current block is modified, skip the rest break if enable_inline_arraycall: # Identify loop structure if modified: # Need to do some cleanups if closure inlining kicked in merge_adjacent_blocks(self.func_ir) cfg = compute_cfg_from_blocks(self.func_ir.blocks) debug_print("start inline arraycall") _debug_dump(cfg) loops = cfg.loops() sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] visited = [] # We go over all loops, bigger loops first (outer first) for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): visited.append(k) if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], self.flags.auto_parallel): modified = True if modified: _fix_nested_array(self.func_ir) if modified: remove_dels(self.func_ir.blocks) # repeat dead code elimintation until nothing can be further # removed while (remove_dead(self.func_ir.blocks, self.func_ir.arg_names)): pass self.func_ir.blocks = rename_labels(self.func_ir.blocks) debug_print("END")