def _get_instructions(func, definition_line=None): instrs = [] for ins in get_instructions(func, first_line=None): argval = None if isinstance(ins.argval, str) or isinstance(ins.argval, int): argval = ins.argval fake = FakeBytecodeInstruction(ins.offset, ins.opname, argval, ins.starts_line) instrs.append(fake) return instrs
def print_structured_flow(fn, dom_tree, bb_list): """Print structure skeleton""" print("\n" + ('-' * 40)) bb_num = 0 for bb in bb_list: bb.num = bb_num bb_num += 1 offset2bb_start = {bb.start_offset: bb for bb in bb_list} offset2bb_end = {} for bb in bb_list: if not hasattr(bb, 'reach_offset'): # Dead code continue if bb.reach_offset not in offset2bb_end: offset2bb_end[bb.reach_offset] = [bb] else: offset2bb_end[bb.reach_offset].append(bb) for inst in get_instructions(fn): offset = inst.offset bb_start = offset2bb_start.get(offset, None) if bb_start: for flag in bb_start.flags: if flag == BB_LOOP: print("LOOP") elif flag == BB_FOR: print("FOR") elif flag == BB_FINALLY: print("FINALLY") elif flag == BB_EXCEPT: print("EXCEPT") pass pass pass print(inst.disassemble()) if offset in offset2bb_end: for bb in offset2bb_end[offset]: print("END of block range: BB num: %s" % (bb.num + 1, )) pass
def print_structured_flow(fn, cfg, current, cs_marks): """Print structure skeleton""" print("\n" + ('-' * 40)) for inst in get_instructions(fn): offset = inst.offset remain = [] if offset in cs_marks: for item in cs_marks[offset]: if not item.startswith('end'): print(item.upper()) else: if item == 'end_continue': item = 'CONTINUE' remain.append(item) pass pass pass print(inst.disassemble()) for item in remain: print(item.upper()) pass pass return
def test_get_instructions(): actual = list(dis.get_instructions(TEST_SOURCE_CODE)) actual_len = len(actual) assert actual_len > 0
def basic_blocks(version, is_pypy, fn): """Create a list of basic blocks found in a code object """ BB = BBMgr(version, is_pypy) # Get jump targets jump_targets = set() for inst in get_instructions(fn): op = inst.opcode offset = inst.offset follow_offset = next_offset(op, BB.opcode, offset) if op in BB.JUMP_INSTRUCTIONS: if op in BB.JABS_INSTRUCTIONS: jump_offset = inst.arg else: jump_offset = follow_offset + inst.arg jump_targets.add(jump_offset) pass start_offset = 0 end_offset = -1 jump_offsets = set() prev_offset = -1 endloop_offsets = [-1] flags = set([BB_ENTRY]) for inst in get_instructions(fn): prev_offset = end_offset end_offset = inst.offset op = inst.opcode offset = inst.offset follow_offset = next_offset(op, BB.opcode, offset) if op == BB.opcode.SETUP_LOOP: jump_offset = follow_offset + inst.arg endloop_offsets.append(jump_offset) elif offset == endloop_offsets[-1]: endloop_offsets.pop() pass if op in BB.LOOP_INSTRUCTIONS: flags.add(BB_LOOP) elif op in BB.BREAK_INSTRUCTIONS: flags.add(BB_BREAK) jump_offsets.add(endloop_offsets[-1]) flags, jump_offsets = BB.add_bb(start_offset, end_offset, follow_offset, flags, jump_offsets) start_offset = follow_offset if offset in jump_targets: # Fallthrough path and jump target path. # This instruction definitely starts a new basic block # Close off any prior basic block if start_offset < end_offset: flags, jump_offsets = BB.add_bb(start_offset, prev_offset, end_offset, flags, jump_offsets) start_offset = end_offset # Add block flags for certain classes of instructions if op in BB.BLOCK_INSTRUCTIONS: flags.add(BB_BLOCK) elif op in BB.EXCEPT_INSTRUCTIONS: flags.add(BB_EXCEPT) elif op in BB.FINALLY_INSTRUCTIONS: flags.add(BB_FINALLY) elif op in BB.FOR_INSTRUCTIONS: flags.add(BB_FOR) elif op in BB.JUMP_INSTRUCTIONS: # Some sort of jump instruction. # While in theory an absolute jump could be part of the # same (extened) basic block, for our purposes we would like to # call them two basic blocks as that probably mirrors # the code more simply. # Figure out where we jump to amd add it to this # basic block's jump offsets. if op in BB.JABS_INSTRUCTIONS: jump_offset = inst.arg else: jump_offset = follow_offset + inst.arg jump_offsets.add(jump_offset) if op in BB.JUMP_UNCONDITONAL: flags.add(BB_JUMP_UNCONDITIONAL) flags, jump_offsets = BB.add_bb(start_offset, end_offset, follow_offset, flags, jump_offsets) start_offset = follow_offset elif op != BB.opcode.SETUP_LOOP: flags, jump_offsets = BB.add_bb(start_offset, end_offset, follow_offset, flags, jump_offsets) start_offset = follow_offset pass elif op in BB.NOFOLLOW_INSTRUCTIONS: flags.add(BB_NOFOLLOW) flags, jump_offsets = BB.add_bb(start_offset, end_offset, follow_offset, flags, jump_offsets) start_offset = follow_offset pass pass if len(BB.bb_list): BB.bb_list[-1].follow_offset = None # Add remaining instructions? if start_offset <= end_offset: BB.bb_list.append( BasicBlock(start_offset, end_offset, None, flags=flags, jump_offsets=jump_offsets)) return BB.bb_list
def test_iteration(self): for obj in [_f, _C(1).__init__, "a=1", _f.__code__]: with self.subTest(obj=obj): via_object = list(dis.Bytecode(obj)) via_generator = list(dis.get_instructions(obj)) self.assertEqual(via_object, via_generator)
def test_doubly_nested(self): with captured_stdout(): inner = outer()() actual = dis.get_instructions(inner, first_line=expected_inner_line) self.assertEqual(list(actual), expected_opinfo_inner)
def test_nested(self): with captured_stdout(): f = outer() actual = dis.get_instructions(f, first_line=expected_f_line) self.assertEqual(list(actual), expected_opinfo_f)
def test_outer(self): actual = dis.get_instructions(outer, first_line=expected_outer_line) self.assertEqual(list(actual), expected_opinfo_outer)
def test_first_line_set_to_None(self): actual = dis.get_instructions(simple, first_line=None) self.assertEqual(list(actual), expected_opinfo_simple)
def test_default_first_line(self): actual = dis.get_instructions(simple) self.assertEqual(list(actual), expected_opinfo_simple)
def basic_blocks(version, is_pypy, fn, first_line=None): """Create a list of basic blocks found in a code object """ BB = BBMgr(version, is_pypy) # Get jump targets jump_targets = set() instructions = list(get_instructions(fn, first_line=first_line)) for inst in instructions: op = inst.opcode offset = inst.offset follow_offset = next_offset(op, BB.opcode, offset) if op in BB.JUMP_INSTRUCTIONS: if op in BB.JABS_INSTRUCTIONS: jump_offset = inst.arg else: jump_offset = follow_offset + inst.arg jump_targets.add(jump_offset) pass start_offset = 0 end_offset = -1 jump_offsets = set() prev_offset = -1 endloop_offsets = [-1] flags = set([BB_ENTRY]) end_try_offset_stack = [] try_stack = [] end_try_offset = None loop_offset = None for i, inst in enumerate(instructions): prev_offset = end_offset end_offset = inst.offset op = inst.opcode offset = inst.offset follow_offset = next_offset(op, BB.opcode, offset) if offset == end_try_offset: if len(end_try_offset_stack): end_try_offset = end_try_offset_stack[-1] end_try_offset_stack.pop() else: end_try_offset = None if op in BB.LOOP_INSTRUCTIONS: jump_offset = follow_offset + inst.arg endloop_offsets.append(jump_offset) loop_offset = offset elif offset == endloop_offsets[-1]: endloop_offsets.pop() pass if op in BB.LOOP_INSTRUCTIONS: flags.add(BB_LOOP) elif op in BB.BREAK_INSTRUCTIONS: flags.add(BB_BREAK) jump_offsets.add(endloop_offsets[-1]) block, flags, jump_offsets = BB.add_bb(start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) start_offset = follow_offset if offset in jump_targets: # Fallthrough path and jump target path. # This instruction definitely starts a new basic block # Close off any prior basic block if start_offset < end_offset: block, flags, jump_offsets = BB.add_bb(start_offset, prev_offset, loop_offset, end_offset, flags, jump_offsets) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) pass start_offset = end_offset pass # Add block flags for certain classes of instructions if op in BB.JUMP_CONDITONAL: flags.add(BB_JUMP_CONDITIONAL) if op in BB.POP_BLOCK_INSTRUCTIONS: flags.add(BB_POP_BLOCK) if start_offset == offset: flags.add(BB_STARTS_POP_BLOCK) flags.remove(BB_POP_BLOCK) elif op in BB.EXCEPT_INSTRUCTIONS: if (sys.version_info[0:2] <= (2, 7)): # In Python up to 2.7, thre'POP_TOP'S at the beginning of a block # indicate an exception handler. We also check # that we are nested inside a "try". if len(try_stack) == 0 or start_offset != offset: continue pass if (instructions[i+1].opcode != BB.opcode.opmap['POP_TOP'] or instructions[i+2].opcode != BB.opcode.opmap['POP_TOP']): continue flags.add(BB_EXCEPT) try_stack[-1].exception_offsets.add(start_offset) pass elif op in BB.TRY_INSTRUCTIONS: end_try_offset_stack.append(inst.argval) flags.add(BB_TRY) elif op in BB.END_FINALLY_INSTRUCTIONS: flags.add(BB_END_FINALLY) try_stack[-1].exception_offsets.add(start_offset) elif op in BB.FOR_INSTRUCTIONS: flags.add(BB_FOR) jump_offsets.add(inst.argval) block, flags, jump_offsets = BB.add_bb(start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets) loop_offset = None start_offset = follow_offset elif op in BB.JUMP_INSTRUCTIONS: # Some sort of jump instruction. # Figure out where we jump to amd add it to this # basic block's jump offsets. if op in BB.JABS_INSTRUCTIONS: jump_offset = inst.arg else: jump_offset = inst.argval jump_offsets.add(jump_offset) if op in BB.JUMP_UNCONDITONAL: flags.add(BB_JUMP_UNCONDITIONAL) if jump_offset == follow_offset: flags.add(BB_JUMP_TO_FALLTHROUGH) pass block, flags, jump_offsets = BB.add_bb(start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) pass start_offset = follow_offset elif op != BB.opcode.SETUP_LOOP: if op in BB.FINALLY_INSTRUCTIONS: flags.add(BB_FINALLY) block, flags, jump_offsets = BB.add_bb(start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) start_offset = follow_offset pass elif op in BB.NOFOLLOW_INSTRUCTIONS: flags.add(BB_NOFOLLOW) last_block, flags, jump_offsets = BB.add_bb(start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets) loop_offset = None start_offset = follow_offset pass pass if len(BB.bb_list): BB.bb_list[-1].follow_offset = None BB.start_block = BB.bb_list[0] # Add remaining instructions? if start_offset <= end_offset: BB.bb_list.append(BasicBlock(start_offset, end_offset, loop_offset, None, flags=flags, jump_offsets=jump_offsets)) loop_offset = None pass # Add an artificial block where we can link the exits of other blocks # to. This helps in computing reverse dominators. BB.add_bb(end_offset+1, end_offset+1, None, None, set([BB_EXIT]), []) return BB
def augment_instructions(fn, cfg, version_tuple): """Augment instructions in fn with dominator information""" current_block = cfg.entry_node dom_tree = cfg.dom_tree bb2dom_node = {node.bb: node for node in dom_tree.nodes} # block_stack = [current_block] starts = {current_block.start_offset: current_block} dom_reach_ends = {} ends = {current_block.end_offset: current_block} augmented_instrs = [] bb = None dom = None offset = 0 for inst in get_instructions(fn): offset = inst.offset new_bb = starts.get(offset, None) if new_bb: bb = new_bb # FIXME: if a basic block is only its own dominator we don't have that # listed separately new_dom = bb2dom_node.get(bb, dom) if new_dom is not None: dom = new_dom dom_number = dom.bb.number reach_ends = dom_reach_ends.get(dom.reach_offset, []) reach_ends.append(dom) dom_reach_ends[dom.reach_offset] = reach_ends pseudo_inst = ExtendedInstruction( "DOM_START", 1000, "pseudo", 0, dom_number, dom_number, f"Dominator {dom_number}", True, offset, None, False, False, bb, dom, ) augmented_instrs.append(pseudo_inst) pseudo_inst = ExtendedInstruction( "BB_START", 1001, "pseudo", 0, bb.number, bb.number, f"Basic Block {bb.number}", True, offset, None, False, False, bb, dom, ) augmented_instrs.append(pseudo_inst) if bb.follow_offset: follow_bb = cfg.offset2block[bb.follow_offset].bb starts[follow_bb.start_offset] = follow_bb ends[follow_bb.end_offset] = follow_bb for successor_bb in bb.successors: starts[successor_bb.start_offset] = successor_bb ends[successor_bb.end_offset] = successor_bb pass pass elif bb is None: # FIXME: this shouldn't be needed bb = dom.bb extended_inst = ExtendedInstruction( inst.opname, inst.opcode, inst.optype, inst.inst_size, inst.arg, inst.argval, inst.argrepr, inst.has_arg, inst.offset, inst.starts_line, inst.is_jump_target, inst.has_extended_arg, bb, dom, ) augmented_instrs.append(extended_inst) pass bb = ends.get(offset, None) if bb: pseudo_inst = ExtendedInstruction( "BB_END", 1002, "pseudo", 0, bb.number, bb.number, f"Basic Block {bb.number}", True, offset, None, False, False, bb, dom, ) augmented_instrs.append(pseudo_inst) dom_list = dom_reach_ends.get(offset, None) if dom_list is not None: for dom in reversed(dom_list): dom_number = dom.bb.number pseudo_inst = ExtendedInstruction( "DOM_END", 1003, "pseudo", 0, dom_number, dom_number, f"Basic Block {dom_number}", True, offset, None, False, False, dom.bb, dom, ) augmented_instrs.append(pseudo_inst) pass pass # We have a dummy bb at the end+1. # Add the end dominator info for that which should exist if version_tuple >= (3, 6): offset += 2 else: offset += 1 # FIXME: DRY with above dom_list = dom_reach_ends.get(offset, None) if dom_list is not None: for dom in reversed(dom_list): dom_number = dom.bb.number pseudo_inst = ExtendedInstruction( "DOM_END", 1003, "pseudo", 0, dom_number, dom_number, f"Basic Block {dom_number}", True, offset, None, False, False, dom.bb, dom, ) augmented_instrs.append(pseudo_inst) pass # for inst in augmented_instrs: # print(inst) return augmented_instrs
def basic_blocks( fn_or_code, version=PYTHON_VERSION_TRIPLE, is_pypy=IS_PYPY, more_precise_returns=False, print_instructions=False, ): """Create a list of basic blocks found in a code object. `more_precise_returns` indicates whether the RETURN_VALUE should modeled as a jump to the end of the enclosing function or not. See comment in code as to why this might be useful. """ BB = BBMgr(version, is_pypy) # Get jump targets jump_targets = set() instructions = list(get_instructions(fn_or_code)) for inst in instructions: op = inst.opcode offset = inst.offset follow_offset = next_offset(op, BB.opcode, offset) if op in BB.JUMP_INSTRUCTIONS: jump_value = get_jump_val(inst.arg, version) if op in BB.JABS_INSTRUCTIONS: jump_offset = jump_value else: jump_offset = follow_offset + jump_value jump_targets.add(jump_offset) pass # Add an artificial block where we can link the exits of other blocks # to. This helps when there is a "raise" not in any try block and # in computing reverse dominators. end_offset = instructions[-1].offset if version >= (3, 6): end_bb_offset = end_offset + 2 else: end_bb_offset = end_offset + 1 end_block, _, _ = BB.add_bb( end_bb_offset, end_bb_offset, None, None, set([BB_EXIT]), [] ) start_offset = 0 end_offset = -1 jump_offsets = set() prev_offset = -1 endloop_offsets = [-1] flags = set([BB_ENTRY]) end_try_offset_stack = [] try_stack = [end_block] end_try_offset = None loop_offset = None return_blocks = [] for i, inst in enumerate(instructions): if print_instructions: print(inst) prev_offset = end_offset end_offset = inst.offset op = inst.opcode offset = inst.offset follow_offset = next_offset(op, BB.opcode, offset) if offset == end_try_offset: if len(end_try_offset_stack): end_try_offset = end_try_offset_stack[-1] end_try_offset_stack.pop() else: end_try_offset = None if op in BB.LOOP_INSTRUCTIONS: jump_offset = follow_offset + inst.arg endloop_offsets.append(jump_offset) loop_offset = offset elif offset == endloop_offsets[-1]: endloop_offsets.pop() pass if op in BB.LOOP_INSTRUCTIONS: flags.add(BB_LOOP) elif op in BB.BREAK_INSTRUCTIONS: flags.add(BB_BREAK) jump_offsets.add(endloop_offsets[-1]) block, flags, jump_offsets = BB.add_bb( start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets, ) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) start_offset = follow_offset if offset in jump_targets: # Fallthrough path and jump target path. # This instruction definitely starts a new basic block # Close off any prior basic block if start_offset < end_offset: block, flags, jump_offsets = BB.add_bb( start_offset, prev_offset, loop_offset, end_offset, flags, jump_offsets, ) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) pass start_offset = end_offset pass # Add block flags for certain classes of instructions if op in BB.JUMP_CONDITONAL: flags.add(BB_JUMP_CONDITIONAL) if op in BB.POP_BLOCK_INSTRUCTIONS: flags.add(BB_POP_BLOCK) if start_offset == offset: flags.add(BB_STARTS_POP_BLOCK) flags.remove(BB_POP_BLOCK) elif op in BB.EXCEPT_INSTRUCTIONS: if sys.version_info[0:2] <= (2, 7): # In Python up to 2.7, thre'POP_TOP'S at the beginning of a block # indicate an exception handler. We also check # that we are nested inside a "try". if len(try_stack) == 0 or start_offset != offset: continue pass if ( instructions[i + 1].opcode != BB.opcode.opmap["POP_TOP"] or instructions[i + 2].opcode != BB.opcode.opmap["POP_TOP"] ): continue flags.add(BB_EXCEPT) try_stack[-1].exception_offsets.add(start_offset) pass elif op in BB.TRY_INSTRUCTIONS: end_try_offset_stack.append(inst.argval) flags.add(BB_TRY) elif op in BB.END_FINALLY_INSTRUCTIONS: flags.add(BB_END_FINALLY) try_stack[-1].exception_offsets.add(start_offset) elif op in BB.FOR_INSTRUCTIONS: flags.add(BB_FOR) jump_offsets.add(inst.argval) block, flags, jump_offsets = BB.add_bb( start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets, ) loop_offset = None start_offset = follow_offset elif op in BB.JUMP_INSTRUCTIONS: # Some sort of jump instruction. # Figure out where we jump to amd add it to this # basic block's jump offsets. jump_offset = inst.argval jump_offsets.add(jump_offset) if op in BB.JUMP_UNCONDITONAL: flags.add(BB_JUMP_UNCONDITIONAL) if jump_offset == follow_offset: flags.add(BB_JUMP_TO_FALLTHROUGH) pass block, flags, jump_offsets = BB.add_bb( start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets, ) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) pass start_offset = follow_offset elif version[:2] >= (3, 9) or ( version[:2] < (3, 8) and op != BB.opcode.SETUP_LOOP ): if op in BB.FINALLY_INSTRUCTIONS: flags.add(BB_FINALLY) block, flags, jump_offsets = BB.add_bb( start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets, ) loop_offset = None if BB_TRY in block.flags: try_stack.append(block) start_offset = follow_offset pass elif op in BB.NOFOLLOW_INSTRUCTIONS: flags.add(BB_NOFOLLOW) if op in BB.RETURN_INSTRUCTIONS: flags.add(BB_RETURN) last_block, flags, jump_offsets = BB.add_bb( start_offset, end_offset, loop_offset, follow_offset, flags, jump_offsets, ) loop_offset = None start_offset = follow_offset if op in BB.RETURN_INSTRUCTIONS: return_blocks.append(last_block) pass pass # If the bytecode comes from Python, then there is possibly an # advantage in treating a return in a block as an instruction # which flows to the next instruction, since that will treat # blocks with unreachable instructions the way Python source # does - the code after that exists. # # However if you care about analysis, then # Hook RETURN_VALUE instructions to the exit block offset if more_precise_returns: for block in return_blocks: block.jump_offsets.add(end_bb_offset) block.edge_count += 1 if len(BB.bb_list): BB.bb_list[-1].follow_offset = None BB.start_block = BB.bb_list[0] # Add remaining instructions? if start_offset <= end_offset: BB.bb_list.append( BasicBlock( start_offset, end_offset, loop_offset, None, flags=flags, jump_offsets=jump_offsets, ) ) loop_offset = None pass return BB