def do_pass_two(bbs: Set[int], rad: RAD) -> Dict[int, Node]: '''Run pass two of basic block discovery. This builds the basic blocks, creates function and predicate nodes from them, and stores these in a dictionary by their first address.''' note("Starting pass two") # Dictionary to hold nodes. nodes: Dict[int, Node] = {} # Now generate the nodes. count = 0 for address in bbs: debug(f"Possible basic block at {hex(address)}") if not rad.in_range(address): continue # Create a basic block starting at this location. bb = BasicBlock() node: Node count += 1 run = True while run: # Disassemble the instruction. try: i = rad.at(address) except AddressException: # Ignore and let the basic block be terminated. run = False continue except NotExecutableException: # Ignore and let the basic block be terminated. run = False continue # Add the instruction to the basic block. nextaddr = i.address + i.size bb.add(i) # Determine if there is a next address for us to disassemble in this # basic block. run = False if InstructionTests.is_call(i): if DebugOpts.CALL_ENDS_BB: # The call ends the basic block. node = FunctionNode(bb, nextaddr) nodes[bb.get_address()] = node continue else: # Assume the call returns and disassemble the next address as part # of this basic block. address = nextaddr run = True elif InstructionTests.is_branch(i) or InstructionTests.is_jump(i): # A branch or jump ends the basic block. if i.mnemonic.endswith('jmp'): if OperandTests.is_imm(i.operands[0]): node = FunctionNode(bb, int(i.op_str, 0)) nodes[bb.get_address()] = node elif OperandTests.is_mem(i.operands[0]): disp = OperandTests.is_rip_relative(i.operands[0]) if disp is not None: node = FunctionNode(bb, nextaddr + disp) nodes[bb.get_address()] = node else: node = FunctionNode(bb, 0) nodes[bb.get_address()] = node else: node = FunctionNode(bb, 0) nodes[bb.get_address()] = node else: node = PredicateNode(bb, int(i.op_str, 0), nextaddr) nodes[bb.get_address()] = node continue elif InstructionTests.is_interrupt(i): if DebugOpts.SYSCALL_ENDS_BB: # The system call ends the basic block. node = FunctionNode(bb, nextaddr) nodes[bb.get_address()] = node continue else: # Assume the system call returns and disassemble the next address # as part of this basic block. address = nextaddr run = True elif (i.mnemonic == 'hlt' or InstructionTests.is_ret(i) or InstructionTests.is_interrupt_return(i)): # A halt or return ends the basic block. node = FunctionNode(bb, 0) nodes[bb.get_address()] = node continue else: # The basic block continues. address = nextaddr run = True # If the address is in the set of basic block starts, terminate # this basic block. if address in bbs: node = FunctionNode(bb, address) nodes[bb.get_address()] = node run = False note("Finished pass two") note(f"Wrote {count} basic blocks") note(f"Generated {len(nodes)} nodes") return nodes
def do_pass_one(explore: List[int], rad: RAD) -> Set[int]: '''Find basic block leaders in a program. This returns a list of the leaders (addresses). A list of initial leaders must be provided as the first argument, and an initialized random access disassembler as the second.''' note("Starting pass one") # We maintain a stack of addresses to explore (explore). We also maintain # a set of basic block leaders we have discovered (bbs). bbs = set(explore) def add_explore(addr: int): '''Add an address to be explored, if it is not already scheduled to be explored.''' if addr not in explore: explore.append(addr) def add_leader(addr: int): '''Add a leader to the set of leaders, and also to the set of addresses to be explored.''' debug(f"adding leader: {hex(addr)}") if addr not in bbs: bbs.add(addr) add_explore(addr) # Disassemble the file, follow the links, and build a list of basic blocks # leaders. Within this loop the explore list is treated as an (initialized) # stack to perform instruction tracing, and does not always contain only basic # block leaders. Ultimately we have to discover the rest of the leaders we # can find, and those go in the bbs set. Once the explore stack is empty, # we have finished, and bbs will contain all the potential basic block # leaders we have discovered. while len(explore) > 0: # Get the next address from the stack. address = explore.pop() # Disassemble at the address. try: i = rad.at(address) except AddressException: # This address is out of range; ignore and continue. continue except NotExecutableException: # This address is not executable; ignore and continue. continue # Figure out the address that is one byte past the end of the # current instruction. This is likely the address of the next # instruction in sequence. nextaddr = i.address + i.size # Based on the instruction type, determine the next address(es). # There are three things we can do here. # (1) Add an address to the set of leaders (and the explore stack) # (2) Add an address to the explore stack (it is not a leader) # (3) Do nothing if InstructionTests.is_call(i): debug(f"found call at {hex(i.address)}; target is a leader") # This is a call. Push the call target and the next # address on the stack to explore. The call target is # a basic block leader. If calls end the basic block, then # the next address after the call is also a leader. We # assume all calls return. if OperandTests.is_imm(i.operands[0]): add_leader(i.operands[0].value.imm) elif OperandTests.is_mem(i.operands[0]): # We can only handle RIP-based addressing. disp = OperandTests.is_rip_relative(i.operands[0]) if disp is not None: # Now we can compute the address of the call. add_leader(nextaddr + disp) if DebugOpts.CALL_ENDS_BB: add_leader(nextaddr) else: add_explore(nextaddr) elif InstructionTests.is_branch(i) or InstructionTests.is_jump(i): if i.mnemonic == 'jmp': debug(f"found jump at {hex(i.address)}; target is leader") # This is a jump. Note that you need to test for this after # relative branch because those are also in the jump group. if OperandTests.is_imm(i.operands[0]): # The target of the jump is the leader of a basic block. add_leader(i.operands[0].value.imm) elif OperandTests.is_mem(i.operands[0]): # We can only handle RIP-based addressing. disp = OperandTests.is_rip_relative(i.operands[0]) if disp is not None: # Now we compute the address of the jump. add_leader(nextaddr + disp) else: debug( f"found branch at {hex(i.address)}; true and false branches are leaders" ) # This is a conditional branch. Both the target of the branch # and the instruction following the branch are leaders. add_leader(i.operands[0].value.imm) add_leader(nextaddr) elif InstructionTests.is_interrupt(i): debug(f"found interrupt at {hex(i.address)}; possible leader") # This is an interrupt. Assume we return and continue. if DebugOpts.SYSCALL_ENDS_BB: add_leader(nextaddr) else: add_explore(nextaddr) elif (i.mnemonic == 'hlt' or InstructionTests.is_ret(i) or InstructionTests.is_interrupt_return(i)): debug(f"found halt or return at {hex(i.address)}") # These end the basic block and flow does not continue to # the next instruction, so do not add anything to explore. pass else: # Assume this instruction flows to the next instruction # in sequence, but that instruction is not necessarily # a leader. add_explore(nextaddr) note("Pass one complete") note(f"Discovered {len(bbs)} potential basic blocks") return bbs