Ejemplo n.º 1
0
def do_pass_two(bbs: Set[int], rad: RAD) -> Dict[int, Node]:
    '''Run pass two of basic block discovery.
    
    This builds the basic blocks, creates function and predicate nodes from them,
    and stores these in a dictionary by their first address.'''

    note("Starting pass two")

    # Dictionary to hold nodes.
    nodes: Dict[int, Node] = {}

    # Now generate the nodes.
    count = 0
    for address in bbs:
        debug(f"Possible basic block at {hex(address)}")
        if not rad.in_range(address):
            continue

        # Create a basic block starting at this location.
        bb = BasicBlock()
        node: Node
        count += 1
        run = True
        while run:
            # Disassemble the instruction.
            try:
                i = rad.at(address)
            except AddressException:
                # Ignore and let the basic block be terminated.
                run = False
                continue
            except NotExecutableException:
                # Ignore and let the basic block be terminated.
                run = False
                continue

            # Add the instruction to the basic block.
            nextaddr = i.address + i.size
            bb.add(i)

            # Determine if there is a next address for us to disassemble in this
            # basic block.
            run = False
            if InstructionTests.is_call(i):
                if DebugOpts.CALL_ENDS_BB:
                    # The call ends the basic block.
                    node = FunctionNode(bb, nextaddr)
                    nodes[bb.get_address()] = node
                    continue
                else:
                    # Assume the call returns and disassemble the next address as part
                    # of this basic block.
                    address = nextaddr
                    run = True

            elif InstructionTests.is_branch(i) or InstructionTests.is_jump(i):
                # A branch or jump ends the basic block.
                if i.mnemonic.endswith('jmp'):
                    if OperandTests.is_imm(i.operands[0]):
                        node = FunctionNode(bb, int(i.op_str, 0))
                        nodes[bb.get_address()] = node
                    elif OperandTests.is_mem(i.operands[0]):
                        disp = OperandTests.is_rip_relative(i.operands[0])
                        if disp is not None:
                            node = FunctionNode(bb, nextaddr + disp)
                            nodes[bb.get_address()] = node
                        else:
                            node = FunctionNode(bb, 0)
                            nodes[bb.get_address()] = node
                    else:
                        node = FunctionNode(bb, 0)
                        nodes[bb.get_address()] = node
                else:
                    node = PredicateNode(bb, int(i.op_str, 0), nextaddr)
                    nodes[bb.get_address()] = node
                continue

            elif InstructionTests.is_interrupt(i):
                if DebugOpts.SYSCALL_ENDS_BB:
                    # The system call ends the basic block.
                    node = FunctionNode(bb, nextaddr)
                    nodes[bb.get_address()] = node
                    continue
                else:
                    # Assume the system call returns and disassemble the next address
                    # as part of this basic block.
                    address = nextaddr
                    run = True

            elif (i.mnemonic == 'hlt' or InstructionTests.is_ret(i)
                  or InstructionTests.is_interrupt_return(i)):
                # A halt or return ends the basic block.
                node = FunctionNode(bb, 0)
                nodes[bb.get_address()] = node
                continue

            else:
                # The basic block continues.
                address = nextaddr
                run = True

            # If the address is in the set of basic block starts, terminate
            # this basic block.
            if address in bbs:
                node = FunctionNode(bb, address)
                nodes[bb.get_address()] = node
                run = False

    note("Finished pass two")
    note(f"Wrote {count} basic blocks")
    note(f"Generated {len(nodes)} nodes")

    return nodes
Ejemplo n.º 2
0
def do_pass_one(explore: List[int], rad: RAD) -> Set[int]:
    '''Find basic block leaders in a program.  This returns a list of the
    leaders (addresses).  A list of initial leaders must be provided as the
    first argument, and an initialized random access disassembler as the
    second.'''

    note("Starting pass one")

    # We maintain a stack of addresses to explore (explore).  We also maintain
    # a set of basic block leaders we have discovered (bbs).
    bbs = set(explore)

    def add_explore(addr: int):
        '''Add an address to be explored, if it is not already scheduled to
        be explored.'''
        if addr not in explore:
            explore.append(addr)

    def add_leader(addr: int):
        '''Add a leader to the set of leaders, and also to the set of addresses
        to be explored.'''
        debug(f"adding leader: {hex(addr)}")
        if addr not in bbs:
            bbs.add(addr)
            add_explore(addr)

    # Disassemble the file, follow the links, and build a list of basic blocks
    # leaders.  Within this loop the explore list is treated as an (initialized)
    # stack to perform instruction tracing, and does not always contain only basic
    # block leaders.  Ultimately we have to discover the rest of the leaders we
    # can find, and those go in the bbs set.  Once the explore stack is empty,
    # we have finished, and bbs will contain all the potential basic block
    # leaders we have discovered.
    while len(explore) > 0:
        # Get the next address from the stack.
        address = explore.pop()

        # Disassemble at the address.
        try:
            i = rad.at(address)
        except AddressException:
            # This address is out of range; ignore and continue.
            continue
        except NotExecutableException:
            # This address is not executable; ignore and continue.
            continue

        # Figure out the address that is one byte past the end of the
        # current instruction.  This is likely the address of the next
        # instruction in sequence.
        nextaddr = i.address + i.size

        # Based on the instruction type, determine the next address(es).
        # There are three things we can do here.
        #   (1) Add an address to the set of leaders (and the explore stack)
        #   (2) Add an address to the explore stack (it is not a leader)
        #   (3) Do nothing
        if InstructionTests.is_call(i):
            debug(f"found call at {hex(i.address)}; target is a leader")
            # This is a call.  Push the call target and the next
            # address on the stack to explore.  The call target is
            # a basic block leader.  If calls end the basic block, then
            # the next address after the call is also a leader.  We
            # assume all calls return.
            if OperandTests.is_imm(i.operands[0]):
                add_leader(i.operands[0].value.imm)
            elif OperandTests.is_mem(i.operands[0]):
                # We can only handle RIP-based addressing.
                disp = OperandTests.is_rip_relative(i.operands[0])
                if disp is not None:
                    # Now we can compute the address of the call.
                    add_leader(nextaddr + disp)
            if DebugOpts.CALL_ENDS_BB:
                add_leader(nextaddr)
            else:
                add_explore(nextaddr)

        elif InstructionTests.is_branch(i) or InstructionTests.is_jump(i):
            if i.mnemonic == 'jmp':
                debug(f"found jump at {hex(i.address)}; target is leader")
                # This is a jump.  Note that you need to test for this after
                # relative branch because those are also in the jump group.
                if OperandTests.is_imm(i.operands[0]):
                    # The target of the jump is the leader of a basic block.
                    add_leader(i.operands[0].value.imm)
                elif OperandTests.is_mem(i.operands[0]):
                    # We can only handle RIP-based addressing.
                    disp = OperandTests.is_rip_relative(i.operands[0])
                    if disp is not None:
                        # Now we compute the address of the jump.
                        add_leader(nextaddr + disp)
            else:
                debug(
                    f"found branch at {hex(i.address)}; true and false branches are leaders"
                )
                # This is a conditional branch.  Both the target of the branch
                # and the instruction following the branch are leaders.
                add_leader(i.operands[0].value.imm)
                add_leader(nextaddr)

        elif InstructionTests.is_interrupt(i):
            debug(f"found interrupt at {hex(i.address)}; possible leader")
            # This is an interrupt.  Assume we return and continue.
            if DebugOpts.SYSCALL_ENDS_BB:
                add_leader(nextaddr)
            else:
                add_explore(nextaddr)

        elif (i.mnemonic == 'hlt' or InstructionTests.is_ret(i)
              or InstructionTests.is_interrupt_return(i)):
            debug(f"found halt or return at {hex(i.address)}")
            # These end the basic block and flow does not continue to
            # the next instruction, so do not add anything to explore.
            pass

        else:
            # Assume this instruction flows to the next instruction
            # in sequence, but that instruction is not necessarily
            # a leader.
            add_explore(nextaddr)

    note("Pass one complete")
    note(f"Discovered {len(bbs)} potential basic blocks")

    return bbs