def generate_refs(self):
     for start, end in self.non_init_segm:
         for func in idautils.Functions(start, end):
             for item in idautils.FuncItems(func):
                 for xref in chain(idautils.DataRefsFrom(item),
                                   idautils.CodeRefsFrom(item, 1)):
                     if self.is_ea_in_segs(xref, self.init_segm):
                         if not self.refs.get(func, None):
                             self.refs[func] = {'to': set(), 'from': set()}
                         self.refs[func]['from'].add((item, xref))
                         for to_xref in chain(idautils.DataRefsTo(func),
                                              idautils.CodeRefsTo(func, 1)):
                             self.refs[func]['to'].add(to_xref)
Example #2
0
def extract_function_calls_from(f, bb, insn):
    """extract functions calls from features

    most relevant at the function scope, however, its most efficient to extract at the instruction scope

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)
    """
    if idaapi.is_call_insn(insn):
        for ref in idautils.CodeRefsFrom(insn.ea, False):
            yield Characteristic("calls from"), ref
Example #3
0
def goto_ref(addresses, code=False, data=False):
    for address in addresses:
        refs = []
        if code:
            refs += list(idautils.CodeRefsFrom(address, 0))
        if data:
            refs += list(idautils.DataRefsFrom(address))

        if len(refs) == 0:
            continue

        for ref in refs:
            if address + 4 != ref:
                yield ref
def retrieveExterns(bl, ea_externs):
	externs = []
	start = bl[0]
	end = bl[1]
	inst_addr = start
	while inst_addr < end:
		refs = idautils.CodeRefsFrom(inst_addr, 1)
		try:
			ea = [v for v in refs if v in ea_externs][0]
			externs.append(ea_externs[ea])
		except:
			pass
		inst_addr = idc.next_head(inst_addr)
	return externs
Example #5
0
 def propagate_dead_code(self, ea, op_map):
     prevs = [
         x for x in idautils.CodeRefsTo(ea, True)
         if not self.marked_addresses.has_key(x)
         and not self.dead_br_of_op(ea, x, op_map)
     ]
     if prevs == []:  #IF there is no legit predecessors
         idc.SetColor(ea, idc.CIC_ITEM, 0x0000ff)
         self.marked_addresses[ea] = None
         succs = [x for x in idautils.CodeRefsFrom(ea, True)]
         for succ in succs:
             self.propagate_dead_code(succ, op_map)
     else:
         return
Example #6
0
def recoverBlock(startEA):
    b = Block(startEA)
    curEA = startEA

    while True:
        insn_t = idautils.DecodeInstruction(curEA)
        if insn_t is None:
            if idc.Byte(curEA) == 0xCC:
                b.endEA = curEA + 1
                return b
            else:
                sys.stdout.write(
                    "WARNING: Couldn't decode insn at: {0:x}. Ending block.\n".
                    format(curEA))
                b.endEA = curEA
                return b

        nextEA = curEA + insn_t.size

        crefs = idautils.CodeRefsFrom(curEA, 1)

        # get curEA follows
        follows = [cref for cref in crefs]

        if follows == [nextEA] or isCall(curEA):
            # there is only one following branch, to the next instruction
            # check if this is a JMP 0; in that case, make a new block
            if isUnconditionalJump(curEA):
                b.endEA = nextEA
                for f in follows:
                    # do not decode external code refs
                    if not isExternalReference(f):
                        b.succs.append(f)
                return b

            # if its not JMP 0, add next instruction to current block
            curEA = nextEA
        # check if we need to make a new block
        elif len(follows) == 0:
            # this is a ret, no follows
            b.endEA = nextEA
            return b
        else:
            # this block has several follow blocks
            b.endEA = nextEA
            for f in follows:
                # do not decode external code refs
                if not isExternalReference(f):
                    b.succs.append(f)
            return b
Example #7
0
def get_static_successors(inst):
    """Returns the statically known successors of an instruction."""
    branch_flows = tuple(idautils.CodeRefsFrom(inst.ea, False))

    # Direct function call. The successor will be the fall-through instruction
    # unless the target of the function call looks like a `noreturn` function.
    if inst.is_direct_function_call():
        called_ea = get_direct_branch_target(inst.ea)
        flags = idc.GetFunctionFlags(called_ea)

        if 0 < flags and (flags & idaapi.FUNC_NORET):
            log.debug("Call to noreturn function {:08x} at {:08x}".format(
                called_ea, inst.ea))
        else:
            yield inst.next_ea  # Not recognised as a `noreturn` function.

    if inst.is_call():  # Indirect function call, system call.
        yield inst.next_ea

    elif inst.is_conditional_branch():
        yield inst.next_ea
        yield get_direct_branch_target(inst.ea)

    elif inst.is_direct_jump():
        yield get_direct_branch_target(inst.ea)

    elif inst.is_indirect_jump():
        si = idaapi.get_switch_info_ex(inst.ea)
        if si:
            for case_ea in idautils.CodeRefsFrom(inst.ea, True):
                yield case_ea

    elif inst.is_fall_through():
        yield inst.next_ea

    else:
        log.debug("No static successors of {:08x}".format(inst.ea))
Example #8
0
def is_basic_block_tight_loop(bb):
    """check basic block loops to self

    true if last instruction in basic block branches to basic block start

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
    """
    bb_end = idc.prev_head(bb.end_ea)
    if bb.start_ea < bb_end:
        for ref in idautils.CodeRefsFrom(bb_end, True):
            if ref == bb.start_ea:
                return True
    return False
Example #9
0
def analyse_indirect_jump(block, jump_inst, blocks):
    """Analyse an indirect jump and try to determine its targets."""
    log.info("Analysing indirect jump at {:08x}".format(jump_inst.ea))
    si = idaapi.get_switch_info_ex(jump_inst.ea)
    target_eas = set()
    if si:
        num_targets = si.get_jtable_size()
        log.info(
            "IDA identified a jump table at {:08x} with {} targets".format(
                jump_inst.ea, num_targets))
        target_eas.update(idautils.CodeRefsFrom(jump_inst.ea, True))

    for target_ea in target_eas:
        block = program.get_basic_block(target_ea)
        block.address_is_taken = True
Example #10
0
def isElfThunk(ea):
    if not isLinkedElf():
        return False, None

    if isUnconditionalJump(ea):
        have_ext_ref = False
        for cref in idautils.CodeRefsFrom(ea, 0):
            if isExternalReference(cref):
                have_ext_ref = True
                break

        if have_ext_ref:
            fn = getFunctionName(cref)
            return True, fn

    return False, None
Example #11
0
def extract_insn_cross_section_cflow(f, bb, insn):
    """inspect the instruction for a CALL or JMP that crosses section boundaries

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)
    """
    for ref in idautils.CodeRefsFrom(insn.ea, False):
        if ref in get_imports(f.ctx).keys():
            # ignore API calls
            continue
        if not idaapi.getseg(ref):
            # handle IDA API bug
            continue
        if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
            continue
        yield Characteristic("cross section flow"), insn.ea
Example #12
0
def check_for_api_call(ctx, insn):
    """ check instruction for API call """
    if not idaapi.is_call_insn(insn):
        return

    for ref in idautils.CodeRefsFrom(insn.ea, False):
        info = get_imports(ctx).get(ref, ())
        if info:
            yield "%s.%s" % (info[0], info[1])
        else:
            f = idaapi.get_func(ref)
            # check if call to thunk
            # TODO: first instruction might not always be the thunk
            if f and (f.flags & idaapi.FUNC_THUNK):
                for thunk_ref in idautils.DataRefsFrom(ref):
                    # TODO: always data ref for thunk??
                    info = get_imports(ctx).get(thunk_ref, ())
                    if info:
                        yield "%s.%s" % (info[0], info[1])
Example #13
0
def ShowFunctionsBrowser(mea=None, show_runtime=False, show_string=True, mynav=None):
    try:
        if mea is None:
            ea = idc.ScreenEA()
        else:
            ea = mea
        
        num = idc.AskLong(3, "Maximum recursion level")
        if not num:
            return
        
        result = list(idautils.CodeRefsFrom(ea, idc.BADADDR))
        g = FunctionsBrowser("Code Refs Browser %s" % idc.GetFunctionName(ea), ea, result)
        g.max_level = num
        g.show_string = True
        g.show_runtime_functions = show_runtime
        g.mynav = mynav
        g.Show()
    except:
        print "Error", sys.exc_info()[1]
Example #14
0
def verify_ref(addresses, name, code=False, data=False):
    symbol = locate(name)

    if symbol == idc.BADADDR:
        return

    for address in addresses:
        refs = []
        if code:
            refs += list(idautils.CodeRefsFrom(address, 1))
        if data:
            refs += list(idautils.DataRefsFrom(address))

        if len(refs) == 0:
            continue

        for ref in refs:
            if address + 4 != ref and symbol == ref:
                yield address
                break
Example #15
0
def GetCodeRefsFrom(ea):
    name = idc.GetFunctionName(ea)
    ea = idc.LocByName(name)

    f_start = ea
    f_end = idc.GetFunctionAttr(ea, idc.FUNCATTR_END)

    ret = []
    for chunk in idautils.Chunks(ea):
        astart = chunk[0]
        aend = chunk[1]
        for head in idautils.Heads(astart, aend):
            # If the element is an instruction
            if idc.isCode(idc.GetFlags(head)):
                refs = idautils.CodeRefsFrom(head, 0)
                for ref in refs:
                    loc = idc.LocByName(idc.GetFunctionName(ref))
                    if loc not in ret and loc != f_start:
                        ret.append(ref)

    return ret
Example #16
0
    def forward_analysis(self):
        """
        Start at self.ea, end at all pointers were killed.
        :return:
        """
        self.active_paths = [Path(self.ea, self)]
        while self.active_paths:
            add = []
            remove = []
            for p in self.active_paths:
                ea = p.route[-1]  # the latest step
                while ea != self.ctx_end:
                    ea = idc.NextHead(ea)
                    if idc.GetMnem(ea) in ['CBZ', 'B']:
                        for des in idautils.CodeRefsFrom(ea, 1):
                            if des in p.route:  # avoid loop
                                continue
                            successor_path = copy.deepcopy(p)
                            successor_path.route.append(ea)
                            successor_path.add_step(des)
                            if successor_path.active:
                                add.append(successor_path)
                            else:
                                self.dead_paths.append(successor_path)
                        remove.append(
                            p
                        )  # the path is not dead, just gave path to the successors.
                        break
                    else:
                        p.add_step(ea)
                        if p.active:
                            continue
                        else:
                            remove.append(p)
                            self.dead_paths.append(p)
                            break

            self.active_paths.extend(add)
            for p in remove:
                self.active_paths.remove(p)
Example #17
0
File: insn.py Project: clayne/capa
def extract_insn_api_features(f, bb, insn):
    """parse instruction API features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        call dword [0x00473038]
    """
    if not insn.get_canon_mnem() in ("call", "jmp"):
        return

    for api in check_for_api_call(f.ctx, insn):
        dll, _, symbol = api.rpartition(".")
        for name in capa.features.extractors.helpers.generate_symbols(
                dll, symbol):
            yield API(name), insn.ea

    # extract IDA/FLIRT recognized API functions
    targets = tuple(idautils.CodeRefsFrom(insn.ea, False))
    if not targets:
        return

    target = targets[0]
    target_func = idaapi.get_func(target)
    if not target_func or target_func.start_ea != target:
        # not a function (start)
        return

    if target_func.flags & idaapi.FUNC_LIB:
        name = idaapi.get_name(target_func.start_ea)
        yield API(name), insn.ea
        if name.startswith("_"):
            # some linkers may prefix linked routines with a `_` to avoid name collisions.
            # extract features for both the mangled and un-mangled representations.
            # e.g. `_fwrite` -> `fwrite`
            # see: https://stackoverflow.com/a/2628384/87207
            yield API(name[1:]), insn.ea
Example #18
0
def find_function_callees(func_ea, maxlvl):

    callees = []
    visited = set()
    pending = set((func_ea, ))
    lvl = 0

    while len(pending) > 0:
        func_ea = pending.pop()
        visited.add(func_ea)

        func_name = idc.GetFunctionName(func_ea)
        if not func_name: continue
        callees.append(func_ea)

        func_end = idc.FindFuncEnd(func_ea)
        if func_end == idaapi.BADADDR: continue

        lvl += 1
        if lvl >= maxlvl: continue

        all_refs = set()
        for line in idautils.Heads(func_ea, func_end):

            if not ida_bytes.isCode(get_flags(line)): continue

            ALL_XREFS = 0
            refs = idautils.CodeRefsFrom(line, ALL_XREFS)
            refs = set(
                filter(lambda x: not (x >= func_ea and x <= func_end), refs))
            all_refs |= refs

        all_refs -= visited
        pending |= all_refs

    return callees
Example #19
0
def process_function(arch, func_ea):

    func_end = idc.FindFuncEnd(func_ea)

    packet = DismantlerDataPacket()

    ida_chunks = get_chunks(func_ea)
    chunks = set()

    # Add to the chunks only the main block, containing the
    # function entry point
    #
    chunk = get_flow_code_from_address(func_ea)
    if chunk:
        chunks.add(chunk)

    # Make "ida_chunks" a set for faster searches  within
    ida_chunks = set(ida_chunks)
    ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks))

    func = idaapi.get_func(func_ea)
    comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)]

    # Copy the list of chunks into a queue to process
    #
    chunks_todo = [c for c in chunks]

    while True:

        # If no chunks left in the queue, exit
        if not chunks_todo:

            if ida_chunks:
                chunks_todo.extend(ida_chunks)
            else:
                break

        chunk_start, chunk_end = chunks_todo.pop()
        if ida_chunks_idx.has_key(chunk_start):
            ida_chunks.remove(ida_chunks_idx[chunk_start])
            del ida_chunks_idx[chunk_start]

        for head in idautils.Heads(chunk_start, chunk_end):

            comments.extend((idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1)))
            comment = '\n'.join([c for c in comments if c is not None])
            comment = comment.strip()
            if comment:
                packet.add_comment(head, comment)
            comments = list()

            if idc.isCode(idc.GetFlags(head)):

                instruction = arch.process_instruction(packet, head)

                # if there are other references than
                # flow add them all.
                if list(idautils.CodeRefsFrom(head, 0)):

                    # for each reference, including flow ones
                    for ref_idx, ref in enumerate(
                            idautils.CodeRefsFrom(head, 1)):

                        if arch.is_call(instruction):

                            # This two conditions must remain separated, it's
                            # necessary to enter the enclosing "if" whenever
                            # the instruction is a call, otherwise it will be
                            # added as an uncoditional jump in the last else
                            #
                            if ref in list(idautils.CodeRefsFrom(head, 0)):
                                packet.add_direct_call(head, ref)

                        elif ref_idx > 0 and arch.is_conditional_branch(
                                instruction):
                            # The ref_idx is > 0 in order to avoid processing the
                            # normal flow reference which would effectively imply
                            # that the conditional branch is processed twice.
                            # It's done this way instead of changing the loop's head
                            # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in
                            # order to avoid altering the behavior of other conditions
                            # which rely on it being so.

                            # FIXME
                            # I don't seem to check for the reference here
                            # to point to valid, defined code. I suspect
                            # this could lead to a failure when exporting
                            # if such situation appears. I should test if
                            # it's a likely scenario and probably just add
                            # an isHead() or isCode() to address it.

                            packet.add_conditional_branch_true(head, ref)
                            packet.add_conditional_branch_false(
                                head, idaapi.next_head(head, chunk_end))

                            # If the target is not in our chunk list
                            if not address_in_chunks(ref, chunks):
                                new_chunk = get_flow_code_from_address(ref)
                                # Add the chunk to the chunks to process
                                # and to the set containing all visited
                                # chunks
                                if new_chunk is not None:
                                    chunks_todo.append(new_chunk)
                                    chunks.add(new_chunk)

                        elif arch.is_unconditional_branch(instruction):
                            packet.add_unconditional_branch(head, ref)

                            # If the target is not in our chunk list
                            if not address_in_chunks(ref, chunks):
                                new_chunk = get_flow_code_from_address(ref)
                                # Add the chunk to the chunks to process
                                # and to the set containing all visited
                                # chunks
                                if new_chunk is not None:
                                    chunks_todo.append(new_chunk)
                                    chunks.add(new_chunk)

                        #skip = False

                for ref in idautils.DataRefsFrom(head):
                    packet.add_data_reference(head, ref)

                    # Get a data reference from the current reference's
                    # location. For instance, if 'ref' points to a valid
                    # address and such address contains a data reference
                    # to code.
                    target = list(idautils.DataRefsFrom(ref))
                    if target:
                        target = target[0]
                    else:
                        target = None

                    if target is None and arch.is_call(instruction):
                        imp_name = idc.Name(ref)

                        imp_module = get_import_module_name(ref)

                        imported_functions.add((ref, imp_name, imp_module))
                        packet.add_indirect_virtual_call(head, ref)

                    elif target is not None and idc.isHead(target):
                        # for calls "routed" through this reference
                        if arch.is_call(instruction):
                            packet.add_indirect_call(head, target)

                        # for unconditional jumps "routed" through this reference
                        elif arch.is_unconditional_branch(instruction):
                            packet.add_unconditional_branch(head, target)

                        # for conditional "routed" through this reference
                        elif arch.is_conditional_branch(instruction):
                            packet.add_conditional_branch_true(head, target)
                            packet.add_conditional_branch_false(
                                head, idaapi.next_head(head, chunk_end))

    f = FunctionAnalyzer(arch, func_ea, packet)

    instrumentation.new_packet(packet)
    instrumentation.new_function(f)
Example #20
0
    def block(self, block):
        '''
        Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names])
        '''
        formal = []
        fuzzy = []
        functions = []
        immediates = []

        ea = block.startEA
        while ea < block.endEA:
            idaapi.decode_insn(ea)

            # Get a list of all data/code references from the current instruction
            drefs = [x for x in idautils.DataRefsFrom(ea)]
            crefs = [x for x in idautils.CodeRefsFrom(ea, False)]

            # Add all instruction mnemonics to the formal block hash
            formal.append(idc.GetMnem(ea))

            # If this is a call instruction, be sure to note the name of the function
            # being called. This is used to apply call-based signatures to functions.
            #
            # For fuzzy signatures, we can't use the actual name or EA of the function,
            # but rather just want to note that a function call was made.
            #
            # Formal signatures already have the call instruction mnemonic, which is more
            # specific than just saying that a call was made.
            if idaapi.is_call_insn(ea):
                for cref in crefs:
                    func_name = idc.Name(cref)
                    if func_name:
                        functions.append(func_name)
                        fuzzy.append("funcref")
            # If there are data references from the instruction, check to see if any of them
            # are strings. These are looked up in the pre-generated strings dictionary.
            #
            # String values are easily identifiable, and are used as part of both the fuzzy
            # and the formal signatures.
            #
            # It is more difficult to determine if non-string values are constants or not;
            # for both fuzzy and formal signatures, just use "data" to indicate that some data
            # was referenced.
            elif drefs:
                for dref in drefs:
                    if self.strings.has_key(dref):
                        formal.append(self.strings[dref].value)
                        fuzzy.append(self.strings[dref].value)
                    else:
                        formal.append("dataref")
                        fuzzy.append("dataref")
            # If there are no data or code references from the instruction, use every operand as
            # part of the formal signature.
            #
            # Fuzzy signatures are only concerned with interesting immediate values, that is, values
            # that are greater than 65,535, are not memory addresses, and are not displayed as
            # negative values.
            elif not drefs and not crefs:
                for n in range(0, len(idaapi.cmd.Operands)):
                    opnd_text = idc.GetOpnd(ea, n)
                    formal.append(opnd_text)
                    if idaapi.cmd.Operands[
                            n].type == idaapi.o_imm and not opnd_text.startswith(
                                '-'):
                        if idaapi.cmd.Operands[n].value >= 0xFFFF:
                            if idaapi.getFlags(
                                    idaapi.cmd.Operands[n].value) == 0:
                                fuzzy.append(str(idaapi.cmd.Operands[n].value))
                                immediates.append(idaapi.cmd.Operands[n].value)

            ea = idc.NextHead(ea)

        return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)),
                immediates, functions)
Example #21
0
    def get_stack_strings(self, functions):
        """
        Finds all the stack strings it can in the given functions.

        Parameters set globally:
            STRING_GAP_TOLERANCE - the gap allowed between string characters.
            MAX_CHARACTER_WIDTH  - the maximum character size, in bytes
            ASCII                - Whether character values must be 0-127
        """
        stack_strings = []
        for func in functions:
            state = tracing.BranchingTraceState(func.start_ea)
            state.strs = set()
            states = [state]
            func_eas = []
            ea = state.ea
            while ea < func.end_ea:
                func_eas.append(ea)
                ea += idc.get_item_size(ea)
            while states:
                state = states.pop()
                while state.ea < func.end_ea:
                    try:
                        func_eas.remove(state.ea)
                    except:
                        pass
                    state.visited_eas.append(state.ea)
                    mnemonic = idc.print_insn_mnem(state.ea)
                    if mnemonic in IGNORED_MNEMONICS:
                        pass
                    elif "pop" in mnemonic:
                        reg = tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST))
                        if reg:
                            value = state.stack.get(idc.get_spd(state.ea), None)
                            if value is not None:
                                state.regs[reg[0]] = value
                            else:
                                self.clear_reg_if_needed(reg, state.regs)
                    elif "push" in mnemonic:
                        # bug where idc.get_spd was not correctly tracking the pointer,
                        # this case also hasn't really been seen often as part of a stack string
                        # self.set_stack(idc.get_spd(ea), ea, POS_FIRST, regs, stack)
                        pass
                    elif "mov" in mnemonic:
                        self.handle_mov(state)
                    elif (
                        (
                            "xor" in mnemonic
                            and tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST))
                            == tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_SECOND))
                        )
                        or ("lea" in mnemonic and idc.print_operand(state.ea, POS_SECOND) == "[0]")
                        or (
                            "sub" in mnemonic
                            and tracing.get_opnd_replacement(state.ea, POS_FIRST)
                            == tracing.get_opnd_replacement(state.ea, POS_SECOND)
                        )
                    ):
                        reg = tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST))
                        if reg:
                            state.regs[reg[0]] = (0, state.ea)
                    elif "loop" in mnemonic or "movsb" in mnemonic:
                        state.regs["rcx"] = (0, state.ea)
                    elif mnemonic in JUMPS:
                        try:
                            target = next(idautils.CodeRefsFrom(state.ea, 0))
                        except StopIteration:
                            target = None
                        if target and target not in state.visited_eas:
                            if func.end_ea > target >= func.start_ea:
                                state.visited_eas.append(target)
                                new_state = tracing.BranchingTraceState(target, state)
                                new_state.strs = state.strs
                                states.append(new_state)
                            else:
                                self.report_strings(state.strs, state.stack)
                        # Always follow an unconditional jump
                        if mnemonic == "jmp":
                            break
                    elif (
                        "rep" in idc.GetDisasm(state.ea).split(" ")[0]
                        and "scas" not in idc.GetDisasm(state.ea).split(" ")[1]
                    ):
                        self.report_strings(state.strs, state.stack)
                    elif "lea" in mnemonic:
                        self.handle_lea(state)
                    elif "call" in mnemonic:
                        self.handle_call(state)
                    elif "ret" in mnemonic:
                        break
                    elif (
                        idc.get_operand_type(state.ea, POS_FIRST) == idc.o_reg
                    ):  # If we find a target register we were tracking, stop tracking it.
                        self.clear_reg_if_needed(
                            tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST)), state.regs
                        )
                    state.ea += idc.get_item_size(state.ea)
                self.report_strings(state.strs, state.stack)
                if not states and func_eas:
                    new_state = tracing.BranchingTraceState(func_eas[0])
                    new_state.strs = set()
                    states.append(new_state)
                stack_strings.extend(state.strs)
        self.strings.update(stack_strings)
Example #22
0
 def make_po_pair(ea, alive):
     dead = [x for x in idautils.CodeRefsFrom(ea, True) if x != alive]
     return alive, dead[0]
Example #23
0
def main():

    idaapi.msg("alter instrument function\n")

    addr_afl_maybe_log_fun = 0
    addr_afl_maybe_log = 0
    fun_info = []

    try:
        for func in idautils.Functions():
            fun_name = idc.GetFunctionName(func)
            if fun_name.find('afl_maybe_log_fun') > 0:
                addr_afl_maybe_log_fun = func
            elif fun_name.find('afl_maybe_log') > 0:
                addr_afl_maybe_log = func
            if addr_afl_maybe_log_fun and addr_afl_maybe_log:
                break

        if not addr_afl_maybe_log_fun or not addr_afl_maybe_log:
            print("don't find add_afl_maybe_fun\n")
            return

        print("find add_afl_maybe_fun ok\n")

        # find instrumented function
        for func in idautils.Functions():

            f_end = idc.FindFuncEnd(func)

            if f_end - func <= 0x28:
                continue

            # call    __afl_maybe_log
            if idc.__EA64__:  # 64bit
                addr_call = func + 0x1A
            else:  # 32bit
                addr_call = func + 0x08

            mnem = idc.GetMnem(addr_call)
            if mnem != 'call':
                continue

            for to in idautils.CodeRefsFrom(addr_call, False):
                fun_name = idc.GetFunctionName(to)
                if fun_name.find('afl_maybe_log') < 0:
                    continue
                fun_info.append((func, f_end - func, addr_call))

        fun_info.sort(key=lambda x: x[1])
        num = len(fun_info)
        print(num)
        for i in range(num - 1, -1, -1):
            print(hex(fun_info[i][0]))
            if fun_info[i][
                    1] < 0x200 or i < num * 90.0 / 100.0 and random.randint(
                        0, 99) < 80:  # remove fun instrumented #or i < num/3
                idc.PatchDword(fun_info[i][2] + 1,
                               addr_afl_maybe_log - fun_info[i][2] - 5)
            else:
                print(hex(fun_info[i][0]))

        #idc.SaveBase('')
        #idc.Exit(0)

    except Exception as e:
        print(e)

    print('analyse time: ' + str(time.time() - g_time_start) + 's\n')
Example #24
0
def instructionHandler(M, F, B, inst, new_eas):
    insn_t = idautils.DecodeInstruction(inst)
    if not insn_t:
        # handle jumps after noreturn functions
        if idc.Byte(inst) == 0xCC:
            I = addInst(B, inst, [0xCC])
            return I, True
        else:
            raise Exception("Cannot read instruction at: {0:x}".format(inst))

    # check for align instruction
    pf = idc.GetFlags(inst)
    if idaapi.isAlign(pf):
        return None, True

    # skip HLTs -- they are privileged, and are used in ELFs after a noreturn call
    if isHlt(inst):
        return None, False

    DEBUG("\t\tinst: {0}\n".format(idc.GetDisasm(inst)))
    inst_bytes = readInstructionBytes(inst)
    DEBUG("\t\tBytes: {0}\n".format(inst_bytes))

    I = addInst(B, inst, inst_bytes)

    if isJmpTable(inst):
        handleJmpTable(I, F, inst, new_eas)
        return I, False

    if isIndirectCall(inst):
        global FUNCTIONS_NEED_TRAMPOLINE
        FUNCTIONS_NEED_TRAMPOLINE = True

    #check for code refs from here
    crefs = []
    for cref in idautils.CodeRefsFrom(inst, 0):
        crefs.append(cref)
        fn = getFunctionName(cref)
        if isCall(inst):

            elfy, fn_replace = isElfThunk(cref)
            if elfy:
                fn = fn_replace

            if isExternalReference(cref) or elfy:
                fn = handleExternalRef(fn)
                I.ext_call_name = fn
                DEBUG("EXTERNAL CALL: {0}\n".format(fn))

                if doesNotReturn(fn):
                    return I, True
            else:
                I.call_target = cref

                if cref not in RECOVERED_EAS:
                    new_eas.add(cref)

                DEBUG("INTERNAL CALL: {0}\n".format(fn))
        elif isUnconditionalJump(inst):
            if isExternalReference(cref):
                fn = handleExternalRef(fn)
                I.ext_call_name = fn
                DEBUG("EXTERNAL JMP: {0}\n".format(fn))

                if doesNotReturn(fn):
                    DEBUG("Nonreturn JMP\n")
                    return I, True
            else:
                DEBUG("INTERNAL JMP: {0:x}\n".format(cref))
                I.true_target = cref

    #true: jump to where we have a code-ref
    #false: continue as we were
    print hex(inst), crefs
    if isConditionalJump(inst):
        I.true_target = crefs[0]
        I.false_target = inst+len(inst_bytes)
        return I, False

    relo_off = findRelocOffset(inst, len(inst_bytes))
    if relo_off != -1:
        I.reloc_offset = relo_off

    for dref in idautils.DataRefsFrom(inst):
        if dref in crefs:
            continue

        if inValidSegment(dref):
            if isExternalReference(dref):
                fn = getFunctionName(dref)

                fn = handleExternalRef(fn)
                if isExternalData(fn):
                    I.ext_data_name = fn
                    sys.stdout.write("EXTERNAL DATA REF FROM {0:x} to {1}\n".format(inst, fn))
                else:
                    I.ext_call_name = fn
                    sys.stdout.write("EXTERNAL CODE REF FROM {0:x} to {1}\n".format(inst, fn))

            elif isInternalCode(dref):
                DEBUG("\t\tCode Ref from {0:x} to {1:x}\n".format(inst, dref))
                I.call_target = dref
                if dref not in RECOVERED_EAS:
                    new_eas.add(dref)
            else:
                dref_size = idc.ItemSize(dref)
                I.data_offset = handleDataRelocation(M, dref, new_eas)
                DEBUG("\t\tData Ref: {0:x}, size: {1}, offset : {2:x}\n".format(
                    dref, dref_size, I.data_offset))
        else:
            DEBUG("Data not in valid segment {0:x}\n".format(dref))

    # if we have a mov sth, imm with imm that it's likely a fn pointer,
    # we add that pointer to the list of ones to disassemble
    # TODO: use also some other info to assume this
    if insn_t[1].type == idaapi.o_imm and insn_t.itype == idaapi.NN_mov and inValidSegment(insn_t[1].value):
        ref = insn_t[1].value
        if isInternalCode(ref) and ref not in RECOVERED_EAS:
            new_eas.add(ref)

    if isCall(inst):
            coderefs = [i for i in idautils.CodeRefsFrom(inst, 0)]
            coderefs_normal = [i for i in idautils.CodeRefsFrom(inst, 1)]
            if len(coderefs) == 0 and len(coderefs_normal) == 1 and insn_t[0].type == idaapi.o_near:
                    for cref in coderefs_normal:
                            I.call_target = cref
                            if cref not in RECOVERED_EAS:
                                    new_eas.add(cref)

    return I, False
Example #25
0
def recoverBlock(F, startEA, need_trampolines):
    b = Block(startEA)
    curEA = startEA

    # TODO: link some metadata to any block to keep track
    #       of this table, because the indirect jmp
    #       may be in a follower block and not directly in
    #       the block where the address is loaded
    likelyJmpTable = None

    while True:
        insn_t = idautils.DecodeInstruction(curEA)
        if insn_t is None:
            if idc.Byte(curEA) == 0xCC:
                b.endEA = curEA+1
                return b
            else:
                sys.stdout.write("WARNING: Couldn't decode insn at: {0:x}. Ending block.\n".format(curEA))
                b.endEA = curEA
                return b

        # check for xrefs
        j = 0
        for op in insn_t:
            # if it is a MEM operand
            if op.type == idaapi.o_mem and inValidSegment(op.addr):
                if isCall(curEA):
                    if isInternalCode(op.addr):
                            idaapi.add_cref(curEA, op.addr, idaapi.fl_CN)
                    else:
                            idaapi.add_dref(curEA, op.addr, idaapi.dr_R)
                elif isUnconditionalJump(curEA) or isConditionalJump(curEA):
                    if isInternalCode(op.addr):
                            idaapi.add_cref(curEA, op.addr, idaapi.fl_JN)
                    else:
                            idaapi.add_dref(curEA, op.addr, idaapi.dr_R)
                else:
                    if j == 0:
                        idaapi.add_dref(curEA, op.addr, idaapi.dr_W)
                    else:
                        idaapi.add_dref(curEA, op.addr, idaapi.dr_R)

            j += 1

        nextEA = curEA+insn_t.size

        crefs = idautils.CodeRefsFrom(curEA, 1)

        # get curEA follows
        follows = [cref for cref in crefs]
        if isJmpTable(curEA):
            # this is a jmptable (according to IDA)
            # XXX: we assume jmp tables found by IDA don't overlap
            #      with others
            jmpentries = set()
            jmpt = handleJmpTable(None, F, curEA, jmpentries)
            follows = list(jmpentries.union(set(follows)))

            JMPTABLES.add(jmpt)
        elif isIndirectJmp(curEA) and likelyJmpTable is not None:
            # this is an indirect jmp and in the same block there
            # was a mov to take the address of a "likely" jmptable
            for ref in likelyJmpTable.entries():
                need_trampolines.add(ref)
            follows = list(set(likelyJmpTable.entries() + follows))

            JMPTABLES.add(likelyJmpTable)
            likelyJmpTable = None
        elif isLikeLoadJmpTable(curEA):
            # this is an instruction which take the address of a
            # switch table (or something we *think* is a jmp table)
            likelyJmpTable = handleLikeLoadJmpTable(curEA, F)

        if isRepPrefix(curEA):
            sys.stdout.write("Found rep prefix at {0:#x}\n".format(curEA))
            b.succs.append(nextEA)
            b.succs.append(curEA)
            b.endEA = nextEA
            return b

        if isDataInst(curEA):
            sys.stdout.write("Found data in middle of code at {0:#x}\n".format(curEA))
            b.endEA = curEA
            return b

        if isCall(curEA):
            sys.stdout.write("Found call\n")
            fcrefs = idautils.CodeRefsFrom(curEA, 0)
            ffollows = [cref for cref in fcrefs]

            if len(ffollows) == 0 or idaapi.func_does_return(ffollows[0]):
                b.succs.append(nextEA)

            b.endEA = nextEA
            return b

        if isInt(curEA):
            sys.stdout.write("Found int\n")
            b.endEA = nextEA
            b.succs.append(nextEA)
            return b

        if (follows == [nextEA] and not isUnconditionalJump(curEA)) or isCall(curEA):
            # read next instruction
            curEA = nextEA
        # check if we need to make a new block
        elif len(follows) == 0:
            # this is a ret, no follows
            b.endEA = nextEA
            return b
        else:
            # this block has several follow blocks
            b.endEA = nextEA
            for f in follows:
                # do not decode external code refs
                if not isExternalReference(f):
                    b.succs.append(f)
            return b
Example #26
0
 def crefs_from(self):
     """Destination addresses of code references from this line."""
     return idautils.CodeRefsFrom(self.ea, 1)
Example #27
0
def GetFunEdgesAndBbls(function_ea):
    """
    Get bbls of function.
    @function_ea - function address
    @return - bbls of function
    """
    bbl = []  # bbl info [head, tail, call_num, mem_num]
    SingleBBS = {}  # head -> pred_bbl
    MultiBBS = {}  # head -> [pred_bbls]
    bbls = {}  # head -> bbl
    bbls2 = {}  # tail -> bbl
    edges_s = set()  # set of (tail, head)
    edges_d = {}  # dict struct.  head -> of (head, ..., head)
    edges_count = 0
    edges_s_t = set()  # tmp edges set
    edges_d_t = {}  # tmp edges dict.

    if not IsInstrumentIns(function_ea):
        return bbls, edges_d, edges_count, SingleBBS, MultiBBS

    f_start = function_ea
    f_end = idc.FindFuncEnd(function_ea)

    boundaries = set((f_start, ))  # head of bbl

    for head in idautils.Heads(f_start, f_end):
        # If the element is an instruction
        if head == idaapi.BADADDR:
            raise Exception("Invalid head for parsing")
        if not idc.isCode(idc.GetFlags(head)):
            continue

        # Get the references made from the current instruction
        # and keep only the ones local to the function.
        refs = idautils.CodeRefsFrom(head, 0)
        refs_filtered = set()
        for ref in refs:
            if ref > f_start and ref < f_end:  # can't use ref>=f_start, avoid recusion
                refs_filtered.add(ref)
        refs = refs_filtered

        if refs:
            # If the flow continues also to the next (address-wise)
            # instruction, we add a reference to it.
            # For instance, a conditional jump will not branch
            # if the condition is not met, so we save that
            # reference as well.
            next_head = idc.NextHead(head, f_end)
            if next_head != idaapi.BADADDR and idc.isFlow(
                    idc.GetFlags(next_head)):
                refs.add(next_head)

            # Update the boundaries found so far.
            boundaries.update(refs)
            for r in refs:  # enum all of next ins
                # If the flow could also come from the address
                # previous to the destination of the branching
                # an edge is created.
                if isFlow(idc.GetFlags(r)):
                    prev_head = idc.PrevHead(r, f_start)
                    if prev_head == 0xffffffffL:
                        #edges_s_t.add((head, r))
                        #raise Exception("invalid reference to previous instruction for", hex(r))
                        pass
                    else:
                        edges_s_t.add((prev_head, r))
                edges_s_t.add((head, r))

    #end of for head in idautils.Heads(chunk[0], chunk[1]):

    last_head = 0
    # NOTE: We can handle if jump xrefs to chunk address space.

    # get bbls. head of bbl is first ins addr, tail of bbl is last ins addr.
    for head in idautils.Heads(f_start, f_end):
        mnem = idc.GetMnem(head)
        if head in boundaries:
            if len(bbl) > 0:
                if bbl[0] == head:
                    continue
                if True:  # IsInstrumentIns(bbl[0]):
                    bbl[1] = last_head
                    bbls[bbl[0]] = bbl
                    bbls2[bbl[1]] = bbl
            bbl = [head, 0, 0, 0]
        #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION:
        elif mnem.startswith('j'):
            if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head):
                continue
            if True:  # IsInstrumentIns(bbl[0]):
                bbl[1] = head  # head + idc.ItemSize(head))
                bbls[bbl[0]] = bbl
                bbls2[bbl[1]] = bbl
            bbl = [head + idc.ItemSize(head), 0, 0, 0]
        else:
            last_head = head
        if mnem.startswith('call'):
            bbl[2] += 1

        #if 2 == idc.GetOpType(head, 0):      # 2  Memory Reference
        #    bbl[3] += 1
        #if 2 == idc.GetOpType(head, 1):      # 2  Memory Reference
        #    bbl[3] += 1

    # add last basic block
    if len(bbl) and bbl[0] != f_end:  # and IsInstrumentIns(bbl[0]):
        bbl[1] = f_end
        bbls[bbl[0]] = bbl
        bbls2[bbl[1]] = bbl

    # edges set -> dict
    for e in edges_s_t:
        if e[0] in bbls2:
            bbl_head = bbls2[e[0]][0]
            if bbl_head in edges_d_t:
                edges_d_t[bbl_head].append(e[1])
            else:
                edges_d_t[bbl_head] = [e[1]]
        else:
            print('edge (%x, %x) can not find head bbl.' %
                  (e[0], e[1]))  # a small case. e1 flow e0.

    # revise edges. head bbl and tail bbl of edges must be instrumented.
    for e0 in edges_d_t:
        if not IsInstrumentIns(e0):  # e0 don't instrumented, skip.
            continue

        for e1 in edges_d_t[e0]:
            if IsInstrumentIns(e1):  # e0 e1 both instrumented, add edge.
                if e0 in edges_d:
                    edges_d[e0].append(e1)
                else:
                    edges_d[e0] = [e1]
                edges_count += 1
            else:
                # e1 don't instrumented, recursively looks for instrumented child bbls
                bbls_t = LookForInsChildBbls(e1, edges_d_t, [])
                for b in bbls_t:  # add edge
                    if e0 in edges_d:
                        edges_d[e0].append(b)
                    else:
                        edges_d[e0] = [b]
                    edges_count += 1

    # revise bbls. bbl must be instrumented.
    for b in bbls.keys():
        if not IsInstrumentIns(b):
            # if bbls[b][1] in bbls2:     # avoid multi del
            # bbls2.pop(bbls[b][1])
            bbls.pop(b)

    #print('bbls:')
    #i = 0
    #for b in bbls:
    #    i += 1
    #    print('%04d %x, %x' % (i, b, bbls[b][1]))

    #print('edges_d:')
    #i = 0
    #for e0 in edges_d:
    #    for e1 in edges_d[e0]:
    #        i += 1
    #        print('%04d %x, %x' % (i, e0, e1))

    for e0 in edges_d:
        if e0 not in bbls:
            print('error:%x have no head' % (e0))  # error
            continue
        for e1 in edges_d[e0]:
            if e1 in MultiBBS:
                MultiBBS[e1].append(bbls[e0])  # add Pred
            elif e1 in SingleBBS:
                MultiBBS[e1] = [SingleBBS[e1], bbls[e0]]  # add Pred
                SingleBBS.pop(e1)  # remove from SingleBBS
            else:
                SingleBBS[e1] = bbls[e0]  # add Pred

    # del bbls which don't instrumented

    return bbls, edges_d, edges_count, SingleBBS, MultiBBS
Example #28
0
 def getCodeOutRefs(self, offset):
     return [(offset, ref_to) for ref_to in idautils.CodeRefsFrom(offset, True)]
Example #29
0
for f in idautils.Functions():
    func = idaapi.get_func(f)

    for h in idautils.Heads(func.startEA, func.endEA):
        res = idaapi.get_switch_info_ex(h)
        if res != None:
            # number of cases
            num_cases = res.get_jtable_size()
        else:
            continue

        print '0x%08x: switch (%d cases)' % (h, num_cases)

        # get cases
        xrefs = idautils.CodeRefsFrom(h, 1)

        interesting_calls = []

        switches.append((h, num_cases, interesting_calls))


# http://dvlabs.tippingpoint.com/blog/2011/05/11/mindshare-extending-ida-custviews
class SwitchViewer(idaapi.simplecustviewer_t):
    def __init__(self, data):

        # data should be a 3-tuple
        #
        # (address, number of cases, list of interesting calls)
        self.switches = data
Example #30
0
def index_idb(sdb_path):
    """
    Index the current idb.
    """
    sdbgen = SDBGen(sdb_path)

    sdbgen.begin_transaction()
    # Index all lines:
    for line_addr in iter_lines():
        # Get line attributes:
        line_type = LineTypes.DATA
        if is_line_code(line_addr):
            line_type = LineTypes.CODE

        line_text = canonicalize_line_text(idc.GetDisasm(line_addr))
        line_data = idc.GetManyBytes(line_addr, idc.ItemSize(line_addr))
        # Make sure that we don't insert Nones:
        if line_data is None:
            line_data = ""

        # Index the line:
        sdbgen.add_line(line_addr, line_type, line_text, line_data)

    sdbgen.commit_transaction()

    sdbgen.begin_transaction()
    # Index all xrefs:
    for line_addr in iter_lines():
        if is_line_code(line_addr):
            # Line is code:
            # Code xrefs:
            no_flow_crefs = set(idautils.CodeRefsFrom(line_addr, 0))
            all_crefs = set(idautils.CodeRefsFrom(line_addr, 1))
            flow_crefs = no_flow_crefs.difference(all_crefs)

            for nf_cref in no_flow_crefs:
                if not is_line_exists(nf_cref):
                    logger.warning(
                        'Code line: nf_cref = 0x{:x} is nonexistent. '
                        'line_addr = 0x{:x}'.format(nf_cref, line_addr))
                    continue
                sdbgen.add_xref(XrefTypes.CODE_JUMP, line_addr, nf_cref)

            for f_cref in flow_crefs:
                if not is_line_exists(f_cref):
                    logger.warning(
                        'Code line: f_cref = 0x{:x} is nonexistent. '
                        'line_addr = 0x{:x}'.format(f_cref, line_addr))
                    continue
                sdbgen.add_xref(XrefTypes.CODE_FLOW, line_addr, f_cref)

            # Code to Data xrefs:
            for dref in idautils.DataRefsFrom(line_addr):
                if not is_line_exists(dref):
                    logger.warning('Code line: dref = 0x{:x} is nonexistent. '
                                   'line_addr = 0x{:x}'.format(
                                       dref, line_addr))
                    continue
                sdbgen.add_xref(XrefTypes.CODE_TO_DATA, line_addr, dref)

        else:
            # Line is data (Not code):
            for dref in idautils.DataRefsFrom(line_addr):
                if not is_line_exists(dref):
                    logger.warning('Data line: dref = {:x} is nonexistent. '
                                   'line_addr = 0x{:x}'.format(
                                       dref, line_addr))
                    continue

                if is_line_code(dref):
                    sdbgen.add_xref(XrefTypes.DATA_TO_CODE, line_addr, dref)
                else:
                    sdbgen.add_xref(XrefTypes.DATA_TO_DATA, line_addr, dref)

    sdbgen.commit_transaction()

    sdbgen.begin_transaction()
    # Index all functions:
    for func_addr in idautils.Functions():
        # We skip chunked functions:
        if is_func_chunked(func_addr):
            logger.warning('Function at 0x{:x} is chunked'.format(func_addr))
            continue

        func_end = idc.GetFunctionAttr(func_addr, idc.FUNCATTR_END)

        # Make sure that start is before end:
        if func_end <= func_addr:
            logger.warning('Function at {:x} has end {:x}'\
                    .format(func_addr,func_end))
            continue

        line_addresses = xrange(func_addr, func_end)
        func_name = idc.GetFunctionName(func_addr)
        sdbgen.add_function(func_addr, func_name, line_addresses)

    sdbgen.commit_transaction()

    sdbgen.fill_lines_fts()
    sdbgen.close()