Exemple #1
0
    def argv(self, func):
        '''
        Attempts to identify what types of arguments are passed to a given
        function. Currently unused.
        '''
        args = [None for x in self.arch.argv]

        if not self.arch.unknown:
            start_ea = ida_shims.start_ea(func)
            for xref in idautils.XrefsTo(start_ea):
                if idaapi.is_call_insn(xref.frm):
                    insn = ida_shims.decode_insn(xref.frm)

                    ea = xref.frm + (self.arch.delay_slot *
                                     self.arch.insn_size)
                    end_ea = (xref.frm - (self.arch.insn_size * 10))

                    while ea >= end_ea:
                        if idaapi.is_basic_block_end(ea) or \
                                (ea != xref.frm and idaapi.is_call_insn(ea)):
                            break

                        insn = ida_shims.decode_insn(ea)
                        features = ida_shims.get_canon_feature(insn)

                        for n in range(0, len(self.CHANGE_OPND)):
                            ops = ida_shims.get_operands(insn)
                            if ops[n].type in [
                                    idaapi.o_reg, idaapi.o_displ,
                                    idaapi.o_phrase
                            ]:
                                try:
                                    regname = self.arch.registers[ops[n].reg]
                                    index = self.arch.argv.index(regname)
                                except ValueError:
                                    continue

                                if features & self.CHANGE_OPND[n]:
                                    for xref in idautils.XrefsFrom(ea):
                                        # TODO: Where is this xref type defined?
                                        if xref.type == 1:
                                            string = \
                                                ida_shims.get_strlit_contents(
                                                    xref.to)
                                            if string and len(string) > 4:
                                                args[index] = str
                                            break

                        ea -= self.arch.insn_size

                yield args
def _get_api(sea):
    calls = 0
    api = []
    flags = idc.GetFunctionFlags(sea)
    # ignore library functions
    if flags & idc.FUNC_LIB or flags & idc.FUNC_THUNK:
        return calls, api
    # list of addresses
    addresses = list(idautils.FuncItems(sea))
    for instr in addresses:
        tmp_api_address = ""
        if idaapi.is_call_insn(instr):
            for xref in idautils.XrefsFrom(instr, idaapi.XREF_FAR):
                if xref.to is None:
                    calls += 1
                    continue
                tmp_api_address = xref.to
                break
            if tmp_api_address == "":
                calls += 1
                continue
            api_flags = idc.GetFunctionFlags(tmp_api_address)
            if api_flags & idaapi.FUNC_LIB is True \
                    or api_flags & idaapi.FUNC_THUNK:
                tmp_api_name = idc.NameEx(0, tmp_api_address)
                if tmp_api_name:
                    api.append(tmp_api_name)
            else:
                calls += 1
    return calls, api
Exemple #3
0
def get_apis(func_addr):
    calls = 0
    apis = []
    flags = GetFunctionFlags(func_addr)
    # ignore library functions
    if flags & FUNC_LIB or flags & FUNC_THUNK:
        logging.debug("get_apis: Library code or thunk")
        return None
    # list of addresses
    dism_addr = list(FuncItems(func_addr))
    for instr in dism_addr:
        tmp_api_address = ""
        if idaapi.is_call_insn(instr):
            # In theory an API address should only have one xrefs
            # The xrefs approach was used because I could not find how to
            # get the API name by address.
            for xref in XrefsFrom(instr, idaapi.XREF_FAR):
                if xref.to == None:
                    calls += 1
                    continue
                tmp_api_address = xref.to
                break
            # get next instr since api address could not be found
            if tmp_api_address == "":
                calls += 1
                continue
            api_flags = GetFunctionFlags(tmp_api_address)
            print GetFunctionName(tmp_api_address)
            tmp_api_name = GetFunctionName(tmp_api_address)
            apis.append(tmp_api_name)
            # check for lib code (api)
    return (calls, apis)
def _get_api(sea):
    calls = 0
    api = []
    flags = idc.GetFunctionFlags(sea)
    # ignore library functions
    if flags & idc.FUNC_LIB or flags & idc.FUNC_THUNK:
        return calls, api
    # list of addresses
    addresses = list(idautils.FuncItems(sea))
    for instr in addresses:
        tmp_api_address = ""
        if idaapi.is_call_insn(instr):
            for xref in idautils.XrefsFrom(instr, idaapi.XREF_FAR):
                if xref.to is None:
                    calls += 1
                    continue
                tmp_api_address = xref.to
                break
            if tmp_api_address == "":
                calls += 1
                continue
            api_flags = idc.GetFunctionFlags(tmp_api_address)
            if api_flags & idaapi.FUNC_LIB is True \
                    or api_flags & idaapi.FUNC_THUNK:
                tmp_api_name = idc.NameEx(0, tmp_api_address)
                if tmp_api_name:
                    api.append(tmp_api_name)
            else:
                calls += 1
    return calls, api
Exemple #5
0
    def _find_leafs(self):
        # Loop through every function
        for func_ea in idautils.Functions():
            # Count the number of xrefs to this function
            func = idaapi.get_func(func_ea)
            if func:
                leaf_function = True
                ea = ida_shims.start_ea(func)
                end_ea = ida_shims.end_ea(func)

                # Loop through all instructions in this function looking
                # for call instructions; if found, then this is not a leaf.
                while ea <= end_ea:
                    insn = ida_shims.decode_insn(ea)
                    if idaapi.is_call_insn(ea):
                        leaf_function = False
                        break

                    ea = ida_shims.next_head(ea)

                if leaf_function:
                    self.functions.append(
                        Function(start=ida_shims.start_ea(func),
                                 end=ida_shims.end_ea(func),
                                 leaf=True,
                                 loop=self.has_loop(func),
                                 argc=self.argp.argc(func)))

        # Sort leafs by xref count, largest first
        self.functions.sort(key=lambda f: f.xrefs, reverse=True)
Exemple #6
0
    def is_call(self):
        """
            Property indicating if this instruction is a call.

            :return bool: True if this instruction is a call, False otherwise.
        """
        return idaapi.is_call_insn(self.ea)
Exemple #7
0
    def _add_child_subs(self, root, ea, tid):
        if ea == 0 or root.text(4) == "y":
            return

        for x in idautils.FuncItems(ea):
            if idaapi.is_call_insn(x):
                fname, target_addr, is_api, callee_id = self._logged_call(
                    x, tid)
                if self._valid_call(x, target_addr) and fname:
                    current_root = QTreeWidgetItem(root, [
                        fname,
                        hex(int(x)), "0",
                        hex(int(target_addr)), "n",
                        hex(int(is_api)),
                        hex(int(tid)),
                        hex(int(callee_id))
                    ])
                    current_root.setFlags(current_root.flags()
                                          & ~QtCore.Qt.ItemIsEditable)
                    try:
                        self._tags[get_api_tag(fname)].append(
                            [root, current_root])
                    except KeyError:
                        self._tags[get_api_tag(fname)] = [[root, current_root]]
                    self._tags["All"].append(current_root)
                    self._add_child_subs(current_root, target_addr, tid)
        root.setText(4, "y")
Exemple #8
0
    def _find_leafs(self):
        # Loop through every function
        for func_ea in idautils.Functions():
            # Count the number of xrefs to this function
            func = idaapi.get_func(func_ea)
            if func:
                leaf_function = True
                ea = func.startEA

                # Loop through all instructions in this function looking
                # for call instructions; if found, then this is not a leaf.
                while ea <= func.endEA:
                    idaapi.decode_insn(ea)
                    if idaapi.is_call_insn(ea):
                        leaf_function = False
                        break

                    ea += self.arch.insn_size

                if leaf_function:
                    self.functions.append(
                        Function(
                            start=func.startEA,
                            end=func.endEA,
                            leaf=True,
                            loop=self.has_loop(func),
                            argc=self.argp.argc(func),
                        )
                    )

        # Sort leafs by xref count, largest first
        self.functions.sort(key=lambda f: f.xrefs, reverse=True)
Exemple #9
0
def enum_calls_using_var_arg(func_addr):
    if not func_addr or func_addr == BADADDR:
        return None

    walk_limit = 5
    varg_calls = []
    xrefs = {
        xref.frm
        for xref in XrefsTo(func_addr, 0) if idaapi.is_call_insn(xref.frm)
    }
    for xref in xrefs:
        walk = 0
        prev_insn = DecodePreviousInstruction(xref)

        # starting at the call instruction, walk the instructions in reverse order
        # until we find a push or hit the limit.
        while stricmp(prev_insn.get_canon_mnem(),
                      "push") and walk < walk_limit:
            prev_insn = DecodePreviousInstruction(prev_insn.ea)
            walk = walk + 1
        if walk >= walk_limit:
            print "[!] Reached walk limit for xref at %x." % xref
            continue
        if prev_insn.Op1.type == o_reg:
            varg_calls.append(xref)

    return varg_calls
Exemple #10
0
def trace_data(ea, min_ea, op_type, op_val):
    '''
    trace from ea to previous instruction, if the instruction is an
    effect(change the destination register) instruction. return the
    instruction. e.g:
    LDR R0, R3
    '''
    ea_call = ea
    while ea != idc.BADADDR and ea != min_ea:
        ea = idc.PrevHead(ea, min_ea)

        if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea):
            # BL/BLX that will modify the R0
            #
            return None

        operand = idc.GetMnem(ea)
        if operand in ['LDR', 'MOV']:
            src_op = 1
            dest_op = 0
        elif operand == 'STR':
            src_op = 0
            dest_op = 1
        else:
            continue

        #debug
        if ea == 0x9778a:
            print 'ea_call: %x' %ea_call
            print 'op_type: %d, op_val: %d' %(op_type, op_val)

        if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val:
            mark_instruction(ea)
            op_type = idc.GetOpType(ea, src_op)
            op_val = idc.GetOperandValue(ea, src_op)
Exemple #11
0
def graph_down(ea, path=set()):
    """
    Recursively collect all function calls.

    Copied with minor modifications from
    http://hooked-on-mnemonics.blogspot.com/2012/07/renaming-subroutine-blocks-and.html
    """
    path.add(ea)

    #
    # extract all the call instructions from the current function
    #

    call_instructions = []
    instruction_info = idaapi.insn_t()
    for address in idautils.FuncItems(ea):

        # decode the instruction
        if not idaapi.decode_insn(instruction_info, address):
            continue

        # check if this instruction is a call
        if not idaapi.is_call_insn(instruction_info):
            continue

        # save this address as a call instruction
        call_instructions.append(address)

    #
    # iterate through all the instructions in the target function (ea) and
    # inspect all the call instructions
    #

    for x in call_instructions:

        #  TODO
        for r in idautils.XrefsFrom(x, idaapi.XREF_FAR):
            #print(0x%08X" % h, "--calls-->", "0x%08X" % r.to)
            if not r.iscode:
                continue

            # get the function pointed at by this call
            func = idaapi.get_func(r.to)
            if not func:
                continue

            # ignore calls to imports / library calls / thunks
            if (func.flags & (idaapi.FUNC_THUNK | idaapi.FUNC_LIB)) != 0:
                continue

            #
            # if we have not traversed to the destination function that this
            # call references, recurse down to it to continue our traversal
            #

            if r.to not in path:
                graph_down(r.to, path)

    return path
Exemple #12
0
 def highlight(self, color=COLOR):
     for ea in idautils.Heads():
         if idaapi.isCode(idaapi.getFlags(ea)) and idaapi.is_call_insn(ea):
             current_color = idaapi.get_item_color(ea)
             if current_color == self.COLOR:
                 idaapi.set_item_color(ea, idc.DEFCOLOR)
             elif current_color == idc.DEFCOLOR:
                 idaapi.set_item_color(ea, self.COLOR)
Exemple #13
0
    def argv(self, func):
        '''
        Attempts to identify what types of arguments are passed to a given function.
        Currently unused.
        '''
        args = [None for x in self.arch.argv]

        for xref in idautils.XrefsTo(func.startEA):
            if idaapi.is_call_insn(xref.frm):
                idaapi.decode_insn(xref.frm)

                ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size)
                end_ea = (xref.frm - (self.arch.insn_size * 10))

                while ea >= end_ea:
                    # Stop searching if we've reached a conditional block or another call
                    if idaapi.is_basic_block_end(ea) or (
                            ea != xref.frm and idaapi.is_call_insn(ea)):
                        break

                    idaapi.decode_insn(ea)
                    features = idaapi.cmd.get_canon_feature()

                    for n in range(0, len(self.CHANGE_OPND)):
                        if idaapi.cmd.Operands[n].type in [
                                idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase
                        ]:
                            try:
                                regname = self.arch.registers[
                                    idaapi.cmd.Operands[n].reg]
                                index = self.arch.argv.index(regname)
                            except ValueError:
                                continue

                            if features & self.CHANGE_OPND[n]:
                                for xref in idautils.XrefsFrom(ea):
                                    # TODO: Where is this xref type defined?
                                    if xref.type == 1:
                                        string = idc.GetString(xref.to)
                                        if string and len(string) > 4:
                                            args[index] = str
                                        break

                    ea -= self.arch.insn_size

                yield args
Exemple #14
0
 def highlight(self):
     for ea in idautils.Heads():
         flags = ida_shims.get_full_flags(ea)
         if ida_shims.is_code(flags) and idaapi.is_call_insn(ea):
             current_color = idaapi.get_item_color(ea)
             if current_color == self.COLOR:
                 idaapi.set_item_color(ea, idc.DEFCOLOR)
             elif current_color == idc.DEFCOLOR:
                 idaapi.set_item_color(ea, self.COLOR)
Exemple #15
0
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn):
    """
    parse call $+5 instruction from the given instruction.
    """
    if not idaapi.is_call_insn(insn):
        return

    if insn.ea + 5 == idc.get_operand_value(insn.ea, 0):
        yield Characteristic("call $+5"), insn.ea
Exemple #16
0
    def argv(self, func):
        '''
        Attempts to identify what types of arguments are passed to a given function.
        Currently unused.
        '''
        args = [None for x in self.arch.argv]

        if not self.arch.unknown:
            for xref in idautils.XrefsTo(func.startEA):
                if idaapi.is_call_insn(xref.frm):
                    idaapi.decode_insn(xref.frm)

                    ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size)
                    end_ea = (xref.frm - (self.arch.insn_size * 10))

                    while ea >= end_ea:
                        # Stop searching if we've reached a conditional block or another call
                        if idaapi.is_basic_block_end(ea) or (ea != xref.frm and idaapi.is_call_insn(ea)):
                            break

                        idaapi.decode_insn(ea)
                        features = idaapi.cmd.get_canon_feature()

                        for n in range(0, len(self.CHANGE_OPND)):
                            if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]:
                                try:
                                    regname = self.arch.registers[idaapi.cmd.Operands[n].reg]
                                    index = self.arch.argv.index(regname)
                                except ValueError:
                                    continue

                                if features & self.CHANGE_OPND[n]:
                                    for xref in idautils.XrefsFrom(ea):
                                        # TODO: Where is this xref type defined?
                                        if xref.type == 1:
                                            string = idc.GetString(xref.to)
                                            if string and len(string) > 4:
                                                args[index] = str
                                            break

                        ea -= self.arch.insn_size

                yield args
Exemple #17
0
    def trace(self, ea):
        '''
        Given an EA where an argument register is set, attempt to trace what
        function call that argument is passed to.

        @ea - The address of an instruction that modifies a function argument
        register.

        Returns a tuple of (function EA, argv index, argument register name) on
        success.
        Returns None on failure.
        '''
        insn = ida_shims.decode_insn(ea)
        features = ida_shims.get_canon_feature(insn)

        if self.arch.unknown:
            return (None, None, None)

        for n in range(0, len(self.CHANGE_OPND)):
            ops = ida_shims.get_operands(insn)
            if ops[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]:
                try:
                    regname = self.arch.registers[ops[n].reg]
                    index = self.arch.argv.index(regname)
                except ValueError:
                    continue

                if features & self.CHANGE_OPND[n]:
                    ea = ea - (self.arch.delay_slot * self.arch.insn_size)

                    while True:
                        insn = ida_shims.decode_insn(ea)

                        if idaapi.is_call_insn(ea):
                            for xref in idautils.XrefsFrom(ea):
                                if xref.type in [idaapi.fl_CF, idaapi.fl_CN]:
                                    return (xref.to, index, regname)
                            # If we couldn't figure out where the function call
                            # was going to, just quit
                            break

                        try:
                            is_block_end = idaapi.is_basic_block_end(ea)
                        except TypeError:
                            is_block_end = idaapi.is_basic_block_end(ea, True)

                        if is_block_end:
                            break

                        # TODO: Use idc.NextHead(ea) instead...
                        ea += self.arch.insn_size

        return (None, None, None)
Exemple #18
0
def extract_function_calls_from(f, bb, insn):
    """extract functions calls from features

    most relevant at the function scope, however, its most efficient to extract at the instruction scope

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)
    """
    if idaapi.is_call_insn(insn):
        for ref in idautils.CodeRefsFrom(insn.ea, False):
            yield Characteristic("calls from"), ref
Exemple #19
0
def extract_function_indirect_call_characteristic_features(f, bb, insn):
    """ extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
        does not include calls like => call ds:dword_ABD4974

        most relevant at the function or basic block scope;
        however, its most efficient to extract at the instruction scope

        args:
            f (IDA func_t)
            bb (IDA BasicBlock)
            insn (IDA insn_t)
    """
    if idaapi.is_call_insn(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
        yield Characteristic("indirect call"), insn.ea
Exemple #20
0
    def trace(self, ea):
        '''
        Given an EA where an argument register is set, attempt to trace what
        function call that argument is passed to.

        @ea - The address of an instruction that modifies a function argument register.

        Returns a tuple of (function EA, argv index, argument register name) on success.
        Returns None on failure.
        '''
        idaapi.decode_insn(ea)
        features = idaapi.cmd.get_canon_feature()

        if self.arch.unknown:
            return (None, None, None)

        for n in range(0, len(self.CHANGE_OPND)):
            if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]:
                try:
                    regname = self.arch.registers[idaapi.cmd.Operands[n].reg]
                    index = self.arch.argv.index(regname)
                except ValueError:
                    continue

                if features & self.CHANGE_OPND[n]:
                    ea = ea - (self.arch.delay_slot * self.arch.insn_size)

                    while True:
                        idaapi.decode_insn(ea)

                        if idaapi.is_call_insn(ea):
                            for xref in idautils.XrefsFrom(ea):
                                if xref.type in [idaapi.fl_CF, idaapi.fl_CN]:
                                    return (xref.to, index, regname)
                            # If we couldn't figure out where the function call was going to, just quit
                            break

                        try:
                            is_block_end = idaapi.is_basic_block_end(ea)
                        except TypeError:
                            is_block_end = idaapi.is_basic_block_end(ea, True)

                        if is_block_end:
                            break

                        # TODO: Use idc.NextHead(ea) instead...
                        ea += self.arch.insn_size

        return (None, None, None)
def IsPrevInsnCall(ea):
    """
    Given a return address, this function tries to check if previous instruction
    is a CALL instruction
    """
    global CallPattern
    for p in CallPattern:
        # assume caller's ea
        caller = ea + p[0]
        # get the bytes
        bytes = [x for x in GetDataList(caller, len(p[1]), 1)]
        # do we have a match? is it a call instruction?
        if bytes == p[1] and idaapi.is_call_insn(caller):
            return caller
    return False
Exemple #22
0
def check_previous_inst_is_call(return_addr, is_64bit):
    list_of_call_inst_lengths = [2, 3, 5, 6, 7]
    if is_64bit:
        list_of_call_inst_lengths.append(9)

    for call_length in list_of_call_inst_lengths:
        call_addr = return_addr - call_length

        try:
            if idaapi.is_call_insn(call_addr) and idc.create_insn(
                    call_addr) and print_insn_mnem(call_addr) == "call":
                return (True, call_addr)
        except ValueError:
            continue

    return (False, None)
Exemple #23
0
def enum_calls_in_function(fva):
    '''
    yield the call instructions in the given function.
    
    Args:
      fva (int): the starting address of a function
    
    Returns:
      sequence[tuple[int, str]]: the address of a call instruction, and the disassembly line at that address
    '''
    for ea in enum_function_addrs(fva):
        if idaapi.is_call_insn(ea):
            disasm = ida_lines.generate_disassembly(ea, 16, True, False)[1][0]
            # replace consequent whitespaces by a single whitespaces
            disasm = re.sub("\s\s+", " ", disasm)
            yield ea, disasm
Exemple #24
0
def IsPrevInsnCall(ea):
    """
    Given a return address, this function tries to check if previous instruction
    is a CALL instruction
    """
    global CallPattern
    if ea == idaapi.BADADDR or ea < 10:
        return None

    for delta, opcodes in CallPattern:
        # assume caller's ea
        caller = ea + delta
        # get the bytes
        bytes = [x for x in GetDataList(caller, len(opcodes), 1)]
        # do we have a match? is it a call instruction?
        if bytes == opcodes and idaapi.is_call_insn(caller):
            return caller
    return None
Exemple #25
0
def get_apis(func_addr):
    calls = 0
    apis = []
    #print func_addr
    flags = GetFunctionFlags(func_addr)
    # ignore library functions
    if flags & FUNC_LIB or flags & FUNC_THUNK:
        #logging.debug("get_apis: Library code or thunk")
        #print flags," ",FUNC_LIB," ", FUNC_THUNK
        return (calls, "Library code or thunk")
    # list of addresses
    start = idc.GetFunctionAttr(func_addr, FUNCATTR_START)
    end = idc.GetFunctionAttr(func_addr, FUNCATTR_END)
    cur_addr = start
    while cur_addr <= end:
        #print cur_addr
        instr = idc.GetDisasm(cur_addr)
        tmp_api_address = ""
        if idaapi.is_call_insn(cur_addr):
            # In theory an API address should only have one xrefs
            # The xrefs approach was used because I could not find how to
            # get the API name by address.
            for xref in XrefsFrom(cur_addr, idaapi.XREF_FAR):
                if xref.to == None:
                    calls += 1
                    cur_addr = idc.NextHead(cur_addr, end)
                    continue
                tmp_api_address = xref.to
                break
            # get next instr since api address could not be found
            if tmp_api_address == "":
                calls += 1
                cur_addr = idc.NextHead(cur_addr, end)
                continue
            api_flags = GetFunctionFlags(tmp_api_address)
            # check for lib code (api)
            if api_flags & idaapi.FUNC_LIB == True or api_flags & idaapi.FUNC_THUNK:
                tmp_api_name = NameEx(0, tmp_api_address)
                if tmp_api_name:
                    apis.append(tmp_api_name)
            else:
                calls += 1
        cur_addr = idc.NextHead(cur_addr, end)
    return (calls, apis)
Exemple #26
0
def check_for_api_call(ctx, insn):
    """ check instruction for API call """
    if not idaapi.is_call_insn(insn):
        return

    for ref in idautils.CodeRefsFrom(insn.ea, False):
        info = get_imports(ctx).get(ref, ())
        if info:
            yield "%s.%s" % (info[0], info[1])
        else:
            f = idaapi.get_func(ref)
            # check if call to thunk
            # TODO: first instruction might not always be the thunk
            if f and (f.flags & idaapi.FUNC_THUNK):
                for thunk_ref in idautils.DataRefsFrom(ref):
                    # TODO: always data ref for thunk??
                    info = get_imports(ctx).get(thunk_ref, ())
                    if info:
                        yield "%s.%s" % (info[0], info[1])
Exemple #27
0
def extract_insn_bytes_features(f, bb, insn):
    """ parse referenced byte sequences

        args:
            f (IDA func_t)
            bb (IDA BasicBlock)
            insn (IDA insn_t)

        example:
            push    offset iid_004118d4_IShellLinkA ; riid
    """
    if idaapi.is_call_insn(insn):
        # ignore call instructions
        return

    for ref in idautils.DataRefsFrom(insn.ea):
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
        if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
            yield Bytes(extracted_bytes), insn.ea
Exemple #28
0
def extract_insn_bytes_features(f, bb, insn):
    """parse referenced byte sequences

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push    offset iid_004118d4_IShellLinkA ; riid
    """
    if idaapi.is_call_insn(insn):
        return

    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
    if ref != insn.ea:
        extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
        if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
            yield Bytes(extracted_bytes), insn.ea
Exemple #29
0
def get_func_code_refs_from(func_ea, iaddrs):
    """Returns a set with the code references from this function"""
    code_refs = set()

    for addr in iaddrs:
        ref = idaapi.BADADDR

        for r in idautils.XrefsFrom(addr, idaapi.XREF_FAR):

            if r.iscode:
                to_func = idaapi.get_func(r.to)
                if not to_func or to_func.startEA != func_ea:
                    ref = r.to
            else:
                ref = r.to

        if (ref != idaapi.BADADDR or idaapi.is_call_insn(addr) or idaapi.is_indirect_jump_insn(addr)):
            #print hex(i.addr), i, hex(ref)
            code_refs.add(ref)

    return code_refs
Exemple #30
0
def graph_down(ea, path=set()):
    """
    Recursively collect all function calls.

    Copied with minor modifications from
    http://hooked-on-mnemonics.blogspot.com/2012/07/renaming-subroutine-blocks-and.html
    """
    path.add(ea)

    #
    # iterate through all the instructions in the target function (ea) and
    # inspect all the call instructions
    #

    for x in [x for x in idautils.FuncItems(ea) if idaapi.is_call_insn(x)]:

        #  TODO
        for r in idautils.XrefsFrom(x, idaapi.XREF_FAR):
            #print "0x%08X" % h, "--calls-->", "0x%08X" % r.to
            if not r.iscode:
                    continue

            # get the function pointed at by this call
            func = idaapi.get_func(r.to)
            if not func:
                continue

            # ignore calls to imports / library calls / thunks
            if (func.flags & (idaapi.FUNC_THUNK | idaapi.FUNC_LIB)) != 0:
                continue

            #
            # if we have not traversed to the destination function that this
            # call references, recurse down to it to continue our traversal
            #

            if r.to not in path:
                graph_down(r.to, path)

    return path
	def analyzeFunction(self, funcea):
		# https://reverseengineering.stackexchange.com/questions/9352/finding-all-api-calls-in-a-function
		# Copy + Paste from Stack Overflow - Lika Boss
		n_flags = set() 
		dism_addr = list(idautils.FuncItems(funcea))
		for instr in dism_addr:
			tmp_api_address = ""
			if idaapi.is_call_insn(instr):
				for xref in idautils.XrefsFrom(instr, idaapi.XREF_FAR):
					if xref.to == None:
						continue
					tmp_api_address = xref.to
					break
				# get next instr since api address could not be found
				if tmp_api_address == "":
					continue
				api_flags = idc.GetFunctionFlags(tmp_api_address)
	
				# check for lib code (api)
				if (api_flags & idaapi.FUNC_LIB and api_flags & idaapi.FUNC_STATICDEF):
					tmp_api_name = idc.NameEx(0, tmp_api_address)
					if tmp_api_name:
						t_flags = self.processFunction( funcea, tmp_api_name)
						n_flags = ( t_flags| n_flags )
		# Rename function if flags populated
		# 	Skip of this isn't the first run
		sflags = "".join(set(n_flags))
		if len(n_flags) > 0 and self.rename:
			fn = idc.GetFunctionName(funcea)
			if not fn.startswith(sflags):
				print "Renaming - ", fn, " with - ", sflags
				idc.MakeName(funcea, str(sflags + "_" + fn ))
		tbl = [ funcea, idc.GetFunctionName(funcea), sflags ]
		for f in definitions.PEAPIs.keys():
			if definitions.PEAPIs[f]['flag'] in sflags:
				tbl.append('*')
			else:
				tbl.append('')

		data.append( tbl )
Exemple #32
0
 def get_apis(self, func_ea):
     calls = 0
     apis = []
     flags = GetFunctionFlags(func_ea)
     dism_addr = list(FuncItems(func_ea))
     for instr in dism_addr:
         tmp_api_address = ""
         if idaapi.is_call_insn(instr):
             for xref in XrefsFrom(instr, idaapi.XREF_FAR):
                 if xref.to == None:
                     calls += 1
                     continue
                 tmp_api_address = xref.to
                 break
             if tmp_api_address == "":
                 calls += 1
                 continue
             api_flags = GetFunctionFlags(tmp_api_address)
             # print GetFunctionName(tmp_api_address)
             tmp_api_name = GetFunctionName(tmp_api_address)
             apis.append(tmp_api_name)
     return apis
Exemple #33
0
def get_func_code_refs_from(func_ea, iaddrs):
    """Returns a set with the code references from this function"""
    code_refs = set()

    for addr in iaddrs:
        ref = idaapi.BADADDR

        for r in idautils.XrefsFrom(addr, idaapi.XREF_FAR):

            if r.iscode:
                to_func = idaapi.get_func(r.to)
                if not to_func or to_func.startEA != func_ea:
                    ref = r.to
            else:
                ref = r.to

        if (ref != idaapi.BADADDR or idaapi.is_call_insn(addr)
                or idaapi.is_indirect_jump_insn(addr)):
            #print hex(i.addr), i, hex(ref)
            code_refs.add(ref)

    return code_refs
Exemple #34
0
    def __init__(self, functionName):
        import idautils
        import idc
        import idaapi
        super(FunctionGraph, self).__init__()

        start_addr = 0
        if type(functionName) == type('str'):
          start_addr = idc.LocByName(functionName)
        else:
          start_addr = idaapi.get_func(functionName).startEA
          print 'using 0x%x as function start' % (start_addr)

        self.start_addr = start_addr

        end_addr = idc.FindFuncEnd(start_addr)

        self.start_addr = start_addr
        self.end_addr = end_addr

        self.name = functionName

        for h in idautils.Heads(start_addr, end_addr):
            if h == idc.BADADDR:
                continue
            if not idc.isCode(idc.GetFlags(h)):
                continue


            self.add_node(h)
            refs = set(filter(lambda x: x <= end_addr and x >= start_addr, idautils.CodeRefsFrom(h,1)))
            nh = idc.NextHead(h, end_addr)
            if nh != idc.BADADDR and \
              (idaapi.isFlow(idaapi.get_flags_ex(nh,0)) or idaapi.is_call_insn(h)):
                refs.add(nh)

            for r in refs:
                self.connect(h, r)
Exemple #35
0
def find_rec(ea, func, maxdepth, all=True, depth=0, path=[], processed=[]):
    if depth > maxdepth:
        return

    processed.append(ea)
        
    #Call func for each address in the function
    for addr in [x for x in FuncItems(ea)]:
        func(addr, path)
    
    #For each call instruction in the function descend into that call
    for addr in [x for x in FuncItems(ea) if idaapi.is_call_insn(x)]:
        xrefs = [x for x in CodeRefsFrom(addr, 0)]
        
        #If the call references a function known by IDA
        if len(xrefs) > 0:
            xref = xrefs[0]
            
            #If the function has not alread been processed
            if all == True or not xref in processed:
                #Find further calls in the below function
                path.append(addr)
                find_rec(xref, func, maxdepth, all, depth + 1, path, processed)
                path.pop()
def trace_data(ea, min_ea, op_type, op_val):
    '''
    trace from ea to previous instruction, if the instruction is an
    effect(change the destination register) instruction. return the
    instruction. e.g:
    LDR R0, R3
    '''
    ea_call = ea
    while ea != idc.BADADDR and ea != min_ea:
        ea = idc.PrevHead(ea, min_ea)

        if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea):
            # BL/BLX that will modify the R0
            #
            return None

        operand = idc.GetMnem(ea)
        if operand in ['LDR', 'MOV']:
            src_op = 1
            dest_op = 0
        elif operand == 'STR':
            src_op = 0
            dest_op = 1
        else:
            continue

        #debug
        if ea == 0x9778a:
            print 'ea_call: %x' % ea_call
            print 'op_type: %d, op_val: %d' % (op_type, op_val)

        if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(
                ea, dest_op) == op_val:
            mark_instruction(ea)
            op_type = idc.GetOpType(ea, src_op)
            op_val = idc.GetOperandValue(ea, src_op)
def trace_param(ea, min_ea, op_type, op_val):
    '''
    trace_param: ea, min_ea, op_type, op_val

    Taking ea as start, this function does basic backtrace of
    an operand (defined by op_type and op_val) until it finds
    a data reference which we consider the "source". It stops
    when ea < min_ea (usually the function start).

    It does not support arithmetic or complex modifications of
    the source. This will be improved on future versions.
    '''
    global displ_re, msgsend, var_re

    ea_call = ea
    while ea != idc.BADADDR and ea != min_ea:
        ea = idc.PrevHead(ea, min_ea)

        if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea):
            # We have a BL/BLX that will modify the R0
            # we're tracking
            #
            return None

        if idc.GetMnem(ea) in ['LDR', 'MOV']:
            src_op = 1
            dest_op = 0
        elif idc.GetMnem(ea) == 'STR':
            src_op = 0
            dest_op = 1
        else:
            continue


        if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val:
            # Found, see where it comes from
            if idc.GetOpType(ea, src_op) == idc.o_mem:
                # Got the final reference
                refs = list(idautils.DataRefsFrom(ea))
                if not refs:
                    local_ref = idc.GetOperandValue(ea, src_op)
                    far_ref = idc.Dword(local_ref)
                else:
                    while len(refs) > 0:
                        far_ref = refs[0]
                        refs = list(idautils.DataRefsFrom(refs[0]))
                return far_ref
            elif idc.GetOpType(ea, src_op) == idc.o_displ:
                if ', [SP' in idc.GetDisasm(ea):
                    if 'arg_' in idc.GetDisasm(ea):
                        # We don't track function arguments
                        return None

                    # We're tracking an stack variable
                    try:
                        var_name = var_re.search(idc.GetDisasm(ea)).group('varname')
                    except:
                        print '%08x: Unable to recognize variable' % ea
                        return None

                    while ea != idc.BADADDR and ea > min_ea:
                        if idc.GetMnem(ea) == 'STR' and var_name in idc.GetDisasm(ea):
                            # New reg to track
                            op_val = idc.GetOperandValue(ea, dest_op)
                            break
                        ea = idc.PrevHead(ea, min_ea)
                else:
                    # New reg to track
                    if '[LR]' in idc.GetDisasm(ea):
                        # Optimizations use LR as general reg
                        op_val = 14
                    else:
                        try:
                            op_val = int(displ_re.search(idc.GetDisasm(ea)).group('regnum'))
                        except:
                            print '%08x: Unable to recognize register' % ea
                            return None
            elif idc.GetOpType(ea, src_op) == idc.o_reg:
                # Direct reg-reg assignment
                op_val = idc.GetOperandValue(ea, src_op)
            else:
                # We don't track o_phrase or other complex source operands :(
                return None
    return None
Exemple #38
0
    def block(self, block):
        '''
        Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names])
        '''
        formal = []
        fuzzy = []
        functions = []
        immediates = []

        ea = block.startEA
        while ea < block.endEA:
            idaapi.decode_insn(ea)

            # Get a list of all data/code references from the current instruction
            drefs = [x for x in idautils.DataRefsFrom(ea)]
            crefs = [x for x in idautils.CodeRefsFrom(ea, False)]

            # Add all instruction mnemonics to the formal block hash
            formal.append(idc.GetMnem(ea))

            # If this is a call instruction, be sure to note the name of the function
            # being called. This is used to apply call-based signatures to functions.
            #
            # For fuzzy signatures, we can't use the actual name or EA of the function,
            # but rather just want to note that a function call was made.
            #
            # Formal signatures already have the call instruction mnemonic, which is more
            # specific than just saying that a call was made.
            if idaapi.is_call_insn(ea):
                for cref in crefs:
                    func_name = idc.Name(cref)
                    if func_name:
                        functions.append(func_name)
                        fuzzy.append("funcref")
            # If there are data references from the instruction, check to see if any of them
            # are strings. These are looked up in the pre-generated strings dictionary.
            #
            # String values are easily identifiable, and are used as part of both the fuzzy
            # and the formal signatures.
            #
            # It is more difficult to determine if non-string values are constants or not;
            # for both fuzzy and formal signatures, just use "data" to indicate that some data
            # was referenced.
            elif drefs:
                for dref in drefs:
                    if self.strings.has_key(dref):
                        formal.append(self.strings[dref].value)
                        fuzzy.append(self.strings[dref].value)
                    else:
                        formal.append("dataref")
                        fuzzy.append("dataref")
            # If there are no data or code references from the instruction, use every operand as
            # part of the formal signature.
            #
            # Fuzzy signatures are only concerned with interesting immediate values, that is, values
            # that are greater than 65,535, are not memory addresses, and are not displayed as
            # negative values.
            elif not drefs and not crefs:
                for n in range(0, len(idaapi.cmd.Operands)):
                    opnd_text = idc.GetOpnd(ea, n)
                    formal.append(opnd_text)
                    if idaapi.cmd.Operands[
                            n].type == idaapi.o_imm and not opnd_text.startswith(
                                '-'):
                        if idaapi.cmd.Operands[n].value >= 0xFFFF:
                            if idaapi.getFlags(
                                    idaapi.cmd.Operands[n].value) == 0:
                                fuzzy.append(str(idaapi.cmd.Operands[n].value))
                                immediates.append(idaapi.cmd.Operands[n].value)

            ea = idc.NextHead(ea)

        return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)),
                immediates, functions)
Exemple #39
0
    def block(self, block):
        '''
        Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names])
        '''
        formal = []
        fuzzy = []
        functions = []
        immediates = []

        ea = block.startEA
        while ea < block.endEA:
            idaapi.decode_insn(ea)

            # Get a list of all data/code references from the current instruction
            drefs = [x for x in idautils.DataRefsFrom(ea)]
            crefs = [x for x in idautils.CodeRefsFrom(ea, False)]

            # Add all instruction mnemonics to the formal block hash
            formal.append(idc.GetMnem(ea))

            # If this is a call instruction, be sure to note the name of the function
            # being called. This is used to apply call-based signatures to functions.
            #
            # For fuzzy signatures, we can't use the actual name or EA of the function,
            # but rather just want to note that a function call was made.
            #
            # Formal signatures already have the call instruction mnemonic, which is more
            # specific than just saying that a call was made.
            if idaapi.is_call_insn(ea):
                for cref in crefs:
                    func_name = idc.Name(cref)
                    if func_name:
                        functions.append(func_name)
                        fuzzy.append("funcref")
            # If there are data references from the instruction, check to see if any of them
            # are strings. These are looked up in the pre-generated strings dictionary.
            #
            # String values are easily identifiable, and are used as part of both the fuzzy
            # and the formal signatures.
            #
            # It is more difficult to determine if non-string values are constants or not;
            # for both fuzzy and formal signatures, just use "data" to indicate that some data
            # was referenced.
            elif drefs:
                for dref in drefs:
                    if self.strings.has_key(dref):
                        formal.append(self.strings[dref].value)
                        fuzzy.append(self.strings[dref].value)
                    else:
                        formal.append("dataref")
                        fuzzy.append("dataref")
            # If there are no data or code references from the instruction, use every operand as
            # part of the formal signature.
            #
            # Fuzzy signatures are only concerned with interesting immediate values, that is, values
            # that are greater than 65,535, are not memory addresses, and are not displayed as
            # negative values.
            elif not drefs and not crefs:
                for n in range(0, len(idaapi.cmd.Operands)):
                    opnd_text = idc.GetOpnd(ea, n)
                    formal.append(opnd_text)
                    if idaapi.cmd.Operands[n].type == idaapi.o_imm and not opnd_text.startswith('-'):
                        if idaapi.cmd.Operands[n].value >= 0xFFFF:
                            if idaapi.getFlags(idaapi.cmd.Operands[n].value) == 0:
                                fuzzy.append(str(idaapi.cmd.Operands[n].value))
                                immediates.append(idaapi.cmd.Operands[n].value)

            ea = idc.NextHead(ea)

        return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions)
Exemple #40
0
 def is_call(self):
     """Is the instruction a call instruction."""
     return idaapi.is_call_insn(self._ea)
Exemple #41
0
 def is_call(self):
     """Is the instruction a call instruction."""
     return idaapi.is_call_insn(self._insn)
Exemple #42
0
	def _profile_function(self):
		current_ea = ScreenEA()
		current_function = idc.GetFunctionName(current_ea)
		current_function_ea = idc.LocByName(current_function)

		if current_function:
			self.function = current_function

		ea = start_ea = idc.GetFunctionAttr(current_function_ea,  idc.FUNCATTR_START)
		end_ea = idc.GetFunctionAttr(current_function_ea, idc.FUNCATTR_END)

		self.highlighted = idaapi.get_highlighted_identifier()

		while ea < end_ea and ea != idc.BADADDR and self.highlighted:

			i = 0
			match = False
			optype = self.READ
			comment = None

			idaapi.decode_insn(ea)
			
			mnem = idc.GetMnem(ea)

			if self.highlighted in mnem:
				match = True
			elif idaapi.is_call_insn(ea):
				for xref in idautils.XrefsFrom(ea):
					if xref.type != 21:
						name = idc.Name(xref.to)
						if name and self.highlighted in name:
							match = True
							break
			else:	
				while True:
					opnd = idc.GetOpnd(ea, i)
					if opnd:
						if self.highlighted in opnd:
							match = True
							if (idaapi.insn_t_get_canon_feature(idaapi.cmd.itype) & self.OPND_WRITE_FLAGS[i]):
								optype = self.WRITE
						i += 1
					else:
						break

			if not match:
				comment = idc.GetCommentEx(ea, 0)
				if comment and self.highlighted in comment:
					match = True
				else:
					comment = idc.GetCommentEx(ea, 1)
					if comment and self.highlighted in comment:
						match = True
					else:
						comment = None

			if match:
				if ea > current_ea:
					direction = self.DOWN
				elif ea < current_ea:
					direction = self.UP
				else:
					direction = self.THIS

				self.xrefs[ea] = {
					'offset' 	: idc.GetFuncOffset(ea),
					'mnem'	 	: mnem,
					'type'		: optype,
					'direction'	: direction,
					'text'		: idc.GetDisasm(ea),
				}

			ea += idaapi.cmd.size
Exemple #43
-1
 def highlight(self, color=COLOR):
     for ea in idautils.Heads():
         if idaapi.isCode(idaapi.getFlags(ea)) and idaapi.is_call_insn(ea):
             current_color = idaapi.get_item_color(ea)
             if current_color == self.COLOR:
                 idaapi.set_item_color(ea, idc.DEFCOLOR)
             elif current_color == idc.DEFCOLOR:
                 idaapi.set_item_color(ea, self.COLOR)