예제 #1
0
def instructionHandler(M, B, inst, new_eas):
    insn_t = idautils.DecodeInstruction(inst)
    if not insn_t:
        # handle jumps after noreturn functions
        if idc.Byte(inst) == 0xCC:
            I = addInst(B, inst, [0xCC])
            return I, True
        else:
            raise Exception("Cannot read instruction at: {0:x}".format(inst))

    # skip HLTs -- they are privileged, and are used in ELFs after a noreturn call
    if isHlt(inst):
        return None, False

    DEBUG("\t\tinst: {0}\n".format(idc.GetDisasm(inst)))
    inst_bytes = readInstructionBytes(inst)
    DEBUG("\t\tBytes: {0}\n".format(inst_bytes))

    I = addInst(B, inst, inst_bytes)

    if isJmpTable(inst):
        handleJmpTable(I, inst, new_eas)
        return I, False

    crefs_from_here = idautils.CodeRefsFrom(inst, 0)

    #check for code refs from here
    crefs = []

    # pull code refs from generator into a list
    for cref_i in crefs_from_here:
        crefs.append(cref_i)

    is_call = isCall(inst)
    isize = insn_t.size
    next_ea = inst + isize

    had_refs = False

    # this is a call $+5, needs special handling
    if len(crefs) == 0 and is_call and isize == 5:
        selfCallEA = next_ea
        DEBUG("INTERNAL CALL $+5: {0:x}\n".format(selfCallEA))
        DEBUG("LOCAL NORETURN CALL!\n")
        I.local_noreturn = True

        if selfCallEA not in RECOVERED_EAS:
            DEBUG("Adding new EA: {0:x}\n".format(selfCallEA))
            new_eas.add(selfCallEA)
            I.call_target = selfCallEA
            return I, True

    for cref in crefs:
        had_refs = True
        fn = getFunctionName(cref)
        if is_call:

            elfy, fn_replace = isElfThunk(cref)
            if elfy:
                fn = fn_replace

            if isExternalReference(cref) or elfy:
                fn = handleExternalRef(fn)
                I.ext_call_name = fn
                DEBUG("EXTERNAL CALL: {0}\n".format(fn))

                if doesNotReturn(fn):
                    return I, True
            else:
                I.call_target = cref

                if cref not in RECOVERED_EAS:
                    new_eas.add(cref)

                DEBUG("INTERNAL CALL: {0}\n".format(fn))
        elif isUnconditionalJump(inst):
            if isExternalReference(cref):
                fn = handleExternalRef(fn)
                I.ext_call_name = fn
                DEBUG("EXTERNAL JMP: {0}\n".format(fn))

                if doesNotReturn(fn):
                    DEBUG("Nonreturn JMP\n")
                    return I, True
            else:
                DEBUG("INTERNAL JMP: {0:x}\n".format(cref))
                I.true_target = cref

    #true: jump to where we have a code-ref
    #false: continue as we were
    if isConditionalJump(inst):
        I.true_target = crefs[0]
        I.false_target = inst + len(inst_bytes)
        return I, False

    if is_call and isNotCode(next_ea):
        DEBUG("LOCAL NORETURN CALL!\n")
        I.local_noreturn = True
        return I, True

    relo_off = findRelocOffset(inst, len(inst_bytes))
    if relo_off != -1:
        I.reloc_offset = relo_off

    for dref in idautils.DataRefsFrom(inst):
        had_refs = True
        if dref in crefs:
            continue
        addDataReference(M, I, inst, dref, new_eas)
        DEBUG("instr refering data")
        if isUnconditionalJump(inst):
            xdrefs = DataRefsFrom(dref)
            for xref in xdrefs:
                DEBUG("xref : {0:x}\n".format(xref))
                # check if it refers to come instructions; link Control flow
                if isExternalReference(xref):
                    fn = getFunctionName(xref)
                    fn = handleExternalRef(fn)
                    I.ext_call_name = fn
                    DEBUG("EXTERNAL CALL : {0}\n".format(fn))

    if not had_refs and isLinkedElf():
        for op in insn_t.Operands:
            if op.type == idc.o_imm:
                # we have an immedaite.. check if its in a code or data section
                begin_a = op.value
                end_a = begin_a + idc.ItemSize(begin_a)
                if isInData(begin_a, end_a):
                    # add data reference
                    addDataReference(M, I, inst, begin_a, new_eas)
                #elif isInCode(begin_a, end_a):
                # add code ref

    return I, False
def trace_param(ea, min_ea, op_type, op_val):
    '''
    trace_param: ea, min_ea, op_type, op_val

    Taking ea as start, this function does basic backtrace of
    an operand (defined by op_type and op_val) until it finds
    a data reference which we consider the "source". It stops
    when ea < min_ea (usually the function start).

    It does not support arithmetic or complex modifications of
    the source. This will be improved on future versions.
    '''
    global displ_re, msgsend, var_re

    ea_call = ea
    while ea != idc.BADADDR and ea != min_ea:
        ea = idc.PrevHead(ea, min_ea)

        if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea):
            # We have a BL/BLX that will modify the R0
            # we're tracking
            #
            return None

        if idc.GetMnem(ea) in ['LDR', 'MOV']:
            src_op = 1
            dest_op = 0
        elif idc.GetMnem(ea) == 'STR':
            src_op = 0
            dest_op = 1
        else:
            continue


        if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val:
            # Found, see where it comes from
            if idc.GetOpType(ea, src_op) == idc.o_mem:
                # Got the final reference
                refs = list(idautils.DataRefsFrom(ea))
                if not refs:
                    local_ref = idc.GetOperandValue(ea, src_op)
                    far_ref = idc.Dword(local_ref)
                else:
                    while len(refs) > 0:
                        far_ref = refs[0]
                        refs = list(idautils.DataRefsFrom(refs[0]))
                return far_ref
            elif idc.GetOpType(ea, src_op) == idc.o_displ:
                if ', [SP' in idc.GetDisasm(ea):
                    if 'arg_' in idc.GetDisasm(ea):
                        # We don't track function arguments
                        return None

                    # We're tracking an stack variable
                    try:
                        var_name = var_re.search(idc.GetDisasm(ea)).group('varname')
                    except:
                        print '%08x: Unable to recognize variable' % ea
                        return None

                    while ea != idc.BADADDR and ea > min_ea:
                        if idc.GetMnem(ea) == 'STR' and var_name in idc.GetDisasm(ea):
                            # New reg to track
                            op_val = idc.GetOperandValue(ea, dest_op)
                            break
                        ea = idc.PrevHead(ea, min_ea)
                else:
                    # New reg to track
                    if '[LR]' in idc.GetDisasm(ea):
                        # Optimizations use LR as general reg
                        op_val = 14
                    else:
                        # Got the final reference
                        refs = list(idautils.DataRefsFrom(ea))
                        if refs:
                          while len(refs) > 0:
                            far_ref = refs[0]
                            refs = list(idautils.DataRefsFrom(refs[0]))
                          return far_ref
                          
                        try:
                            op_val = int(displ_re.search(idc.GetDisasm(ea)).group('regnum'))
                        except:
                            print '%08x: Unable to recognize register' % ea
                            return None
            elif idc.GetOpType(ea, src_op) == idc.o_reg:
                # Direct reg-reg assignment
                op_val = idc.GetOperandValue(ea, src_op)
            else:
                # We don't track o_phrase or other complex source operands :(
                return None
    return None
예제 #3
0
def instructionHandler(M, B, inst, new_eas):
    insn_t = idautils.DecodeInstruction(inst)
    if not insn_t:
        # handle jumps after noreturn functions
        if idc.Byte(inst) == 0xCC:
            I = addInst(B, inst, [0xCC])
            return I, True
        else:
            raise Exception("Cannot read instruction at: {0:x}".format(inst))

    # skip HLTs -- they are privileged, and are used in ELFs after a noreturn call
    if isHlt(inst):
        return None, False

    DEBUG("\t\tinst: {0}\n".format(idc.GetDisasm(inst)))
    inst_bytes = readInstructionBytes(inst)
    DEBUG("\t\tBytes: {0}\n".format(inst_bytes))

    I = addInst(B, inst, inst_bytes)

    if isJmpTable(inst):
        handleJmpTable(I, inst, new_eas)
        return I, False

    crefs_from_here = idautils.CodeRefsFrom(inst, 0)

    #check for code refs from here
    crefs = []

    # pull code refs from generator into a list
    for cref_i in crefs_from_here:
        crefs.append(cref_i)

    is_call = isCall(inst)
    isize = idautils.DecodeInstruction(inst).size
    next_ea = inst+isize
 
    # this is a call $+5, needs special handling
    if len(crefs) == 0 and is_call and isize == 5:
        selfCallEA = next_ea
        DEBUG("INTERNAL CALL $+5: {0:x}\n".format(selfCallEA))
        sys.stdout.write("LOCAL NORETURN CALL!\n")
        I.local_noreturn = True

        if selfCallEA not in RECOVERED_EAS:
            DEBUG("Adding new EA: {0:x}\n".format(selfCallEA))
            new_eas.add(selfCallEA)
            I.call_target = selfCallEA
            return I, True
    
    for cref in crefs:
        fn = getFunctionName(cref)
        if is_call:

            elfy, fn_replace = isElfThunk(cref) 
            if elfy:
                fn = fn_replace

            if isExternalReference(cref) or elfy:
                fn = handleExternalRef(fn)
                I.ext_call_name = fn 
                DEBUG("EXTERNAL CALL: {0}\n".format(fn))

                if doesNotReturn(fn):
                    return I, True
            else:
                I.call_target = cref

                if cref not in RECOVERED_EAS: 
                    new_eas.add(cref)

                DEBUG("INTERNAL CALL: {0}\n".format(fn))
        elif isUnconditionalJump(inst):
            if isExternalReference(cref):
                fn = handleExternalRef(fn)
                I.ext_call_name = fn 
                DEBUG("EXTERNAL JMP: {0}\n".format(fn))

                if doesNotReturn(fn):
                    DEBUG("Nonreturn JMP\n")
                    return I, True
            else:
                DEBUG("INTERNAL JMP: {0:x}\n".format(cref))
                I.true_target = cref

    #true: jump to where we have a code-ref
    #false: continue as we were
    if isConditionalJump(inst):
        I.true_target = crefs[0]
        I.false_target = inst+len(inst_bytes)
        return I, False

    if is_call and isNotCode(next_ea):
        sys.stdout.write("LOCAL NORETURN CALL!\n")
        I.local_noreturn = True
        return I, True

    relo_off = findRelocOffset(inst, len(inst_bytes))
    if relo_off != -1:
        I.reloc_offset = relo_off

    for dref in idautils.DataRefsFrom(inst):
        if dref in crefs:
            continue

        if inValidSegment(dref): 
            if isExternalReference(dref):
                fn = getFunctionName(dref)

                fn = handleExternalRef(fn)
                if isExternalData(fn):
                    I.ext_data_name = fn
                    sys.stdout.write("EXTERNAL DATA REF FROM {0:x} to {1}\n".format(inst, fn))
                else:
                    I.ext_call_name = fn 
                    sys.stdout.write("EXTERNAL CODE REF FROM {0:x} to {1}\n".format(inst, fn))

            elif isInternalCode(dref):
                I.call_target = dref
                if dref not in RECOVERED_EAS: 
                    new_eas.add(dref)
            else:
                dref_size = idc.ItemSize(dref)
                DEBUG("\t\tData Ref: {0:x}, size: {1}\n".format(
                    dref, dref_size))
                I.data_offset = handleDataRelocation(M, dref, new_eas)
        else:
            DEBUG("Data not in valid segment {0:x}\n".format(dref))


    return I, False
예제 #4
0
파일: rizzo.py 프로젝트: p-chandra/Shortcut
    def block(self, block):
        '''
        Returns a tuple:
        ([formal, block, signatures], [fuzzy, block, signatures],
        set([unique, immediate, values]), [called, function, names])
        '''
        formal = []
        fuzzy = []
        functions = []
        immediates = []

        ea = ida_shims.start_ea(block)
        while ea < ida_shims.end_ea(block):
            insn = ida_shims.decode_insn(ea)

            # Get a list of all data/code refs from the current instruction
            drefs = [x for x in idautils.DataRefsFrom(ea)]
            crefs = [x for x in idautils.CodeRefsFrom(ea, False)]

            # Add all instruction mnemonics to the formal block hash
            formal.append(ida_shims.print_insn_mnem(ea))

            # If this is a call instruction, be sure to note the name of the
            # function being called. This is used to apply call-based
            # signatures to functions.
            #
            # For fuzzy signatures, we can't use the actual name or EA of the
            # function, but rather just want to note that a function call was
            # made.
            #
            # Formal signatures already have the call instruction mnemonic,
            # which is more specific than just saying that a call was made.
            if idaapi.is_call_insn(ea):
                for cref in crefs:
                    func_name = ida_shims.get_name(cref)
                    if func_name:
                        functions.append(func_name)
                        fuzzy.append("funcref")
            # If there are data references from the instruction, check to see
            # if any of them are strings. These are looked up in the
            # pre-generated strings dictionary.
            #
            # String values are easily identifiable, and are used as part of
            # both the fuzzy and the formal signatures.
            #
            # It is more difficult to determine if non-string values are
            # constants or not; for both fuzzy and formal signatures, just use
            # "data" to indicate that some data was referenced.
            elif drefs:
                for dref in drefs:
                    if dref in self.strings:
                        formal.append(self.strings[dref].value)
                        fuzzy.append(self.strings[dref].value)
                    else:
                        formal.append("dataref")
                        fuzzy.append("dataref")
            # If there are no data or code references from the instruction, use
            # every operand as part of the formal signature.
            #
            # Fuzzy signatures are only concerned with interesting immediate
            # values, that is, values that are greater than 65,535, are not
            # memory addresses, and are not displayed as negative values.
            elif not drefs and not crefs:
                ops = ida_shims.get_operands(insn)
                for n in range(0, len(ops)):
                    opnd_text = ida_shims.print_operand(ea, n)
                    formal.append(opnd_text)
                    if ops[n].type == idaapi.o_imm and \
                            not opnd_text.startswith('-'):
                        if ops[n].value >= 0xFFFF:
                            if ida_shims.get_full_flags(ops[n].value) == 0:
                                fuzzy.append(str(ops[n].value))
                                immediates.append(ops[n].value)

            ea = ida_shims.next_head(ea)

        return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)),
                immediates, functions)
예제 #5
0
    print hex(addr), idc.GetDisasm(addr)

#查询所有对指定地址处数据的交叉引用
#0x100001378L text "UTF-16LE", '\UNC\',0
#0x100001723L lea     r11, aUnc; "\\UNC\\"
ea = here()
print hex(ea), idc.GetDisasm(ea)
for addr in idautils.DataRefsTo(ea):
    print hex(addr), idc.GetDisasm(addr)

#查询该地址所引用的所有数据地址
#0x100001723L lea     r11, aUnc; "\\UNC\\"
#0x100001378L text "UTF-16LE", '\UNC\',0
ea = here()
print hex(ea), idc.GetDisasm(ea)
for addr in idautils.DataRefsFrom(ea):
    print hex(addr), idc.GetDisasm(addr)

# 交叉引用的类型
#0 = 'Data_Unknown'
#1 = 'Data_Offset'
#2 = 'Data_Write'
#3 = 'Data_Read'
#4 = 'Data_Text'
#5 = 'Data_Informational'
#16 = 'Code_Far_Call'
#17 = 'Code_Near_Call'
#18 = 'Code_Far_Jump'
#19 = 'Code_Near_Jump'
#20 = 'Code_User'
#21 = 'Ordinary_Flow'
def trace_param(ea, min_ea, op_type, op_val):
    '''
    trace_param: ea, min_ea, op_type, op_val

    Taking ea as start, this function does basic backtrace of
    an operand (defined by op_type and op_val) until it finds
    a data reference which we consider the "source". It stops
    when ea < min_ea (usually the function start).

    It does not support arithmetic or complex modifications of
    the source. This will be improved on future versions.
    '''
    global displ_re, msgsend, var_re

    ea_call = ea
    while ea != idc.BADADDR and ea != min_ea:
        ea = idc.PrevHead(ea, min_ea)

        if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea):
            # We have a BL/BLX that will modify the R0
            # we're tracking
            #TODO: resolve more situation
            return None

        if idc.GetMnem(ea) in ['LDR', 'MOV']:
            src_op = 1
            dest_op = 0
        elif idc.GetMnem(ea) == 'STR':
            src_op = 0
            dest_op = 1
        else:
            continue

        if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val:
            # Found, see where it comes from
            if idc.GetOpType(ea, src_op) == idc.o_mem or idc.GetOpType(ea, src_op) == idc.o_imm: #add o_imm support
                # Got the final reference
                refs = list(idautils.DataRefsFrom(ea))
                if not refs:
                    local_ref = idc.GetOperandValue(ea, src_op)
                    far_ref = idc.Dword(local_ref)
                else:
                    while len(refs) > 0:
                        far_ref = refs[0]
                        refs = list(idautils.DataRefsFrom(refs[0]))
                #patch by lc
                if far_ref:

                return far_ref
            elif idc.GetOpType(ea, src_op) == idc.o_displ:
                if ', [SP' in idc.GetDisasm(ea):
                    if 'arg_' in idc.GetDisasm(ea):
                        # We don't track function arguments
                        return None

                    # We're tracking an stack variable
                    try:
                        var_name = var_re.search(idc.GetDisasm(ea)).group('varname')
                    except:
                        print '%08x: Unable to recognize variable' % ea
                        return None

                    while ea != idc.BADADDR and ea > min_ea:
                        if idc.GetMnem(ea) == 'STR' and var_name in idc.GetDisasm(ea):
                            # New reg to track
                            op_val = idc.GetOperandValue(ea, dest_op)
                            break
                        ea = idc.PrevHead(ea, min_ea)
                else:
                    # New reg to track
                    if '[LR]' in idc.GetDisasm(ea):
                        # Optimizations use LR as general reg
                        op_val = 14
                    else:
                        try:
                            op_val = int(displ_re.search(idc.GetDisasm(ea)).group('regnum'))
                        except:
                            print '%08x: Unable to recognize register' % ea
                            return None
            elif idc.GetOpType(ea, src_op) == idc.o_reg:
                # Direct reg-reg assignment
                op_val = idc.GetOperandValue(ea, src_op)
            else:
                # We don't track o_phrase or other complex source operands :(
                return None
    #register R0-R3 assigned by function parameter
    if ea <= min_ea and op_type == idc.o_reg and op_val in range(4):
        f_info = get_func_info(ea)
        return ['pself', 'selector', f_info['fparam_type'], f_info['sparam_name']][op_val]#fix: error
    return None



def fix_callgraph(msgsend, segname, class_param, sel_param): #class_param == 0, sel_param == 1
    '''
    fix_callgraph: msgsend, segname, class_param, sel_param

    Given the msgsend flavour address as a parameter, looks
    for the parameters (class and selector, identified by
    class_param and sel_param) and creates a new segment where
    it places a set of dummy calls named as classname_methodname
    (we use method instead of selector most of the time).
    '''

    t1 = time.time()
    if not msgsend:
        print 'ERROR: msgSend not found'
        return

    total = 0
    resolved = 0
    call_table = dict()

    for xref in idautils.XrefsTo(msgsend, idaapi.XREF_ALL):
        total += 1
        ea_call = xref.frm
        func_start = idc.GetFunctionAttr(ea_call, idc.FUNCATTR_START)
        if not func_start or func_start == idc.BADADDR:
            continue
        ea = ea_call
        method_name_ea = trace_param(ea, func_start, idc.o_reg, sel_param)#sel_param == 1
        if method_name_ea and idc.isASCII(idc.GetFlags(method_name_ea)):
            method_name = idc.GetString(method_name_ea, -1, idc.ASCSTR_C)
            if not method_name:
                method_name = '_unk_method'
        else:
            method_name = '_unk_method'

        class_name_ea = trace_param(ea, func_start, idc.o_reg, class_param)#class_param == 0
        if class_name_ea:
            class_name = idc.Name(class_name_ea)
            if not class_name:
                class_name = '_unk_class'
        else:
            class_name = '_unk_class'

        if method_name == '_unk_method' and class_name == '_unk_class':
            continue

        # Using this name convention, if the class and method
        # are identified by IDA, the patched call will point to
        # the REAL call and not one of our dummy functions
        #
        class_name = class_name.replace('_OBJC_CLASS_$_', '')
        class_name = class_name.replace('_OBJC_METACLASS_$_', '')
        new_name = '_[' + class_name + '_' + method_name + ']'
        print '%08x: %s' % (ea_call, new_name)
        call_table[ea_call] = new_name
        resolved += 1

    print '\nFinal stats:\n\t%d total calls, %d resolved' % (total, resolved)
    print '\tAnalysis took %.2f seconds' % (time.time() - t1)

    if resolved == 0:
        print 'Nothing to patch.'
        return

    print 'Adding new segment to store new nullsubs'

    # segment size = opcode ret (4 bytes) * num_calls
    seg_size = resolved * 4
    seg_start = idc.MaxEA() + 4
    idaapi.add_segm(0, seg_start, seg_start + seg_size, segname, 'CODE')

    print 'Patching database...'
    seg_ptr = seg_start
    for ea, new_name in call_table.items():
        if idc.LocByName(new_name) != idc.BADADDR:
            offset = idc.LocByName(new_name) - ea
        else:
            # create code and name it
            idc.PatchDword(seg_ptr, 0xE12FFF1E) # BX LR
            idc.MakeName(seg_ptr, new_name)
            idc.MakeCode(seg_ptr)
            idc.MakeFunction(seg_ptr, seg_ptr + 4)
            idc.MakeRptCmt(seg_ptr, new_name)
            offset = seg_ptr - ea
            seg_ptr += 4

        # patch the msgsend call
        if idc.GetReg(ea, "T") == 1:
            if offset > 0 and offset & 0xFF800000:
                print 'Offset too far for Thumb (%08x) Stopping [%08x]' % (offset, ea)
                return

            off1 = (offset & 0x7FF000) >> 12
            off2 = (offset & 0xFFF) / 2
            w1 = (0xF000 | off1)
            w2 = (0xE800 | off2) - 1
            idc.PatchWord(ea, w1)
            idc.PatchWord(ea + 2, w2)
        else:
            if offset > 0 and offset & 0xFF000000:
                print 'Offset too far (%08x) Stopping [%08x]' % (offset, ea)
            dw = (0xFA000000 | (offset - 8 >> 2))
            if dw < 0:
                dw = dw & 0xFAFFFFFF
            idc.PatchDword(ea, dw)


def make_offsets(segname):
    '''
    change the segment's data value into offset by class name
    '''
    segea = idc.SegByBase(idc.SegByName(segname))
    segend = idc.SegEnd(segea)

    while segea < segend:
        idc.OpOffset(segea, 0)
        ptr = idc.Dword(segea)
        idc.OpOffset(ptr, 0)
        segea += 4

if __name__ == '__main__':
    print 'Preparing class references segments'
    make_offsets('__objc_classrefs') #TODO: what's these two segment means?
    make_offsets('__objc_superrefs')
    idaapi.analyze_area(idc.MinEA(), idc.MaxEA())
    print 'Fixing callgraph'
    fix_callgraph(idc.LocByName('_objc_msgSend'), 'msgSend', 0, 1)
    fix_callgraph(idc.LocByName('_objc_msgSendSuper2'), 'msgSendSuper', 3, 1)
    idaapi.analyze_area(idc.MinEA(), idc.MaxEA())
    print 'Done.'
예제 #7
0
def _get_ida_func_surrogate(func, arch):
    func_surrogate = dict()
    func_surrogate['name'] = idc.GetFunctionName(func.startEA)
    func_surrogate['id'] = func.startEA
    # ignore call-graph at this moment
    func_surrogate['call'] = list()
    func_surrogate['sea'] = func.startEA
    func_surrogate['see'] = idc.FindFuncEnd(func.startEA)
    # api is optional
    func_surrogate['api'] = _get_api(func.startEA)[1]
    func_surrogate['blocks'] = list()

    # comments
    func_surrogate['comments'] = []
    func_surrogate['comments'].extend(get_comments(func.startEA))

    for bb in idaapi.FlowChart(idaapi.get_func(func.startEA)):

        block = dict()
        block['id'] = bb.id
        block['sea'] = bb.startEA
        if arch is 'arm':
            # for arm; the last bit indicates thumb mode.
            block['sea'] += idc.GetReg(bb.startEA, 'T')
        block['eea'] = bb.endEA
        block['name'] = 'loc_' + format(bb.startEA, 'x').upper()
        dat = {}
        block['dat'] = dat
        s = idc.GetManyBytes(bb.startEA, bb.endEA - bb.startEA)
        if s is not None:
            block['bytes'] = "".join("{:02x}".format(ord(c)) for c in s)

        func_surrogate['comments'].extend(get_comments(bb.startEA))

        instructions = list()
        for head in idautils.Heads(bb.startEA, bb.endEA):
            ins = list()
            ins.append(str(hex(head)).rstrip("L").upper().replace("0X", "0x"))
            opr = idc.GetMnem(head)
            if opr == "":
                continue
            ins.append(opr)
            for i in range(5):
                opd = idc.GetOpnd(head, i)
                if opd == "":
                    continue
                ins.append(opd)
            instructions.append(ins)

            refs = list(idautils.DataRefsFrom(head))
            for ref in refs:
                dat[head] = binascii.hexlify(struct.pack("<Q", idc.Qword(ref)))

        block['src'] = instructions

        # flow chart
        block_calls = list()
        for success_block in bb.succs():
            block_calls.append(success_block.id)
        block['call'] = block_calls
        func_surrogate['blocks'].append(block)
    return func_surrogate
예제 #8
0
 def drefs_from(self):
     """Destination addresses of data references from this line."""
     return idautils.DataRefsFrom(self.ea)
예제 #9
0
def track_param(ea, min_ea, op_type, op_val):
  '''
  trace_param: ea, min_ea, op_type, op_val

  Taking ea as start, this function does basic backtrace of
  an operand (defined by op_type and op_val) until it finds
  a data reference which we consider the "source". It stops
  when ea < min_ea (usually the function start).

  It does not support arithmetic or complex modifications of
  the source. This will be improved on future versions.
  '''
  global msgsend, var_re

  ea_call = ea
  while ea != idc.BADADDR and ea != min_ea:
    ea = idc.PrevHead(ea, min_ea)

    if idc.GetMnem(ea) not in ['lea', 'mov']:
      continue

    if idc.GetOpType(ea, 0) == op_type and idc.GetOperandValue(ea, 0) == op_val:
      if idc.GetOpType(ea, 1) == idc.o_displ:
        if ', [esp' in idc.GetDisasm(ea) or ', [ebp' in idc.GetDisasm(ea):
          if 'arg_' in idc.GetDisasm(ea):
          # We don't track function arguments
            return None

          # We only track stack variables
          try:
            var_name = var_re.search(idc.GetDisasm(ea)).group('varname')
            op_type = idc.GetOpType(ea, 1)
          except:
            print '%08x: Unable to recognize variable' % ea
            return None

          while ea != idc.BADADDR and ea > min_ea:
            if idc.GetMnem(ea) == 'mov' or idc.GetMnem(ea) == 'lea' and var_name in idc.GetDisasm(ea):
              # New reg to track
              op_val = idc.GetOperandValue(ea, 0)
              break
            ea = idc.PrevHead(ea, min_ea)

      elif idc.GetOpType(ea, 1) == idc.o_mem:
        # Got the final reference
        refs = list(idautils.DataRefsFrom(ea))
        if not refs:
          local_ref = idc.GetOperandValue(ea, 1)
          far_ref = idc.Dword(local_ref)
        else:
          while len(refs) > 0:
            far_ref = refs[0]
            refs = list(idautils.DataRefsFrom(refs[0]))
        return far_ref

      elif idc.GetOpType(ea, 1) == idc.o_reg:
        # Direct reg-reg assignment
        op_val = idc.GetOperandValue(ea, 1)
        op_type =  idc.GetOpType(ea, 1)
      else:
        # We don't track o_phrase or other complex source operands :(
        return None

  return None
예제 #10
0
파일: ida.py 프로젝트: ohio813/Stuff
 def dataRefs(self, ea):
     return idautils.DataRefsFrom(ea)
예제 #11
0
def trace_arg_bwd(ea, arg_num):

    ARCH = "ARM32"
    CALL_ARGS = {"ARM32": ["R0", "R1", "R2", "R3"]}

    args = CALL_ARGS[ARCH]

    if (len(args) <= arg_num):
        arg_into = "SP"
        arg_offs = ((arg_num - len(args))) * 4
    else:
        arg_into = CALL_ARGS[ARCH][arg_num]
        arg_offs = 0

    func = idaapi.get_func(ea)
    fc = idaapi.FlowChart(func)

    for block in fc:
        if block.startEA <= ea and block.endEA > ea:
            break

    #original sink
    arg_in = set([arg_into])

    while (ea >= block.startEA):

        #print "0x%08x %s" % (ea, idc.GetDisasm(ea))

        ############ BEGINNING OF TRACING ############

        mnem = idc.GetMnem(ea)

        if mnem == "MOV":
            arg_to = idc.GetOpnd(ea, 0)
            arg_from = idc.GetOpnd(ea, 1)

            #propagate to new register
            if arg_to in arg_in:
                arg_in.add(arg_from)
            #note: if arg_from is in arg_in, but arg_to isn't, we don't add arg_to to the sinks, because we are going backwards,
            #so we know that's not the one that ended up being used.

        elif mnem == "LDR":

            arg_to = idc.GetOpnd(ea, 0)
            arg_from = idc.GetOpnd(ea, 1)

            if ARCH == "ARM32":

                if arg_to in arg_in:
                    #now there should be a a DataRef here to a string.
                    #we want the data reference that is of type 1 (Data_Offset), as oppossed to 1 (Data_Read)
                    refs = [r for r in idautils.XrefsFrom(ea) if r.type == 1]
                    if len(refs) == 1:
                        #print "There is only one data offset reference from here, if it is a string we are done."
                        for s in IDAStrings:
                            if s.ea == refs[0].to:
                                return str(s)

        elif mnem == "ADR" or mnem == "ADR.W":
            #print "ADR instruction!"

            arg_to = idc.GetOpnd(ea, 0)
            arg_from = idc.GetOpnd(ea, 1)

            if ARCH == "ARM32":

                if arg_to in arg_in:
                    #now there should be a a DataRef here to a string.
                    #we want the data reference that is of type 1 (Data_Offset), as oppossed to 1 (Data_Read)
                    refs = [r for r in idautils.XrefsFrom(ea) if r.type == 1]
                    if len(refs) == 1:
                        #print "There is only one data offset reference from here, if it is a string we are done."
                        for s in IDAStrings:
                            if s.ea == refs[0].to:
                                return str(s)

        elif mnem == "ADD":

            arg_to = idc.GetOpnd(ea, 0)
            arg_from = idc.GetOpnd(ea, 1)

            if ARCH == "ARM32":

                if arg_from == "PC" and arg_to in arg_in:

                    #now there should be a a DataRef here to a string.
                    if sum(1 for _ in idautils.DataRefsFrom(ea)) == 1:
                        for ref in idautils.DataRefsFrom(ea):
                            #get string at ref
                            for s in IDAStrings:
                                if s.ea == ref:
                                    return str(s)

        ############ END OF TRACING ############

        if ea == block.startEA:

            #For some reason, block.preds() seems to be broken. I get 0 predecessors to every block. So for now, we limit to same block.
            #Also idaapi.decode_preceding_instruction is annoying, because if there are more than 1 preceding, it just shows the first one only.
            #So this is getting around the preds() not working.

            preds = []
            for b in fc:
                for s in b.succs():
                    if s.startEA == block.startEA:
                        #this is a predecessor block to us
                        preds.append(b)

            if len(preds) == 1:
                #print "1 predecessor, continuing there"
                block = preds[0]
                i = idautils.DecodePreviousInstruction(block.endEA)
                ea = block.endEA - i.size

            else:
                #print "0 or multiple predecessor blocks, givin up."
                return ""

        else:
            i = idautils.DecodePreviousInstruction(ea)
            ea -= i.size

    return ""
예제 #12
0
def process_function(arch, func_ea):

    func_end = idc.FindFuncEnd(func_ea)

    packet = DismantlerDataPacket()

    ida_chunks = get_chunks(func_ea)
    chunks = set()

    # Add to the chunks only the main block, containing the
    # function entry point
    #
    chunk = get_flow_code_from_address(func_ea)
    if chunk:
        chunks.add(chunk)

    # Make "ida_chunks" a set for faster searches  within
    ida_chunks = set(ida_chunks)
    ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks))

    func = idaapi.get_func(func_ea)
    comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)]

    # Copy the list of chunks into a queue to process
    #
    chunks_todo = [c for c in chunks]

    while True:

        # If no chunks left in the queue, exit
        if not chunks_todo:

            if ida_chunks:
                chunks_todo.extend(ida_chunks)
            else:
                break

        chunk_start, chunk_end = chunks_todo.pop()
        if ida_chunks_idx.has_key(chunk_start):
            ida_chunks.remove(ida_chunks_idx[chunk_start])
            del ida_chunks_idx[chunk_start]

        for head in idautils.Heads(chunk_start, chunk_end):

            comments.extend((idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1)))
            comment = '\n'.join([c for c in comments if c is not None])
            comment = comment.strip()
            if comment:
                packet.add_comment(head, comment)
            comments = list()

            if idc.isCode(idc.GetFlags(head)):

                instruction = arch.process_instruction(packet, head)

                # if there are other references than
                # flow add them all.
                if list(idautils.CodeRefsFrom(head, 0)):

                    # for each reference, including flow ones
                    for ref_idx, ref in enumerate(
                            idautils.CodeRefsFrom(head, 1)):

                        if arch.is_call(instruction):

                            # This two conditions must remain separated, it's
                            # necessary to enter the enclosing "if" whenever
                            # the instruction is a call, otherwise it will be
                            # added as an uncoditional jump in the last else
                            #
                            if ref in list(idautils.CodeRefsFrom(head, 0)):
                                packet.add_direct_call(head, ref)

                        elif ref_idx > 0 and arch.is_conditional_branch(
                                instruction):
                            # The ref_idx is > 0 in order to avoid processing the
                            # normal flow reference which would effectively imply
                            # that the conditional branch is processed twice.
                            # It's done this way instead of changing the loop's head
                            # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in
                            # order to avoid altering the behavior of other conditions
                            # which rely on it being so.

                            # FIXME
                            # I don't seem to check for the reference here
                            # to point to valid, defined code. I suspect
                            # this could lead to a failure when exporting
                            # if such situation appears. I should test if
                            # it's a likely scenario and probably just add
                            # an isHead() or isCode() to address it.

                            packet.add_conditional_branch_true(head, ref)
                            packet.add_conditional_branch_false(
                                head, idaapi.next_head(head, chunk_end))

                            # If the target is not in our chunk list
                            if not address_in_chunks(ref, chunks):
                                new_chunk = get_flow_code_from_address(ref)
                                # Add the chunk to the chunks to process
                                # and to the set containing all visited
                                # chunks
                                if new_chunk is not None:
                                    chunks_todo.append(new_chunk)
                                    chunks.add(new_chunk)

                        elif arch.is_unconditional_branch(instruction):
                            packet.add_unconditional_branch(head, ref)

                            # If the target is not in our chunk list
                            if not address_in_chunks(ref, chunks):
                                new_chunk = get_flow_code_from_address(ref)
                                # Add the chunk to the chunks to process
                                # and to the set containing all visited
                                # chunks
                                if new_chunk is not None:
                                    chunks_todo.append(new_chunk)
                                    chunks.add(new_chunk)

                        #skip = False

                for ref in idautils.DataRefsFrom(head):
                    packet.add_data_reference(head, ref)

                    # Get a data reference from the current reference's
                    # location. For instance, if 'ref' points to a valid
                    # address and such address contains a data reference
                    # to code.
                    target = list(idautils.DataRefsFrom(ref))
                    if target:
                        target = target[0]
                    else:
                        target = None

                    if target is None and arch.is_call(instruction):
                        imp_name = idc.Name(ref)

                        imp_module = get_import_module_name(ref)

                        imported_functions.add((ref, imp_name, imp_module))
                        packet.add_indirect_virtual_call(head, ref)

                    elif target is not None and idc.isHead(target):
                        # for calls "routed" through this reference
                        if arch.is_call(instruction):
                            packet.add_indirect_call(head, target)

                        # for unconditional jumps "routed" through this reference
                        elif arch.is_unconditional_branch(instruction):
                            packet.add_unconditional_branch(head, target)

                        # for conditional "routed" through this reference
                        elif arch.is_conditional_branch(instruction):
                            packet.add_conditional_branch_true(head, target)
                            packet.add_conditional_branch_false(
                                head, idaapi.next_head(head, chunk_end))

    f = FunctionAnalyzer(arch, func_ea, packet)

    instrumentation.new_packet(packet)
    instrumentation.new_function(f)
예제 #13
0
    def block(self, block):
        """
        Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names])
        """
        formal = []
        fuzzy = []
        functions = []
        immediates = []

        ea = block.start_ea
        insn = idaapi.insn_t()
        while ea < block.end_ea:
            idaapi.decode_insn(insn, ea)

            # Get a list of all data/code references from the current instruction
            drefs = [x for x in idautils.DataRefsFrom(ea)]
            crefs = [x for x in idautils.CodeRefsFrom(ea, False)]

            # Add all instruction mnemonics to the formal block hash
            formal.append(idc.print_insn_mnem(ea))

            # If this is a call instruction, be sure to note the name of the function
            # being called. This is used to apply call-based signatures to functions.
            #
            # For fuzzy signatures, we can't use the actual name or EA of the function,
            # but rather just want to note that a function call was made.
            #
            # Formal signatures already have the call instruction mnemonic, which is more
            # specific than just saying that a call was made.
            if idaapi.is_call_insn(ea):
                for cref in crefs:
                    func_name = idc.get_name(cref, ida_name.GN_VISIBLE)
                    if not func_name:
                        continue
                    functions.append(func_name)
                    fuzzy.append('funcref')
            # If there are data references from the instruction, check to see if any of them
            # are strings. These are looked up in the pre-generated strings dictionary.
            #
            # String values are easily identifiable, and are used as part of both the fuzzy
            # and the formal signatures.
            #
            # It is more difficult to determine if non-string values are constants or not;
            # for both fuzzy and formal signatures, just use "data" to indicate that some data
            # was referenced.
            elif drefs:
                for dref in drefs:
                    if dref in self.strings:
                        formal.append(self.strings[dref].value)
                        fuzzy.append(self.strings[dref].value)
                    else:
                        formal.append('dataref')
                        fuzzy.append('dataref')
            # If there are no data or code references from the instruction, use every operand as
            # part of the formal signature.
            #
            # Fuzzy signatures are only concerned with interesting immediate values, that is, values
            # that are greater than 65,535, are not memory addresses, and are not displayed as
            # negative values.
            elif not drefs and not crefs:
                for n in range(0, len(idaapi.insn_t().ops)):
                    opnd_text = idc.print_operand(ea, n)
                    formal.append(opnd_text)

                    if idaapi.insn_t().ops[n].type != idaapi.o_imm or opnd_text.startswith('-'):
                        continue

                    if idaapi.insn_t().ops[n].value < 0xFFFF:
                        continue

                    if idaapi.get_full_flags(idaapi.insn_t().ops[n].value) != 0:
                        continue

                    fuzzy.append(str(idaapi.insn_t().ops[n].value))
                    immediates.append(idaapi.insn_t().ops[n].value)

            ea = idc.next_head(ea)

        return self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions
예제 #14
0
    def objc_msgsend_xref(self,
                          call_ea,
                          objc_self,
                          objc_selector,
                          create_xref=True):
        '''
        This function will create a code xref to an objc method
        
        call_ea : location of call/jmp objc_msgsend (regardless of direct/indirect)
        objc_self: ea where RDI is set to static value (or that we find it's from a previous call or the RDI of the current function)
        objc_selector: ea where RSI is set to static value
        
        This ignores the RDI register, which is the `self` argument to objc_msgsend()
        id objc_msgSend(id self, SEL op, ...);
        So far, this seems to be fine as far as the cross-references are concerned.

        '''

        # get instruction mnemonic at address - I guess to check and make sure
        # it's mov rsi, blah
        instruction = idc.GetDisasm(objc_selector)
        if self.debugflag:
            print(">>> objc_msgsend_xref 0x%08x %s" %
                  (objc_selector, instruction))

        # get outbound references in the appropriate segment
        # implicit assumption is there is exacltly one
        target_selref = None
        for _ref in idautils.DataRefsFrom(objc_selector):
            if idc.get_segm_name(_ref) == "__objc_selrefs":
                target_selref = _ref

        if not target_selref:
            return False

        # get outbound references in the appropriate segment
        # implicit assumption is there is exacltly one
        target_methname = None
        for _ref in idautils.DataRefsFrom(target_selref):
            if idc.get_segm_name(_ref) == "__objc_methname":
                target_methname = _ref

        if not target_methname:
            return False

        # get inbound references
        # __objc_const
        # must be a __objc2_meth
        # I hope this method is correct to find __objc2_meth structs
        # BUG: when the binary has mutiple objc methods by the same name, this logic fails
        # Track RDI register. have to figure out what instance/class is referenced
        objc2_meth_struct_id = ida_struct.get_struc_id("__objc2_meth")
        meth_struct_found = False
        target_method = None
        for _ref in idautils.DataRefsTo(target_methname):
            # multiple may match
            # we care about the __obj2_meth struct found in references
            if idc.get_segm_name(_ref) == "__objc_const":
                # check the outbound references
                for _meth_ref in idautils.DataRefsFrom(_ref):
                    if _meth_ref == objc2_meth_struct_id:
                        meth_struct_found = True

                if meth_struct_found:
                    # only do this once
                    # TODO: check against RDI here to make sure it's the proper class
                    # meth_struct_found = False

                    for _meth_ref in idautils.DataRefsFrom(_ref):
                        # assumption made on function always being in text segment
                        if idc.get_segm_name(_meth_ref) == "__text":
                            # save the method implementation -- this is the function ptr
                            if self.debugflag:
                                print(
                                    "0x%08x checking for the proper method -- %s"
                                    %
                                    (_meth_ref,
                                     idc.get_name(
                                         idc.get_func_attr(
                                             _meth_ref, idc.FUNCATTR_START))))
                            target_method = _meth_ref

        if not target_method:
            return False

        # After dereferencing across the IDB file, we finally have a target function.
        # In other words, if there isn't a method **in this binary** no xref is made (IDA only loads one binary?)
        # that is referenced from the mov rsi, <selector> instruction
        if self.debugflag: print("Found target method 0x%08x" % target_method)
        if create_xref:
            ida_xref.add_cref(objc_selector, target_method, idc.fl_CF)

        return True