def get_strings_per_function(self, start_func):
        strings = []
        fs = ''
        func_obj = idaapi.get_func(start_func)
        if func_obj:
            self.clear_comments(start_func, func_obj)
            for inst_list in idautils.Heads(start_func,
                                            idc.FindFuncEnd(start_func)):
                try:
                    for string in [
                            self.get_string_type(xref_addr)
                            for xref_addr in idautils.DataRefsFrom(inst_list)
                    ]:
                        if len(string) > 2:
                            strings.append(string)
                            self.string_counter += 1
                        else:
                            pass
                except StringException:
                    continue

            if strings:
                for c in strings:
                    if '\n' in c:
                        c = c.replace('\n', '')
                    fs += '"' + c + '" '
                idaapi.set_func_cmt(func_obj,
                                    'STR {}# {}'.format(len(strings), fs), 1)

        else:
            print("func_obj return 0")
            pass
Beispiel #2
0
def create_pe():

    text_start = text_end = 0
    for seg in Segments():
        if idc.SegName(seg)==".text":
            text_start=idc.SegStart(seg)
            text_end=idc.SegEnd(seg)
    for func in idautils.Functions():
        start_address = func
        end_address = idc.FindFuncEnd(func)
        #print hex(start_address)
        for each_step in idautils.Heads(start_address, end_address):
            #print hex(each_step)
            op = idc.GetDisasm(each_step)
            if each_step >= text_start and each_step <text_end:
                instrument(op,each_step)

    section_data = ''
    offsets = []
    for index in range(len(ori_op)):
        offsets.append(len(section_data))
        section_data += build_section_data(args0[index],args1[index],args2[index])

    # add dispatch function
    len_funs = len(section_data)
    section_data = add_dispatch_function(ori_address, offsets) + section_data

    section_file = open( INPUT_PE + '_newSectionData','wb')
    section_file.write(section_data)
    section_file.close()
    section_size = len(section_data)
    insert_section(len(section_data),section_data,len_funs)
Beispiel #3
0
    def codeify(self, ea=idc.BADADDR):
        func_count = 0
        code_count = 0

        if ea == idc.BADADDR:
            ea = self.get_start_ea(self.CODE)
            if ea == idc.BADADDR:
                ea = idc.FirstSeg()

        self.say("\nLooking for undefined code starting at: %s:0x%X" %
                 (idc.SegName(ea), ea))

        while ea != idc.BADADDR:
            try:
                if idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE) == self.CODE:
                    if idc.GetFunctionName(ea) != '':
                        ea = idc.FindFuncEnd(ea)
                        continue
                    else:
                        if idc.MakeFunction(ea):
                            func_count += 1
                        elif idc.MakeCode(ea):
                            code_count += 1
            except:
                pass

            ea = idc.NextAddr(ea)

        self.say("Created %d new functions and %d new code blocks\n" %
                 (func_count, code_count))
Beispiel #4
0
    def run(self):
        self.loadInstructionList()
        self.loadInstructionGroups()
        functionNamesToEA = {}

        ea = idc.BeginEA()
        count = 0
        for funcea in Functions(idc.SegStart(ea), idc.SegEnd(ea)):
            functionInstructions = copy.deepcopy(self.instructions)
            functionGroups = copy.deepcopy(self.groups)
            sum = 0
            allGroupSum = 0
            functionName = idc.GetFunctionName(funcea)
            functionNamesToEA[functionName] = funcea
            originalfuncea = funcea
            currentea = funcea
            while currentea != idc.BADADDR and currentea < idc.FindFuncEnd(
                    funcea):
                currentInstruction = idc.GetMnem(currentea)
                if currentInstruction in self.instructions.keys():
                    functionInstructions[currentInstruction] += 1
                    sum += 1

                for group in self.groups.keys():
                    if currentInstruction in self.groups[group][0].keys():
                        functionGroups[group][1] += 1
                        functionGroups[group][0][currentInstruction] += 1
                        allGroupSum += 1

                currentea = idc.NextHead(currentea)
            self.writeInstructionFeatures(self.instructions, sum,
                                          functionInstructions, functionName)
            self.writeInstructionGroupFeatures(self.groups, allGroupSum,
                                               functionGroups, functionName)
        return functionNamesToEA
Beispiel #5
0
    def run(self, arg):
        print("Running")
        PE = peutils_t()
        print("Image base is %016X" % PE.imagebase)
        print("Exporting functions...")
        filename = os.path.splitext(idc.GetIdbPath())[0] + ".sym"
        rawOffsetsFilename = os.path.splitext(idc.GetIdbPath())[0] + ".raw.sym"
        f = open(filename, 'w')
        rawOffsetsFile = open(rawOffsetsFilename, 'w')

        count = 0
        for address, name in Names():
            offset = address - PE.imagebase
            rawOffset = idaapi.get_fileregion_offset(address)
            if idc.GetFunctionFlags(address) != -1:
                size = idc.FindFuncEnd(address) - address
            else:
                size = 4

            #namesList.append((offset, name))
            count += 1
            f.write("%08X %08X;%s\n" % (offset, size, name))
            rawOffsetsFile.write("%08X %08X;%s\n" % (rawOffset, size, name))

        f.close()
        rawOffsetsFile.close()

        print("%d functions exported" % count)
def find_unusual_xors(functions):
    # TODO find xors in tight loops
    candidate_functions = []
    for fva in functions:
        cva = fva
        while cva != idaapi.BADADDR and cva < idc.FindFuncEnd(fva):
            if idc.GetMnem(cva) == "xor":
                if idc.GetOpnd(cva, 0) != idc.GetOpnd(cva, 1):
                    g_logger.debug(
                        "suspicious XOR instruction at 0x%08X in function 0x%08X: %s",
                        cva, fva, idc.GetDisasm(cva))
                    ph = idc.PrevHead(cva)
                    nh = idc.NextHead(cva)
                    ip = idc.GetDisasm(ph)
                    ia = idc.GetDisasm(nh)
                    if ip and ia:
                        g_logger.debug("Instructions: %s;  %s;  %s", ip,
                                       idc.GetDisasm(cva), ia)
                    if ph or nh:
                        if is_security_cookie(cva, ph, nh):
                            g_logger.debug(
                                "XOR related to security cookie: %s",
                                idc.GetDisasm(cva))
                        else:
                            g_logger.debug("unusual XOR: %s",
                                           idc.GetDisasm(cva))
                            candidate_functions.append(fva)
                            break
            cva = idc.NextHead(cva)
    return candidate_functions
Beispiel #7
0
    def __init__(self):
        print "Naming saved register locations...",

        for ea in idautils.Functions():
            mea = ea
            named_regs = []
            last_iteration = False

            while mea < (ea + (self.INSIZE * self.SEARCH_DEPTH)):
                mnem = idc.GetMnem(mea)

                if mnem in ['sw', 'sd']:
                    reg = idc.GetOpnd(mea, 0)
                    dst = idc.GetOpnd(mea, 1)

                    if reg in self.ARCH['savedregs'] and reg not in named_regs and dst.endswith('($sp)') and 'var_' in dst:
                        offset = int(dst.split('var_')[1].split('(')[0], 16)
                        idc.MakeLocal(ea, idc.FindFuncEnd(ea), "[sp-%d]" % offset, "saved_%s" % reg[1:])
                        named_regs.append(reg)

                if last_iteration:
                    break
                elif mnem.startswith('j') or mnem.startswith('b'):
                    last_iteration = True

                mea += self.INSIZE

        print "done."
Beispiel #8
0
    def codeify(self, ea=idc.BADADDR):
        func_count = 0
        code_count = 0

        if ea == idc.BADADDR:
            ea = self.get_start_ea(self.CODE)
            if ea == idc.BADADDR:
                ea = idc.FirstSeg()

        print "\nLooking for undefined code starting at: %s:0x%X" % (
            idc.SegName(ea), ea)

        if self.get_start_ea(self.DATA) == idc.BADADDR:
            print "WARNING: No data segments defined! I don't know where the code segment ends and the data segment begins."

        while ea != idc.BADADDR:
            try:
                if idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE) == self.CODE:
                    if idc.GetFunctionName(ea) != '':
                        ea = idc.FindFuncEnd(ea)
                        continue
                    else:
                        if idc.MakeFunction(ea):
                            func_count += 1
                        elif idc.MakeCode(ea):
                            code_count += 1
            except:
                pass

            ea = idc.NextAddr(ea)

        print "Created %d new functions and %d new code blocks\n" % (
            func_count, code_count)
    def make_new_functions_heuristic_push_regs(self, makefunction=False):
        '''
        After converting bytes to instructions, Look for PUSH instructions that are likely the beginning of functions.
        Convert these code areas to functions.
        '''
        EAstart = idc.MinEA()
        EAend = idc.MaxEA()
        ea = EAstart

        while ea < EAend:
            if self.printflag:
                print "EA %08x" % ea

            ea_function_start = idc.GetFunctionAttr(ea, idc.FUNCATTR_START)

            # If ea is inside a defined function, skip to end of function
            if ea_function_start != idc.BADADDR:
                ea = idc.FindFuncEnd(ea)
                continue

            # If current ea is code
            if ida_bytes.isCode(ida_bytes.getFlags(ea)):
                # Looking for prologues that do PUSH {register/s}
                mnem = idc.GetMnem(ea)

                #
                if (mnem == "PUSH"):
                    if makefunction:
                        if self.printflag:
                            print "Converting code to function @ %08x" % ea
                        idc.MakeFunction(ea)

                    eanewfunction = idc.FindFuncEnd(ea)
                    if eanewfunction != idc.BADADDR:
                        ea = eanewfunction
                        continue

            nextcode = ida_search.find_code(ea, idc.SEARCH_DOWN)

            if nextcode != idc.BADADDR:
                ea = nextcode
            else:
                ea += 1
Beispiel #10
0
    def make_funcs_from_prof(self, binprof):
        """
		Creates funcs in IDA, base on callgrind func EAs
		from one callgrind profile
		"""

        count = 0

        for i in binprof.keys():
            ea = int(i)
            if idc.MakeFunction(ea, idc.FindFuncEnd(ea)):
                count += 1
        return count
def _process_mod_init_func_for_metaclasses(func, found_metaclass):
    """Process a function from the __mod_init_func section for OSMetaClass information."""
    _log(4, 'Processing function {}', idc.GetFunctionName(func))
    def on_BL(addr, reg):
        X0, X1, X3 = reg['X0'], reg['X1'], reg['X3']
        if not (X0 and X1 and X3):
            return
        _log(5, 'Have call to {:#x}({:#x}, {:#x}, ?, {:#x})', addr, X0, X1, X3)
        # OSMetaClass::OSMetaClass(this, className, superclass, classSize)
        if not idc.SegName(X1).endswith("__TEXT.__cstring") or not idc.SegName(X0):
            return
        found_metaclass(X0, idc.GetString(X1), X3, reg['X2'] or None)
    _emulate_arm64(func, idc.FindFuncEnd(func), on_BL=on_BL)
Beispiel #12
0
    def getFunc(self, ea=None, next=False):
        if ea == None:
            ea = idaapi.get_screen_ea()

        if next:
            ea = idc.NextFunction(ea)
            if ea == -1:
                return (0xFFFFFFFFL, 0xFFFFFFFFL)

        if ea < 0:
            return (0xFFFFFFFFL, 0xFFFFFFFFL)
        elif idc.GetFunctionName(ea) == idc.GetFunctionName(idc.PrevAddr(ea)):
            ea = idc.PrevFunction(ea)
        return (ea, idc.FindFuncEnd(ea))
Beispiel #13
0
    def add_func(self, ea):
        # invalid all addresses in this function (they depend (relatively) on this function now, no on code)
        logger.warning("Warning : deletion of objects not implemented")
        # TODO : implement deletion of objects inside newly created function range
        # TODO : use function chunks to iterate over function code
        ea_index = int(ea)
        while ea_index < int(idc.FindFuncEnd(ea)):
            self.delete_object_version_for_ea(ea_index)
            ea_index += 1

        # self.update_object_version_from_idb(ea)
        self.addresses_to_process.add(ea)
        self.segment_address_to_process.add(ea)
        self.repo_manager.add_auto_comment(ea, "Create function")
Beispiel #14
0
def find_function_strings(func_ea):

    end_ea = idc.FindFuncEnd(func_ea)
    if end_ea == idaapi.BADADDR: return

    strings = []
    for line in idautils.Heads(func_ea, end_ea):
        refs = idautils.DataRefsFrom(line)
        for ref in refs:
            try:
                strings.append(String(line, ref))
            except StringParsingException:
                continue

    return strings
Beispiel #15
0
def get_list_of_function_instr(addr, mode):
    #TODO follow subcalls MODE_INSTRUMENT_SUBCALLS
    f_start = addr
    f_end = idc.FindFuncEnd(addr)
    chunks = enumerate_function_chunks(f_start)
    list_of_addr = list()
    image_base = idaapi.get_imagebase(addr)
    for chunk in chunks:
        for head in idautils.Heads(chunk[0], chunk[1]):
            # If the element is an instruction
            if head == hex(0xffffffffL):
                raise Exception("Invalid head for parsing")
            if isCode(idc.GetFlags(head)):
                head = head - image_base
                head = str(hex(head))
                head = head.replace("L", "")
                head = head.replace("0x", "")
                list_of_addr.append(head)
def find_shifts(functions):
    candidate_functions = {}
    # TODO better to compare number of shifts to overall instruction count?
    # TODO find shifts in tight loops
    shift_mnems = set(["shl", "shr", "sar", "sal", "rol", "ror"])
    shift_mnems_len = len(shift_mnems)
    for fva in functions:
        found_shifts = set([])
        cva = fva
        while cva != idaapi.BADADDR and cva < idc.FindFuncEnd(fva):
            i = idc.GetMnem(cva)
            if i in shift_mnems:
                found_shifts.add(i)
                g_logger.debug("shift instruction: %s va: 0x%x function: 0x%x",
                               idc.GetDisasm(cva), cva, fva)
            cva = idc.NextHead(cva)
        candidate_functions[fva] = 1 - (
            (shift_mnems_len - len(found_shifts)) / float(shift_mnems_len))
    return candidate_functions
def get_list_of_function_instr(addr):
    f_start = addr
    f_end = idc.FindFuncEnd(addr)
    chunks = enumerate_function_chunks(f_start)
    list_of_addr = list()
    list_of_calls = list()
    image_base = idaapi.get_imagebase(addr)
    for chunk in chunks:
        for head in idautils.Heads(chunk[0], chunk[1]):
            # If the element is an instruction
            if head == hex(0xffffffffL):
                raise Exception("Invalid head for parsing")
            if idc.isCode(idc.GetFlags(head)):
                call_name = get_call_name(head)
                if call_name != None:
                    list_of_calls.append(call_name)
                head = head - image_base
                head = str(hex(head))
                head = head.replace("L", "")
                head = head.replace("0x", "")
                list_of_addr.append(head)
Beispiel #18
0
def GetFuncInputSurrogate(func, binaryName):
    data = dict()
    data['name'] = binaryName
    data['functions'] = list()

    function_ea = func.startEA
    f_name = GetFunctionName(func)
    function = dict()
    data['functions'].append(function)
    function['name'] = f_name
    function['id'] = function_ea
    # ignore call-graph at this moment
    function['call'] = list()
    function['sea'] = function_ea
    function['see'] = idc.FindFuncEnd(function_ea)
    function['blocks'] = list()
    # basic bloc content
    for bblock in idaapi.FlowChart(idaapi.get_func(function_ea)):

        sblock = dict()
        sblock['id'] = bblock.id
        sblock['sea'] = bblock.startEA
        sblock['eea'] = bblock.endEA

        fcode = ''
        for head in idautils.Heads(bblock.startEA, bblock.endEA):
            fcode += '%s %s \r\n' % (
                str(head), unicode(idc.GetDisasm(head), errors='replace'))

        sblock['src'] = fcode

        # flow chart
        bcalls = list()
        for succ_block in bblock.succs():
            bcalls.append(succ_block.id)
        sblock['call'] = bcalls
        function['blocks'].append(sblock)

    return data
Beispiel #19
0
def get_list_of_function_instr(addr):
    '''
    The function returns a list of instructions from a function
    @addr - is function entry point
    @return - list of instruction's addresses
    '''
    f_start = addr
    f_end = idc.FindFuncEnd(addr)
    chunks = enumerate_function_chunks(f_start)
    list_of_addr = list()
    image_base = idaapi.get_imagebase(addr)
    for chunk in chunks:
        for head in idautils.Heads(chunk[0], chunk[1]):
            # If the element is an instruction
            if head == hex(0xffffffffL):
                raise Exception("Invalid head for parsing")
            if idc.isCode(idc.GetFlags(head)):
                head = head - image_base
                head = str(hex(head))
                head = head.replace("L", "")
                head = head.replace("0x", "")
                list_of_addr.append(head)
Beispiel #20
0
def find_function_callees(func_ea, maxlvl):

    callees = []
    visited = set()
    pending = set((func_ea, ))
    lvl = 0

    while len(pending) > 0:
        func_ea = pending.pop()
        visited.add(func_ea)

        func_name = idc.GetFunctionName(func_ea)
        if not func_name: continue
        callees.append(func_ea)

        func_end = idc.FindFuncEnd(func_ea)
        if func_end == idaapi.BADADDR: continue

        lvl += 1
        if lvl >= maxlvl: continue

        all_refs = set()
        for line in idautils.Heads(func_ea, func_end):

            if not ida_bytes.isCode(get_flags(line)): continue

            ALL_XREFS = 0
            refs = idautils.CodeRefsFrom(line, ALL_XREFS)
            refs = set(
                filter(lambda x: not (x >= func_ea and x <= func_end), refs))
            all_refs |= refs

        all_refs -= visited
        pending |= all_refs

    return callees
Beispiel #21
0
    def make_funcs_from_profiles(self, firstprof, secondprof):
        """
		Creates funcs in IDA, base on callgrind func EAs
		from two callgrind profiles
		"""

        count = 0
        funcs = []

        for i in firstprof.keys():
            if i in secondprof.keys():
                funcs.append(i)

        for j in secondprof.keys():
            if (j in firstprof.keys()) and (j not in funcs):
                funcs.append(j)

        for fun in funcs:
            ea = int(fun)
            if idc.MakeFunction(ea, idc.FindFuncEnd(ea)):
                count += 1

        del funcs
        return count
Beispiel #22
0
def main():

    idaapi.msg("alter instrument function\n")

    addr_afl_maybe_log_fun = 0
    addr_afl_maybe_log = 0
    fun_info = []

    try:
        for func in idautils.Functions():
            fun_name = idc.GetFunctionName(func)
            if fun_name.find('afl_maybe_log_fun') > 0:
                addr_afl_maybe_log_fun = func
            elif fun_name.find('afl_maybe_log') > 0:
                addr_afl_maybe_log = func
            if addr_afl_maybe_log_fun and addr_afl_maybe_log:
                break

        if not addr_afl_maybe_log_fun or not addr_afl_maybe_log:
            print("don't find add_afl_maybe_fun\n")
            return

        print("find add_afl_maybe_fun ok\n")

        # find instrumented function
        for func in idautils.Functions():

            f_end = idc.FindFuncEnd(func)

            if f_end - func <= 0x28:
                continue

            # call    __afl_maybe_log
            if idc.__EA64__:  # 64bit
                addr_call = func + 0x1A
            else:  # 32bit
                addr_call = func + 0x08

            mnem = idc.GetMnem(addr_call)
            if mnem != 'call':
                continue

            for to in idautils.CodeRefsFrom(addr_call, False):
                fun_name = idc.GetFunctionName(to)
                if fun_name.find('afl_maybe_log') < 0:
                    continue
                fun_info.append((func, f_end - func, addr_call))

        fun_info.sort(key=lambda x: x[1])
        num = len(fun_info)
        print(num)
        for i in range(num - 1, -1, -1):
            print(hex(fun_info[i][0]))
            if fun_info[i][
                    1] < 0x200 or i < num * 90.0 / 100.0 and random.randint(
                        0, 99) < 80:  # remove fun instrumented #or i < num/3
                idc.PatchDword(fun_info[i][2] + 1,
                               addr_afl_maybe_log - fun_info[i][2] - 5)
            else:
                print(hex(fun_info[i][0]))

        #idc.SaveBase('')
        #idc.Exit(0)

    except Exception as e:
        print(e)

    print('analyse time: ' + str(time.time() - g_time_start) + 's\n')
Beispiel #23
0
def create_pe():
    text_start = text_end = 0
    for seg in Segments():
        if idc.SegName(seg) == ".text":
            text_start = idc.SegStart(seg)
            text_end = idc.SegEnd(seg)
    for func in idautils.Functions():
        #
        fourG_1 = ''
        fourG_2 = ''
        fourG_3 = ''
        fourG_4 = ''
        fourG_1_addr = 0
        fourG_2_addr = 0
        fourG_3_addr = 0
        fourG_4_addr = 0

        start_address = func
        end_address = idc.FindFuncEnd(func)
        for each_step in idautils.Heads(start_address, end_address):
            opcode = idc.GetMnem(each_step)
            #traverse 4 Gram
            fourG_1 = fourG_2
            fourG_1_addr = fourG_2_addr
            fourG_2 = fourG_3
            fourG_2_addr = fourG_3_addr
            fourG_3 = fourG_4
            fourG_3_addr = fourG_4_addr
            fourG_4 = opcode
            fourG_4_addr = each_step

            if fourG_1 == 'mov' and fourG_2 == 'push' and fourG_3 == 'pop' and fourG_4 == 'mov':
                print "mov push pop mov", "0x%x" % fourG_1_addr, idc.GetDisasm(
                    fourG_1_addr)
                addr_to_fix.append(['mov push pop mov-mov1', fourG_1_addr])
                addr_to_fix.append(['mov push pop mov-mov2', fourG_4_addr])
            if fourG_1 == 'mov' and fourG_2 == 'mov' and fourG_3 == 'push' and fourG_4 == 'xor':
                print "mov mov push xor", "0x%x" % fourG_1_addr, idc.GetDisasm(
                    fourG_1_addr)
                addr_to_fix.append(['mov mov push xor-mov1', fourG_1_addr])
                addr_to_fix.append(['mov mov push xor-mov2', fourG_2_addr])
                addr_to_fix.append(['mov mov push xor-xor1', fourG_4_addr])
            if fourG_1 == 'mov' and fourG_2 == 'add' and fourG_3 == 'mov' and fourG_4 == 'mov':
                print "mov add mov mov", "0x%x" % fourG_1_addr, idc.GetDisasm(
                    fourG_1_addr)
                addr_to_fix.append(['mov add mov mov-mov1', fourG_1_addr])
                addr_to_fix.append(['mov add mov mov-mov2', fourG_3_addr])
                addr_to_fix.append(['mov add mov mov-mov3', fourG_4_addr])
            if fourG_1 == 'mov' and fourG_2 == 'rep':
                print "mov rep", "0x%x" % fourG_1_addr, idc.GetDisasm(
                    fourG_1_addr)
                addr_to_fix.append(['mov rep-mov1', fourG_1_addr])
            if fourG_1 == 'push' and fourG_2 == 'call' and fourG_3 == 'pop':
                print "push call pop", "0x%x" % fourG_1_addr, idc.GetDisasm(
                    fourG_1_addr)
                addr_to_fix.append(['push call pop-call1', fourG_2_addr])

            op = idc.GetDisasm(each_step)
            if each_step >= text_start and each_step < text_end:
                instrument(op, each_step)

    #print "3 of 4",3*len(addr_to_fix)/4
    name = ['operation', 'address']
    os.system("mkdir " + "crafted\\" + INPUT_PE)
    for i in range(0, 1024):
        tmp_list = []
        tmp_list = random.sample(addr_to_fix, 3 * len(addr_to_fix) / 4)

        df_tmp = pd.DataFrame(columns=name, data=tmp_list)
        df_tmp.to_csv("crafted\\" + INPUT_PE + "\\" + str(i), encoding='gbk')
        '''
        tmp2 = open("crafted\\"+INPUT_PE+"\\"+str(i),'w')
        for each in tmp_list:
            tmp2.write(str(each[0])+','+str(each[1]))
            tmp2.write('\n')
        tmp2.close()
        '''
    df_tmp1 = pd.DataFrame(columns=name, data=addr_to_fix)
    df_tmp1.to_csv("crafted\\" + INPUT_PE + "\\" + INPUT_PE + "_addr_to_fix",
                   encoding='gbk')
    '''
    tmp1 = open("crafted\\" + INPUT_PE + "_addr_to_fix",'w')
    for each in addr_to_fix:
        tmp1.write(str(each[0])+','+str(each[1]))
        tmp1.write('\n')
    tmp1.close()
    '''

    section_data = ''
    offsets = []
    for index in range(len(ori_op)):
        offsets.append(len(section_data))
        section_data += build_section_data(args0[index], args1[index],
                                           args2[index])

    # add dispatch function
    len_funs = len(section_data)
    section_data = add_dispatch_function(ori_address, offsets) + section_data

    section_file = open(INPUT_PE + '_newSectionData', 'wb')
    section_file.write(section_data)
    section_file.close()
    section_size = len(section_data)
    insert_section(len(section_data), section_data, len_funs)
def _get_ida_func_surrogate(func, arch):
    func_surrogate = dict()
    func_surrogate['name'] = idc.GetFunctionName(func.startEA)
    func_surrogate['id'] = func.startEA
    # ignore call-graph at this moment
    func_surrogate['call'] = list()
    func_surrogate['sea'] = func.startEA
    func_surrogate['see'] = idc.FindFuncEnd(func.startEA)
    # api is optional
    func_surrogate['api'] = _get_api(func.startEA)[1]
    func_surrogate['blocks'] = list()

    # comments
    func_surrogate['comments'] = []
    func_surrogate['comments'].extend(get_comments(func.startEA))

    for bb in idaapi.FlowChart(idaapi.get_func(func.startEA)):

        block = dict()
        block['id'] = bb.id
        block['sea'] = bb.startEA
        if arch is 'arm':
            # for arm; the last bit indicates thumb mode.
            block['sea'] += idc.GetReg(bb.startEA, 'T')
        block['eea'] = bb.endEA
        block['name'] = 'loc_' + format(bb.startEA, 'x').upper()
        dat = {}
        block['dat'] = dat
        s = idc.GetManyBytes(bb.startEA, bb.endEA - bb.startEA)
        if s is not None:
            block['bytes'] = "".join("{:02x}".format(ord(c)) for c in s)

        func_surrogate['comments'].extend(get_comments(bb.startEA))

        instructions = list()
        for head in idautils.Heads(bb.startEA, bb.endEA):
            ins = list()
            ins.append(str(hex(head)).rstrip("L").upper().replace("0X", "0x"))
            opr = idc.GetMnem(head)
            if opr == "":
                continue
            ins.append(opr)
            for i in range(5):
                opd = idc.GetOpnd(head, i)
                if opd == "":
                    continue
                ins.append(opd)
            instructions.append(ins)

            refs = list(idautils.DataRefsFrom(head))
            for ref in refs:
                dat[head] = binascii.hexlify(struct.pack("<Q", idc.Qword(ref)))

        block['src'] = instructions

        # flow chart
        block_calls = list()
        for success_block in bb.succs():
            block_calls.append(success_block.id)
        block['call'] = block_calls
        func_surrogate['blocks'].append(block)
    return func_surrogate
Beispiel #25
0
def GetFunBbls(function_ea):
    """
    Get bbls of function.
    @function_ea - function address
    @return - bbls of function
    """
    f_start = function_ea
    f_end = idc.FindFuncEnd(function_ea)

    boundaries = set((f_start, ))

    for head in idautils.Heads(f_start, f_end):
        # If the element is an instruction
        if head == idaapi.BADADDR:
            raise Exception("Invalid head for parsing")
        if idc.isCode(idc.GetFlags(head)):

            # Get the references made from the current instruction
            # and keep only the ones local to the function.
            refs = idautils.CodeRefsFrom(head, 0)
            refs_filtered = set()
            for ref in refs:
                if ref >= f_start and ref < f_end:
                    refs_filtered.add(ref)
            refs = refs_filtered

            if refs:
                # If the flow continues also to the next (address-wise)
                # instruction, we add a reference to it.
                # For instance, a conditional jump will not branch
                # if the condition is not met, so we save that
                # reference as well.
                next_head = idc.NextHead(head, f_end)
                if next_head != idaapi.BADADDR and idc.isFlow(
                        idc.GetFlags(next_head)):
                    refs.add(next_head)

                # Update the boundaries found so far.
                boundaries.update(refs)

    #end of for head in idautils.Heads(chunk[0], chunk[1]):

    bbls = []
    bbl = []  # a list of heads
    # NOTE: We can handle if jump xrefs to chunk address space.

    for head in idautils.Heads(f_start, f_end):
        if head in boundaries:
            #print('%d') % head
            if len(bbl) > 0:
                if bbl[0] == head:
                    continue
                bbl.append(head)
                bbls.append(bbl)
                bbl = []
            bbl.append(head)
        #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION:
        elif idc.GetMnem(head).startswith('j'):
            if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head):
                continue
            bbl.append(head + idc.ItemSize(head))
            bbls.append(bbl)
            bbl = []
            bbl.append(head + idc.ItemSize(head))
        else:
            pass
    # add last basic block
    if len(bbl) and bbl[0] != f_end:
        bbl.append(f_end)
        bbls.append(bbl)

    return bbls
Beispiel #26
0
def GetFunEdgesAndBbls(function_ea):
    """
    Get bbls of function.
    @function_ea - function address
    @return - bbls of function
    """
    bbl = []  # bbl info [head, tail, call_num, mem_num]
    SingleBBS = {}  # head -> pred_bbl
    MultiBBS = {}  # head -> [pred_bbls]
    bbls = {}  # head -> bbl
    bbls2 = {}  # tail -> bbl
    edges_s = set()  # set of (tail, head)
    edges_d = {}  # dict struct.  head -> of (head, ..., head)
    edges_count = 0
    edges_s_t = set()  # tmp edges set
    edges_d_t = {}  # tmp edges dict.

    if not IsInstrumentIns(function_ea):
        return bbls, edges_d, edges_count, SingleBBS, MultiBBS

    f_start = function_ea
    f_end = idc.FindFuncEnd(function_ea)

    boundaries = set((f_start, ))  # head of bbl

    for head in idautils.Heads(f_start, f_end):
        # If the element is an instruction
        if head == idaapi.BADADDR:
            raise Exception("Invalid head for parsing")
        if not idc.isCode(idc.GetFlags(head)):
            continue

        # Get the references made from the current instruction
        # and keep only the ones local to the function.
        refs = idautils.CodeRefsFrom(head, 0)
        refs_filtered = set()
        for ref in refs:
            if ref > f_start and ref < f_end:  # can't use ref>=f_start, avoid recusion
                refs_filtered.add(ref)
        refs = refs_filtered

        if refs:
            # If the flow continues also to the next (address-wise)
            # instruction, we add a reference to it.
            # For instance, a conditional jump will not branch
            # if the condition is not met, so we save that
            # reference as well.
            next_head = idc.NextHead(head, f_end)
            if next_head != idaapi.BADADDR and idc.isFlow(
                    idc.GetFlags(next_head)):
                refs.add(next_head)

            # Update the boundaries found so far.
            boundaries.update(refs)
            for r in refs:  # enum all of next ins
                # If the flow could also come from the address
                # previous to the destination of the branching
                # an edge is created.
                if isFlow(idc.GetFlags(r)):
                    prev_head = idc.PrevHead(r, f_start)
                    if prev_head == 0xffffffffL:
                        #edges_s_t.add((head, r))
                        #raise Exception("invalid reference to previous instruction for", hex(r))
                        pass
                    else:
                        edges_s_t.add((prev_head, r))
                edges_s_t.add((head, r))

    #end of for head in idautils.Heads(chunk[0], chunk[1]):

    last_head = 0
    # NOTE: We can handle if jump xrefs to chunk address space.

    # get bbls. head of bbl is first ins addr, tail of bbl is last ins addr.
    for head in idautils.Heads(f_start, f_end):
        mnem = idc.GetMnem(head)
        if head in boundaries:
            if len(bbl) > 0:
                if bbl[0] == head:
                    continue
                if True:  # IsInstrumentIns(bbl[0]):
                    bbl[1] = last_head
                    bbls[bbl[0]] = bbl
                    bbls2[bbl[1]] = bbl
            bbl = [head, 0, 0, 0]
        #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION:
        elif mnem.startswith('j'):
            if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head):
                continue
            if True:  # IsInstrumentIns(bbl[0]):
                bbl[1] = head  # head + idc.ItemSize(head))
                bbls[bbl[0]] = bbl
                bbls2[bbl[1]] = bbl
            bbl = [head + idc.ItemSize(head), 0, 0, 0]
        else:
            last_head = head
        if mnem.startswith('call'):
            bbl[2] += 1

        #if 2 == idc.GetOpType(head, 0):      # 2  Memory Reference
        #    bbl[3] += 1
        #if 2 == idc.GetOpType(head, 1):      # 2  Memory Reference
        #    bbl[3] += 1

    # add last basic block
    if len(bbl) and bbl[0] != f_end:  # and IsInstrumentIns(bbl[0]):
        bbl[1] = f_end
        bbls[bbl[0]] = bbl
        bbls2[bbl[1]] = bbl

    # edges set -> dict
    for e in edges_s_t:
        if e[0] in bbls2:
            bbl_head = bbls2[e[0]][0]
            if bbl_head in edges_d_t:
                edges_d_t[bbl_head].append(e[1])
            else:
                edges_d_t[bbl_head] = [e[1]]
        else:
            print('edge (%x, %x) can not find head bbl.' %
                  (e[0], e[1]))  # a small case. e1 flow e0.

    # revise edges. head bbl and tail bbl of edges must be instrumented.
    for e0 in edges_d_t:
        if not IsInstrumentIns(e0):  # e0 don't instrumented, skip.
            continue

        for e1 in edges_d_t[e0]:
            if IsInstrumentIns(e1):  # e0 e1 both instrumented, add edge.
                if e0 in edges_d:
                    edges_d[e0].append(e1)
                else:
                    edges_d[e0] = [e1]
                edges_count += 1
            else:
                # e1 don't instrumented, recursively looks for instrumented child bbls
                bbls_t = LookForInsChildBbls(e1, edges_d_t, [])
                for b in bbls_t:  # add edge
                    if e0 in edges_d:
                        edges_d[e0].append(b)
                    else:
                        edges_d[e0] = [b]
                    edges_count += 1

    # revise bbls. bbl must be instrumented.
    for b in bbls.keys():
        if not IsInstrumentIns(b):
            # if bbls[b][1] in bbls2:     # avoid multi del
            # bbls2.pop(bbls[b][1])
            bbls.pop(b)

    #print('bbls:')
    #i = 0
    #for b in bbls:
    #    i += 1
    #    print('%04d %x, %x' % (i, b, bbls[b][1]))

    #print('edges_d:')
    #i = 0
    #for e0 in edges_d:
    #    for e1 in edges_d[e0]:
    #        i += 1
    #        print('%04d %x, %x' % (i, e0, e1))

    for e0 in edges_d:
        if e0 not in bbls:
            print('error:%x have no head' % (e0))  # error
            continue
        for e1 in edges_d[e0]:
            if e1 in MultiBBS:
                MultiBBS[e1].append(bbls[e0])  # add Pred
            elif e1 in SingleBBS:
                MultiBBS[e1] = [SingleBBS[e1], bbls[e0]]  # add Pred
                SingleBBS.pop(e1)  # remove from SingleBBS
            else:
                SingleBBS[e1] = bbls[e0]  # add Pred

    # del bbls which don't instrumented

    return bbls, edges_d, edges_count, SingleBBS, MultiBBS
def GetFuncInputSurrogate(func):

    info = idaapi.get_inf_structure()
    arch = info.procName.lower()

    function_ea = func.startEA
    f_name = GetFunctionName(func)
    function = dict()
    function['name'] = f_name
    function['id'] = function_ea
    # ignore call-graph at this moment
    function['call'] = list()
    function['sea'] = function_ea
    function['see'] = idc.FindFuncEnd(function_ea)
    function['blocks'] = list()
    # basic bloc content
    for bblock in idaapi.FlowChart(idaapi.get_func(function_ea)):

        sblock = dict()
        sblock['id'] = bblock.id
        sblock['sea'] = bblock.startEA
        if (arch == 'arm'):
            sblock['sea'] += idc.GetReg(bblock.startEA, 'T')
        sblock['eea'] = bblock.endEA
        sblock['name'] = 'loc_' + format(bblock.startEA, 'x').upper()
        dat = {}
        sblock['dat'] = dat
        s = idc.GetManyBytes(bblock.startEA, bblock.endEA - bblock.startEA)
        if (s != None):
            sblock['bytes'] = "".join("{:02x}".format(ord(c)) for c in s)

        tlines = []
        for head in idautils.Heads(bblock.startEA, bblock.endEA):
            tline = []
            tline.append(
                str(hex(head)).rstrip("L").upper().replace("0X", "0x"))
            mnem = idc.GetMnem(head)
            if mnem == "":
                continue
            tline.append(mnem)
            for i in range(5):
                opd = idc.GetOpnd(head, i)
                if opd == "":
                    continue
                tline.append(opd)
            tlines.append(tline)

            refdata = list(idautils.DataRefsFrom(head))
            if (len(refdata) > 0):
                for ref in refdata:
                    dat[head] = binascii.hexlify(
                        struct.pack("<Q", idc.Qword(ref)))

        sblock['src'] = tlines

        # flow chart
        bcalls = list()
        for succ_block in bblock.succs():
            bcalls.append(succ_block.id)
        sblock['call'] = bcalls
        function['blocks'].append(sblock)

    return function
Beispiel #28
0
def process_function(arch, func_ea):

    func_end = idc.FindFuncEnd(func_ea)

    packet = DismantlerDataPacket()

    ida_chunks = get_chunks(func_ea)
    chunks = set()

    # Add to the chunks only the main block, containing the
    # function entry point
    #
    chunk = get_flow_code_from_address(func_ea)
    if chunk:
        chunks.add(chunk)

    # Make "ida_chunks" a set for faster searches  within
    ida_chunks = set(ida_chunks)
    ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks))

    func = idaapi.get_func(func_ea)
    comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)]

    # Copy the list of chunks into a queue to process
    #
    chunks_todo = [c for c in chunks]

    while True:

        # If no chunks left in the queue, exit
        if not chunks_todo:

            if ida_chunks:
                chunks_todo.extend(ida_chunks)
            else:
                break

        chunk_start, chunk_end = chunks_todo.pop()
        if ida_chunks_idx.has_key(chunk_start):
            ida_chunks.remove(ida_chunks_idx[chunk_start])
            del ida_chunks_idx[chunk_start]

        for head in idautils.Heads(chunk_start, chunk_end):

            comments.extend((idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1)))
            comment = '\n'.join([c for c in comments if c is not None])
            comment = comment.strip()
            if comment:
                packet.add_comment(head, comment)
            comments = list()

            if idc.isCode(idc.GetFlags(head)):

                instruction = arch.process_instruction(packet, head)

                # if there are other references than
                # flow add them all.
                if list(idautils.CodeRefsFrom(head, 0)):

                    # for each reference, including flow ones
                    for ref_idx, ref in enumerate(
                            idautils.CodeRefsFrom(head, 1)):

                        if arch.is_call(instruction):

                            # This two conditions must remain separated, it's
                            # necessary to enter the enclosing "if" whenever
                            # the instruction is a call, otherwise it will be
                            # added as an uncoditional jump in the last else
                            #
                            if ref in list(idautils.CodeRefsFrom(head, 0)):
                                packet.add_direct_call(head, ref)

                        elif ref_idx > 0 and arch.is_conditional_branch(
                                instruction):
                            # The ref_idx is > 0 in order to avoid processing the
                            # normal flow reference which would effectively imply
                            # that the conditional branch is processed twice.
                            # It's done this way instead of changing the loop's head
                            # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in
                            # order to avoid altering the behavior of other conditions
                            # which rely on it being so.

                            # FIXME
                            # I don't seem to check for the reference here
                            # to point to valid, defined code. I suspect
                            # this could lead to a failure when exporting
                            # if such situation appears. I should test if
                            # it's a likely scenario and probably just add
                            # an isHead() or isCode() to address it.

                            packet.add_conditional_branch_true(head, ref)
                            packet.add_conditional_branch_false(
                                head, idaapi.next_head(head, chunk_end))

                            # If the target is not in our chunk list
                            if not address_in_chunks(ref, chunks):
                                new_chunk = get_flow_code_from_address(ref)
                                # Add the chunk to the chunks to process
                                # and to the set containing all visited
                                # chunks
                                if new_chunk is not None:
                                    chunks_todo.append(new_chunk)
                                    chunks.add(new_chunk)

                        elif arch.is_unconditional_branch(instruction):
                            packet.add_unconditional_branch(head, ref)

                            # If the target is not in our chunk list
                            if not address_in_chunks(ref, chunks):
                                new_chunk = get_flow_code_from_address(ref)
                                # Add the chunk to the chunks to process
                                # and to the set containing all visited
                                # chunks
                                if new_chunk is not None:
                                    chunks_todo.append(new_chunk)
                                    chunks.add(new_chunk)

                        #skip = False

                for ref in idautils.DataRefsFrom(head):
                    packet.add_data_reference(head, ref)

                    # Get a data reference from the current reference's
                    # location. For instance, if 'ref' points to a valid
                    # address and such address contains a data reference
                    # to code.
                    target = list(idautils.DataRefsFrom(ref))
                    if target:
                        target = target[0]
                    else:
                        target = None

                    if target is None and arch.is_call(instruction):
                        imp_name = idc.Name(ref)

                        imp_module = get_import_module_name(ref)

                        imported_functions.add((ref, imp_name, imp_module))
                        packet.add_indirect_virtual_call(head, ref)

                    elif target is not None and idc.isHead(target):
                        # for calls "routed" through this reference
                        if arch.is_call(instruction):
                            packet.add_indirect_call(head, target)

                        # for unconditional jumps "routed" through this reference
                        elif arch.is_unconditional_branch(instruction):
                            packet.add_unconditional_branch(head, target)

                        # for conditional "routed" through this reference
                        elif arch.is_conditional_branch(instruction):
                            packet.add_conditional_branch_true(head, target)
                            packet.add_conditional_branch_false(
                                head, idaapi.next_head(head, chunk_end))

    f = FunctionAnalyzer(arch, func_ea, packet)

    instrumentation.new_packet(packet)
    instrumentation.new_function(f)
Beispiel #29
0
def deobfuscate_function(addr):
    if addr != idc.FirstFuncFchunk(addr):
        print "[DEOBF] Address %X is not the start of a function." % addr
        return

    # Static data.
    func_start = addr
    func_end = idc.FindFuncEnd(addr)

    # 1. Find MOV PC
    (mov_addr, mov_match) = search_forward(func_start, func_end, regex_mov)

    if mov_addr is None:
        # print "[DEOBF] No MOV PC was found in %s" % idc.GetFunctionName(func_start)
        return

    # 2. Find LDR.W ..
    ldr_addr = search_register_modifier_backward(mov_addr, func_start,
                                                 mov_match.group(1))
    ldr_match = regex_ldrw.match(
        idc.GetDisasm(ldr_addr)) if ldr_addr is not None else None

    if ldr_addr is None:
        print "[DEOBF] No LDR.W was found in %s" % idc.GetFunctionName(
            func_start)
        return

    if ldr_match is None:
        print "[DEOBF] Modifier of %s found from %X is not a LDR.W" % (
            mov_match.group(1), mov_addr)
        return

    # 3. Find table offset
    add_addr = search_register_modifier_backward(ldr_addr, func_start,
                                                 ldr_match.group(2))
    # add_match = regex_table.match(idc.GetDisasm(add_addr)) if add_addr is not None else None
    #
    # print idc.GetEnum(add_match.group(2) + 'asd')

    if add_addr is None:
        # TODO: Check if belongs to a previously found graph.
        # print "[DEOBF] No ADD was found above %X" % ldr_addr
        return

    if idc.GetOpnd(add_addr, 1) != 'PC':
        print "[DEOBF] ADD does not use PC at %X" % add_addr
        return

    ldr2_addr = search_register_modifier_backward(idc.PrevHead(add_addr),
                                                  func_start,
                                                  idc.GetOpnd(add_addr, 0))

    opp_val = idc.GetOperandValue(ldr2_addr, 1)  # Address to loc_80054
    opp_val = idc.Dword(opp_val)  # loc_80054
    opp_val = opp_val + idc.NextHead(add_addr) + 2  # Address of the table.

    # 4. Read table.
    table = []
    table_addr = opp_val

    while True:
        table_entry = idc.Dword(table_addr)
        if table_entry > 0:
            table.append(table_entry)
        table_addr = table_addr + 4
        if idc.Name(table_addr):
            break

    # - We also have to add the starting block to the table.
    table.append(func_start)

    # 5. Find subroutine boundary
    (sub_start, sub_end) = find_subroutine_boundary(opp_val, table)

    print "Start: %X - End: %X" % (sub_start, sub_end)
    def print_deepest_path(self):
        # CS_OP_INVALID = 0,  ///< uninitialized/invalid operand.
        # CS_OP_REG,      1   ///< Register operand.
        # CS_OP_IMM,      2   ///< Immediate operand.
        # CS_OP_MEM,      3   ///< Memory operand.
        def ext_instruction(file_name, addr_start, addr_end):
            name_fun = GetFunctionName(addr_start)
            row = ''
            for addr in Heads(addr_start, addr_end):

                ins = ''
                thisOperand = idc.GetMnem(addr)
                oPtype1 = idc.GetOpType(addr, 0)
                oPtype2 = idc.GetOpType(addr, 1)
                # assemblydata = parametertype(oPtype1)+' '+parametertype(oPtype2)
                if (oPtype1 == 1 or oPtype1 == 4):
                    oPtype1 = idc.GetOpnd(addr, 0)
                if (oPtype2 == 1 or oPtype2 == 4):
                    oPtype2 = idc.GetOpnd(addr, 1)
                if thisOperand == "call":
                    call_fun_name = GetOpnd(addr, 0)
                    keyInstr = LocByName(call_fun_name)
                    fflags = idc.get_func_flags(keyInstr)
                    if (fflags & idc.FUNC_LIB) or (fflags & idc.FUNC_THUNK):
                        ins = thisOperand + '_' + idc.GetOpnd(addr, 0) + '_0'
                        row = row + ' ' + ins
                        continue
                ins = str(thisOperand)+'_'+tran(str(oPtype1)) + \
                    '_'+tran(str(oPtype2))
                row = row + ' ' + ins
            return row
            # file_name.writerow([name_fun, hex(addr_start), hex(addr_end), row])

        deepset = list()
        path = list()
        all_path = list()

        def print_path_deepest(head):
            global deepset
            global path
            global all_path
            # print(deepset)
            if head is None:
                return
            if head in all_path:
                return
            # print(path)
            all_path.append(head)
            path.append(head)
            # print("********\n",path)
            if len(deepset) < len(path):
                # print(len(deepset),len(path))
                deepset = [i for i in path]
                # print(deepset)
            for elem in head.child:
                print_path_deepest(elem)
            path.remove(head)
            # print("#######\n",path)

        def print_path_all(head):
            global path
            # print(deepset)
            if head is None:
                return
            if head in path:
                return
            path.append(head)
            for elem in head.child:
                print_path_all(elem)

        f = open(file_store_path + '\\' + 'test' + '.csv', 'wb')
        saveFile = csv.writer(f)
        saveFile.writerow(["name", "start", "end", "Op"])
        for fun_node in self.root:
            name_fun = GetFunctionName(fun_node.addr)
            fflags = idc.get_func_flags(fun_node.addr)
            if not ((fflags & idc.FUNC_LIB) or (fflags & idc.FUNC_THUNK)):
                global path
                path = list()
                global deepset
                deepset = list()
                global all_path
                all_path = list()
                # print(deepset)
                # path.clear()

                print_path_deepest(fun_node)
                row_fun = ''
                fun_addr_end = idc.FindFuncEnd(fun_node.addr)
                for bb in deepset:
                    ins_bb = ext_instruction(saveFile, bb.block.startEA,
                                             bb.block.end_ea)
                    row_fun = row_fun + ' ' + ins_bb
                saveFile.writerow(
                    [name_fun,
                     hex(fun_node.addr),
                     hex(fun_addr_end), row_fun])