def disassemble(self): """ Dump .text, .rodata, .data, .eh_frame, .got to file """ print colored('1: DISASSEMBLE', 'green') ret = os.system(config.objdump + ' -Dr -j .text ' + self.file + ' > ' + self.file + '.temp') self.checkret(ret, self.file + '.temp') if not ELF_utils.elf_arm(): if ELF_utils.elf_32(): pic_process.picprocess32(self.file) else: extern_symbol_process.globalvar(self.file) pic_process.picprocess64(self.file) ret = os.system( config.objdump + " -s -j .rodata " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > rodata.info") self.checkret(ret, 'rodata.info') ret = os.system(config.objdump + " -s -j .data " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > data.info") self.checkret(ret, 'data.info') os.system(config.objdump + " -s -j .eh_frame " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > eh_frame.info") if not ELF_utils.elf_arm(): os.system( config.objdump + " -s -j .eh_frame_hdr " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > eh_frame_hdr.info") os.system(config.objdump + " -s -j .got " + self.file + " | grep \"^ \" | cut -d \" \" -f3,4,5,6 > got.info")
def data_output(self): """ Save data sections to files """ self.process(self.locations) self.process(self.data_labels, True) self.gotexternals() if len(self.rodata_list) != 0: l, s = self.rodata_list[0] self.rodata_list[0] = ('s_dummy:\n' + l, s) dataalign = '\n.align 16' if ELF_utils.elf_64() else ( '\n.align 2' if ELF_utils.elf_arm() else '') self.rodata_list.insert(0, ('.section .rodata' + dataalign, '')) self.got_list.insert(0, ('.section .got', '')) self.data_list.insert(0, ('.section .data' + dataalign, '')) self.bss_list.insert(0, ('.section .bss' + dataalign, '')) def createout(l): l = filter(lambda e: len(e[0]) + len(e[1]) > 0, l) return '\n'.join(map(lambda e: e[0] + e[1], l)) with open('final_data.s', 'a') as f: f.write(createout(self.rodata_list) + '\n') f.write('\n' + createout(self.data_list) + '\n') f.write('\n' + createout(self.got_list) + '\n') f.write('\n' + createout(self.bss_list) + '\n')
def traverse(self): """ Analyze and modify instructions :return: list of generated labels """ if ELF_utils.elf_32() and not ELF_utils.elf_arm(): self.scan() return unify_int_list(self.label)
def picprocess32(filepath): """ PC relative operation in x86 32 bit code such as: call 804c452 <__x86.get_pc_thunk.bx> add $0x2b8e,%ebx mov $0x10, (%ebx) This operation usually loads into %ebx the address of the _GLOBAL_OFFSET_TABLE_ Further adjustments are operated in the analysis phase :param filepath: path to target executable """ if ELF_utils.elf_32() and ELF_utils.elf_exe() and not ELF_utils.elf_arm(): text_process_strip(filepath)
def lib32_processing(self, instrs, funcs): """ Process PC relative code for x86 32 binaries :param instrs: instruction list :param funcs: function list """ if ELF_utils.elf_32() and not ELF_utils.elf_arm(): helper = lib32_helper(instrs, funcs) self.label += map( lambda addr: (self.check_sec(addr).sec_name, addr), helper.traverse()) return helper.get_instrs() return instrs
def traverse32(self, l, startaddr): """ Traverse data section, find pointers using 32bit alignment and substitute them with labels :param l: .byte declaration list :param startaddr: section start address """ i = 0 holei = 0 while holei < len(self.exclude) and startaddr > self.exclude[holei][1]: holei += 1 while i < len(l) - 3: val = int(''.join(map(lambda e: e[1][8:10], reversed(l[i:i + 4]))), 16) s = self.check_sec(val) if s is not None: if self.assumption_two: self.in_jmptable = False elif not ELF_utils.elf_arm() or self.checkifprobd2dARM(val): if s.sec_name == '.plt' and val in self.plt_symbols: l[i] = (l[i][0], '.long ' + self.plt_symbols[val]) l[i + 1:i + 4] = [('', '')] * 3 else: self.data_labels.insert(0, (s.sec_name, val)) l[i] = (l[i][0], '.long S_0x%X' % val) l[i + 1:i + 4] = [('', '')] * 3 else: if ELF_utils.elf_arm(): val = val & (-2) if self.check_text(val): c = bbn_byloc( val, self.begin_addrs) if self.assumption_three else True if c or (not c and self.check_jmptable(l[i][0], val)): if c and self.check_jmptable_1(l[i][0]): self.in_jmptable = True self.cur_func_name = self.fn_byloc(val) else: self.in_jmptable = False self.text_labels.insert(0, val) l[i] = (l[i][0], '.long S_0x%X' % val) l[i + 1:i + 4] = [('', '')] * 3 else: self.in_jmptable = False else: self.in_jmptable = False if holei < len(self.exclude) and self.exclude[holei][ 0] <= startaddr + i + 4 <= self.exclude[holei][1]: i = self.exclude[holei][1] - startaddr i += 4 - i % 4 holei += 1 else: i += 4
def main(filepath, instrument=False): """ Init processing :param filepath: path to executable :param instrument: True to apply instrumentation """ if ELF_utils.elf_strip() and ELF_utils.elf_exe(): init = Init(filepath) init.disassemble() init.process() init.ailProcess(instrument) else: sys.stderr.write( 'Error: binary is not stripped or is a shared library\n')
def add_func_label(self, ufuncs, instrs): """ Insert function labels :param ufuncs: function list :param instrs: instruction list :return: instruction list with function declarations """ i = 0 j = 0 while True: if i == len(ufuncs) or j == len(instrs): break hf = ufuncs[i] hi = instrs[j] iloc = get_loc(hi) if hf.func_begin_addr == iloc.loc_addr and hf.func_name not in iloc.loc_label: lab = '\n' + hf.func_name + ' : ' if ELF_utils.elf_arm(): lab = '\n.thumb_func' + lab iloc.loc_label = lab + iloc.loc_label instrs[j] = set_loc(hi, iloc) i += 1 j -= 1 elif hf.func_begin_addr < iloc.loc_addr: i += 1 j += 1 return instrs
def picprocess64(filepath): """ PC relative operations in x86 64 bit code typical instruction disassembled by objdump like this 4005c9: 48 8b 05 58 08 20 00 mov 0x200858(%rip),%rax # 600e28 <__libc_start_main@plt+0x200a28> should be rewritten in this format 4005c9: ................... mov S_0x600e28(%rip), %rax :param filepath: path to target executable """ if not ELF_utils.elf_64(): return with open(filepath + '.temp') as f: lines = f.readlines() pat = re.compile(r'0x[0-9a-f]+\(%rip\)') for i in xrange(len(lines)): l = lines[i] if "#" in l: m = pat.search(l) if m: items = l.split('#') des = items[1].split()[0] sub = m.group(0) sub1 = "0x" + des + "(%rip)" l = items[0] l = l.replace(sub, sub1) lines[i] = l + "\n" with open(filepath + '.temp', 'w') as f: f.writelines(lines)
def set_datas(self, funcs): """ Load data values and perform analysis :param funcs: function list """ self.section_collect() self.data_collect() self.locations = self.label_locate() self.label_set = set(map(lambda e: e[1], self.label)) self.label_arr = sorted(self.label_set) fl = sorted(funcs, cmp=lambda f1, f2: f1.func_begin_addr - f2.func_begin_addr) self.fl_sort = map( lambda f: ft(f.func_name, f.func_begin_addr, f.func_end_addr), fl) self.text_mem_addrs = map(lambda a: int(a.strip().rstrip(':'), 16), read_file('text_mem.info')) self.text_mem_arr = self.text_mem_addrs self.label_mem_arr = sorted(self.label_mem_addrs) self.set_assumption_flag() self.set_excluded_ranges() self.begin_addrs = map(lambda f: f.func_begin_addr, funcs) if ELF_utils.elf_32(): self.data_refer_solve() else: self.data_refer_solve_64()
def process(self): """ Traverse instruction list and insert generated S_ labels in the correct positions """ do_update = lambda s, n: s if n in s else s + '\n' + n des1 = self.clean_sort(self.des) i = 0 j = 0 while True: if j == len(des1) or i == len(self.locs): break # if i == len(self.locs)-1 and j == len(des1)-1: # raise Exception("undefined des list") lh = self.locs[i] dh = des1[j] if dh == lh.loc_addr: lhs = 'S_' + dec_hex(lh.loc_addr) if ELF_utils.elf_arm() and not isinstance( self.instr_list[i][0], Types.InlineData): lhs = '.thumb_func\n' + lhs label = do_update(lh.loc_label, lhs + ' : ') self.locs[i].loc_label = label j += 1 elif dh < lh.loc_addr: i -= 1 j += 1 i += 1
def perform(instrs, funcs): # Do stuff to the instruction list if not ELF_utils.elf_arm(): instrs.append( Types.TripleInstr(('mov', Types.RegClass('eax'), Types.RegClass('eax'), Types.Loc('', 0, True), False))) return instrs
def init_array_dump(self): return # This seems creating problems rather than solving them if len(self.init_array_list) != 0 and not ELF_utils.elf_arm(): with open('final_data.s', 'a') as f: f.write('\n\n.section .ctors,"aw",@progbits\n') f.write('.align 4\n') f.write('\n'.join( map(lambda s: '.long ' + s.strip(), self.init_array_list))) f.write('\n')
def pp_print_file(ilist): """ Write instruction string list to file :param ilist: string list """ with open('final.s', 'w') as f: f.write('.section .text\n') if ELF_utils.elf_arm(): f.write('.syntax unified\n.align 2\n.thumb\n') f.write('\n'.join(ilist)) f.write('\n\n')
def main(instrument=False): """ Transform malformed code and add main symbol :param instrument: True to insert instrumentation code """ with open("final.s") as f: lines = f.readlines() if ELF_utils.elf_exe(): main_symbol1 = '' with open('main.info') as f: main_symbol1 = f.readline().strip() if main_symbol1 != '': def helpf(l): if '__gmon_start__' in l: l = '' elif 'lea 0x7FFFFFFC(,%ebx,0x4),%edi' in l: l = l.replace('0x7FFFFFFC', '0x7FFFFFFFFFFFFFFC') elif 'movzbl $S_' in l: l = l.replace('movzbl $S_', 'movzbl S_') elif 'jmpq ' in l and '*' not in l: l = l.replace('jmpq ', 'jmp ') elif 'repz retq' in l: # to solve the error of 'expecting string instruction after `repz' l = l.replace('repz retq', 'retq') elif 'repz ret' in l: l = l.replace('repz ret', 'ret') elif 'nop' in l: l = l.replace('nop', ' ') if main_symbol1 + ' :' in l: rep = '.globl main\nmain : ' if instrument: rep += '\n'.join( map(lambda e: e['plain'].beforemain, config.instrumentors)) + '\n' l = l.replace(main_symbol1 + ' : ', rep) elif main_symbol1 in l: l = l.replace(main_symbol1, 'main') return l lines = map(helpf, lines) with open("final.s", 'w') as f: f.writelines(lines) if instrument: f.write('\n'.join( map(lambda e: e['plain'].aftercode, config.instrumentors)) + '\n') if os.path.isfile('inline_symbols.txt'): inline_update.main()
def post_analyze(il, re): """ Make final adjustments and write code to file :param il: instruction list :param re: symbol reconstruction object """ il = re.unify_loc(il) if ELF_utils.elf_arm(): il = re.alignvldrARM(il) ils = pp_print_list(il) ils = re.adjust_globallabel(Analysis.global_bss(), ils) pp_print_file(ils)
def textProcess(self): """ Code disassembly dump """ # useless_func_del.main(self.file) if ELF_utils.elf_arm(): arm_process.arm_process(self.file) else: extern_symbol_process.pltgot(self.file) os.system("cat " + self.file + ".temp | grep \"^ \" | cut -f1,3 > instrs.info") os.system("cut -f 1 instrs.info > text_mem.info")
def gotexternals(self): """ Replace external symbols in .got """ with open('gotglobals.info') as f: def mapper(l): items = l.split() return (int(items[0], 16), items[1].split('@')[0]) syms = sorted(map(mapper, f), key=lambda e: e[0]) gotsec = self.sec['.got'] datatype = '.long ' if ELF_utils.elf_32() else '.quad ' skiplen = 3 if ELF_utils.elf_32() else 7 syms = filter( lambda s: gotsec.sec_begin_addr <= s[0] < gotsec.sec_begin_addr + gotsec.sec_size, syms) for s in syms: off = s[0] - gotsec.sec_begin_addr self.got_list[off] = ('S_' + dec_hex(s[0]) + ': ', datatype + s[1]) self.got_list[off + 1:off + 1 + skiplen] = [('', '')] * skiplen
def main(): if ELF_utils.elf_lib(): with open('final.s') as f: contents = f.readlines() task1() task2() contents = task3_4() # task5() # write back with open('final.s', 'w') as f: f.writelines(contents)
def perform(instrs, funcs): """ Perform gfree instrumentation :param instrs: list of program's instruction :param funcs: list of function objects :return: instrumented list of instructions """ gfree = GfreeInstrumentation(instrs, funcs) gfree.findfreebranches() gfree.indirectprotection() gfree.returnprotection() if not ELF_utils.elf_arm(): gfree.rewrite_instr() elif config.gfree_ARMITdelete: gfree.remove_its() return gfree.instrs
def generatefuncID(self): """ Generate unique function identifier :return: integer for x86, integer tuple for ARM """ while True: fid = pack('<I', random.getrandbits(32)) if not fid in self.fIDset: if next((b for b in fid if b in alignmentenforce.badbytes), None) is not None: continue self.fIDset.add(fid) return unpack('<HH', fid) if ELF_utils.elf_arm() \ else unpack('<i', fid)[0]
def bswapsub(reg, loc): """ Replace target register in bswap containing ret encoding bytes :param reg: bswapped register :param loc: instuction location :return: instruction list with replaced bswap """ sub = Types.RegClass('edi' if reg[0].lower() == 'e' else 'rdi') substack = Types.RegClass('edi' if ELF_utils.elf_32() else 'rdi') return set_inlineblocklocation(loc, [ Types.DoubleInstr(('push', substack, None, False)), Types.TripleInstr(('mov', sub, reg, None, False)), Types.DoubleInstr(('bswap', sub, None, False)), Types.TripleInstr(('mov', reg, sub, None, False)), Types.DoubleInstr(('pop', substack, None, False)), ])
def p_exp(exp): """ String from expression :param exp: expression :return: expression string """ if isinstance(exp, Types.Const): return p_const(exp) elif isinstance(exp, Types.Symbol): return p_symbol(exp) elif isinstance(exp, Types.AssistOpClass): return p_assist(exp) elif isinstance(exp, Types.Ptr): return p_ptraddr(exp) elif isinstance(exp, Types.RegClass): return p_reg(exp) elif isinstance(exp, Types.Label): return str(exp) elif ELF_utils.elf_arm(): if isinstance(exp, Types.ShiftExp): return p_shift(exp) elif isinstance(exp, Types.RegList): return p_reglist(exp) elif isinstance(exp, Types.TBExp): return p_tbexp(exp)
def visit_heuristic_analysis(self, instrs): """ Reconstruct symbolic information :param instrs: instruction list :return: instruction list with labels """ func = lambda i: self.check_text(get_loc(i).loc_addr) self.instr_list = instrs if ELF_utils.elf_arm(): self.pcreloffARM(instrs) instrs = map(self.vinst2ARM, enumerate(instrs)) self.doublemovARM(instrs) else: instrs = map(lambda i: self.vinst2(func, i), instrs) self.symbol_list = map(lambda l: int(l.split('x')[1], 16), self.deslist) + self.symbol_list return instrs
def main(): if ELF_utils.elf_32(): lines = [] with open('instrs.info') as f: lines = f.readlines() for i in range(len(lines)): l = lines[i] l = l.strip() if 'nop' in l: items = l.split() if 'nop' == items[-1]: #l = l.split(':')[0] + " :" l = l lines[i] = l + "\n" with open('instrs.info', 'w') as f: f.writelines(lines)
def sectionProcess(self): """ Dump section boundaries """ badsec = '.got.plt' if ELF_utils.elf_32() else '.data.rel.ro' os.system( "readelf -SW " + self.file + " | awk \'/data|bss|got/ {print $2,$4,$5,$6} \' | awk \ '$1 != \"" + badsec + "\" {print $1,$2,$3,$4}\' > sections.info") os.system("readelf -SW " + self.file + " | awk \'/text/ {print $2,$4,$5,$6} \' > text_sec.info") os.system( "readelf -SW " + self.file + " | awk \'/init/ {print $2,$4,$5,$6} \' | awk \'$1 != \".init_array\" {print $1,$2,$3,$4}\' > init_sec.info" ) if os.path.isfile('init_array.info'): os.remove('init_array.info') os.system(config.objdump + " -s -j .init_array " + self.file + " >> init_array.info 2>&1") os.system("readelf -SW " + self.file + " | awk '$2==\".plt\" {print $2,$4,$5,$6}' > plt_sec.info")
def pp_print_instr(i): """ Get instruction string in assembler syntax :param i: instruction :return: instruction string """ loc = get_loc(i) if not loc.loc_visible: return p_location(loc) res = p_location(loc) + p_prefix(i[-1]) if isinstance(i, Types.SingleInstr): res += p_single(i[0]) elif isinstance(i, Types.DoubleInstr): res += p_double(i[0], i[1]) elif isinstance(i, Types.TripleInstr): res += p_triple(i[0], i[1], i[2]) elif isinstance(i, Types.FourInstr): res += p_four(i[0], i[1], i[2], i[3]) elif isinstance(i, Types.FiveInstr): res += p_five(i[0], i[1], i[2], i[3], i[4]) elif ELF_utils.elf_arm() and isinstance(i, Types.CoproInstr): res += p_copro(i) return res
def addxorcanary(self, i, func): """ Apply return address encryption :param i: starting instruction index :param func: current funtion :return: instruction index after last inserted block """ if func.func_begin_addr in self.avoid: return i + 1 if len(self.indcalls[func.func_begin_addr]) == 0: header = inlining.get_returnenc(self.instrs[i]) self.instrs[i:i + 1] = header i += len(header) - 1 popcookie = False else: popcookie = True for t in self.rets[func.func_begin_addr]: while get_loc(self.instrs[i]).loc_addr != t: i += 1 if ELF_utils.elf_arm( ) and self.instrs[i][0][-2:] in Types.CondSuff: # Handle somehow IT blocks itlen = 0 while not self.instrs[i - itlen][0].upper().startswith( 'IT') and itlen < 5: itlen += 1 if itlen < 5: i -= itlen j = len(self.instrs[i][0].strip()) + 1 self.instrs[i:i + j] = inlining.translate_it_block( self.instrs[i:i + j]) while get_loc(self.instrs[i]).loc_addr != t: i += 1 footer = inlining.get_returnenc(self.instrs[i], popcookie) self.instrs[i:i + 1] = footer i += len(footer) return i
def main_discover(filename): """ Find main function address and store it to file :param filename: path to target executable """ os.system('file ' + filename + ' > elf.info') if ELF_utils.elf_exe(): os.system(config.objdump + ' -Dr -j .text ' + filename + ' > ' + filename + '.temp') with open(filename + '.temp') as f: lines = f.readlines() ll = len(lines) main_symbol = "" if config.arch == config.ARCH_X86: for i in xrange(ll): l = lines[i] # when not using O2 to compile the original binary, we will remove all the _start code, # including the routine attached on the original program. In that case, we can not discover the # main function if "<__libc_start_main@plt>" in l: main_symbol = lines[i-1].split()[-1] if ELF_utils.elf_32() \ else lines[i-1].split()[-1].split(',')[0] if main_symbol == '%eax': # to fit gcc-4.8 -m32, the address is mov to %eax, then push to stack main_symbol = lines[i - 2].split()[-1].split( ',')[0].split('0x')[1] else: main_symbol = main_symbol.split('0x')[1] break #lines[i-1] = lines[i-1].replace(main_symbol, "main") #main_symbol = main_symbol[1:].strip() #print main_symbol ## Some of the PIC code/module rely on typical pattern to locate ## such as: ## 804c460: push %ebx ## 804c461: call 804c452 <__i686.get_pc_thunk.bx> ## 804c466: add $0x2b8e,%ebx ## 804c46c: sub $0x18,%esp ## What we can do this pattern match `<__i686.get_pc_thunk.bx>` and calculate ## the address by plusing 0x2b8e and 0x804c466, which equals to the begin address of GOT.PLT table ## symbols can be leveraged in re-assemble are ## _GLOBAL_OFFSET_TABLE_ == ** .got.plt ** ## .... elif config.arch == config.ARCH_ARMT: ## 1035c: 4803 ldr r0, [pc, #12] ; (1036c <_start+0x28>) ## 1035e: 4b04 ldr r3, [pc, #16] ; (10370 <_start+0x2c>) ## 10360: f7ff efde blx 10320 <__libc_start_main@plt> ## 10364: f7ff efe8 blx 10338 <abort@plt> ## ... ## 1036c: 0001052d for i in xrange(ll): l = lines[i] if '<__libc_start_main@plt>' in l: j = i - 1 while j > 0: if 'ldr' in lines[j] and 'r0' in lines[j]: pcraddr = lines[j].split( ';')[1].strip().split()[0][1:] break j -= 1 j = i + 1 while j < ll: if lines[j].strip().startswith(pcraddr): main_symbol = lines[j].split()[1] if len(main_symbol) < 8: main_symbol = lines[j + 1].split()[1] + main_symbol main_symbol = int(main_symbol.lstrip('0'), 16) & (-2) main_symbol = '%X' % main_symbol break j += 1 break else: raise Exception('Unknown arch') with open("main.info", 'w') as f: f.write('S_0x' + main_symbol.upper() + '\n')
:param assist: assist operator :return: lowercase assist operator """ return str(assist).lower() def p_loc(loc): """ String of location address :param loc: location address :return: lowercase hexdecimal string """ return '0x%x' % loc if ELF_utils.elf_arm(): ## ARM def p_reg(reg): """ String of register :param reg: register :return: lowercase register string """ return str(reg).lower() def p_shift(shift): """ String of shift operand :param shift: shift value :return: shift operand string