def test_disasm_against_objdump(objdump_path, binary_path): # TODO: code repetition from test_disasm_standalone, encapsulate inner functionality. start_time = time.time() total_inst = 0 match_inst = 0 print(('Processing file:', binary_path)) elf_file = ELFFile(open(binary_path, 'rb')) if elf_file.num_segments() == 0: print('There are no program headers in this file.') return objdump = ObjdumpWrapper(objdump_path) disasm = HexagonDisassembler(objdump_compatible=True) for segment in elf_file.iter_segments(): if segment['p_flags'] & P_FLAGS.PF_X: print("Offset: {:x}".format(segment['p_offset'])) print("VirtAddr: {:x}".format(segment['p_vaddr'])) print("FileSiz: {:x}".format(segment['p_filesz'])) segment_data = segment.data() data_pos = 0 while data_pos + INST_SIZE <= len(segment_data): addr = segment['p_vaddr'] + data_pos inst_as_int = struct.unpack( '<I', segment_data[data_pos:data_pos + 4])[0] disasm_output = disasm.disasm_one_inst(inst_as_int, addr).text.strip() objdump_output = objdump.disasm_packet_raw( segment_data[data_pos:min(data_pos + 4 * 4, segment_data)], addr).strip() if (objdump_output != disasm_output): print("[{:08x}] {:s}".format(addr, objdump_output)) print("[{:08x}] {:s}".format(addr, disasm_output)) print() else: match_inst += 1 data_pos += 4 total_inst += 1 elapsed_time = time.time() - start_time print("Elapsed time: {0:.2f}".format(elapsed_time)) print('Match: {0:.2f}%'.format(match_inst / total_inst * 100))
def test_disasm_standalone(binary_path, timeout=None): profile = cProfile.Profile() profile.enable() start_time = time.time() print(('Processing file:', binary_path)) elf_file = ELFFile(open(binary_path, 'rb')) if elf_file.num_segments() == 0: print('There are no program headers in this file.') return disasm = HexagonDisassembler() total_inst = 0 for segment in elf_file.iter_segments(): if segment['p_flags'] & P_FLAGS.PF_X: print("Offset: {:x}".format(segment['p_offset'])) print("VirtAddr: {:x}".format(segment['p_vaddr'])) print("FileSiz: {:x}".format(segment['p_filesz'])) segment_data = segment.data() data_pos = 0 while data_pos + INST_SIZE <= len(segment_data): addr = segment['p_vaddr'] + data_pos inst_as_int = struct.unpack( '<I', segment_data[data_pos:data_pos + 4])[0] dis = disasm.disasm_one_inst(inst_as_int, addr) print("[{:08x}] {:s}".format(addr, dis.text)) data_pos += 4 total_inst += 1 if timeout and (time.time() - start_time) > timeout: break profile.disable() prof_stats = pstats.Stats(profile) prof_stats.strip_dirs().sort_stats('cumulative').print_stats(20) print("Total instructions: " + str(total_inst)) elapsed_time = time.time() - start_time print("Elapsed time: " + str(elapsed_time))
def __init__(self): idaapi.processor_t.__init__(self) # TODO: logging not working. # self.work_folder = "" # self.log_fn = self.work_folder + 'work.log' # logging.basicConfig(filename=self.log_fn, level=logging.DEBUG, filemode='w') # self.logger = open(self.log_fn, 'w') self.relocatable_file = re.search(r'\.o$', GetInputFile()) != None self.init_instructions() self.prev_addr_analyzed = -1 self.current_hex_packet = None self.hd = HexagonDisassembler() # TODO: this should be instatiated on demand, because I think the init is called every time IDA starts self.disasm_cache = {} # TODO: use orderdict to remove old entries self.profiler = cProfile.Profile() hexagondisasm.profiler = self.profiler
def bin_func_find(base_file_name, target_file_name, function_name_adr, max_results): """ converts a list of offsets in an ELF file to memory addresses :param base_file_name: name of the base input file (ELF or object file) :param target_file_name: name of the target ELF file :param func_name_adr: address OR name of the function of interest :param max_results: maximum number of results to return, if the search could not determine less matches an empty array is returned """ # check input files reference_file = open(base_file_name, 'rb') search_file = open(target_file_name, 'rb') if (not is_elf(reference_file) or not is_elf(search_file)): print "error: at least one of the input files is not an ELF file" return -1 ref_offset = get_offset_in_reference_file(base_file_name, function_name_adr) if (ref_offset == 0): print "error: could not find function in base file" return -2 # determine target file length, read it and parse its metadata search_file.seek(0, os.SEEK_END) search_file_length = search_file.tell() search_file_length -= search_file_length % 4 search_file.seek(0) search_file_data = struct.unpack("<%dI" % (search_file_length / 4), search_file.read(search_file_length)) search_file.seek(0) search_file_metadata = parse_metadata(search_file) search_file.close() # determine base file length reference_file.seek(0, os.SEEK_END) max_size = reference_file.tell() - ref_offset min_size = 2 cur_size = 2 print "searching for function %s" % function_name_adr disasm = HexagonDisassembler(objdump_compatible=True) # loop to search with different search string lengths in order to try to find a unique match while (True): print "current search string length: %d min length: %s max length: %s" % ( cur_size, min_size, max_size) # generate search data and perform the search search_data, search_mask = generate_search_mask( reference_file, ref_offset, cur_size, disasm) res = find_function_in_target(search_file_data, search_data, search_mask, 2) num_res = len(res) if (num_res == 1): # unique match: we are done! phys_adr = file_offset_to_elf(search_file_metadata, res) print "found function %s of base file at 0x%08X of target" % ( function_name_adr, phys_adr[0]) return phys_adr if (num_res > 1): # multiple matches if (cur_size >= max_size ): # stop if we reached the maximum size..we cannot get better print "could not identify function, multiple matches for maximal size" res = find_function_in_target(search_file_data, search_data, search_mask, max_results + 1) if (len(res) <= max_results): return file_offset_to_elf(search_file_metadata, res) else: return [] # compute search data length for next iteration min_size = cur_size + 1 cur_size *= 2 if (cur_size > max_size): cur_size = max_size if (num_res == 0): # no match if ( cur_size <= min_size ): # stop if we reached minimal size, we cannot find a unique match print "could not identify function, no matches for minimal size" # regenerate last result with multiple macthes (cur_size-1= search_data, search_mask = generate_search_mask( reference_file, ref_offset, cur_size - 1, disasm) res = find_function_in_target(search_file_data, search_data, search_mask, max_results + 1) if (len(res) <= max_results): return file_offset_to_elf(search_file_metadata, res) else: return [] # compute search data length for next iteration max_size = cur_size - 1 cur_size = ((max_size - min_size)) / 2 + min_size
def generate_function(org_func_name, org_func, symtab, base_elf, metadata, func_symtab): """ generates a helper function to call the original version of an overwritten firmware function :param org_func_name: name of the new function :param org_func: original function name, the one we replaced :param symtab: symbol table :param base_elf: base firmware ELF file :param metadata: base firmware metadata :param func_symtab: table of new symbols needed by generated functions """ addr_str, ret_type, param_str = resolve_symbol_all(org_func, symtab) if (addr_str == '"unknown"'): print "error: trying to generate fw_org function for a function with unknown location" exit(1) address = int(addr_str, 0) # read first 5 instruction of destination function # we have to read 5 instructions in case the first packet contains only 1 instruction, # then the we have to relocate 2 packages (with up to 5 instructions in total) elf_pos, elf_blob = get_offset_in_elf(metadata, address) base_elf.seek(elf_pos) data = struct.unpack("<IIIII", base_elf.read(20)) disasm = HexagonDisassembler(objdump_compatible=True) disasm0 = HexagonDisassembler(objdump_compatible=True) next_prefix = "" # generate function start text function_decl = "%s %s(%s)" % (ret_type, org_func_name, param_str) function_def = "%s {\n\tasm(\n" % function_decl function_decl += ";" # iterate over the four instructions fetched reloc_size = 0 for pos in range(0, 5): reloc_size += 1 # disassemble instruction with correct position hi = disasm.disasm_one_inst(data[pos], address + pos * 4) # disassemble instruction again with position 0 to check for PC relative immediates hi0 = disasm0.disasm_one_inst(data[pos], pos * 4) if (hi.immext is not None): if (hi.start_packet): next_prefix = "{ " continue disasm_output = hi.text.strip() disasm_output_hi0 = hi0.text.strip() # if we have a realtive immediate if (disasm_output != disasm_output_hi0): # loop over all immediates for pos_imm, imm in enumerate(hi.imm_ops): imm0 = hi0.imm_ops[pos_imm] # check for difference -> PC relative immediate if (imm0 != imm): # generate a dummy symbol at the destination address # and write our target relative to this, by this we # force the compiler to generate a relocation for # the immediate for us dest_address = address + imm0.value symbol = "sym_0x%X" % dest_address func_symtab[symbol] = dest_address rel_adr = "%s" % symbol disasm_output = disasm_output.replace( ("0x%X" % imm.value).lower(), rel_adr) function_def += '\t\t"%s%s\\n\\t" \n' % (next_prefix, disasm_output) next_prefix = "" if ((pos != 0) & (hi.end_packet)): break # generate a jump instruction to the start of remaining original function org_func_start = (address + (reloc_size << 2)) symbol = "sym_0x%X" % org_func_start func_symtab[symbol] = org_func_start function_def += '\t\t"{ jump %s }"\n' % symbol function_def += "\t);\n}" return function_decl, function_def, func_symtab