def show_asm(buff, mode, base): """ Return the given byte sequence as assembly under the given hardware mode. :param bytes buff: Complete data stream. :param int mode: Capstone hardware mode. :param int base: Base address from which to start. :return: Assembly code representation. :rtype: str """ md = Cs(CS_ARCH_X86, mode) md.detail = True ret = '' for insn in md.disasm(buff, base): b = binascii.hexlify(insn.bytes).decode('utf-8') b = ' '.join(a + b for a, b in zip(b[::2], b[1::2])) if len(b) > 18: b = b[:18] + '+' ret += "{0:10} {1:20} {2:10} {3:10}\n".format( '%08x:' % insn.address, b, insn.mnemonic, insn.op_str) ret += '*/\n' return ret
def _disassemble(self, data): capstone = Cs(*self.args.mode.value) strz = self._strings(data) string = next(strz, None) cursor, done = 0, 0 while done < len(data): cursor = max(cursor, done) patchsize = self._nullsize(data, cursor, self.args.width) if patchsize > 2: yield self._format(done, data[done:done + patchsize], 'db', ','.join('0' * patchsize)) done += patchsize continue if cursor >= len(data): yield self._bytepatch(data, done, len(data)) done = cursor if string and cursor >= string.end: yield self._bytepatch(data, done, string.start) yield self._format(string.start, data[string.start:string.end], 'db', string.data) done = string.end continue try: ins = next(capstone.disasm( data[cursor:cursor + 15], cursor, count=1)) end = ins.address + ins.size if self.args.str and string: if end > string.start and string.end > cursor: cursor = string.end continue except StopIteration: cursor += 1 continue else: yield self._format(ins.address, ins.bytes, ins.mnemonic, ins.op_str) done = end
def generate_rule(self): """ Generate Yara rule. Return a YaraRule object """ self.yr_rule.rule_name = self.rule_name self.yr_rule.metas["generated_by"] = "\"mkYARA - By Jelle Vergeer\"" self.yr_rule.metas["date"] = "\"{}\"".format(datetime.now().strftime("%Y-%m-%d %H:%M")) self.yr_rule.metas["version"] = "\"1.0\"" md = Cs(self.instruction_set, self.instruction_mode) md.detail = True md.syntax = CS_OPT_SYNTAX_INTEL chunk_nr = 0 for chunk in self._chunks: chunk_nr += 1 chunk_id = "$chunk_{}".format(chunk_nr) chunk_signature = "" chunk_comment = "" if chunk.is_data is False: disasm = md.disasm(chunk.data, chunk.offset) for ins in disasm: rule_part, comment = self._process_instruction(ins) rule_part = self.format_hex(rule_part) chunk_signature += rule_part + "\n" chunk_comment += comment + "\n" self.yr_rule.add_string(chunk_id, chunk_signature, StringType.HEX) if self.do_comment_sig: self.yr_rule.comments.append(chunk_comment) else: rule_part = self.format_hex(chunk.data.encode("hex")) self.yr_rule.add_string(chunk_id, rule_part, StringType.HEX) self.yr_rule.condition = "any of them" return self.yr_rule
def __init__(self, func, r2obj: dict): self.parent = func try: self.address = r2obj['offset'] self.jump = r2obj.get('jump', 0) self.fail = r2obj.get('fail', 0) cases = r2obj.get('switchop', dict()).get('cases', dict()) self.cases = {c['jump'] for c in cases} self.insns = [] for op in r2obj['ops']: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True _addr = op['offset'] _insns = list( md.disasm(BasicBlock.to_bytes(op['bytes']), _addr)) if len(_insns) != 1: raise CapstoneDecodeError(f'Decoder error at {_addr:#x}') else: _insn: CsInsn = _insns[0] _reads, _ = _insn.regs_access() indirect = _insn.mnemonic == 'jmp' and len(_reads) > 0 self.insns.append(Instruction(_addr, indirect)) except KeyError: err_msg = f'Unexpected radare2 output at Basic Block {self.address:#x}' logging.error(err_msg) raise UnhandledOutputError(err_msg)
def __init__(self, arch, mode): self.arch = arch self.mode = mode self.capstone = Cs(self.arch, self.mode) self.prologues = { # Triple backslash (\\\) are needed to escape bytes in the compiled regex CS_MODE_32: [ b"\x55\x89\xE5", # push ebp & mov ebp, esp b"\x55\x8B\xEC", # push ebp & mov ebp, esp b"\x55\x8b\x6c\x24", # push ebp & mov ebp, [esp+?] ], CS_MODE_64: [ b"\x55\x48\x89\xE5", # push rbp & mov rbp, rsp ] }[mode] self.conditional_jmp_mnemonics = { 'jz', 'je', 'jcxz', 'jecxz', 'jrcxz', 'jnz', 'jp', 'jpe', 'jnp', 'ja', 'jae', 'jb', 'jbe', 'jg', 'jge', 'jl', 'jle', 'js', 'jns', 'jo', 'jno', 'jecxz', 'loop', 'loopne', 'loope', 'jne' } self.x86_32_registers = { 'eax', 'ebx', 'ecx', 'edx', 'esi', 'edi', 'esp', 'ebp' } self.max_instruction_size = 16
def disassemble(self, code: bytes, address: int) -> List[DumpAssembly]: dump_assemblies = [] md = Cs(CS_ARCH_ARM, CS_MODE_ARM) for i in md.disasm(code, address): dump_assemblies.append( DumpAssembly(i.address, f'{i.mnemonic}\t{i.op_str}')) return dump_assemblies
def dissemble_code(self, code, baseaddr): md = Cs(capstone.CS_ARCH_PPC, capstone.CS_MODE_32 | capstone.CS_MODE_BIG_ENDIAN) md.syntax = capstone.CS_OPT_SYNTAX_INTEL for (address, size, mnemonic, op_str) in md.disasm_lite(code, baseaddr): print "0x%x:\t%s\t%s" % (address, mnemonic, op_str)
def __init__(self, binary: MachoBinary) -> None: self.binary = binary self.cs = Cs(CS_ARCH_ARM64, CS_MODE_ARM) self.cs.detail = True # Worker to parse dyld bytecode stream and extract dyld stub addresses to the DyldBoundSymbol they represent self.dyld_info_parser = DyldInfoParser(self.binary) # Each __stubs function calls a single dyld stub address, which has a corresponding DyldBoundSymbol. # Map of each __stub function to the associated name of the DyldBoundSymbol self._imported_symbol_addresses_to_names: Dict[VirtualMemoryPointer, str] = {} self.crossref_helper = MachoStringTableHelper(binary) self.imported_symbols = self.crossref_helper.imported_symbols self.imp_stubs = MachoImpStubsParser(binary, self.cs).imp_stubs self._objc_helper: Optional[ObjcRuntimeDataParser] = None self._objc_method_list: List[ObjcMethodInfo] = [] # Use a temporary database to store cross-referenced data. This provides constant-time lookups for things like # finding all the calls to a particular function. self._has_computed_xrefs = False self._db_tempdir = pathlib.Path(tempfile.mkdtemp()) self._db_path = self._db_tempdir / "strongarm.db" self._db_handle = sqlite3.connect(self._db_path.as_posix()) cursor = self._db_handle.executescript(ANALYZER_SQL_SCHEMA) with self._db_handle: cursor.close() self._build_callable_symbol_index() self._build_function_boundaries_index() # Done setting up, store this analyzer in class cache MachoAnalyzer._ANALYZER_CACHE[binary] = self
def getCapstone(self): if self.capstone is None: self.capstone = Cs(CS_ARCH_X86, CS_MODE_64) if self.bitness == 64 else Cs( CS_ARCH_X86, CS_MODE_32) self.capstone.detail = True return self.capstone
def main(fname): """ Basic python version of the tools: - "objdump -d" (linux) - "dumpbin /disasm" (MSVC) It parses the AR and COFF structures, but uses the "capstone" library to disassemble """ for coff in read_lib_file(fname): if coff: syms = deque(coff.symbols) #print (syms) md = Cs(CS_ARCH_X86, CS_MODE_32) md.skipdata = True # iterate through "CsInsn" for i in md.disasm(coff.sections[0].data, 0x000): while syms and i.address >= syms[0].value: if syms[0].type == 32 and syms[0].section_number == 1: print(syms[0].name.decode(errors="ignore") + ":") syms.popleft() instr_bytes = i.bytes remain_bytes = b"" if len(instr_bytes) >= 6: instr_bytes, remain_bytes = instr_bytes[:6], instr_bytes[ 6:] if not i.op_str: asm_part = i.mnemonic else: asm_part = "%-12s%s" % (i.mnemonic, format_asm(i.op_str)) print(" %08X: %-19s" % (i.address, hex_with_spaces(instr_bytes)) + asm_part) if remain_bytes: print(" %s" % (hex_with_spaces(remain_bytes)))
def __init__(self, args): self.args = args self.parse_binary() self.check_binary() # Initialize the engine mode = UC_MODE_THUMB if self.mclf.entry & 1 else UC_MODE_ARM self.uc = Uc(UC_ARCH_ARM, mode) self.map_sections() self.map_shared_memory() self.map_tlapi_handler() # Add the debug hook if needed if args.debug: from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM, CS_MODE_THUMB self.cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM) self.cs_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB) self.uc.hook_add(UC_HOOK_CODE, self.DEBUG) self.start_forkserver() self.load_input() try: self.LOG("[+] Starting fuzzing") self.uc.emu_start(self.mclf.entry | 1, self.mclf.text_va + self.mclf.text_len, count=0, timeout=0) except UcError as e: self.force_crash(e)
def _initialize_zelos(self, binary=None): self.state = State(self, binary, self.date) cs_arch_mode_sm_dict = { "x86": (CS_ARCH_X86, CS_MODE_32), "x86_64": (CS_ARCH_X86, CS_MODE_64), "arm": (CS_ARCH_ARM, CS_MODE_ARM), "mips": (CS_ARCH_MIPS, CS_MODE_MIPS32), } arch = self.state.arch (cs_arch, cs_mode) = cs_arch_mode_sm_dict[arch] endianness = self.state.endianness if endianness == "little": cs_mode |= CS_MODE_LITTLE_ENDIAN elif endianness == "big": cs_mode |= CS_MODE_BIG_ENDIAN else: raise ZelosLoadException(f"Unsupported endianness {endianness}") self.cs = Cs(cs_arch, cs_mode) self.cs.detail = True self.logger.debug( f"Initialized {arch} {self.state.bits} emulator/disassembler") self.triggers = Triggers(self) self.processes.set_architecture(self.state) self.network = Network(self.helpers, self.files, None) self.processes._create_first_process(self.main_module_name) p = self.current_process p.cmdline_args = self.cmdline_args p.environment_variables = self.config.env_vars p.virtual_filename = self.config.virtual_filename p.virtual_path = self.config.virtual_path if hasattr(unicorn.unicorn, "WITH_ZEROPOINT_PATCH"): def process_switch_wrapper(*args, **kwargs): # Block count interrupt. Fires every 2^N blocks executed # Use this as an opportunity to swap threads. self.logger.info(">>> Tracing Thread Swap Opportunity") self.processes.schedule_next() self.interrupt_handler.register_interrupt_handler( 0xF8F8F8F8, process_switch_wrapper) if self.config.filename is not None and self.config.filename != "": if (self.config.virtual_filename is not None and self.config.virtual_filename != ""): self.files.add_file(self.config.filename, self.config.virtual_filename) else: self.files.add_file(self.config.filename) # TODO: SharedSection needs to be removed self.processes.handles.new("section", "\\Windows\\SharedSection")
def _setup(self, user_arch=None, user_mode=None, cs_arch=None, cs_mode=None): if user_arch is not None and user_mode is not None: try: self.uc = unicorn.Uc(user_arch, user_mode) self.cs = Cs(cs_arch, cs_mode) self.thumb = user_mode == unicorn.UC_MODE_THUMB except: raise self.EmulatorSetupFailedError('Unsupported arch') else: if self.dwarf.arch == 'arm': self.setup_arm() elif self.dwarf.arch == 'arm64': self.setup_arm64() elif self.dwarf.arch == 'ia32': self.setup_x86() elif self.dwarf.arch == 'x64': self.setup_x64() else: # unsupported arch raise self.EmulatorSetupFailedError('Unsupported arch') if not self.uc or not self.cs: raise self.EmulatorSetupFailedError('Unicorn or Capstone missing') # enable capstone details if self.cs is not None: self.cs.detail = True if not self.context.is_native_context: raise self.EmulatorSetupFailedError( 'Cannot run emulator on non-native context') err = self.map_range(self.context.pc.value) if err: raise self.EmulatorSetupFailedError('Mapping failed') self.current_context = EmulatorContext(self.dwarf) for reg in self.current_context._unicorn_registers: if reg in self.context.__dict__: if reg not in self._blacklist_regs: self.uc.reg_write( self.current_context._unicorn_registers[reg], self.context.__dict__[reg].value) self.uc.hook_add(unicorn.UC_HOOK_CODE, self.hook_code) self.uc.hook_add(unicorn.UC_HOOK_MEM_WRITE | unicorn.UC_HOOK_MEM_READ, self.hook_mem_access) self.uc.hook_add( unicorn.UC_HOOK_MEM_FETCH_UNMAPPED | unicorn.UC_HOOK_MEM_WRITE_UNMAPPED | unicorn.UC_HOOK_MEM_READ_UNMAPPED, self.hook_unmapped) self.current_context.set_context(self.uc) return 0
def disassemble(addr, data): none = 0 md = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN) disassed = md.disasm(data, addr) for i in disassed: none = 1 print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) if none != 1: print("Couldn't disassemble at 0x%x" % (addr))
def get_raw_disassembler(arch, detailed=True): if arch == BinaryType.SCS_32BIT_BINARY.value: d = Cs(CS_ARCH_X86, CS_MODE_32) elif arch == BinaryType.SCS_64BIT_BINARY.value: d = Cs(CS_ARCH_X86, CS_MODE_64) else: raise Exception("No disassembler for this architecture") d.detail = detailed return d
def __init__(self, encoding, position): super(CAPSInstruction, self).__init__(encoding, position) # CAPSTONE object encoding_bytes = (self._encoding).to_bytes(4, byteorder='little') #endian = CS_MODE_LITTLE_ENDIAN if little_endian else CS_MODE_BIG_ENDIAN md = Cs(CS_ARCH_ARM, CS_MODE_ARM) md.detail = True self._cap = None for i in md.disasm(encoding_bytes, position): self._cap = i
def __init__(self, win_emu): super(ApiHandler, self).__init__() self.funcs = {} self.data = {} self.mod_name = '' self.win_emu = win_emu self.arch = self.win_emu.get_arch() self.ptr_size = self.win_emu.get_ptr_size() self.cpp_procedure = { } self.disasm = Cs(CS_ARCH_X86, CS_MODE_32)
def extract_call_destinations(elf): symbols_list = [] # get the code section or segment (if there's no section) code_section_or_segment = get_ep_section_or_segment(elf) # if we only got the segment, start extracting calls from the EP if type(code_section_or_segment) == elftools.elf.segments.Segment: ofs = elf.header.e_entry code_data = code_section_or_segment.data( )[ofs - code_section_or_segment["p_vaddr"]:] # otherwise we use the code section else: ofs = elf_get_imagebase(elf) + code_section_or_segment["sh_offset"] code_data = code_section_or_segment.data() # get the architecture of our ELF file. # the disassembly and the call opcode and mnemonic will be based on the # determined architecture, as defined by the CALL_LIST dict above arch = elf.get_machine_arch() # in case we have not specified the opcode, mnemonic, and the # capstone arch and mode, skip if arch not in CALL_LIST: return [] # TODO: automatically identify the architecture the binary was compiled to md = Cs(CALL_LIST[arch]["cs_arch"], CALL_LIST[arch]["cs_mode"]) if code_section_or_segment is not None: # TODO: handle UPX-packed binaries as they have no sections so we should go straight to segment offset for i in md.disasm(code_data, ofs): if arch in ("x86", "x64") and i.mnemonic == "call": # Consider only call to absolute addresses if i.op_str.startswith("0x"): address = i.op_str[2:] # cut off '0x' prefix if not address in symbols_list: symbols_list.append(address) elif arch == "ARM" and i.mnemonic.startswith("bl"): if i.op_str.startswith("#0x"): address = i.op_str[3:] if not address in symbols_list: symbols_list.append(address) elif arch == "MIPS" and i.mnemonic == "lw": if i.op_str.startswith("$t9, "): address = i.op_str[8:-5] if not address in symbols_list: symbols_list.append(address) return symbols_list
def disassemble(addr, data, thumb=False): none = 0 # disassed at least on if thumb == True: mode = CS_MODE_THUMB else: mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data, addr) for i in disassed: none = 1 print "0x%x:\t%s %s" %(i.address, i.mnemonic, i.op_str) if none != 1: print "Couldn't disassemble at 0x%x"%(addr)
def init(self, disassembly): if disassembly.binary_info.code_areas: self._code_areas = disassembly.binary_info.code_areas self.disassembly = disassembly self.lang_analyzer = LanguageAnalyzer(disassembly) self.disassembly.language = self.lang_analyzer.identify() self.bitness = disassembly.binary_info.bitness self.capstone = Cs(CS_ARCH_X86, CS_MODE_32) if self.bitness == 64: self.capstone = Cs(CS_ARCH_X86, CS_MODE_64) self.locateCandidates() self.disassembly.identified_alignment = self.identified_alignment self._buildQueue()
def disassemble(addr, data, thumb=False): none = 0 # disassed at least on if thumb == True: mode = CS_MODE_THUMB else: mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data, addr) for i in disassed: none = 1 print "0x%x:\t%s %s" % (i.address, i.mnemonic, i.op_str) if none != 1: print "Couldn't disassemble at 0x%x" % (addr)
def __gadgetsFinding(self, section, gadgets, arch, mode): C_OP = 0 C_SIZE = 1 C_ALIGN = 2 PREV_BYTES = 9 # Number of bytes prior to the gadget to store. ret = [] md = Cs(arch, mode) for gad in gadgets: allRefRet = [ m.start() for m in re.finditer(gad[C_OP], section["opcodes"]) ] for ref in allRefRet: for i in range(self.__options.depth): if (section["vaddr"] + ref - (i * gad[C_ALIGN])) % gad[C_ALIGN] == 0: decodes = md.disasm( section["opcodes"][ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]], section["vaddr"] + ref) gadget = "" for decode in decodes: gadget += (decode.mnemonic + " " + decode.op_str + " ; ").replace(" ", " ") if re.search(gad[C_OP], decode.bytes) is None: continue if len(gadget) > 0: gadget = gadget[:-3] off = self.__offset vaddr = off + section["vaddr"] + ref - ( i * gad[C_ALIGN]) prevBytesAddr = max(section["vaddr"], vaddr - PREV_BYTES) prevBytes = section["opcodes"][ prevBytesAddr - section["vaddr"]:vaddr - section["vaddr"]] ret += [{ "vaddr": vaddr, "gadget": gadget, "decodes": decodes, "bytes": section["opcodes"][ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]], "prev": prevBytes }] return ret
def __init__(self, firmware: Firmware = None, state: CpuState = None, verbose=0, init=True): self.firmware = firmware self.uc = Uc(UC_ARCH_ARM, UC_MODE_THUMB) self.cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB) self.cs.detail = True self.state = state self.has_error = None self.last_addr = None self.ready = False self.context = None self.verbose = verbose if init: self.init()
def disasm_plt(bytes, offset=0): try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) instruc = disassembled[0] # get rip relative address for op in instruc.operands: if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: return disassembled[1].address + op.mem.disp, op.size return None, None except CsError as e: print("ERROR: %s" % e)
def find_single((raw_data, pvaddr, elftype, elf_base_addr, arch, mode, gad, need_filter, ref)): C_OP = 0 C_SIZE = 1 C_ALIGN = 2 allgadgets = [] md = Cs(arch, mode) md.detail = True for i in range(10): back_bytes = i * gad[C_ALIGN] section_start = ref - back_bytes start_address = pvaddr + section_start if elftype == 'DYN': start_address = elf_base_addr + start_address decodes = md.disasm(raw_data[section_start:ref + gad[C_SIZE]], start_address) decodes = list(decodes) insns = [] for decode in decodes: insns.append((decode.mnemonic + " " + decode.op_str).strip()) if len(insns) > 0: if (start_address % gad[C_ALIGN]) == 0: address = start_address if mode == CS_MODE_THUMB: address = address | 1 bytes = raw_data[ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]] onegad = Gadget(address, insns, {}, 0, bytes) if not passClean(decodes): continue if arch == CS_ARCH_X86: onegad = filter_for_x86_big_binary(onegad) elif arch == CS_ARCH_ARM: onegad = filter_for_arm_big_binary(onegad) if (not need_filter) and onegad: classifier = GadgetClassifier(arch, mode) onegad = classifier.classify(onegad) if onegad: allgadgets += [onegad] return allgadgets
def __setup_available_disassemblers(self): arch_map = { ARCH_ARM_MODE_ARM: CS_MODE_ARM, ARCH_ARM_MODE_THUMB: CS_MODE_THUMB, } self._avaliable_disassemblers = { ARCH_ARM_MODE_ARM: Cs(CS_ARCH_ARM, arch_map[ARCH_ARM_MODE_ARM]), ARCH_ARM_MODE_THUMB: Cs(CS_ARCH_ARM, arch_map[ARCH_ARM_MODE_THUMB]), } self._avaliable_disassemblers[ARCH_ARM_MODE_ARM].detail = True self._avaliable_disassemblers[ARCH_ARM_MODE_THUMB].detail = True
def disasm_plt(bytes, offset=0): try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) instruc = disassembled[0] # get rip relative address for op in instruc.operands: if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: return disassembled[1].address + op.mem.disp, op.size return None, None except CsError as e: print("ERROR: %s" %e)
def __init__(self, sections: SectionFinder, arch, bits): '''Start disassembly of the provided code blob. Arguments: sections -- A section finder instance. arch -- The architecture, as defined by Capstone. bits -- The bit width, as defined by Capstone. ''' # Set up options for disassembly. self.md = Cs(arch, bits) self.md.skipdata = True self.md.detail = True self.sections = sections self._last_data = None self._last_start = 0 self._last_end = 0
def setup_arm(self): self.thumb = self.context.pc.thumb if self.thumb: self._current_cpu_mode = unicorn.UC_MODE_THUMB self.cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB) self.uc = unicorn.Uc(unicorn.UC_ARCH_ARM, unicorn.UC_MODE_THUMB) # Enable VFP instr self.uc.mem_map(0x1000, 1024) self.uc.mem_write(0x1000, binascii.unhexlify(VFP)) self.uc.emu_start(0x1000 | 1, 0x1000 + len(VFP)) self.uc.mem_unmap(0x1000, 1024) else: self.cs = Cs(CS_ARCH_ARM, CS_MODE_ARM) self.uc = unicorn.Uc(unicorn.UC_ARCH_ARM, unicorn.UC_MODE_ARM) self._current_cpu_mode = unicorn.UC_MODE_ARM
def __init__(self, architecture, code): self.md = None self.data = [] self.code = code self.iterator = None self.architecture = architecture self.valid = False if architecture in arch_mapping: arch, mode = arch_mapping[architecture] self.md = Cs(arch, mode) self.md.detail = True self.iterator = self.md.disasm(self.code, 0) self.valid = True
def _cs_disassemble_one(self, data, address): """Disassemble the data into an instruction in string form. """ disasm = list(self._disassembler.disasm(data, address)) # TODO: Improve this check. if len(disasm) > 0: return disasm[0] else: cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM) disasm = list(cs_arm.disasm(data, address)) if len(disasm) > 0: return disasm[0] else: raise InvalidDisassemblerData("CAPSTONE: Unknown instruction (Addr: {:s}).".format(hex(address)))
def trace(ql: Qiling, address: int, size: int, md: Cs): """Emit tracing info for each and every instruction that is about to be executed. Args: ql: the qiling instance address: the address of the instruction that is about to be executed size: size of the instruction (in bytes) md: initialized disassembler object """ # read current instruction bytes and disassemble it buf = ql.mem.read(address, size) insn = next(md.disasm(buf, address)) nibbles = ql.arch.bits // 4 color_faded = '\033[2m' color_reset = '\033[0m' # get values of the registers referenced by this instruction. # # note: since this method is called before the instruction has been emulated, the 'rip' # register still points to the current instruction, while the instruction considers it # as if it was pointing to the next one. that will cause 'rip' to show an incorrect value reads = (f'{md.reg_name(reg)} = {ql.arch.regs.read(CS_UC_REGS[reg]):#x}' for reg in insn.regs_access()[0]) # construct a human-readable trace line trace_line = f'{insn.address:0{nibbles}x} | {insn.bytes.hex():24s} {insn.mnemonic:12} {insn.op_str:35s} | {", ".join(reads)}' # emit the trace line in a faded color, so it would be easier to tell trace info from other log entries ql.log.info(f'{color_faded}{trace_line}{color_reset}')
def disassemble(self, size, thumb=True): """ Display the bytes disassembled using Capstone at the current position. Args: size (:obj:`int`): the number of bytes to disassemble thumb (:obj:`bool`): True if Thumb, False otherwise """ from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM, CS_MODE_THUMB cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB if thumb else CS_MODE_ARM) addr = self._ptr.value for insn in cs.disasm(self.read(size), addr): insn_info = insn.address, insn.mnemonic, insn.op_str print("{:08x}:\t{} {}".format(insn_info))
def disasm(self, addr): (data, virtual_addr, flags) = self.binary.get_section(addr) if not flags["exec"]: die("the address 0x%x is not in an executable section" % addr) mode = CS_MODE_64 if self.bits == 64 else CS_MODE_32 md = Cs(CS_ARCH_X86, mode) md.detail = True for i in md.disasm(data, virtual_addr): self.code[i.address] = i self.code_idx.append(i.address) # Now load imported symbols for PE. This cannot be done before, # because we need the code for a better resolution. if self.binary.get_type() == T_BIN_PE: self.binary.load_import_symbols(self.code)
def dumpASM(flo, mode, maxAddr=1e99): modeRef = {32: CS_MODE_32, 64: CS_MODE_64} md = Cs(CS_ARCH_X86, modeRef[mode]) md.detail = True for i in md.disasm(flo, 0): # print(dir(i)) print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) print("\tImplicit registers read: ", end="") for r in i.regs_read: print("%s " % i.reg_name(r)) print() print("\tImplicit registers written: ", end="") for r in i.regs_write: print("%s " % i.reg_name(r)) print() if i.address > maxAddr: break
def find_instr_addr(mod_name, bits): dll = pefile.PE(mod_name) for entry in dll.DIRECTORY_ENTRY_EXPORT.symbols: if entry.name == 'rtcInStrChar': exp_addr = entry.address break for imp in dll.DIRECTORY_ENTRY_IMPORT: for entry in imp.imports: if entry.name == 'SysFreeString': imp_addr = entry.address break memory = dll.get_memory_mapped_image() if bits == 32: dsm = Cs(CS_ARCH_X86, CS_MODE_32) else: dsm = Cs(CS_ARCH_X86, CS_MODE_64) for op in dsm.disasm(memory[exp_addr:exp_addr + 0xA0], (exp_addr + dll.OPTIONAL_HEADER.ImageBase)): if op.mnemonic == 'call': last_call = op.op_str if op.mnemonic == 'ret': break next_func = int(last_call, 16) - dll.OPTIONAL_HEADER.ImageBase calls = 0 call_free = 0 for op in dsm.disasm(memory[next_func:next_func + 0x200], (next_func + dll.OPTIONAL_HEADER.ImageBase)): if op.mnemonic == 'call' and ('0x%x' % imp_addr in op.op_str or 'qword ptr' in op.op_str): call_free += 1 if call_free == 2: return last_call if op.mnemonic == 'call': last_call = op.address - dll.OPTIONAL_HEADER.ImageBase if op.mnemonic == 'ret': return
def __init__(self, target, log, start_clnum=0, end_clnum=0): f = open(target, 'rb') self.data = f.read() f.close() self.target = target self.log = log self.os = self.get_os() if self.os is None: raise Exception('not supports os') self.arch = self.get_arch() if self.arch is None: raise Exception('not known arch') self.base = self.get_base() if self.os == 'windows': self.pe = PE(target) else: self.elf = Elf(target) if self.arch == 'i386': self.md = Cs(CS_ARCH_X86, CS_MODE_32) else: self.md = Cs(CS_ARCH_X86, CS_MODE_64) if self.arch == 'i386': self.t = qiradb.Trace(log, 0, 4, 9, False) # 32 bits else: self.t = qiradb.Trace(log, 0, 8, 17, False) # 64 bits while not self.t.did_update(): print "waiting..." time.sleep(0.1) self.disasms = {}
def createDisassembly(fileContent, offset): capStone = Cs(CS_ARCH_X86, CS_MODE_32) return list(capStone.disasm(fileContent, offset))
def disasm(bytes, offset=0): print "offset %i" % offset try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) for i, instr in enumerate(disassembled): print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str) # Handle no-op instructions if instr.id == x86.X86_INS_NOP: instr.nop = True # Handle jump/call instructions if instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL): # We can only decode the destination if it's an immediate value if instr.operands[0].type == x86.X86_OP_IMM: # Ignore if it's a jump/call to an address within this function func_start_addr = disassembled[0].address func_end_addr = disassembled[len(disassembled)-1].address dest_addr = instr.operands[0].imm if func_start_addr <= dest_addr <= func_end_addr: instr.internal_jump = True instr.jump_address = dest_addr else: symbol = executable.ex.get_symbol_by_addr(dest_addr) if symbol: text_sect = executable.ex.elff.get_section_by_name('.text') sect_addr = text_sect['sh_addr'] sect_offset = text_sect['sh_offset'] instr.external_jump = True instr.jump_address = dest_addr instr.jump_function_name = demangle(symbol.name) instr.jump_function_address = dest_addr instr.jump_function_offset = dest_addr - sect_addr + sect_offset instr.jump_function_size = symbol['st_size'] instr.comment = demangle(symbol.name) # Handle individual operands for op in instr.operands: # Handle rip-relative operands if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: instr.rip = True instr.rip_offset = op.mem.disp instr.rip_resolved = disassembled[i+1].address + instr.rip_offset symbol = executable.ex.get_symbol_by_addr(instr.rip_resolved) if symbol: instr.comment = demangle(symbol.name) bytes = executable.ex.get_bytes(instr.rip_resolved, op.size) instr.rip_value_hex = "" space = "" for char in bytes: instr.rip_value_hex += space + hex(ord(char)) space = " " # HTML collapses consecutive spaces. For presentation purposes, replace spaces # with   (non-breaking space) nbsp_str = [] if op.size == 16: for char in bytes: if char == ' ': nbsp_str.append(' ') else: nbsp_str.append(char) instr.rip_value_ascii = ''.join(nbsp_str) # TODO: there's a bug involving ASCII that cannot be jsonified. To get around # it, we're temporarily pretending they don't exist. Those edge cases need to be # handled. # see typeName( else: instr.rip_value_ascii = "under construction..." # what registers does this instruction read/write? instr.regs_write_names = [instr.reg_name(reg) for reg in instr.regs_write] instr.regs_read_names = [instr.reg_name(reg) for reg in instr.regs_read] # Add in documentation meta-data instr.docfile = doc_file(instr) instr.short_desc = get_short_desc(instr) if instr.docfile is None: with open('missing_docs.log', 'a+') as f: f.write('[{}] : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic)) return disassembled except CsError as e: print("ERROR: %s" %e)
def do_POST(self): length = int(self.headers.getheader('content-length')) if length: rdata = self.rfile.read(length) rdata = urlparse.parse_qs(rdata) addr = 0 extra = "" try: addr = int(rdata['addr'][0]) except KeyError: print "[+] Warning: addr not received" try: data = rdata['data'][0] except KeyError: print "[+] Error: dump not received" return try: typ = rdata['type'][0] except KeyError: print "[+] Error: msg type not received" return try: extra = rdata['extra'][0] except KeyError: pass if(typ == 'read'): print display_data(addr, data.decode('hex')) if(typ == 'dis'): if(extra == "thumb"): disassemble(addr, data.decode('hex'), thumb=True) else: disassemble(addr, data.decode('hex')) if(typ == 'dis_res'): mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data.decode('hex'), addr) ops = [] ptrstr = "" print "Parsing: " + extra for i in disassed: print "0x%x:\t%s %s" %(i.address, i.mnemonic, i.op_str) if i.mnemonic == "SVC": print "Could not resolve " + extra + " (syscall) " return ops.append(i.op_str[7:]) ptrstr = "0x"+ops[1].rjust(4,'0')+ops[0].rjust(4,'0') cmdstr = "resolve " + ptrstr + " " + extra print cmdstr if (int(ptrstr,16) > 0x40000000) and (int(ptrstr,16) < 0xE000000000): self.mods.append(cmdstr) else: print "Could not resolve " + extra + " (invalid address) " print "----" """ if(typ == 'dump'): fname = extra dump_data(data.decode('hex'), fname) """ if typ == 'dump': global CURRENT_DUMP_FILE_NAME if CURRENT_DUMP_FILE_NAME == "": #If this is the initial dump CURRENT_DUMP_FILE_NAME = extra #check if this file already exists self.dump_directory_initializer(extra) elif not extra.startswith(CURRENT_DUMP_FILE_NAME): #If this is a different dump self.dump_directory_initializer(extra) CURRENT_DUMP_FILE_NAME = extra dump_data(data.decode('hex'), CURRENT_DUMP_FILE_NAME)
def disasm(exe, bytes, offset=0): print "offset %i" % offset try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) for i, instr in enumerate(disassembled): print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str) # Handle no-op instructions if instr.id == x86.X86_INS_NOP: instr.nop = True # Handle jump/call instructions elif instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL): # jump table if instr.group(x86.X86_GRP_JUMP) and instr.operands[0].type == x86.X86_OP_REG: instr.jump_table = instr.reg_name(instr.operands[0].reg) # We can only decode the destination if it's an immediate value elif instr.operands[0].type == x86.X86_OP_IMM: # Ignore if it's a jump/call to an address within this function func_start_addr = disassembled[0].address func_end_addr = disassembled[len(disassembled)-1].address dest_addr = instr.operands[0].imm if func_start_addr <= dest_addr <= func_end_addr: instr.internal_jump = True instr.jump_address = dest_addr else: symbol, field_name = exe.get_symbol_by_addr( dest_addr, instr.address) if symbol: text_sect = exe.elff.get_section_by_name('.text') sect_addr = text_sect['sh_addr'] sect_offset = text_sect['sh_offset'] instr.comment = demangle(symbol.name) # only follow call address if it is a known location if symbol['st_size'] > 0: instr.external_jump = True instr.jump_address = symbol["st_value"] instr.jump_function_name = demangle(symbol.name) instr.jump_function_address = symbol["st_value"] instr.jump_function_offset = symbol["st_value"] - sect_addr + sect_offset instr.jump_function_size = symbol['st_size'] if instr.group(x86.X86_GRP_RET): instr.return_type = True # Handle individual operands c = -1 instr.regs_explicit = [] for op in instr.operands: c += 1 # Handle rip-relative operands if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: instr.rip = True instr.rip_offset = op.mem.disp instr.rip_resolved = disassembled[i+1].address + instr.rip_offset # file offset depends on section section = exe.get_section_from_offset(instr.rip_resolved) file_offset = instr.rip_resolved - section["sh_addr"] + section["sh_offset"] # Read in and unpack the first byte at the offset val_8 = exe.get_bytes(file_offset, 1) instr.signed_8 = unpack('b', val_8)[0] instr.unsigned_8 = unpack('B', val_8)[0] instr.hex_8 = hex(instr.unsigned_8) # Read in and unpack the first two bytes at the offset val_16 = exe.get_bytes(file_offset, 2) instr.signed_16 = unpack('h', val_16)[0] instr.unsigned_16 = unpack('H', val_16)[0] instr.hex_16 = hex(instr.unsigned_16) # Read in and unpack the first four bytes at the offset val_32 = exe.get_bytes(file_offset, 4) instr.signed_32 = unpack('i', val_32)[0] instr.unsigned_32 = unpack('I', val_32)[0] instr.hex_32 = hex(instr.unsigned_32) instr.float = unpack('f', val_32)[0] # Read in and unpack the first eight bytes at the offset val_64 = exe.get_bytes(file_offset, 8) instr.signed_64 = unpack('q', val_64)[0] instr.unsigned_64 = unpack('Q', val_64)[0] instr.hex_64 = hex(instr.unsigned_64) instr.double = unpack('d', val_64)[0] symbol, field_name = exe.get_symbol_by_addr( instr.rip_resolved, instr.address, instr_size=op.size, get_sub_symbol=True) if symbol: instr.comment = demangle(symbol.name) if field_name: instr.comment += '.' + field_name bytes = exe.get_bytes(file_offset, op.size) instr.rip_value_hex = "" space = "" for char in bytes: instr.rip_value_hex += space + hex(ord(char)) space = " " # HTML collapses consecutive spaces. For presentation purposes, replace spaces # with   (non-breaking space) nbsp_str = [] if op.size == 16: for char in bytes: if char == ' ': nbsp_str.append(' ') else: nbsp_str.append(char) instr.rip_value_ascii = ''.join(nbsp_str) # TODO: there's a bug involving ASCII that cannot be jsonified. To get around # it, we're temporarily pretending they don't exist. Those edge cases need to be # handled. # see typeName( else: instr.rip_value_ascii = "under construction..." # Handle explicitly read/written registers if op.type == x86.X86_OP_MEM: ptr = ["", "", ""] # using an array instead of object to guarantee ordering instr.regs_ptr_explicit = [] if op.value.mem.base != 0: regname = instr.reg_name(op.value.mem.base) ptr[0] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.index != 0: regname = instr.reg_name(op.value.mem.index) ptr[1] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.disp != 0: ptr[2] = hex(op.value.mem.disp) instr.ptr = ptr instr.ptr_size = op.size instr.regs_explicit.append(instr.ptr) elif op.type == x86.X86_OP_REG: instr.regs_explicit.append(instr.reg_name(op.value.reg)) else: instr.regs_explicit.append("") # what registers does this instruction read/write? instr.regs_write_implicit = [instr.reg_name(reg) for reg in instr.regs_write] if instr.group(x86.X86_GRP_CALL) and instr.reg_name(x86.X86_REG_RAX) not in instr.regs_write_implicit: instr.regs_write_implicit.append(instr.reg_name(x86.X86_REG_RAX)) instr.regs_read_implicit = [instr.reg_name(reg) for reg in instr.regs_read] # Add in documentation meta-data instr.short_desc, instr.docfile = get_documentation(instr) if instr.docfile is None or instr.short_desc is None: with open(CUR_PATH + 'missing_docs.log', 'a+') as f: f.write('[{}] : {} : {} : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc)) return disassembled except CsError as e: print("ERROR: %s" %e)
def do_POST(self): length = int(self.headers.getheader('content-length')) if length: rdata = self.rfile.read(length) rdata = urlparse.parse_qs(rdata) addr = 0 extra = "" try: addr = int(rdata['addr'][0]) except KeyError: print "[+] Warning: addr not received" try: data = rdata['data'][0] except KeyError: print "[+] Error: dump not received" return try: typ = rdata['type'][0] except KeyError: print "[+] Error: msg type not received" return try: extra = rdata['extra'][0] except KeyError: pass if(typ == 'read'): print display_data(addr, data.decode('hex')) if(typ == 'dis'): if(extra == "thumb"): disassemble(addr, data.decode('hex'), thumb=True) else: disassemble(addr, data.decode('hex')) if(typ == 'dis_res'): mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data.decode('hex'), addr) ops = [] ptrstr = "" for i in disassed: if i.mnemonic == "SVC": print "Could not resolve " + extra + " (syscall) return ops.append(i.op_str[7:]) ptrstr = ops[1].rjust(4,'0')+ops[0].rjust(4,'0') print ptrstr cmdstr = "resolve 0x" + ptrstr + " " + extra if int(ptrstr,16) > 0x40000000: self.mods.append(cmdstr) else: print "Could not resolve " + extra + " (invalid address) if(typ == 'dump'): fname = extra dump_data(data.decode('hex'), fname)
from __future__ import print_function # test1.py from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_MODE_32 CODE = b"\x8d\x44\x38\x02" md = Cs(CS_ARCH_X86, CS_MODE_32) md.detail = True for i in md.disasm(CODE, 0): # print(dir(i)) print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) if len(i.regs_read) > 0: print("\tImplicit registers read: "), for r in i.regs_read: print("%s " % i.reg_name(r)), print if len(i.groups) > 0: print("\tThis instruction belongs to groups:", end="") for g in i.groups: print("%u" % g) # print("%u" % g, end="") print() def dumpASM(flo, mode, maxAddr=1e99): modeRef = {32: CS_MODE_32, 64: CS_MODE_64} md = Cs(CS_ARCH_X86, modeRef[mode]) md.detail = True
def main(): BYTES = 500 NUM_MNEM = 30 SIG_FILE = "./mpesm.sig" THRESHOLD = .85 VERBOSE = False DIR_PROCESSING = False signatures = {} file_list = [] nos = 0 ep = 0 ep_ava = 0 parser = ArgumentParser(description="Mnemonic PE Signature Matching") parser.add_argument("-n", "--num-mnem", dest="num_mnem", help="Use a lenght of 'n' mnemonics (default: " + str(NUM_MNEM) + ')') parser.add_argument("-s", "--signatures", dest="sig_file", help="signature file to use (default: " + SIG_FILE + ')') parser.add_argument("-b", "--bytes", dest="bytes", help="Grab and disassemble x bytes from EP, you should only need to change this if you give a super large number for -n (default: " + str(BYTES) + ')') parser.add_argument("-t", "--threshold", dest="threshold", help="Display all matches greater than -t supplied similarity (default: " + str(THRESHOLD) + ')') parser.add_argument("-v", "--verbose", dest="verbose", help="Verbose output", action='store_true') parser.add_argument("file", nargs=1, help='File to analyze') args = parser.parse_args() if args.sig_file: SIG_FILE = args.sig_file if args.threshold: THRESHOLD = float(args.threshold) if args.bytes: BYTES = args.bytes if args.num_mnem: NUM_MNEM = args.num_mnem if args.verbose: VERBOSE = True config = ConfigParser.RawConfigParser() config.read(SIG_FILE) if len(config.sections()) == 0: print "Error Reading from config file: %s, it's either empty or not present" %(SIG_FILE) sys.exit(1) for s in config.sections(): signatures[s] = {} signatures[s]['mnemonics'] = config.get(s, 'mnemonics').split(',') if config.has_option(s, 'num_mnemonics'): signatures[s]['num_mnemonics'] = config.getint(s, 'num_mnemonics') if config.has_option(s, 'major_linker'): signatures[s]['major_linker'] = config.getint(s, 'major_linker') if config.has_option(s, 'minor_linker'): signatures[s]['minor_linker'] = config.getint(s, 'minor_linker') if config.has_option(s, 'numberofsections'): signatures[s]['numberofsections'] = config.getint(s, 'numberofsections') if os.path.isdir(args.file[0]): file_list = glob.glob(args.file[0]+'/*') DIR_PROCESSING = True else: file_list.append(args.file[0]) for f in file_list: file_type = None if VERBOSE: print '[*] Processing: ' + f try: fe = pefile.PE(f) file_type = 'PE' except Exception as e: if VERBOSE: sys.stderr.write("[*] Error with %s - %s\n" %(f, str(e))) if not file_type: try: fe = macholib.MachO.MachO(f) file_type = 'MACHO' except Exception as e: if VERBOSE: sys.stderr.write("[*] Error with %s - %s\n" %(f, str(e))) if not file_type: sys.stderr.write("[*] Error with %s - not a PE or Mach-O\n" % f) if file_type == 'PE': try: minor_linker = 0 major_linker = 0 try: minor_linker = fe.OPTIONAL_HEADER.MinorLinkerVersion major_linker = fe.OPTIONAL_HEADER.MajorLinkerVersion except Exception as e: pass if hasattr(fe, 'FILE_HEADER') and hasattr(fe.FILE_HEADER, 'NumberOfSections'): nos = fe.FILE_HEADER.NumberOfSections if hasattr(fe, 'OPTIONAL_HEADER') and hasattr(fe.OPTIONAL_HEADER, 'AddressOfEntryPoint'): ep = fe.OPTIONAL_HEADER.AddressOfEntryPoint if hasattr(fe, 'OPTIONAL_HEADER') and hasattr(fe.OPTIONAL_HEADER, 'ImageBase') and ep > 0: ep_ava = ep+fe.OPTIONAL_HEADER.ImageBase data = fe.get_memory_mapped_image()[ep:ep+BYTES] # # Determine if the file is 32bit or 64bit # mode = CS_MODE_32 if fe.OPTIONAL_HEADER.Magic == 0x20b: mode = CS_MODE_64 md = Cs(CS_ARCH_X86, mode) match = [] for (address, size, mnemonic, op_str) in md.disasm_lite(data, 0x1000): match.append(mnemonic.encode('utf-8').strip()) for s in signatures: m = match sig = signatures[s]['mnemonics'] if m and m[0] == sig[0] or THRESHOLD < .7: additional_info = [] if 'minor_linker' in signatures[s]: if minor_linker == signatures[s]['minor_linker']: additional_info.append('Minor Linker Version Match: True') else: additional_info.append('Minor Linker Version Match: False') if 'major_linker' in signatures[s]: if major_linker == signatures[s]['major_linker']: additional_info.append('Major Linker Version Match: True') else: additional_info.append('Major Linker Version Match: False') if 'numberofsections' in signatures[s]: if nos == signatures[s]['numberofsections']: additional_info.append('Number Of Sections Match: True') else: additional_info.append('Number Of Sections Match: False') if 'num_mnemonics' in signatures[s]: nm = signatures[s]['num_mnemonics'] m = match[:nm] sig = signatures[s]['mnemonics'][:nm] else: m = match[:NUM_MNEM] sig = signatures[s]['mnemonics'][:NUM_MNEM] distance = tapered_levenshtein(sig, m) similarity = 1.0 - distance/float(max(len(sig), len(m))) if similarity > THRESHOLD: if DIR_PROCESSING: print "[%s] [%s] (Edits: %s | Similarity: %0.3f) (%s)" %(f, s, distance, similarity, ' | '.join(additional_info)) else: print "[%s] (Edits: %s | Similarity: %0.3f) (%s)" %(s, distance, similarity, ' | '.join(additional_info)) if VERBOSE: print "%s\n%s\n" %(sig, m) except Exception as e: print str(e) elif file_type == 'MACHO': macho_file = open(f, 'rb') macho_data = macho_file.read() macho_file.close() for header in fe.headers: # Limit it to X86 if header.header.cputype not in [7, 0x01000007]: continue # Limit it to Object and Executable files if header.header.filetype not in [1, 2]: continue magic = int(header.MH_MAGIC) offset = int(header.offset) all_sections = [] entrypoint_type = '' entrypoint_address = 0 for cmd in header.commands: load_cmd = cmd[0] cmd_info = cmd[1] cmd_data = cmd[2] cmd_name = load_cmd.get_cmd_name() if cmd_name in ('LC_SEGMENT', 'LC_SEGMENT_64'): for section_data in cmd_data: sd = section_data.describe() all_sections.append(sd) elif cmd_name in ('LC_THREAD', 'LC_UNIXTHREAD'): entrypoint_type = 'old' flavor = int(struct.unpack(header.endian + 'I', cmd_data[0:4])[0]) count = int(struct.unpack(header.endian + 'I', cmd_data[4:8])[0]) if flavor == 1: entrypoint_address = int(struct.unpack(header.endian + 'I', cmd_data[48:52])[0]) elif flavor == 4: entrypoint_address = int(struct.unpack(header.endian + 'Q', cmd_data[136:144])[0]) elif cmd_name == 'LC_MAIN': entrypoint_type = 'new' entrypoint_address = cmd_info.describe()['entryoff'] entrypoint_data = '' if entrypoint_type == 'new': entrypoint_offset = offset + entrypoint_address entrypoint_data = macho_data[entrypoint_offset:entrypoint_offset+500] elif entrypoint_type == 'old': found_section = False for sec in all_sections: if entrypoint_address >= sec['addr'] and entrypoint_address < (sec['addr'] + sec['size']): found_section = True entrypoint_address = (entrypoint_address - sec['addr']) + sec['offset'] break if found_section: entrypoint_offset = offset + entrypoint_address entrypoint_data = macho_data[entrypoint_offset:entrypoint_offset+500] mode = CS_MODE_32 if magic == 0xcffaedfe: mode = CS_MODE_64 md = Cs(CS_ARCH_X86, mode) match = [] if entrypoint_data: try: for (address, size, mnemonic, op_str) in md.disasm_lite(entrypoint_data, 0x1000): match.append(mnemonic.encode('utf-8').strip()) except Exception as e: print str(e) for s in signatures: m = match sig = signatures[s]['mnemonics'] if m and m[0] == sig[0] or THRESHOLD < .7: additional_info = [] if 'num_mnemonics' in signatures[s]: nm = signatures[s]['num_mnemonics'] m = match[:nm] sig = signatures[s]['mnemonics'][:nm] else: m = match[:NUM_MNEM] sig = signatures[s]['mnemonics'][:NUM_MNEM] distance = tapered_levenshtein(sig, m) similarity = 1.0 - distance/float(max(len(sig), len(m))) if similarity > THRESHOLD: if DIR_PROCESSING: print "[%s] [%s] (Edits: %s | Similarity: %0.3f) (%s)" %(f, s, distance, similarity, ' | '.join(additional_info)) else: print "[%s] (Edits: %s | Similarity: %0.3f) (%s)" %(s, distance, similarity, ' | '.join(additional_info)) if VERBOSE: print "%s\n%s\n" %(sig, m)
class Tracer(): def __init__(self, target, log, start_clnum=0, end_clnum=0): f = open(target, 'rb') self.data = f.read() f.close() self.target = target self.log = log self.os = self.get_os() if self.os is None: raise Exception('not supports os') self.arch = self.get_arch() if self.arch is None: raise Exception('not known arch') self.base = self.get_base() if self.os == 'windows': self.pe = PE(target) else: self.elf = Elf(target) if self.arch == 'i386': self.md = Cs(CS_ARCH_X86, CS_MODE_32) else: self.md = Cs(CS_ARCH_X86, CS_MODE_64) if self.arch == 'i386': self.t = qiradb.Trace(log, 0, 4, 9, False) # 32 bits else: self.t = qiradb.Trace(log, 0, 8, 17, False) # 64 bits while not self.t.did_update(): print "waiting..." time.sleep(0.1) self.disasms = {} # self.generate_trace(target, log, start_clnum, end_clnum, 4) def get_disasm(self, va): offset = self.get_offset_from_rva(va - self.base) #print hex(offset) if offset > len(self.data): return '' try: if self.disasms.has_key(va): insn = self.disasms[va] return insn.mnemonic + ' ' + insn.op_str for insn in self.md.disasm(self.data[offset:], va, count=1): disasm = insn.mnemonic + ' ' + insn.op_str self.disasms[va] = insn return disasm except: pass return '' def get_os(self): if self.data[0:4] == '\x7fELF': return 'linux' elif self.data[0:2] == 'MZ': return 'windows' return None def get_arch(self): if self.os == 'linux': value = l16(self.data[0x12:0x14]) if value == 3: return 'i386' elif value == 0x3e: return 'x86_64' if self.os == 'windows': # to modify return 'i386' return None def get_base(self, module_name=None): # default is the main module if module_name is None: f = open(log + '_base', 'rb') for line in f: line = line.strip() if line == '': continue if self.os == 'linux': pattern = '\.so' else: pattern = '\.dll' matches = re.findall(pattern, line) if not matches: f.close() return long(line.split('-')[0], 16) f.close() else: f = open(log + '_base', 'rb') for line in f: if module_name in line: f.close() return long(line.split('-')[0], 16) f.close() return None def get_reg_name(self, index): if self.arch == 'i386': reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi'] return reg_names[index / 4] else: reg_names = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15', 'rip'] return reg_names[index / 8] def get_reg_index(self, name): reg_names2 = ['ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di'] reg_names3 = ['ah', 'ch', 'dh', 'bh'] reg_names4 = ['al', 'cl', 'dl', 'bl'] for i in range(len(reg_names2)): if name == reg_names2[i]: return i | 0x400 for i in range(len(reg_names3)): if name == reg_names3[i]: return i | 0x200 for i in range(len(reg_names4)): if name == reg_names4[i]: return i | 0x100 if self.arch == 'i386': reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'eip'] for i in range(len(reg_names)): if name == reg_names[i]: return i | 0x800 else: reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'r8d', 'r9d', 'r10d', 'r11d', 'r12d', 'r13d', 'r14d', 'r15d'] for i in range(len(reg_names)): if name == reg_names[i]: return i | 0x800 reg_names5 = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13','r14', 'r15', 'rip'] for i in range(len(reg_names5)): if name == reg_names5[i]: return i | 0x1000 def get_offset_from_rva(self, rva): if self.os == 'linux': # to modify return self.elf.vma2offset(rva + self.base) else: return self.pe.get_offset_from_rva(rva) def is_branch(self, ins): if ins == '': return False opcode = ins.split(' ')[1] if opcode == 'ret': return True if opcode == 'call': return True if opcode.startswith('j'): return True return False def write_one_ins(self, out, clnum, ins, ops): result = str(clnum) + ': ' result = result.ljust(8, ' ') result += ins.ljust(50, ' ') for op in ops: if self.arch == 'i386': result += op.ljust(24, ' ') else: result += op.ljust(36, ' ') out.write(result + '\n') if self.is_branch(ins): out.write('\n') def byte_to_value(self, bytes): result = '' for byte in bytes: result += chr(byte & 0xff) if len(result) == 1: return l8(result) elif len(result) == 2: return l16(result) elif len(result) == 4: return l32(result) elif len(result) == 8: return l64(result) raise Exception('not known len:%d' % len(result)) def generate_trace(self, start_addr=None, start_clnum=0, end_clnum=0, limit=1): out = open(self.log + '.out', 'wb') if start_clnum == 0: start_clnum = self.t.get_minclnum() if end_clnum == 0: end_clnum = self.t.get_maxclnum() print 'start:', start_clnum print 'end:', end_clnum ins = '' ops = [] start_record = True if start_addr is not None: start_record = False for i in range(start_clnum, end_clnum): changes = self.t.fetch_changes_by_clnum(i, limit) if len(changes) < 1: continue change = changes[0] #print change if change['type'] == 'I': if not start_record: pc = change['address'] if pc == start_addr: start_record = True else: continue self.md.detail = True ins = '%x %s' % (change['address'], self.get_disasm(change['address'])) ops = [] if not self.disasms.has_key(change['address']): continue insn = self.disasms[change['address']] operands = insn.operands if len(operands) > 0: j = -1 for op in operands: j += 1 if op.type == X86_OP_IMM: continue elif op.type == X86_OP_FP: continue elif op.type == X86_OP_REG: reg_name = insn.reg_name(op.reg) reg_value = self.get_reg(i - j, reg_name) ops.append('%s:%x' % (reg_name, reg_value)) elif op.type == X86_OP_MEM: if op.mem.base != 0: base_name = insn.reg_name(op.mem.base) # reg base = self.get_reg(i - j, base_name) else: base = 0 if op.mem.index != 0: index_name = insn.reg_name(op.mem.index) # reg index = self.get_reg(i - j, index_name) else: index = 0 scale = op.mem.scale disp = op.mem.disp mem_addr = base + scale * index + disp mem_byte = self.t.fetch_memory(i - j, mem_addr, op.size) mem_value = self.byte_to_value(mem_byte) ops.append('[%x]:%x' % (mem_addr, mem_value)) ''' elif change['type'] == 'R': op = '%s => %x' % (self.get_reg_name(change['address']), change['data']) ops.append(op) # change['size'] elif change['type'] == 'W': op = '%s <= %x' % (self.get_reg_name(change['address']), change['data']) ops.append(op) elif change['type'] == 'L': op = '[%x] => %x' % (change['address'], change['data']) ops.append(op) elif change['type'] == 'S': op = '[%x] <= %x' % (change['address'], change['data']) ops.append(op) elif change['type'] == 's': pass # if self.os == 'linux': # 'sys_' + self.get_sys_call_name(change['address']) else: print change ''' self.write_one_ins(out, i, ins, ops) out.close() def get_memory(self, clnum, addr, size): result = '' for byte in self.t.fetch_memory(clnum, addr, size): result += chr(byte & 0xff) return result def get_reg(self, clnum, reg_name): index = self.get_reg_index(reg_name) reg_value = self.t.fetch_registers(clnum)[index & 0xff] if index & 0x1000: reg_value = reg_value if index & 0x800: reg_value = reg_value & 0xffffffff elif index & 0x400: reg_value = reg_value & 0xffff elif index & 0x200: reg_value = (reg_value & 0xff00) >> 8 elif index & 0x100: reg_value &= 0xff if (self.arch != 'i386') & (index&0xff == 16): changes = self.t.fetch_changes_by_clnum(clnum, 1) for change in changes: if change['type'] == 'I': reg_value = change['address'] + change['data'] #rip return reg_value def get_ret_addr(self, clnum): if self.arch == 'i386': esp = self.get_reg(clnum, 'esp') retval = l32(self.get_memory(clnum, esp, 4)) else: rsp = self.get_reg(clnum, 'rsp') retval = l64(self.get_memory(clnum, rsp, 8)) return retval def get_pc(self, clnum): changes = self.t.fetch_changes_by_clnum(clnum, 1) for change in changes: # print change if change['type'] == 'I': return change['address'] return 0 def generate_cfg(self, start_addr, ret_addr=None, start_clnum=0, end_clnum=0): if start_clnum == 0: start_clnum = self.t.get_minclnum() + 1 if end_clnum == 0: end_clnum = self.t.get_maxclnum() - 1 traces = [] enter_call = 0 enter_sub_call = 0 for i in range(start_clnum, end_clnum + 1): pc = self.get_pc(i) asm = self.get_disasm(pc) if enter_call == 0: if pc == start_addr: if ret_addr is None: end_addr = self.get_ret_addr(i - 1) print hex(end_addr) else: end_addr = ret_addr enter_call = 1 trace = [(i, pc, asm)] else: if end_addr == pc: print 'exit call' enter_call = 0 traces.append(trace) trace = [] if enter_sub_call == 0: trace.append((i, pc, asm)) if asm.startswith('call'): enter_sub_call = 1 sub_call_ret = self.get_ret_addr(i) else: if pc == sub_call_ret: trace.append((i, pc, asm)) enter_sub_call = 0 graph = Graph() pcs = [] for trace in traces: print trace for trace in traces: exist_node = None exist_index = 1 new_node = None for ins in trace: if ins[1] not in pcs: pcs.append(ins[1]) if exist_node is None: if new_node is None: new_node = Node([Assemble(ins[1], ins[2])]) graph.add_node(new_node) else: new_node.add_asm(Assemble(ins[1], ins[2])) else: new_node = Node([Assemble(ins[1], ins[2])]) graph.add_node(new_node) if len(exist_node.asm_seqs) == exist_index: graph.add_edge(exist_node, new_node) else: node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count - 1) graph.add_edge(node1, new_node) exist_node = None exist_index = 0 else: if exist_node is None: if new_node is None: exist_node = graph.search_and_split(ins[1]) exist_node.add_count() exist_index = 1 else: node, index = graph.search_node(ins[1]) if index == 0: graph.add_edge(new_node, node) node2 = node else: node1, node2 = graph.split_node(node, index) if node == new_node: graph.add_edge(node2, node2) else: graph.add_edge(new_node, node2) new_node = None exist_node = node2 node2.add_count() exist_index = 1 else: if new_node is None: if len(exist_node.asm_seqs) == exist_index: node3 = graph.search_and_split(ins[1]) graph.add_edge(exist_node, node3) exist_node = node3 node3.add_count() exist_index = 1 else: if exist_node.asm_seqs[exist_index].addr == ins[1]: exist_index += 1 else: node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count-1) node3 = graph.search_and_split(ins[1]) graph.add_edge(node1, node3) exist_node = node3 node3.add_count() exist_index = 1 else: print 'impossible2', ins graph.print_graph('tracer.png') def test(self): changes = self.t.fetch_changes_by_clnum(13, 1000) print self.t.fetch_registers(13) for change in changes: print change