def disasm(self, base, code): """disassemble binary code Args: base: base address of code code: binary code Returns: list of instruction """ result = [] if self.arch == ARCH.X86: md = Cs(CS_ARCH_X86, CS_MODE_32) for i in md.disasm(code, base): result.append(i) elif self.arch == ARCH.X86_64: md = Cs(CS_ARCH_X86, CS_MODE_64) for i in md.disasm(code, base): result.append(i) else: raise UnsupportedArchException(self.arch) return result
def show_asm(buff, mode, base): """ Return the given byte sequence as assembly under the given hardware mode. :param bytes buff: Complete data stream. :param int mode: Capstone hardware mode. :param int base: Base address from which to start. :return: Assembly code representation. :rtype: str """ md = Cs(CS_ARCH_X86, mode) md.detail = True ret = '' for insn in md.disasm(buff, base): b = binascii.hexlify(insn.bytes).decode('utf-8') b = ' '.join(a + b for a, b in zip(b[::2], b[1::2])) if len(b) > 18: b = b[:18] + '+' ret += "{0:10} {1:20} {2:10} {3:10}\n".format( '%08x:' % insn.address, b, insn.mnemonic, insn.op_str) ret += '*/\n' return ret
def __get_trace_records(ql: Qiling, address: int, size: int, md: Cs) -> Iterator[TraceRecord]: """[private] Acquire trace info for the current instruction and yield as a trace record. A trace record is a parsed instruction paired to a list of registers and their values. This method might yield more than one record for a single instruction. """ # unicorn denotes unsupported instructions by a magic size value. though these instructions # are not emulated, capstone can still parse them. if size == 0xf1f1f1f1: yield next(__get_trace_records(ql, address, 16, md)) return # a trace line is generated even for hook addresses that do not contain meaningful opcodes. # in that case, make it look like a nop if address in ql._addr_hook: buf = b'\x90' else: buf = ql.mem.read(address, size) for insn in md.disasm(buf, address): # BUG: insn.regs_read doesn't work well, so we use insn.regs_access()[0] state = tuple((reg, ql.reg.read(reg)) for reg in insn.regs_access()[0]) yield (insn, state)
def main(fname): """ Basic python version of the tools: - "objdump -d" (linux) - "dumpbin /disasm" (MSVC) It parses the AR and COFF structures, but uses the "capstone" library to disassemble """ for coff in read_lib_file(fname): if coff: syms = deque(coff.symbols) #print (syms) md = Cs(CS_ARCH_X86, CS_MODE_32) md.skipdata = True # iterate through "CsInsn" for i in md.disasm(coff.sections[0].data, 0x000): while syms and i.address >= syms[0].value: if syms[0].type == 32 and syms[0].section_number == 1: print(syms[0].name.decode(errors="ignore") + ":") syms.popleft() instr_bytes = i.bytes remain_bytes = b"" if len(instr_bytes) >= 6: instr_bytes, remain_bytes = instr_bytes[:6], instr_bytes[ 6:] if not i.op_str: asm_part = i.mnemonic else: asm_part = "%-12s%s" % (i.mnemonic, format_asm(i.op_str)) print(" %08X: %-19s" % (i.address, hex_with_spaces(instr_bytes)) + asm_part) if remain_bytes: print(" %s" % (hex_with_spaces(remain_bytes)))
def trace(ql: Qiling, address: int, size: int, md: Cs): """Emit tracing info for each and every instruction that is about to be executed. Args: ql: the qiling instance address: the address of the instruction that is about to be executed size: size of the instruction (in bytes) md: initialized disassembler object """ # read current instruction bytes and disassemble it buf = ql.mem.read(address, size) insn = next(md.disasm(buf, address)) nibbles = ql.arch.bits // 4 color_faded = '\033[2m' color_reset = '\033[0m' # get values of the registers referenced by this instruction. # # note: since this method is called before the instruction has been emulated, the 'rip' # register still points to the current instruction, while the instruction considers it # as if it was pointing to the next one. that will cause 'rip' to show an incorrect value reads = (f'{md.reg_name(reg)} = {ql.arch.regs.read(CS_UC_REGS[reg]):#x}' for reg in insn.regs_access()[0]) # construct a human-readable trace line trace_line = f'{insn.address:0{nibbles}x} | {insn.bytes.hex():24s} {insn.mnemonic:12} {insn.op_str:35s} | {", ".join(reads)}' # emit the trace line in a faded color, so it would be easier to tell trace info from other log entries ql.log.info(f'{color_faded}{trace_line}{color_reset}')
def __init__(self, func, r2obj: dict): self.parent = func try: self.address = r2obj['offset'] self.jump = r2obj.get('jump', 0) self.fail = r2obj.get('fail', 0) cases = r2obj.get('switchop', dict()).get('cases', dict()) self.cases = {c['jump'] for c in cases} self.insns = [] for op in r2obj['ops']: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True _addr = op['offset'] _insns = list( md.disasm(BasicBlock.to_bytes(op['bytes']), _addr)) if len(_insns) != 1: raise CapstoneDecodeError(f'Decoder error at {_addr:#x}') else: _insn: CsInsn = _insns[0] _reads, _ = _insn.regs_access() indirect = _insn.mnemonic == 'jmp' and len(_reads) > 0 self.insns.append(Instruction(_addr, indirect)) except KeyError: err_msg = f'Unexpected radare2 output at Basic Block {self.address:#x}' logging.error(err_msg) raise UnhandledOutputError(err_msg)
class RAD: '''Provide a random access disassembler (RAD).''' def __init__(self, sections: SectionFinder, arch, bits): '''Start disassembly of the provided code blob. Arguments: sections -- A section finder instance. arch -- The architecture, as defined by Capstone. bits -- The bit width, as defined by Capstone. ''' # Set up options for disassembly. self.md = Cs(arch, bits) self.md.skipdata = True self.md.detail = True self.sections = sections self._last_data = None self._last_start = 0 self._last_end = 0 def at(self, address): '''Try to disassemble and return the instruction starting at the given address. An AddressException is thrown if the address is not present in any of the sections, and a NotExecutableException is thrown if the address is in a section that is not executable. ''' if address >= self._last_end or address < self._last_start: # We need to find the section and initialize all variables. # Find a section that contains the address. section = self.sections.find(address) if section is None: # No section contains the address. raise AddressException(address) # Make sure the section is executable and allocated. flags = section.header.sh_flags if (flags & SH_FLAGS.SHF_ALLOC) * (flags & SH_FLAGS.SHF_EXECINSTR) == 0: # This section is not allocated or does not contain code. raise NotExecutableException(section) # Set the variables so we skip this next time. self._last_data = section.data() self._last_start = section.header.sh_addr self._last_end = self._last_start + section.data_size # Compute the index into the section's data for the given address. We # already know the address is in the given section (we checked earlier). index = address - self._last_start # The maximun length of an x86-64 instruction is 15 bytes. You can # exceed this with prefix bytes and the like, but you will get an # "general protection" (GP) exception on the processor. So don't do # that. return next(self.md.disasm(self._last_data[index:index+15], address, count=1)) def in_range(self, address): '''Determine if an address is in range and executable.''' section = self.sections.find(address) flags = section.header.sh_flags return (section is not None and (flags & SH_FLAGS.SHF_ALLOC) and (flags & SH_FLAGS.SHF_EXECINSTR))
def generate_rule(self): """ Generate Yara rule. Return a YaraRule object """ self.yr_rule.rule_name = self.rule_name self.yr_rule.metas["generated_by"] = "\"mkYARA - By Jelle Vergeer\"" self.yr_rule.metas["date"] = "\"{}\"".format(datetime.now().strftime("%Y-%m-%d %H:%M")) self.yr_rule.metas["version"] = "\"1.0\"" md = Cs(self.instruction_set, self.instruction_mode) md.detail = True md.syntax = CS_OPT_SYNTAX_INTEL chunk_nr = 0 for chunk in self._chunks: chunk_nr += 1 chunk_id = "$chunk_{}".format(chunk_nr) chunk_signature = "" chunk_comment = "" if chunk.is_data is False: disasm = md.disasm(chunk.data, chunk.offset) for ins in disasm: rule_part, comment = self._process_instruction(ins) rule_part = self.format_hex(rule_part) chunk_signature += rule_part + "\n" chunk_comment += comment + "\n" self.yr_rule.add_string(chunk_id, chunk_signature, StringType.HEX) if self.do_comment_sig: self.yr_rule.comments.append(chunk_comment) else: rule_part = self.format_hex(chunk.data.encode("hex")) self.yr_rule.add_string(chunk_id, rule_part, StringType.HEX) self.yr_rule.condition = "any of them" return self.yr_rule
def disassemble(self, code: bytes, address: int) -> List[DumpAssembly]: dump_assemblies = [] md = Cs(CS_ARCH_ARM, CS_MODE_ARM) for i in md.disasm(code, address): dump_assemblies.append( DumpAssembly(i.address, f'{i.mnemonic}\t{i.op_str}')) return dump_assemblies
def filter_asm_and_return_instruction_list(address, asm, symbols, arch, API, symbolic_call=True): #n = int(asm, 2) binary = binascii.unhexlify(asm) #binary = binascii.unhexlify('%x' % n) #binary = asm # md = Cs(CS_ARCH_X86, CS_MODE_64) # md = Cs(CS_ARCH_ARM64, CS_MODE_ARM) if arch == capstone.CS_ARCH_ARM: md = Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM) else: md = Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) md.detail = True insns = [] cap_insns = [] for i in md.disasm(binary, address): insns.append( filter_memory_references(i, symbols, API, symbolic_call=symbolic_call)) cap_insns.append(i) del md return (constantIndependt_hash(cap_insns), insns)
def _disassemble(self, data): capstone = Cs(*self.args.mode.value) strz = self._strings(data) string = next(strz, None) cursor, done = 0, 0 while done < len(data): cursor = max(cursor, done) patchsize = self._nullsize(data, cursor, self.args.width) if patchsize > 2: yield self._format(done, data[done:done + patchsize], 'db', ','.join('0' * patchsize)) done += patchsize continue if cursor >= len(data): yield self._bytepatch(data, done, len(data)) done = cursor if string and cursor >= string.end: yield self._bytepatch(data, done, string.start) yield self._format(string.start, data[string.start:string.end], 'db', string.data) done = string.end continue try: ins = next(capstone.disasm( data[cursor:cursor + 15], cursor, count=1)) end = ins.address + ins.size if self.args.str and string: if end > string.start and string.end > cursor: cursor = string.end continue except StopIteration: cursor += 1 continue else: yield self._format(ins.address, ins.bytes, ins.mnemonic, ins.op_str) done = end
class Disasassembler: def __init__(self, offset=0): self._disas = Cs(CS_ARCH_X86, CS_MODE_32) self.offset = offset def disasm(self, opcodes): return self._disas.disasm(opcodes, self.offset)
def disassemble(addr, data): none = 0 md = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN) disassed = md.disasm(data, addr) for i in disassed: none = 1 print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) if none != 1: print("Couldn't disassemble at 0x%x" % (addr))
def __init__(self, encoding, position): super(CAPSInstruction, self).__init__(encoding, position) # CAPSTONE object encoding_bytes = (self._encoding).to_bytes(4, byteorder='little') #endian = CS_MODE_LITTLE_ENDIAN if little_endian else CS_MODE_BIG_ENDIAN md = Cs(CS_ARCH_ARM, CS_MODE_ARM) md.detail = True self._cap = None for i in md.disasm(encoding_bytes, position): self._cap = i
def find_instr_addr(mod_name, bits): dll = pefile.PE(mod_name) for entry in dll.DIRECTORY_ENTRY_EXPORT.symbols: if entry.name == 'rtcInStrChar': exp_addr = entry.address break for imp in dll.DIRECTORY_ENTRY_IMPORT: for entry in imp.imports: if entry.name == 'SysFreeString': imp_addr = entry.address break memory = dll.get_memory_mapped_image() if bits == 32: dsm = Cs(CS_ARCH_X86, CS_MODE_32) else: dsm = Cs(CS_ARCH_X86, CS_MODE_64) for op in dsm.disasm(memory[exp_addr:exp_addr + 0xA0], (exp_addr + dll.OPTIONAL_HEADER.ImageBase)): if op.mnemonic == 'call': last_call = op.op_str if op.mnemonic == 'ret': break next_func = int(last_call, 16) - dll.OPTIONAL_HEADER.ImageBase calls = 0 call_free = 0 for op in dsm.disasm(memory[next_func:next_func + 0x200], (next_func + dll.OPTIONAL_HEADER.ImageBase)): if op.mnemonic == 'call' and ('0x%x' % imp_addr in op.op_str or 'qword ptr' in op.op_str): call_free += 1 if call_free == 2: return last_call if op.mnemonic == 'call': last_call = op.address - dll.OPTIONAL_HEADER.ImageBase if op.mnemonic == 'ret': return
def disasm_bytes(bytes, addr): md = Cs(CS_ARCH_ARM64, CS_MODE_ARM) md.syntax = CS_OPT_SYNTAX_ATT md.detail = True result = [] for ins in range(0, len(bytes), 4): disasm = list(md.disasm(bytes[ins:ins + 4], addr + ins)) if len(disasm): result += disasm else: # the instruction is invalid, so we craft a fake "nop" (to make the rest of the code work) # and we just overwrite it as data with a comment fake_ins = InstructionWrapper( list(md.disasm(b"\x1f\x20\x03\xd5", addr + ins))[0]) # bytes for nop fake_ins.mnemonic = ".quad 0x%x // invalid instruction" % int.from_bytes( bytes[ins:ins + 4], byteorder="little") # are we sure about 'little'? result += [fake_ins] return result
def extract_call_destinations(elf): symbols_list = [] # get the code section or segment (if there's no section) code_section_or_segment = get_ep_section_or_segment(elf) # if we only got the segment, start extracting calls from the EP if type(code_section_or_segment) == elftools.elf.segments.Segment: ofs = elf.header.e_entry code_data = code_section_or_segment.data( )[ofs - code_section_or_segment["p_vaddr"]:] # otherwise we use the code section else: ofs = elf_get_imagebase(elf) + code_section_or_segment["sh_offset"] code_data = code_section_or_segment.data() # get the architecture of our ELF file. # the disassembly and the call opcode and mnemonic will be based on the # determined architecture, as defined by the CALL_LIST dict above arch = elf.get_machine_arch() # in case we have not specified the opcode, mnemonic, and the # capstone arch and mode, skip if arch not in CALL_LIST: return [] # TODO: automatically identify the architecture the binary was compiled to md = Cs(CALL_LIST[arch]["cs_arch"], CALL_LIST[arch]["cs_mode"]) if code_section_or_segment is not None: # TODO: handle UPX-packed binaries as they have no sections so we should go straight to segment offset for i in md.disasm(code_data, ofs): if arch in ("x86", "x64") and i.mnemonic == "call": # Consider only call to absolute addresses if i.op_str.startswith("0x"): address = i.op_str[2:] # cut off '0x' prefix if not address in symbols_list: symbols_list.append(address) elif arch == "ARM" and i.mnemonic.startswith("bl"): if i.op_str.startswith("#0x"): address = i.op_str[3:] if not address in symbols_list: symbols_list.append(address) elif arch == "MIPS" and i.mnemonic == "lw": if i.op_str.startswith("$t9, "): address = i.op_str[8:-5] if not address in symbols_list: symbols_list.append(address) return symbols_list
def disassemble(addr, data, thumb=False): none = 0 # disassed at least on if thumb == True: mode = CS_MODE_THUMB else: mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data, addr) for i in disassed: none = 1 print "0x%x:\t%s %s" %(i.address, i.mnemonic, i.op_str) if none != 1: print "Couldn't disassemble at 0x%x"%(addr)
def disassemble(addr, data, thumb=False): none = 0 # disassed at least on if thumb == True: mode = CS_MODE_THUMB else: mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data, addr) for i in disassed: none = 1 print "0x%x:\t%s %s" % (i.address, i.mnemonic, i.op_str) if none != 1: print "Couldn't disassemble at 0x%x" % (addr)
def __gadgetsFinding(self, section, gadgets, arch, mode): C_OP = 0 C_SIZE = 1 C_ALIGN = 2 PREV_BYTES = 9 # Number of bytes prior to the gadget to store. ret = [] md = Cs(arch, mode) for gad in gadgets: allRefRet = [ m.start() for m in re.finditer(gad[C_OP], section["opcodes"]) ] for ref in allRefRet: for i in range(self.__options.depth): if (section["vaddr"] + ref - (i * gad[C_ALIGN])) % gad[C_ALIGN] == 0: decodes = md.disasm( section["opcodes"][ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]], section["vaddr"] + ref) gadget = "" for decode in decodes: gadget += (decode.mnemonic + " " + decode.op_str + " ; ").replace(" ", " ") if re.search(gad[C_OP], decode.bytes) is None: continue if len(gadget) > 0: gadget = gadget[:-3] off = self.__offset vaddr = off + section["vaddr"] + ref - ( i * gad[C_ALIGN]) prevBytesAddr = max(section["vaddr"], vaddr - PREV_BYTES) prevBytes = section["opcodes"][ prevBytesAddr - section["vaddr"]:vaddr - section["vaddr"]] ret += [{ "vaddr": vaddr, "gadget": gadget, "decodes": decodes, "bytes": section["opcodes"][ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]], "prev": prevBytes }] return ret
def find_instr_addr(mod_name, bits): dll = pefile.PE(mod_name) for entry in dll.DIRECTORY_ENTRY_EXPORT.symbols: if entry.name == 'rtcInStrChar': exp_addr = entry.address break for imp in dll.DIRECTORY_ENTRY_IMPORT: for entry in imp.imports: if entry.name == 'SysFreeString': imp_addr = entry.address break memory = dll.get_memory_mapped_image() if bits == 32: dsm = Cs(CS_ARCH_X86, CS_MODE_32) else: dsm = Cs(CS_ARCH_X86, CS_MODE_64) for op in dsm.disasm(memory[exp_addr:exp_addr + 0xA0], (exp_addr + dll.OPTIONAL_HEADER.ImageBase)): if op.mnemonic == 'call': last_call = op.op_str if op.mnemonic == 'ret': break next_func = int(last_call, 16) - dll.OPTIONAL_HEADER.ImageBase calls = 0 call_free = 0 for op in dsm.disasm(memory[next_func:next_func + 0x200], (next_func + dll.OPTIONAL_HEADER.ImageBase)): if op.mnemonic == 'call' and ('0x%x' % imp_addr in op.op_str or 'qword ptr' in op.op_str): call_free += 1 if call_free == 2: return last_call if op.mnemonic == 'call': last_call = op.address - dll.OPTIONAL_HEADER.ImageBase if op.mnemonic == 'ret': return
def find_single((raw_data, pvaddr, elftype, elf_base_addr, arch, mode, gad, need_filter, ref)): C_OP = 0 C_SIZE = 1 C_ALIGN = 2 allgadgets = [] md = Cs(arch, mode) md.detail = True for i in range(10): back_bytes = i * gad[C_ALIGN] section_start = ref - back_bytes start_address = pvaddr + section_start if elftype == 'DYN': start_address = elf_base_addr + start_address decodes = md.disasm(raw_data[section_start:ref + gad[C_SIZE]], start_address) decodes = list(decodes) insns = [] for decode in decodes: insns.append((decode.mnemonic + " " + decode.op_str).strip()) if len(insns) > 0: if (start_address % gad[C_ALIGN]) == 0: address = start_address if mode == CS_MODE_THUMB: address = address | 1 bytes = raw_data[ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]] onegad = Gadget(address, insns, {}, 0, bytes) if not passClean(decodes): continue if arch == CS_ARCH_X86: onegad = filter_for_x86_big_binary(onegad) elif arch == CS_ARCH_ARM: onegad = filter_for_arm_big_binary(onegad) if (not need_filter) and onegad: classifier = GadgetClassifier(arch, mode) onegad = classifier.classify(onegad) if onegad: allgadgets += [onegad] return allgadgets
def disasm_plt(bytes, offset=0): try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) instruc = disassembled[0] # get rip relative address for op in instruc.operands: if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: return disassembled[1].address + op.mem.disp, op.size return None, None except CsError as e: print("ERROR: %s" % e)
def disasm_plt(bytes, offset=0): try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) instruc = disassembled[0] # get rip relative address for op in instruc.operands: if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: return disassembled[1].address + op.mem.disp, op.size return None, None except CsError as e: print("ERROR: %s" %e)
def disassemble(self, size, thumb=True): """ Display the bytes disassembled using Capstone at the current position. Args: size (:obj:`int`): the number of bytes to disassemble thumb (:obj:`bool`): True if Thumb, False otherwise """ from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM, CS_MODE_THUMB cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB if thumb else CS_MODE_ARM) addr = self._ptr.value for insn in cs.disasm(self.read(size), addr): insn_info = insn.address, insn.mnemonic, insn.op_str print("{:08x}:\t{} {}".format(insn_info))
def _cs_disassemble_one(self, data, address): """Disassemble the data into an instruction in string form. """ disasm = list(self._disassembler.disasm(data, address)) # TODO: Improve this check. if len(disasm) > 0: return disasm[0] else: cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM) disasm = list(cs_arm.disasm(data, address)) if len(disasm) > 0: return disasm[0] else: raise InvalidDisassemblerData("CAPSTONE: Unknown instruction (Addr: {:s}).".format(hex(address)))
def generate_pic(buff, mode): """ Return a position independent result of the byte sequence. :param bytes buff: Complete data stream. :param int mode: Capstone hardware mode. :return: YARA compliant hex string sequence. :rtype: str """ md = Cs(CS_ARCH_X86, mode) md.detail = True relative_tracker = [] relative = False offset = 0 for insn in md.disasm(buff, 0x0): if relative: r_size = insn.address - offset relative_tracker.append((offset, r_size)) relative = False if insn.op_count(X86_OP_IMM) == 1 or insn.op_count(X86_OP_MEM) == 1: offset = insn.address + _get_opcode_length(insn.opcode) relative = True if insn.modrm > 0: offset += 1 if insn.rex > 0: offset += 1 if insn.sib > 0: offset += 1 offset += MAX_PREFIX_SIZE - insn.prefix.count(0x0) continue if relative: r_size = len(buff) - offset relative_tracker.append((offset, r_size)) hex_bytes = '{ ' + _to_yara_hex_string(buff, relative_tracker) + ' }' return hex_bytes
def __parse_plt(self): # parsing .plt section plt_sct = self.elf.get_section_by_name(".plt") if plt_sct is None: raise ValueError md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True mnemonics = md.disasm(plt_sct.data(), plt_sct["sh_addr"]) cnt = 0 for mnemonic in mnemonics: if cnt % 3 == 0 and cnt != 0: rip = mnemonic.address + mnemonic.size assert len(mnemonic.operands) == 1 rip_plus = mnemonic.operands[0].value.mem.disp self.plt_got_dic[mnemonic.address] = rip + rip_plus self.got_plt_dic[rip + rip_plus] = mnemonic.address cnt += 1
def disasm(self, addr): (data, virtual_addr, flags) = self.binary.get_section(addr) if not flags["exec"]: die("the address 0x%x is not in an executable section" % addr) mode = CS_MODE_64 if self.bits == 64 else CS_MODE_32 md = Cs(CS_ARCH_X86, mode) md.detail = True for i in md.disasm(data, virtual_addr): self.code[i.address] = i self.code_idx.append(i.address) # Now load imported symbols for PE. This cannot be done before, # because we need the code for a better resolution. if self.binary.get_type() == T_BIN_PE: self.binary.load_import_symbols(self.code)
def _cs_disassemble_one(self, data, address): """Disassemble the data into an instruction in string form. """ disasm = list(self._disassembler.disasm(data, address)) # TODO: Improve this check. if len(disasm) > 0: return disasm[0] else: cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM) disasm = list(cs_arm.disasm(data, address)) if len(disasm) > 0: return disasm[0] else: raise InvalidDisassemblerData( "CAPSTONE: Unknown instruction (Addr: {:s}).".format( hex(address)))
def compute_eflags_setter(self): dis32 = Cs(CS_ARCH_X86, CS_MODE_32) dis32.detail = True flag_insn = False for fl, traces in self.traces.items(): for trace in traces: for g_addr in trace: if flag_insn: flag_insn = False break gadget_bytes = self._emu.gadget_map[g_addr].rop_bytes for insn in dis32.disasm(gadget_bytes, g_addr): # Check every instruction of the gadget to see if it can perform a modification of the # monitored bits (doesn't mean that the bits have been actually modified) if insn.eflags and insn.eflags & self.capstone_to_eflags_aux(fl.monitored_bits): fl.set_eflag_bitmask(self.capstone_to_eflags_aux(insn.eflags)) flag_insn = True break
def get_compiler_info(rom_bytes, entry_point, print_result=True): md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN) md.detail = True jumps = 0 branches = 0 for insn in md.disasm(rom_bytes[0x1000:], entry_point): if insn.mnemonic == "j": jumps += 1 elif insn.mnemonic == "b": branches += 1 compiler = "IDO" if branches > jumps else "GCC" if print_result: print( f"{branches} branches and {jumps} jumps detected in the first code segment. Compiler is most likely {compiler}" ) return compiler
def disassemble_bytes( cap: cs.Cs, base_address: int, data: bytes ) -> List[Tuple[Optional[CsInsn], int, bytes]]: output: List[Tuple[Optional[CsInsn], int, bytes]] = [] offset = 0 end = len(data) while offset < end: code = data[offset:end] for insn in cap.disasm(code, base_address + offset): if insn.id in MANUAL_MNEMONICS: output.append((None, insn.address, insn.bytes)) else: output.append((insn, insn.address, insn.bytes)) offset += 4 if offset < end: address = base_address + offset output.append((None, address, data[offset : offset + 4])) offset += 4 return output
def main(fname): """ Basic python version of the tools: - "objdump -d" (linux) - "dumpbin /disasm" (MSVC) It parses the AR and COFF structures, but uses the "capstone" library to disassemble """ for coff in read_lib_file(fname): if coff: syms = deque(coff.symbols) md = Cs(CS_ARCH_X86, CS_MODE_32) md.skipdata = True for i in md.disasm(coff.sections[0].data, 0x000): if syms and i.address >= syms[0].value: if syms[0].type == 32: print(syms[0].name.decode(errors="ignore")) syms.popleft() print(" 0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
def dumpASM(flo, mode, maxAddr=1e99): modeRef = {32: CS_MODE_32, 64: CS_MODE_64} md = Cs(CS_ARCH_X86, modeRef[mode]) md.detail = True for i in md.disasm(flo, 0): # print(dir(i)) print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) print("\tImplicit registers read: ", end="") for r in i.regs_read: print("%s " % i.reg_name(r)) print() print("\tImplicit registers written: ", end="") for r in i.regs_write: print("%s " % i.reg_name(r)) print() if i.address > maxAddr: break
def disas_function(self, name): if len(self.functions_name_dic) == 0: self.__parse_functions() all_txt = self.elf.get_section_by_name(".text") base_addr = all_txt["sh_addr"] sct = self.functions_name_dic[name] if sct == None: return offset = sct["st_value"] - base_addr func_txt = all_txt.data()[offset:offset + sct["st_size"]] md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True for mnemonic in md.disasm(func_txt, sct["st_value"]): print( self.__disas_function_format.format(hex(mnemonic.address), mnemonic.mnemonic, mnemonic.op_str)) regs = mnemonic.regs_access() read_regs = regs[0] write_regs = regs[1] if len(read_regs) > 1: print("\tRead registers: {}".format( reduce( lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic. reg_name(r2), read_regs))) elif len(read_regs) == 1: print("\tRead registers: {}".format( mnemonic.reg_name(read_regs[0]))) if len(write_regs) > 1: print("\tWrite registers: {}".format( reduce( lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic. reg_name(r2), write_regs))) elif len(write_regs) == 1: print("\tWrite registers: {}".format( mnemonic.reg_name(write_regs[0]))) """
def disasm(bytes, offset=0): print "offset %i" % offset try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) for i, instr in enumerate(disassembled): print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str) # Handle no-op instructions if instr.id == x86.X86_INS_NOP: instr.nop = True # Handle jump/call instructions if instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL): # We can only decode the destination if it's an immediate value if instr.operands[0].type == x86.X86_OP_IMM: # Ignore if it's a jump/call to an address within this function func_start_addr = disassembled[0].address func_end_addr = disassembled[len(disassembled)-1].address dest_addr = instr.operands[0].imm if func_start_addr <= dest_addr <= func_end_addr: instr.internal_jump = True instr.jump_address = dest_addr else: symbol = executable.ex.get_symbol_by_addr(dest_addr) if symbol: text_sect = executable.ex.elff.get_section_by_name('.text') sect_addr = text_sect['sh_addr'] sect_offset = text_sect['sh_offset'] instr.external_jump = True instr.jump_address = dest_addr instr.jump_function_name = demangle(symbol.name) instr.jump_function_address = dest_addr instr.jump_function_offset = dest_addr - sect_addr + sect_offset instr.jump_function_size = symbol['st_size'] instr.comment = demangle(symbol.name) # Handle individual operands for op in instr.operands: # Handle rip-relative operands if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: instr.rip = True instr.rip_offset = op.mem.disp instr.rip_resolved = disassembled[i+1].address + instr.rip_offset symbol = executable.ex.get_symbol_by_addr(instr.rip_resolved) if symbol: instr.comment = demangle(symbol.name) bytes = executable.ex.get_bytes(instr.rip_resolved, op.size) instr.rip_value_hex = "" space = "" for char in bytes: instr.rip_value_hex += space + hex(ord(char)) space = " " # HTML collapses consecutive spaces. For presentation purposes, replace spaces # with   (non-breaking space) nbsp_str = [] if op.size == 16: for char in bytes: if char == ' ': nbsp_str.append(' ') else: nbsp_str.append(char) instr.rip_value_ascii = ''.join(nbsp_str) # TODO: there's a bug involving ASCII that cannot be jsonified. To get around # it, we're temporarily pretending they don't exist. Those edge cases need to be # handled. # see typeName( else: instr.rip_value_ascii = "under construction..." # what registers does this instruction read/write? instr.regs_write_names = [instr.reg_name(reg) for reg in instr.regs_write] instr.regs_read_names = [instr.reg_name(reg) for reg in instr.regs_read] # Add in documentation meta-data instr.docfile = doc_file(instr) instr.short_desc = get_short_desc(instr) if instr.docfile is None: with open('missing_docs.log', 'a+') as f: f.write('[{}] : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic)) return disassembled except CsError as e: print("ERROR: %s" %e)
from __future__ import print_function # test1.py from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_MODE_32 CODE = b"\x8d\x44\x38\x02" md = Cs(CS_ARCH_X86, CS_MODE_32) md.detail = True for i in md.disasm(CODE, 0): # print(dir(i)) print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) if len(i.regs_read) > 0: print("\tImplicit registers read: "), for r in i.regs_read: print("%s " % i.reg_name(r)), print if len(i.groups) > 0: print("\tThis instruction belongs to groups:", end="") for g in i.groups: print("%u" % g) # print("%u" % g, end="") print() def dumpASM(flo, mode, maxAddr=1e99): modeRef = {32: CS_MODE_32, 64: CS_MODE_64} md = Cs(CS_ARCH_X86, modeRef[mode]) md.detail = True
def do_POST(self): length = int(self.headers.getheader('content-length')) if length: rdata = self.rfile.read(length) rdata = urlparse.parse_qs(rdata) addr = 0 extra = "" try: addr = int(rdata['addr'][0]) except KeyError: print "[+] Warning: addr not received" try: data = rdata['data'][0] except KeyError: print "[+] Error: dump not received" return try: typ = rdata['type'][0] except KeyError: print "[+] Error: msg type not received" return try: extra = rdata['extra'][0] except KeyError: pass if(typ == 'read'): print display_data(addr, data.decode('hex')) if(typ == 'dis'): if(extra == "thumb"): disassemble(addr, data.decode('hex'), thumb=True) else: disassemble(addr, data.decode('hex')) if(typ == 'dis_res'): mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data.decode('hex'), addr) ops = [] ptrstr = "" for i in disassed: if i.mnemonic == "SVC": print "Could not resolve " + extra + " (syscall) return ops.append(i.op_str[7:]) ptrstr = ops[1].rjust(4,'0')+ops[0].rjust(4,'0') print ptrstr cmdstr = "resolve 0x" + ptrstr + " " + extra if int(ptrstr,16) > 0x40000000: self.mods.append(cmdstr) else: print "Could not resolve " + extra + " (invalid address) if(typ == 'dump'): fname = extra dump_data(data.decode('hex'), fname)
def do_POST(self): length = int(self.headers.getheader('content-length')) if length: rdata = self.rfile.read(length) rdata = urlparse.parse_qs(rdata) addr = 0 extra = "" try: addr = int(rdata['addr'][0]) except KeyError: print "[+] Warning: addr not received" try: data = rdata['data'][0] except KeyError: print "[+] Error: dump not received" return try: typ = rdata['type'][0] except KeyError: print "[+] Error: msg type not received" return try: extra = rdata['extra'][0] except KeyError: pass if(typ == 'read'): print display_data(addr, data.decode('hex')) if(typ == 'dis'): if(extra == "thumb"): disassemble(addr, data.decode('hex'), thumb=True) else: disassemble(addr, data.decode('hex')) if(typ == 'dis_res'): mode = CS_MODE_ARM md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN) disassed = md.disasm(data.decode('hex'), addr) ops = [] ptrstr = "" print "Parsing: " + extra for i in disassed: print "0x%x:\t%s %s" %(i.address, i.mnemonic, i.op_str) if i.mnemonic == "SVC": print "Could not resolve " + extra + " (syscall) " return ops.append(i.op_str[7:]) ptrstr = "0x"+ops[1].rjust(4,'0')+ops[0].rjust(4,'0') cmdstr = "resolve " + ptrstr + " " + extra print cmdstr if (int(ptrstr,16) > 0x40000000) and (int(ptrstr,16) < 0xE000000000): self.mods.append(cmdstr) else: print "Could not resolve " + extra + " (invalid address) " print "----" """ if(typ == 'dump'): fname = extra dump_data(data.decode('hex'), fname) """ if typ == 'dump': global CURRENT_DUMP_FILE_NAME if CURRENT_DUMP_FILE_NAME == "": #If this is the initial dump CURRENT_DUMP_FILE_NAME = extra #check if this file already exists self.dump_directory_initializer(extra) elif not extra.startswith(CURRENT_DUMP_FILE_NAME): #If this is a different dump self.dump_directory_initializer(extra) CURRENT_DUMP_FILE_NAME = extra dump_data(data.decode('hex'), CURRENT_DUMP_FILE_NAME)
class Tracer(): def __init__(self, target, log, start_clnum=0, end_clnum=0): f = open(target, 'rb') self.data = f.read() f.close() self.target = target self.log = log self.os = self.get_os() if self.os is None: raise Exception('not supports os') self.arch = self.get_arch() if self.arch is None: raise Exception('not known arch') self.base = self.get_base() if self.os == 'windows': self.pe = PE(target) else: self.elf = Elf(target) if self.arch == 'i386': self.md = Cs(CS_ARCH_X86, CS_MODE_32) else: self.md = Cs(CS_ARCH_X86, CS_MODE_64) if self.arch == 'i386': self.t = qiradb.Trace(log, 0, 4, 9, False) # 32 bits else: self.t = qiradb.Trace(log, 0, 8, 17, False) # 64 bits while not self.t.did_update(): print "waiting..." time.sleep(0.1) self.disasms = {} # self.generate_trace(target, log, start_clnum, end_clnum, 4) def get_disasm(self, va): offset = self.get_offset_from_rva(va - self.base) #print hex(offset) if offset > len(self.data): return '' try: if self.disasms.has_key(va): insn = self.disasms[va] return insn.mnemonic + ' ' + insn.op_str for insn in self.md.disasm(self.data[offset:], va, count=1): disasm = insn.mnemonic + ' ' + insn.op_str self.disasms[va] = insn return disasm except: pass return '' def get_os(self): if self.data[0:4] == '\x7fELF': return 'linux' elif self.data[0:2] == 'MZ': return 'windows' return None def get_arch(self): if self.os == 'linux': value = l16(self.data[0x12:0x14]) if value == 3: return 'i386' elif value == 0x3e: return 'x86_64' if self.os == 'windows': # to modify return 'i386' return None def get_base(self, module_name=None): # default is the main module if module_name is None: f = open(log + '_base', 'rb') for line in f: line = line.strip() if line == '': continue if self.os == 'linux': pattern = '\.so' else: pattern = '\.dll' matches = re.findall(pattern, line) if not matches: f.close() return long(line.split('-')[0], 16) f.close() else: f = open(log + '_base', 'rb') for line in f: if module_name in line: f.close() return long(line.split('-')[0], 16) f.close() return None def get_reg_name(self, index): if self.arch == 'i386': reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi'] return reg_names[index / 4] else: reg_names = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15', 'rip'] return reg_names[index / 8] def get_reg_index(self, name): reg_names2 = ['ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di'] reg_names3 = ['ah', 'ch', 'dh', 'bh'] reg_names4 = ['al', 'cl', 'dl', 'bl'] for i in range(len(reg_names2)): if name == reg_names2[i]: return i | 0x400 for i in range(len(reg_names3)): if name == reg_names3[i]: return i | 0x200 for i in range(len(reg_names4)): if name == reg_names4[i]: return i | 0x100 if self.arch == 'i386': reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'eip'] for i in range(len(reg_names)): if name == reg_names[i]: return i | 0x800 else: reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'r8d', 'r9d', 'r10d', 'r11d', 'r12d', 'r13d', 'r14d', 'r15d'] for i in range(len(reg_names)): if name == reg_names[i]: return i | 0x800 reg_names5 = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13','r14', 'r15', 'rip'] for i in range(len(reg_names5)): if name == reg_names5[i]: return i | 0x1000 def get_offset_from_rva(self, rva): if self.os == 'linux': # to modify return self.elf.vma2offset(rva + self.base) else: return self.pe.get_offset_from_rva(rva) def is_branch(self, ins): if ins == '': return False opcode = ins.split(' ')[1] if opcode == 'ret': return True if opcode == 'call': return True if opcode.startswith('j'): return True return False def write_one_ins(self, out, clnum, ins, ops): result = str(clnum) + ': ' result = result.ljust(8, ' ') result += ins.ljust(50, ' ') for op in ops: if self.arch == 'i386': result += op.ljust(24, ' ') else: result += op.ljust(36, ' ') out.write(result + '\n') if self.is_branch(ins): out.write('\n') def byte_to_value(self, bytes): result = '' for byte in bytes: result += chr(byte & 0xff) if len(result) == 1: return l8(result) elif len(result) == 2: return l16(result) elif len(result) == 4: return l32(result) elif len(result) == 8: return l64(result) raise Exception('not known len:%d' % len(result)) def generate_trace(self, start_addr=None, start_clnum=0, end_clnum=0, limit=1): out = open(self.log + '.out', 'wb') if start_clnum == 0: start_clnum = self.t.get_minclnum() if end_clnum == 0: end_clnum = self.t.get_maxclnum() print 'start:', start_clnum print 'end:', end_clnum ins = '' ops = [] start_record = True if start_addr is not None: start_record = False for i in range(start_clnum, end_clnum): changes = self.t.fetch_changes_by_clnum(i, limit) if len(changes) < 1: continue change = changes[0] #print change if change['type'] == 'I': if not start_record: pc = change['address'] if pc == start_addr: start_record = True else: continue self.md.detail = True ins = '%x %s' % (change['address'], self.get_disasm(change['address'])) ops = [] if not self.disasms.has_key(change['address']): continue insn = self.disasms[change['address']] operands = insn.operands if len(operands) > 0: j = -1 for op in operands: j += 1 if op.type == X86_OP_IMM: continue elif op.type == X86_OP_FP: continue elif op.type == X86_OP_REG: reg_name = insn.reg_name(op.reg) reg_value = self.get_reg(i - j, reg_name) ops.append('%s:%x' % (reg_name, reg_value)) elif op.type == X86_OP_MEM: if op.mem.base != 0: base_name = insn.reg_name(op.mem.base) # reg base = self.get_reg(i - j, base_name) else: base = 0 if op.mem.index != 0: index_name = insn.reg_name(op.mem.index) # reg index = self.get_reg(i - j, index_name) else: index = 0 scale = op.mem.scale disp = op.mem.disp mem_addr = base + scale * index + disp mem_byte = self.t.fetch_memory(i - j, mem_addr, op.size) mem_value = self.byte_to_value(mem_byte) ops.append('[%x]:%x' % (mem_addr, mem_value)) ''' elif change['type'] == 'R': op = '%s => %x' % (self.get_reg_name(change['address']), change['data']) ops.append(op) # change['size'] elif change['type'] == 'W': op = '%s <= %x' % (self.get_reg_name(change['address']), change['data']) ops.append(op) elif change['type'] == 'L': op = '[%x] => %x' % (change['address'], change['data']) ops.append(op) elif change['type'] == 'S': op = '[%x] <= %x' % (change['address'], change['data']) ops.append(op) elif change['type'] == 's': pass # if self.os == 'linux': # 'sys_' + self.get_sys_call_name(change['address']) else: print change ''' self.write_one_ins(out, i, ins, ops) out.close() def get_memory(self, clnum, addr, size): result = '' for byte in self.t.fetch_memory(clnum, addr, size): result += chr(byte & 0xff) return result def get_reg(self, clnum, reg_name): index = self.get_reg_index(reg_name) reg_value = self.t.fetch_registers(clnum)[index & 0xff] if index & 0x1000: reg_value = reg_value if index & 0x800: reg_value = reg_value & 0xffffffff elif index & 0x400: reg_value = reg_value & 0xffff elif index & 0x200: reg_value = (reg_value & 0xff00) >> 8 elif index & 0x100: reg_value &= 0xff if (self.arch != 'i386') & (index&0xff == 16): changes = self.t.fetch_changes_by_clnum(clnum, 1) for change in changes: if change['type'] == 'I': reg_value = change['address'] + change['data'] #rip return reg_value def get_ret_addr(self, clnum): if self.arch == 'i386': esp = self.get_reg(clnum, 'esp') retval = l32(self.get_memory(clnum, esp, 4)) else: rsp = self.get_reg(clnum, 'rsp') retval = l64(self.get_memory(clnum, rsp, 8)) return retval def get_pc(self, clnum): changes = self.t.fetch_changes_by_clnum(clnum, 1) for change in changes: # print change if change['type'] == 'I': return change['address'] return 0 def generate_cfg(self, start_addr, ret_addr=None, start_clnum=0, end_clnum=0): if start_clnum == 0: start_clnum = self.t.get_minclnum() + 1 if end_clnum == 0: end_clnum = self.t.get_maxclnum() - 1 traces = [] enter_call = 0 enter_sub_call = 0 for i in range(start_clnum, end_clnum + 1): pc = self.get_pc(i) asm = self.get_disasm(pc) if enter_call == 0: if pc == start_addr: if ret_addr is None: end_addr = self.get_ret_addr(i - 1) print hex(end_addr) else: end_addr = ret_addr enter_call = 1 trace = [(i, pc, asm)] else: if end_addr == pc: print 'exit call' enter_call = 0 traces.append(trace) trace = [] if enter_sub_call == 0: trace.append((i, pc, asm)) if asm.startswith('call'): enter_sub_call = 1 sub_call_ret = self.get_ret_addr(i) else: if pc == sub_call_ret: trace.append((i, pc, asm)) enter_sub_call = 0 graph = Graph() pcs = [] for trace in traces: print trace for trace in traces: exist_node = None exist_index = 1 new_node = None for ins in trace: if ins[1] not in pcs: pcs.append(ins[1]) if exist_node is None: if new_node is None: new_node = Node([Assemble(ins[1], ins[2])]) graph.add_node(new_node) else: new_node.add_asm(Assemble(ins[1], ins[2])) else: new_node = Node([Assemble(ins[1], ins[2])]) graph.add_node(new_node) if len(exist_node.asm_seqs) == exist_index: graph.add_edge(exist_node, new_node) else: node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count - 1) graph.add_edge(node1, new_node) exist_node = None exist_index = 0 else: if exist_node is None: if new_node is None: exist_node = graph.search_and_split(ins[1]) exist_node.add_count() exist_index = 1 else: node, index = graph.search_node(ins[1]) if index == 0: graph.add_edge(new_node, node) node2 = node else: node1, node2 = graph.split_node(node, index) if node == new_node: graph.add_edge(node2, node2) else: graph.add_edge(new_node, node2) new_node = None exist_node = node2 node2.add_count() exist_index = 1 else: if new_node is None: if len(exist_node.asm_seqs) == exist_index: node3 = graph.search_and_split(ins[1]) graph.add_edge(exist_node, node3) exist_node = node3 node3.add_count() exist_index = 1 else: if exist_node.asm_seqs[exist_index].addr == ins[1]: exist_index += 1 else: node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count-1) node3 = graph.search_and_split(ins[1]) graph.add_edge(node1, node3) exist_node = node3 node3.add_count() exist_index = 1 else: print 'impossible2', ins graph.print_graph('tracer.png') def test(self): changes = self.t.fetch_changes_by_clnum(13, 1000) print self.t.fetch_registers(13) for change in changes: print change
def createDisassembly(fileContent, offset): capStone = Cs(CS_ARCH_X86, CS_MODE_32) return list(capStone.disasm(fileContent, offset))
def disasm(exe, bytes, offset=0): print "offset %i" % offset try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) for i, instr in enumerate(disassembled): print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str) # Handle no-op instructions if instr.id == x86.X86_INS_NOP: instr.nop = True # Handle jump/call instructions elif instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL): # jump table if instr.group(x86.X86_GRP_JUMP) and instr.operands[0].type == x86.X86_OP_REG: instr.jump_table = instr.reg_name(instr.operands[0].reg) # We can only decode the destination if it's an immediate value elif instr.operands[0].type == x86.X86_OP_IMM: # Ignore if it's a jump/call to an address within this function func_start_addr = disassembled[0].address func_end_addr = disassembled[len(disassembled)-1].address dest_addr = instr.operands[0].imm if func_start_addr <= dest_addr <= func_end_addr: instr.internal_jump = True instr.jump_address = dest_addr else: symbol, field_name = exe.get_symbol_by_addr( dest_addr, instr.address) if symbol: text_sect = exe.elff.get_section_by_name('.text') sect_addr = text_sect['sh_addr'] sect_offset = text_sect['sh_offset'] instr.comment = demangle(symbol.name) # only follow call address if it is a known location if symbol['st_size'] > 0: instr.external_jump = True instr.jump_address = symbol["st_value"] instr.jump_function_name = demangle(symbol.name) instr.jump_function_address = symbol["st_value"] instr.jump_function_offset = symbol["st_value"] - sect_addr + sect_offset instr.jump_function_size = symbol['st_size'] if instr.group(x86.X86_GRP_RET): instr.return_type = True # Handle individual operands c = -1 instr.regs_explicit = [] for op in instr.operands: c += 1 # Handle rip-relative operands if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: instr.rip = True instr.rip_offset = op.mem.disp instr.rip_resolved = disassembled[i+1].address + instr.rip_offset # file offset depends on section section = exe.get_section_from_offset(instr.rip_resolved) file_offset = instr.rip_resolved - section["sh_addr"] + section["sh_offset"] # Read in and unpack the first byte at the offset val_8 = exe.get_bytes(file_offset, 1) instr.signed_8 = unpack('b', val_8)[0] instr.unsigned_8 = unpack('B', val_8)[0] instr.hex_8 = hex(instr.unsigned_8) # Read in and unpack the first two bytes at the offset val_16 = exe.get_bytes(file_offset, 2) instr.signed_16 = unpack('h', val_16)[0] instr.unsigned_16 = unpack('H', val_16)[0] instr.hex_16 = hex(instr.unsigned_16) # Read in and unpack the first four bytes at the offset val_32 = exe.get_bytes(file_offset, 4) instr.signed_32 = unpack('i', val_32)[0] instr.unsigned_32 = unpack('I', val_32)[0] instr.hex_32 = hex(instr.unsigned_32) instr.float = unpack('f', val_32)[0] # Read in and unpack the first eight bytes at the offset val_64 = exe.get_bytes(file_offset, 8) instr.signed_64 = unpack('q', val_64)[0] instr.unsigned_64 = unpack('Q', val_64)[0] instr.hex_64 = hex(instr.unsigned_64) instr.double = unpack('d', val_64)[0] symbol, field_name = exe.get_symbol_by_addr( instr.rip_resolved, instr.address, instr_size=op.size, get_sub_symbol=True) if symbol: instr.comment = demangle(symbol.name) if field_name: instr.comment += '.' + field_name bytes = exe.get_bytes(file_offset, op.size) instr.rip_value_hex = "" space = "" for char in bytes: instr.rip_value_hex += space + hex(ord(char)) space = " " # HTML collapses consecutive spaces. For presentation purposes, replace spaces # with   (non-breaking space) nbsp_str = [] if op.size == 16: for char in bytes: if char == ' ': nbsp_str.append(' ') else: nbsp_str.append(char) instr.rip_value_ascii = ''.join(nbsp_str) # TODO: there's a bug involving ASCII that cannot be jsonified. To get around # it, we're temporarily pretending they don't exist. Those edge cases need to be # handled. # see typeName( else: instr.rip_value_ascii = "under construction..." # Handle explicitly read/written registers if op.type == x86.X86_OP_MEM: ptr = ["", "", ""] # using an array instead of object to guarantee ordering instr.regs_ptr_explicit = [] if op.value.mem.base != 0: regname = instr.reg_name(op.value.mem.base) ptr[0] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.index != 0: regname = instr.reg_name(op.value.mem.index) ptr[1] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.disp != 0: ptr[2] = hex(op.value.mem.disp) instr.ptr = ptr instr.ptr_size = op.size instr.regs_explicit.append(instr.ptr) elif op.type == x86.X86_OP_REG: instr.regs_explicit.append(instr.reg_name(op.value.reg)) else: instr.regs_explicit.append("") # what registers does this instruction read/write? instr.regs_write_implicit = [instr.reg_name(reg) for reg in instr.regs_write] if instr.group(x86.X86_GRP_CALL) and instr.reg_name(x86.X86_REG_RAX) not in instr.regs_write_implicit: instr.regs_write_implicit.append(instr.reg_name(x86.X86_REG_RAX)) instr.regs_read_implicit = [instr.reg_name(reg) for reg in instr.regs_read] # Add in documentation meta-data instr.short_desc, instr.docfile = get_documentation(instr) if instr.docfile is None or instr.short_desc is None: with open(CUR_PATH + 'missing_docs.log', 'a+') as f: f.write('[{}] : {} : {} : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc)) return disassembled except CsError as e: print("ERROR: %s" %e)