def __init__(self, func, r2obj: dict): self.parent = func try: self.address = r2obj['offset'] self.jump = r2obj.get('jump', 0) self.fail = r2obj.get('fail', 0) cases = r2obj.get('switchop', dict()).get('cases', dict()) self.cases = {c['jump'] for c in cases} self.insns = [] for op in r2obj['ops']: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True _addr = op['offset'] _insns = list( md.disasm(BasicBlock.to_bytes(op['bytes']), _addr)) if len(_insns) != 1: raise CapstoneDecodeError(f'Decoder error at {_addr:#x}') else: _insn: CsInsn = _insns[0] _reads, _ = _insn.regs_access() indirect = _insn.mnemonic == 'jmp' and len(_reads) > 0 self.insns.append(Instruction(_addr, indirect)) except KeyError: err_msg = f'Unexpected radare2 output at Basic Block {self.address:#x}' logging.error(err_msg) raise UnhandledOutputError(err_msg)
def filter_asm_and_return_instruction_list(address, asm, symbols, arch, API, symbolic_call=True): #n = int(asm, 2) binary = binascii.unhexlify(asm) #binary = binascii.unhexlify('%x' % n) #binary = asm # md = Cs(CS_ARCH_X86, CS_MODE_64) # md = Cs(CS_ARCH_ARM64, CS_MODE_ARM) if arch == capstone.CS_ARCH_ARM: md = Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM) else: md = Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) md.detail = True insns = [] cap_insns = [] for i in md.disasm(binary, address): insns.append( filter_memory_references(i, symbols, API, symbolic_call=symbolic_call)) cap_insns.append(i) del md return (constantIndependt_hash(cap_insns), insns)
def generate_rule(self): """ Generate Yara rule. Return a YaraRule object """ self.yr_rule.rule_name = self.rule_name self.yr_rule.metas["generated_by"] = "\"mkYARA - By Jelle Vergeer\"" self.yr_rule.metas["date"] = "\"{}\"".format(datetime.now().strftime("%Y-%m-%d %H:%M")) self.yr_rule.metas["version"] = "\"1.0\"" md = Cs(self.instruction_set, self.instruction_mode) md.detail = True md.syntax = CS_OPT_SYNTAX_INTEL chunk_nr = 0 for chunk in self._chunks: chunk_nr += 1 chunk_id = "$chunk_{}".format(chunk_nr) chunk_signature = "" chunk_comment = "" if chunk.is_data is False: disasm = md.disasm(chunk.data, chunk.offset) for ins in disasm: rule_part, comment = self._process_instruction(ins) rule_part = self.format_hex(rule_part) chunk_signature += rule_part + "\n" chunk_comment += comment + "\n" self.yr_rule.add_string(chunk_id, chunk_signature, StringType.HEX) if self.do_comment_sig: self.yr_rule.comments.append(chunk_comment) else: rule_part = self.format_hex(chunk.data.encode("hex")) self.yr_rule.add_string(chunk_id, rule_part, StringType.HEX) self.yr_rule.condition = "any of them" return self.yr_rule
def show_asm(buff, mode, base): """ Return the given byte sequence as assembly under the given hardware mode. :param bytes buff: Complete data stream. :param int mode: Capstone hardware mode. :param int base: Base address from which to start. :return: Assembly code representation. :rtype: str """ md = Cs(CS_ARCH_X86, mode) md.detail = True ret = '' for insn in md.disasm(buff, base): b = binascii.hexlify(insn.bytes).decode('utf-8') b = ' '.join(a + b for a, b in zip(b[::2], b[1::2])) if len(b) > 18: b = b[:18] + '+' ret += "{0:10} {1:20} {2:10} {3:10}\n".format( '%08x:' % insn.address, b, insn.mnemonic, insn.op_str) ret += '*/\n' return ret
def get_raw_disassembler(arch, detailed=True): if arch == BinaryType.SCS_32BIT_BINARY.value: d = Cs(CS_ARCH_X86, CS_MODE_32) elif arch == BinaryType.SCS_64BIT_BINARY.value: d = Cs(CS_ARCH_X86, CS_MODE_64) else: raise Exception("No disassembler for this architecture") d.detail = detailed return d
def __init__(self, encoding, position): super(CAPSInstruction, self).__init__(encoding, position) # CAPSTONE object encoding_bytes = (self._encoding).to_bytes(4, byteorder='little') #endian = CS_MODE_LITTLE_ENDIAN if little_endian else CS_MODE_BIG_ENDIAN md = Cs(CS_ARCH_ARM, CS_MODE_ARM) md.detail = True self._cap = None for i in md.disasm(encoding_bytes, position): self._cap = i
def disasm_plt(bytes, offset=0): try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) instruc = disassembled[0] # get rip relative address for op in instruc.operands: if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: return disassembled[1].address + op.mem.disp, op.size return None, None except CsError as e: print("ERROR: %s" %e)
def find_single((raw_data, pvaddr, elftype, elf_base_addr, arch, mode, gad, need_filter, ref)): C_OP = 0 C_SIZE = 1 C_ALIGN = 2 allgadgets = [] md = Cs(arch, mode) md.detail = True for i in range(10): back_bytes = i * gad[C_ALIGN] section_start = ref - back_bytes start_address = pvaddr + section_start if elftype == 'DYN': start_address = elf_base_addr + start_address decodes = md.disasm(raw_data[section_start:ref + gad[C_SIZE]], start_address) decodes = list(decodes) insns = [] for decode in decodes: insns.append((decode.mnemonic + " " + decode.op_str).strip()) if len(insns) > 0: if (start_address % gad[C_ALIGN]) == 0: address = start_address if mode == CS_MODE_THUMB: address = address | 1 bytes = raw_data[ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]] onegad = Gadget(address, insns, {}, 0, bytes) if not passClean(decodes): continue if arch == CS_ARCH_X86: onegad = filter_for_x86_big_binary(onegad) elif arch == CS_ARCH_ARM: onegad = filter_for_arm_big_binary(onegad) if (not need_filter) and onegad: classifier = GadgetClassifier(arch, mode) onegad = classifier.classify(onegad) if onegad: allgadgets += [onegad] return allgadgets
def disasm_plt(bytes, offset=0): try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) instruc = disassembled[0] # get rip relative address for op in instruc.operands: if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: return disassembled[1].address + op.mem.disp, op.size return None, None except CsError as e: print("ERROR: %s" % e)
def generate_pic(buff, mode): """ Return a position independent result of the byte sequence. :param bytes buff: Complete data stream. :param int mode: Capstone hardware mode. :return: YARA compliant hex string sequence. :rtype: str """ md = Cs(CS_ARCH_X86, mode) md.detail = True relative_tracker = [] relative = False offset = 0 for insn in md.disasm(buff, 0x0): if relative: r_size = insn.address - offset relative_tracker.append((offset, r_size)) relative = False if insn.op_count(X86_OP_IMM) == 1 or insn.op_count(X86_OP_MEM) == 1: offset = insn.address + _get_opcode_length(insn.opcode) relative = True if insn.modrm > 0: offset += 1 if insn.rex > 0: offset += 1 if insn.sib > 0: offset += 1 offset += MAX_PREFIX_SIZE - insn.prefix.count(0x0) continue if relative: r_size = len(buff) - offset relative_tracker.append((offset, r_size)) hex_bytes = '{ ' + _to_yara_hex_string(buff, relative_tracker) + ' }' return hex_bytes
def __parse_plt(self): # parsing .plt section plt_sct = self.elf.get_section_by_name(".plt") if plt_sct is None: raise ValueError md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True mnemonics = md.disasm(plt_sct.data(), plt_sct["sh_addr"]) cnt = 0 for mnemonic in mnemonics: if cnt % 3 == 0 and cnt != 0: rip = mnemonic.address + mnemonic.size assert len(mnemonic.operands) == 1 rip_plus = mnemonic.operands[0].value.mem.disp self.plt_got_dic[mnemonic.address] = rip + rip_plus self.got_plt_dic[rip + rip_plus] = mnemonic.address cnt += 1
def _cs_disassemble_one(self, data, address): """Disassemble the data into an instruction in string form. """ disasm = list(self._disassembler.disasm(data, address)) # TODO: Improve this check. if len(disasm) > 0: return disasm[0] else: cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM) cs_arm.detail = True disasm = list(cs_arm.disasm(data, address)) if len(disasm) > 0: return disasm[0] else: raise InvalidDisassemblerData("CAPSTONE: Unknown instruction (Addr: {:s}).".format(hex(address)))
def disasm(self, addr): (data, virtual_addr, flags) = self.binary.get_section(addr) if not flags["exec"]: die("the address 0x%x is not in an executable section" % addr) mode = CS_MODE_64 if self.bits == 64 else CS_MODE_32 md = Cs(CS_ARCH_X86, mode) md.detail = True for i in md.disasm(data, virtual_addr): self.code[i.address] = i self.code_idx.append(i.address) # Now load imported symbols for PE. This cannot be done before, # because we need the code for a better resolution. if self.binary.get_type() == T_BIN_PE: self.binary.load_import_symbols(self.code)
def compute_eflags_setter(self): dis32 = Cs(CS_ARCH_X86, CS_MODE_32) dis32.detail = True flag_insn = False for fl, traces in self.traces.items(): for trace in traces: for g_addr in trace: if flag_insn: flag_insn = False break gadget_bytes = self._emu.gadget_map[g_addr].rop_bytes for insn in dis32.disasm(gadget_bytes, g_addr): # Check every instruction of the gadget to see if it can perform a modification of the # monitored bits (doesn't mean that the bits have been actually modified) if insn.eflags and insn.eflags & self.capstone_to_eflags_aux(fl.monitored_bits): fl.set_eflag_bitmask(self.capstone_to_eflags_aux(insn.eflags)) flag_insn = True break
def _cs_disassemble_one(self, data, address): """Disassemble the data into an instruction in string form. """ disasm = list(self._disassembler.disasm(data, address)) # TODO: Improve this check. if len(disasm) > 0: return disasm[0] else: cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM) cs_arm.detail = True disasm = list(cs_arm.disasm(data, address)) if len(disasm) > 0: return disasm[0] else: raise InvalidDisassemblerData( "CAPSTONE: Unknown instruction (Addr: {:s}).".format( hex(address)))
def get_compiler_info(rom_bytes, entry_point, print_result=True): md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN) md.detail = True jumps = 0 branches = 0 for insn in md.disasm(rom_bytes[0x1000:], entry_point): if insn.mnemonic == "j": jumps += 1 elif insn.mnemonic == "b": branches += 1 compiler = "IDO" if branches > jumps else "GCC" if print_result: print( f"{branches} branches and {jumps} jumps detected in the first code segment. Compiler is most likely {compiler}" ) return compiler
def disasm_bytes(bytes, addr): md = Cs(CS_ARCH_ARM64, CS_MODE_ARM) md.syntax = CS_OPT_SYNTAX_ATT md.detail = True result = [] for ins in range(0, len(bytes), 4): disasm = list(md.disasm(bytes[ins:ins + 4], addr + ins)) if len(disasm): result += disasm else: # the instruction is invalid, so we craft a fake "nop" (to make the rest of the code work) # and we just overwrite it as data with a comment fake_ins = InstructionWrapper( list(md.disasm(b"\x1f\x20\x03\xd5", addr + ins))[0]) # bytes for nop fake_ins.mnemonic = ".quad 0x%x // invalid instruction" % int.from_bytes( bytes[ins:ins + 4], byteorder="little") # are we sure about 'little'? result += [fake_ins] return result
def dumpASM(flo, mode, maxAddr=1e99): modeRef = {32: CS_MODE_32, 64: CS_MODE_64} md = Cs(CS_ARCH_X86, modeRef[mode]) md.detail = True for i in md.disasm(flo, 0): # print(dir(i)) print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) print("\tImplicit registers read: ", end="") for r in i.regs_read: print("%s " % i.reg_name(r)) print() print("\tImplicit registers written: ", end="") for r in i.regs_write: print("%s " % i.reg_name(r)) print() if i.address > maxAddr: break
def disas_function(self, name): if len(self.functions_name_dic) == 0: self.__parse_functions() all_txt = self.elf.get_section_by_name(".text") base_addr = all_txt["sh_addr"] sct = self.functions_name_dic[name] if sct == None: return offset = sct["st_value"] - base_addr func_txt = all_txt.data()[offset:offset + sct["st_size"]] md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True for mnemonic in md.disasm(func_txt, sct["st_value"]): print( self.__disas_function_format.format(hex(mnemonic.address), mnemonic.mnemonic, mnemonic.op_str)) regs = mnemonic.regs_access() read_regs = regs[0] write_regs = regs[1] if len(read_regs) > 1: print("\tRead registers: {}".format( reduce( lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic. reg_name(r2), read_regs))) elif len(read_regs) == 1: print("\tRead registers: {}".format( mnemonic.reg_name(read_regs[0]))) if len(write_regs) > 1: print("\tWrite registers: {}".format( reduce( lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic. reg_name(r2), write_regs))) elif len(write_regs) == 1: print("\tWrite registers: {}".format( mnemonic.reg_name(write_regs[0]))) """
def test_details(): elffile = get_ELFFile('a.out') code = elffile.get_section_by_name('.text') ops = code.data() addr = code['sh_addr'] md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True for insn in md.disasm(ops, addr): print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str)) print(map(lambda x: to_x(int(x)), insn.bytes)) imm_count = insn.op_count(X86_OP_IMM) if imm_count == 0: continue bytes_no_imm = [] # Inclusive 'start' and 'end' indexes imm_start = insn.imm_offset imm_end = imm_start + insn.imm_size + 1 for i in range(len(insn.bytes)): if imm_start <= i <= imm_end: continue bytes_no_imm.append(insn.bytes[i]) print(map(lambda x: to_x(int(x)), bytes_no_imm))
def dis_assemble(self): status = cnst.fail() try: """ Also it is possible to disassemble the whole code, but here only I get one. """ machine = Cs(self.machineArch, self.machineMode) machine.detail = True for inst in machine.disasm(bytes(bytearray.fromhex(self.op)), self.ip): if self.saveDetail: self.disAssembledInstruction = inst self.machine = machine self.extract_registers(inst) status = cnst.success() except CsError as e: self.log_handler.error("%s" % e) return status
def generate_mnemonic(buff, mode): """ Return a mnemonic only result of the byte sequence. :param bytes buff: Complete data stream. :param int mode: Capstone hardware mode. :return: YARA compliant hex string sequence. :rtype: str """ md = Cs(CS_ARCH_X86, mode) md.detail = True mnemonic_tracker = [] for insn in md.disasm(buff, 0x0): op_len = _get_opcode_length(insn.opcode) offset = insn.address + op_len r_size = len(insn.bytes) - op_len mnemonic_tracker.append((offset, r_size)) hex_bytes = '{ ' + _to_yara_hex_string(buff, mnemonic_tracker) + ' }' return hex_bytes
from __future__ import print_function # test1.py from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_MODE_32 CODE = b"\x8d\x44\x38\x02" md = Cs(CS_ARCH_X86, CS_MODE_32) md.detail = True for i in md.disasm(CODE, 0): # print(dir(i)) print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) if len(i.regs_read) > 0: print("\tImplicit registers read: "), for r in i.regs_read: print("%s " % i.reg_name(r)), print if len(i.groups) > 0: print("\tThis instruction belongs to groups:", end="") for g in i.groups: print("%u" % g) # print("%u" % g, end="") print() def dumpASM(flo, mode, maxAddr=1e99): modeRef = {32: CS_MODE_32, 64: CS_MODE_64} md = Cs(CS_ARCH_X86, modeRef[mode]) md.detail = True
from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM from capstone.arm_const import * #CODE = b"\xe1\x0b\x40\xe1\x20\x04\x81\xda\x20\x08\x02\x8b" CODE = b"\x04\xe0\x2d\xe5\x20\x04\x81\xda\x20\x08\x02\x8b" #e52de004 md = Cs(CS_ARCH_ARM, CS_MODE_ARM) md.detail = True ARM_INS_STR for insn in md.disasm(CODE, 0x38): print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str)) print("\tCode condition: %u" % insn.cc) if len(insn.operands) > 0: print("\tNumber of operands: %u" % len(insn.operands)) c = -1 for i in insn.operands: c += 1 if i.type == ARM_OP_REG: print("\t\toperands[%u].type: REG = %s" % (c, insn.reg_name(i.value.reg))) if i.type == ARM_OP_IMM: print("\t\toperands[%u].type: IMM = 0x%x" % (c, i.value.imm)) if i.type == ARM_OP_CIMM: print("\t\toperands[%u].type: C-IMM = %u" % (c, i.value.imm)) if i.type == ARM_OP_FP: print("\t\toperands[%u].type: FP = %f" % (c, i.value.fp)) if i.type == ARM_OP_MEM: print("\t\toperands[%u].type: MEM" % c) if i.value.mem.base != 0: print("\t\t\toperands[%u].mem.base: REG = %s" \
def disasm(bytes, offset=0): print "offset %i" % offset try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) for i, instr in enumerate(disassembled): print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str) # Handle no-op instructions if instr.id == x86.X86_INS_NOP: instr.nop = True # Handle jump/call instructions if instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL): # We can only decode the destination if it's an immediate value if instr.operands[0].type == x86.X86_OP_IMM: # Ignore if it's a jump/call to an address within this function func_start_addr = disassembled[0].address func_end_addr = disassembled[len(disassembled)-1].address dest_addr = instr.operands[0].imm if func_start_addr <= dest_addr <= func_end_addr: instr.internal_jump = True instr.jump_address = dest_addr else: symbol = executable.ex.get_symbol_by_addr(dest_addr) if symbol: text_sect = executable.ex.elff.get_section_by_name('.text') sect_addr = text_sect['sh_addr'] sect_offset = text_sect['sh_offset'] instr.external_jump = True instr.jump_address = dest_addr instr.jump_function_name = demangle(symbol.name) instr.jump_function_address = dest_addr instr.jump_function_offset = dest_addr - sect_addr + sect_offset instr.jump_function_size = symbol['st_size'] instr.comment = demangle(symbol.name) # Handle individual operands for op in instr.operands: # Handle rip-relative operands if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: instr.rip = True instr.rip_offset = op.mem.disp instr.rip_resolved = disassembled[i+1].address + instr.rip_offset symbol = executable.ex.get_symbol_by_addr(instr.rip_resolved) if symbol: instr.comment = demangle(symbol.name) bytes = executable.ex.get_bytes(instr.rip_resolved, op.size) instr.rip_value_hex = "" space = "" for char in bytes: instr.rip_value_hex += space + hex(ord(char)) space = " " # HTML collapses consecutive spaces. For presentation purposes, replace spaces # with   (non-breaking space) nbsp_str = [] if op.size == 16: for char in bytes: if char == ' ': nbsp_str.append(' ') else: nbsp_str.append(char) instr.rip_value_ascii = ''.join(nbsp_str) # TODO: there's a bug involving ASCII that cannot be jsonified. To get around # it, we're temporarily pretending they don't exist. Those edge cases need to be # handled. # see typeName( else: instr.rip_value_ascii = "under construction..." # what registers does this instruction read/write? instr.regs_write_names = [instr.reg_name(reg) for reg in instr.regs_write] instr.regs_read_names = [instr.reg_name(reg) for reg in instr.regs_read] # Add in documentation meta-data instr.docfile = doc_file(instr) instr.short_desc = get_short_desc(instr) if instr.docfile is None: with open('missing_docs.log', 'a+') as f: f.write('[{}] : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic)) return disassembled except CsError as e: print("ERROR: %s" %e)
def disasm(code, arch, mode, start=0, detail: bool = True): md = Cs(arch, mode) md.detail = detail for _ in md.disasm(code, start): yield _
def disasm(exe, bytes, offset=0): print "offset %i" % offset try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) for i, instr in enumerate(disassembled): print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str) # Handle no-op instructions if instr.id == x86.X86_INS_NOP: instr.nop = True # Handle jump/call instructions elif instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL): # jump table if instr.group(x86.X86_GRP_JUMP) and instr.operands[0].type == x86.X86_OP_REG: instr.jump_table = instr.reg_name(instr.operands[0].reg) # We can only decode the destination if it's an immediate value elif instr.operands[0].type == x86.X86_OP_IMM: # Ignore if it's a jump/call to an address within this function func_start_addr = disassembled[0].address func_end_addr = disassembled[len(disassembled)-1].address dest_addr = instr.operands[0].imm if func_start_addr <= dest_addr <= func_end_addr: instr.internal_jump = True instr.jump_address = dest_addr else: symbol, field_name = exe.get_symbol_by_addr( dest_addr, instr.address) if symbol: text_sect = exe.elff.get_section_by_name('.text') sect_addr = text_sect['sh_addr'] sect_offset = text_sect['sh_offset'] instr.comment = demangle(symbol.name) # only follow call address if it is a known location if symbol['st_size'] > 0: instr.external_jump = True instr.jump_address = symbol["st_value"] instr.jump_function_name = demangle(symbol.name) instr.jump_function_address = symbol["st_value"] instr.jump_function_offset = symbol["st_value"] - sect_addr + sect_offset instr.jump_function_size = symbol['st_size'] if instr.group(x86.X86_GRP_RET): instr.return_type = True # Handle individual operands c = -1 instr.regs_explicit = [] for op in instr.operands: c += 1 # Handle rip-relative operands if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: instr.rip = True instr.rip_offset = op.mem.disp instr.rip_resolved = disassembled[i+1].address + instr.rip_offset # file offset depends on section section = exe.get_section_from_offset(instr.rip_resolved) file_offset = instr.rip_resolved - section["sh_addr"] + section["sh_offset"] # Read in and unpack the first byte at the offset val_8 = exe.get_bytes(file_offset, 1) instr.signed_8 = unpack('b', val_8)[0] instr.unsigned_8 = unpack('B', val_8)[0] instr.hex_8 = hex(instr.unsigned_8) # Read in and unpack the first two bytes at the offset val_16 = exe.get_bytes(file_offset, 2) instr.signed_16 = unpack('h', val_16)[0] instr.unsigned_16 = unpack('H', val_16)[0] instr.hex_16 = hex(instr.unsigned_16) # Read in and unpack the first four bytes at the offset val_32 = exe.get_bytes(file_offset, 4) instr.signed_32 = unpack('i', val_32)[0] instr.unsigned_32 = unpack('I', val_32)[0] instr.hex_32 = hex(instr.unsigned_32) instr.float = unpack('f', val_32)[0] # Read in and unpack the first eight bytes at the offset val_64 = exe.get_bytes(file_offset, 8) instr.signed_64 = unpack('q', val_64)[0] instr.unsigned_64 = unpack('Q', val_64)[0] instr.hex_64 = hex(instr.unsigned_64) instr.double = unpack('d', val_64)[0] symbol, field_name = exe.get_symbol_by_addr( instr.rip_resolved, instr.address, instr_size=op.size, get_sub_symbol=True) if symbol: instr.comment = demangle(symbol.name) if field_name: instr.comment += '.' + field_name bytes = exe.get_bytes(file_offset, op.size) instr.rip_value_hex = "" space = "" for char in bytes: instr.rip_value_hex += space + hex(ord(char)) space = " " # HTML collapses consecutive spaces. For presentation purposes, replace spaces # with   (non-breaking space) nbsp_str = [] if op.size == 16: for char in bytes: if char == ' ': nbsp_str.append(' ') else: nbsp_str.append(char) instr.rip_value_ascii = ''.join(nbsp_str) # TODO: there's a bug involving ASCII that cannot be jsonified. To get around # it, we're temporarily pretending they don't exist. Those edge cases need to be # handled. # see typeName( else: instr.rip_value_ascii = "under construction..." # Handle explicitly read/written registers if op.type == x86.X86_OP_MEM: ptr = ["", "", ""] # using an array instead of object to guarantee ordering instr.regs_ptr_explicit = [] if op.value.mem.base != 0: regname = instr.reg_name(op.value.mem.base) ptr[0] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.index != 0: regname = instr.reg_name(op.value.mem.index) ptr[1] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.disp != 0: ptr[2] = hex(op.value.mem.disp) instr.ptr = ptr instr.ptr_size = op.size instr.regs_explicit.append(instr.ptr) elif op.type == x86.X86_OP_REG: instr.regs_explicit.append(instr.reg_name(op.value.reg)) else: instr.regs_explicit.append("") # what registers does this instruction read/write? instr.regs_write_implicit = [instr.reg_name(reg) for reg in instr.regs_write] if instr.group(x86.X86_GRP_CALL) and instr.reg_name(x86.X86_REG_RAX) not in instr.regs_write_implicit: instr.regs_write_implicit.append(instr.reg_name(x86.X86_REG_RAX)) instr.regs_read_implicit = [instr.reg_name(reg) for reg in instr.regs_read] # Add in documentation meta-data instr.short_desc, instr.docfile = get_documentation(instr) if instr.docfile is None or instr.short_desc is None: with open(CUR_PATH + 'missing_docs.log', 'a+') as f: f.write('[{}] : {} : {} : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc)) return disassembled except CsError as e: print("ERROR: %s" %e)
def disasm(exe, bytes, offset=0): print "offset %i" % offset try: md = Cs(CS_ARCH_X86, CS_MODE_64) md.detail = True disassembled = list(md.disasm(bytes, offset)) for i, instr in enumerate(disassembled): print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str) # Handle no-op instructions if instr.id == x86.X86_INS_NOP: instr.nop = True # Handle jump/call instructions elif instr.group(x86.X86_GRP_JUMP) or instr.group( x86.X86_GRP_CALL): # jump table if instr.group(x86.X86_GRP_JUMP ) and instr.operands[0].type == x86.X86_OP_REG: instr.jump_table = instr.reg_name(instr.operands[0].reg) # We can only decode the destination if it's an immediate value elif instr.operands[0].type == x86.X86_OP_IMM: # Ignore if it's a jump/call to an address within this function func_start_addr = disassembled[0].address func_end_addr = disassembled[len(disassembled) - 1].address dest_addr = instr.operands[0].imm if func_start_addr <= dest_addr <= func_end_addr: instr.internal_jump = True instr.jump_address = dest_addr else: symbol, field_name = exe.get_symbol_by_addr( dest_addr, instr.address) if symbol: text_sect = exe.elff.get_section_by_name('.text') sect_addr = text_sect['sh_addr'] sect_offset = text_sect['sh_offset'] instr.comment = demangle(symbol.name) # only follow call address if it is a known location if symbol['st_size'] > 0: instr.external_jump = True instr.jump_address = symbol["st_value"] instr.jump_function_name = demangle( symbol.name) instr.jump_function_address = symbol[ "st_value"] instr.jump_function_offset = symbol[ "st_value"] - sect_addr + sect_offset instr.jump_function_size = symbol['st_size'] if instr.group(x86.X86_GRP_RET): instr.return_type = True # Handle individual operands c = -1 instr.regs_explicit = [] for op in instr.operands: c += 1 # Handle rip-relative operands if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP: instr.rip = True instr.rip_offset = op.mem.disp instr.rip_resolved = disassembled[ i + 1].address + instr.rip_offset # file offset depends on section section = exe.get_section_from_offset(instr.rip_resolved) file_offset = instr.rip_resolved - section[ "sh_addr"] + section["sh_offset"] # Read in and unpack the first byte at the offset val_8 = exe.get_bytes(file_offset, 1) instr.signed_8 = unpack('b', val_8)[0] instr.unsigned_8 = unpack('B', val_8)[0] instr.hex_8 = hex(instr.unsigned_8) # Read in and unpack the first two bytes at the offset val_16 = exe.get_bytes(file_offset, 2) instr.signed_16 = unpack('h', val_16)[0] instr.unsigned_16 = unpack('H', val_16)[0] instr.hex_16 = hex(instr.unsigned_16) # Read in and unpack the first four bytes at the offset val_32 = exe.get_bytes(file_offset, 4) instr.signed_32 = unpack('i', val_32)[0] instr.unsigned_32 = unpack('I', val_32)[0] instr.hex_32 = hex(instr.unsigned_32) instr.float = unpack('f', val_32)[0] # Read in and unpack the first eight bytes at the offset val_64 = exe.get_bytes(file_offset, 8) instr.signed_64 = unpack('q', val_64)[0] instr.unsigned_64 = unpack('Q', val_64)[0] instr.hex_64 = hex(instr.unsigned_64) instr.double = unpack('d', val_64)[0] symbol, field_name = exe.get_symbol_by_addr( instr.rip_resolved, instr.address, instr_size=op.size, get_sub_symbol=True) if symbol: instr.comment = demangle(symbol.name) if field_name: instr.comment += '.' + field_name bytes = exe.get_bytes(file_offset, op.size) instr.rip_value_hex = "" space = "" for char in bytes: instr.rip_value_hex += space + hex(ord(char)) space = " " # HTML collapses consecutive spaces. For presentation purposes, replace spaces # with   (non-breaking space) nbsp_str = [] if op.size == 16: for char in bytes: if char == ' ': nbsp_str.append(' ') else: nbsp_str.append(char) instr.rip_value_ascii = ''.join(nbsp_str) # TODO: there's a bug involving ASCII that cannot be jsonified. To get around # it, we're temporarily pretending they don't exist. Those edge cases need to be # handled. # see typeName( else: instr.rip_value_ascii = "under construction..." # Handle explicitly read/written registers if op.type == x86.X86_OP_MEM: ptr = [ "", "", "" ] # using an array instead of object to guarantee ordering instr.regs_ptr_explicit = [] if op.value.mem.base != 0: regname = instr.reg_name(op.value.mem.base) ptr[0] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.index != 0: regname = instr.reg_name(op.value.mem.index) ptr[1] = regname if regname != "rip": instr.regs_ptr_explicit.append(regname) if op.value.mem.disp != 0: ptr[2] = hex(op.value.mem.disp) instr.ptr = ptr instr.ptr_size = op.size instr.regs_explicit.append(instr.ptr) elif op.type == x86.X86_OP_REG: instr.regs_explicit.append(instr.reg_name(op.value.reg)) else: instr.regs_explicit.append("") # what registers does this instruction read/write? instr.regs_write_implicit = [ instr.reg_name(reg) for reg in instr.regs_write ] if instr.group(x86.X86_GRP_CALL) and instr.reg_name( x86.X86_REG_RAX) not in instr.regs_write_implicit: instr.regs_write_implicit.append( instr.reg_name(x86.X86_REG_RAX)) instr.regs_read_implicit = [ instr.reg_name(reg) for reg in instr.regs_read ] # Add in documentation meta-data instr.short_desc, instr.docfile = get_documentation(instr) if instr.docfile is None or instr.short_desc is None: with open(CUR_PATH + 'missing_docs.log', 'a+') as f: f.write('[{}] : {} : {} : {}\n'.format( str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc)) return disassembled except CsError as e: print("ERROR: %s" % e)
ARM_INS_POP: "pop", ARM_INS_PUSH: "push", ARM_INS_NOP: "nop", ARM_INS_YIELD: "yield", ARM_INS_WFE: "wfe", ARM_INS_WFI: "wfi", ARM_INS_SEV: "sev", ARM_INS_SEVL: "sevl", ARM_INS_VPUSH: "vpush", ARM_INS_VPOP: "vpop", ARM_INS_ENDING: "ending", } # disassembler definitions THUMB_DISASSEMBLER = Cs(CS_ARCH_ARM, CS_MODE_THUMB + CS_MODE_LITTLE_ENDIAN) THUMB_DISASSEMBLER.detail = True ARM_DISASSEMBLER = Cs(CS_ARCH_ARM, CS_MODE_ARM + CS_MODE_LITTLE_ENDIAN) ARM_DISASSEMBLER.detail = True # constants BRANCH_IDS = (ARM_INS_BX, ARM_INS_B) COND_BRANCH_IDS = (ARM_INS_CBNZ, ARM_INS_CBZ) CALL_IDS = (ARM_INS_BL, ARM_INS_BLX) class Function: def __init__(self, block_tree: IntervalTree, name="sub"): if len(block_tree) == 0: raise ValueError("Cannot create function from empty block tree") self.name = name blocks = [iv.data for iv in block_tree]
from capstone import Cs, CS_ARCH_ARM64, CS_MODE_ARM, CS_OP_REG cs = Cs(CS_ARCH_ARM64, CS_MODE_ARM) cs.detail = True # Clobbered registers (reserved by caller, cannot overwrite) clobbered_registers = ["x" + str(i) for i in range(19, 29) ] + ["w" + str(i) for i in range(19, 29)] # Non-clobbered registers (can be overwritten by a function) non_clobbered_registers = ["x" + str(i) for i in range(0, 19) ] + ["w" + str(i) for i in range(0, 19)] # Argument registers (used to pass function arguments) argument_registers = ["x" + str(i) for i in range(0, 8) ] + ["w" + str(i) for i in range(0, 8)] def get_reg_size_arm(regname): sizes = {"B": 1, "H": 2, "W": 4, "S": 4, "X": 8, "D": 8, "Q": 16} return sizes[regname.upper()[0]] def get_access_size_arm(instruction): bool_load = True if instruction.mnemonic.upper().startswith("L") else False # here we get the size from the last letter of the instruction # horrible hack I know, but capstone is a bad boy and is not reliable sizes = {"B": 1, "H": 2, "W": 4, "R": 8, "P": 16} acsz = sizes[instruction.mnemonic.upper()[-1]] if instruction.operands[0].type == CS_OP_REG: reg = instruction.reg_name(instruction.operands[0].reg) regsz = get_reg_size_arm(reg) if regsz < acsz or regsz == 16: