Ejemplo n.º 1
0
    def disasm(self, base, code):
        """disassemble binary code

        Args:
            base: base address of code
            code: binary code

        Returns:
            list of instruction
        """
        result = []
        if self.arch == ARCH.X86:
            md = Cs(CS_ARCH_X86, CS_MODE_32)
            for i in md.disasm(code, base):
                result.append(i)

        elif self.arch == ARCH.X86_64:
            md = Cs(CS_ARCH_X86, CS_MODE_64)
            for i in md.disasm(code, base):
                result.append(i)

        else:
            raise UnsupportedArchException(self.arch)

        return result
Ejemplo n.º 2
0
def show_asm(buff, mode, base):
    """
    Return the given byte sequence as assembly under the given hardware mode.

    :param bytes buff: Complete data stream.
    :param int mode: Capstone hardware mode.
    :param int base: Base address from which to start.

    :return: Assembly code representation.
    :rtype: str
    """

    md = Cs(CS_ARCH_X86, mode)
    md.detail = True

    ret = ''
    for insn in md.disasm(buff, base):
        b = binascii.hexlify(insn.bytes).decode('utf-8')
        b = ' '.join(a + b for a, b in zip(b[::2], b[1::2]))
        if len(b) > 18:
            b = b[:18] + '+'
        ret += "{0:10} {1:20} {2:10} {3:10}\n".format(
            '%08x:' % insn.address, b, insn.mnemonic, insn.op_str)
    ret += '*/\n'

    return ret
Ejemplo n.º 3
0
def __get_trace_records(ql: Qiling, address: int, size: int, md: Cs) -> Iterator[TraceRecord]:
	"""[private] Acquire trace info for the current instruction and yield as a trace record.
	A trace record is a parsed instruction paired to a list of registers and their values.

	This method might yield more than one record for a single instruction.
	"""

	# unicorn denotes unsupported instructions by a magic size value. though these instructions
	# are not emulated, capstone can still parse them.
	if size == 0xf1f1f1f1:
		yield next(__get_trace_records(ql, address, 16, md))
		return

	# a trace line is generated even for hook addresses that do not contain meaningful opcodes.
	# in that case, make it look like a nop
	if address in ql._addr_hook:
		buf = b'\x90'
	else:
		buf = ql.mem.read(address, size)

	for insn in md.disasm(buf, address):
		# BUG: insn.regs_read doesn't work well, so we use insn.regs_access()[0]
		state = tuple((reg, ql.reg.read(reg)) for reg in insn.regs_access()[0])

		yield (insn, state)
Ejemplo n.º 4
0
def main(fname):
    """ Basic python version of the tools:
    
            - "objdump -d" (linux)
            - "dumpbin /disasm" (MSVC)
        
        It parses the AR and COFF structures, but uses the "capstone" library to disassemble
    """
    for coff in read_lib_file(fname):
        if coff:
            syms = deque(coff.symbols)
            #print (syms)
            md = Cs(CS_ARCH_X86, CS_MODE_32)
            md.skipdata = True
            # iterate through "CsInsn"
            for i in md.disasm(coff.sections[0].data, 0x000):
                while syms and i.address >= syms[0].value:
                    if syms[0].type == 32 and syms[0].section_number == 1:
                        print(syms[0].name.decode(errors="ignore") + ":")
                    syms.popleft()
                instr_bytes = i.bytes
                remain_bytes = b""
                if len(instr_bytes) >= 6:
                    instr_bytes, remain_bytes = instr_bytes[:6], instr_bytes[
                        6:]
                if not i.op_str:
                    asm_part = i.mnemonic
                else:
                    asm_part = "%-12s%s" % (i.mnemonic, format_asm(i.op_str))
                print("  %08X: %-19s" %
                      (i.address, hex_with_spaces(instr_bytes)) + asm_part)
                if remain_bytes:
                    print("            %s" % (hex_with_spaces(remain_bytes)))
Ejemplo n.º 5
0
def trace(ql: Qiling, address: int, size: int, md: Cs):
    """Emit tracing info for each and every instruction that is about to be executed.

    Args:
        ql: the qiling instance
        address: the address of the instruction that is about to be executed
        size: size of the instruction (in bytes)
        md: initialized disassembler object
    """

    # read current instruction bytes and disassemble it
    buf = ql.mem.read(address, size)
    insn = next(md.disasm(buf, address))

    nibbles = ql.arch.bits // 4
    color_faded = '\033[2m'
    color_reset = '\033[0m'

    # get values of the registers referenced by this instruction.
    #
    # note: since this method is called before the instruction has been emulated, the 'rip'
    # register still points to the current instruction, while the instruction considers it
    # as if it was pointing to the next one. that will cause 'rip' to show an incorrect value
    reads = (f'{md.reg_name(reg)} = {ql.arch.regs.read(CS_UC_REGS[reg]):#x}'
             for reg in insn.regs_access()[0])

    # construct a human-readable trace line
    trace_line = f'{insn.address:0{nibbles}x} | {insn.bytes.hex():24s} {insn.mnemonic:12} {insn.op_str:35s} | {", ".join(reads)}'

    # emit the trace line in a faded color, so it would be easier to tell trace info from other log entries
    ql.log.info(f'{color_faded}{trace_line}{color_reset}')
Ejemplo n.º 6
0
    def __init__(self, func, r2obj: dict):
        self.parent = func
        try:
            self.address = r2obj['offset']
            self.jump = r2obj.get('jump', 0)
            self.fail = r2obj.get('fail', 0)
            cases = r2obj.get('switchop', dict()).get('cases', dict())
            self.cases = {c['jump'] for c in cases}

            self.insns = []
            for op in r2obj['ops']:
                md = Cs(CS_ARCH_X86, CS_MODE_64)
                md.detail = True
                _addr = op['offset']
                _insns = list(
                    md.disasm(BasicBlock.to_bytes(op['bytes']), _addr))
                if len(_insns) != 1:
                    raise CapstoneDecodeError(f'Decoder error at {_addr:#x}')
                else:
                    _insn: CsInsn = _insns[0]
                    _reads, _ = _insn.regs_access()
                    indirect = _insn.mnemonic == 'jmp' and len(_reads) > 0
                    self.insns.append(Instruction(_addr, indirect))
        except KeyError:
            err_msg = f'Unexpected radare2 output at Basic Block {self.address:#x}'
            logging.error(err_msg)
            raise UnhandledOutputError(err_msg)
Ejemplo n.º 7
0
class RAD:
    '''Provide a random access disassembler (RAD).'''
    def __init__(self, sections: SectionFinder, arch, bits):
        '''Start disassembly of the provided code blob.
        Arguments:
            sections -- A section finder instance.
            arch -- The architecture, as defined by Capstone.
            bits -- The bit width, as defined by Capstone.
        '''

        # Set up options for disassembly.
        self.md = Cs(arch, bits)
        self.md.skipdata = True
        self.md.detail = True
        self.sections = sections
        self._last_data = None
        self._last_start = 0
        self._last_end = 0

    def at(self, address):
        '''Try to disassemble and return the instruction starting at
        the given address.  An AddressException is thrown if the address
        is not present in any of the sections, and a NotExecutableException
        is thrown if the address is in a section that is not executable.
        '''
        if address >= self._last_end or address < self._last_start:
            # We need to find the section and initialize all variables.
            # Find a section that contains the address.
            section = self.sections.find(address)
            if section is None:
                # No section contains the address.
                raise AddressException(address)

            # Make sure the section is executable and allocated.
            flags = section.header.sh_flags
            if (flags & SH_FLAGS.SHF_ALLOC) * (flags & SH_FLAGS.SHF_EXECINSTR) == 0:
                # This section is not allocated or does not contain code.
                raise NotExecutableException(section)

            # Set the variables so we skip this next time.
            self._last_data = section.data()
            self._last_start = section.header.sh_addr
            self._last_end = self._last_start + section.data_size

        # Compute the index into the section's data for the given address.  We
        # already know the address is in the given section (we checked earlier).
        index = address - self._last_start

        # The maximun length of an x86-64 instruction is 15 bytes.  You can
        # exceed this with prefix bytes and the like, but you will get an
        # "general protection" (GP) exception on the processor.  So don't do
        # that.
        return next(self.md.disasm(self._last_data[index:index+15], address, count=1))

    def in_range(self, address):
        '''Determine if an address is in range and executable.'''
        section = self.sections.find(address)
        flags = section.header.sh_flags
        return (section is not None and (flags & SH_FLAGS.SHF_ALLOC)
                and (flags & SH_FLAGS.SHF_EXECINSTR))
Ejemplo n.º 8
0
    def generate_rule(self):
        """ Generate Yara rule. Return a YaraRule object """
        self.yr_rule.rule_name = self.rule_name
        self.yr_rule.metas["generated_by"] = "\"mkYARA - By Jelle Vergeer\""
        self.yr_rule.metas["date"] = "\"{}\"".format(datetime.now().strftime("%Y-%m-%d %H:%M"))
        self.yr_rule.metas["version"] = "\"1.0\""

        md = Cs(self.instruction_set, self.instruction_mode)
        md.detail = True
        md.syntax = CS_OPT_SYNTAX_INTEL
        chunk_nr = 0

        for chunk in self._chunks:
            chunk_nr += 1
            chunk_id = "$chunk_{}".format(chunk_nr)
            chunk_signature = ""
            chunk_comment = ""
            if chunk.is_data is False:
                disasm = md.disasm(chunk.data, chunk.offset)
                for ins in disasm:
                    rule_part, comment = self._process_instruction(ins)
                    rule_part = self.format_hex(rule_part)
                    chunk_signature += rule_part + "\n"
                    chunk_comment += comment + "\n"
                self.yr_rule.add_string(chunk_id, chunk_signature, StringType.HEX)
                if self.do_comment_sig:
                    self.yr_rule.comments.append(chunk_comment)
            else:
                rule_part = self.format_hex(chunk.data.encode("hex"))
                self.yr_rule.add_string(chunk_id, rule_part, StringType.HEX)

        self.yr_rule.condition = "any of them"
        return self.yr_rule
Ejemplo n.º 9
0
 def disassemble(self, code: bytes, address: int) -> List[DumpAssembly]:
     dump_assemblies = []
     md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
     for i in md.disasm(code, address):
         dump_assemblies.append(
             DumpAssembly(i.address, f'{i.mnemonic}\t{i.op_str}'))
     return dump_assemblies
Ejemplo n.º 10
0
def filter_asm_and_return_instruction_list(address,
                                           asm,
                                           symbols,
                                           arch,
                                           API,
                                           symbolic_call=True):
    #n = int(asm, 2)
    binary = binascii.unhexlify(asm)
    #binary = binascii.unhexlify('%x' % n)
    #binary = asm
    # md = Cs(CS_ARCH_X86, CS_MODE_64)
    # md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
    if arch == capstone.CS_ARCH_ARM:
        md = Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM)
    else:
        md = Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
    md.detail = True
    insns = []
    cap_insns = []
    for i in md.disasm(binary, address):
        insns.append(
            filter_memory_references(i,
                                     symbols,
                                     API,
                                     symbolic_call=symbolic_call))
        cap_insns.append(i)
    del md
    return (constantIndependt_hash(cap_insns), insns)
Ejemplo n.º 11
0
 def _disassemble(self, data):
     capstone = Cs(*self.args.mode.value)
     strz = self._strings(data)
     string = next(strz, None)
     cursor, done = 0, 0
     while done < len(data):
         cursor = max(cursor, done)
         patchsize = self._nullsize(data, cursor, self.args.width)
         if patchsize > 2:
             yield self._format(done, data[done:done + patchsize], 'db', ','.join('0' * patchsize))
             done += patchsize
             continue
         if cursor >= len(data):
             yield self._bytepatch(data, done, len(data))
             done = cursor
         if string and cursor >= string.end:
             yield self._bytepatch(data, done, string.start)
             yield self._format(string.start, data[string.start:string.end], 'db', string.data)
             done = string.end
             continue
         try:
             ins = next(capstone.disasm(
                 data[cursor:cursor + 15], cursor, count=1))
             end = ins.address + ins.size
             if self.args.str and string:
                 if end > string.start and string.end > cursor:
                     cursor = string.end
                     continue
         except StopIteration:
             cursor += 1
             continue
         else:
             yield self._format(ins.address, ins.bytes, ins.mnemonic, ins.op_str)
             done = end
Ejemplo n.º 12
0
class Disasassembler:
    def __init__(self, offset=0):
        self._disas = Cs(CS_ARCH_X86, CS_MODE_32)

        self.offset = offset

    def disasm(self, opcodes):
        return self._disas.disasm(opcodes, self.offset)
Ejemplo n.º 13
0
def disassemble(addr, data):
    none = 0
    md = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN)
    disassed = md.disasm(data, addr)
    for i in disassed:
        none = 1
        print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
    if none != 1:
        print("Couldn't disassemble at 0x%x" % (addr))
Ejemplo n.º 14
0
 def __init__(self, encoding, position):
     super(CAPSInstruction, self).__init__(encoding, position)
     # CAPSTONE object
     encoding_bytes = (self._encoding).to_bytes(4, byteorder='little')
     #endian = CS_MODE_LITTLE_ENDIAN if little_endian else CS_MODE_BIG_ENDIAN
     md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
     md.detail = True
     self._cap = None
     for i in md.disasm(encoding_bytes, position):
         self._cap = i
Ejemplo n.º 15
0
def find_instr_addr(mod_name, bits):

    dll = pefile.PE(mod_name)

    for entry in dll.DIRECTORY_ENTRY_EXPORT.symbols:
        if entry.name == 'rtcInStrChar':
            exp_addr = entry.address
            break

    for imp in dll.DIRECTORY_ENTRY_IMPORT:
        for entry in imp.imports:
            if entry.name == 'SysFreeString':
                imp_addr = entry.address
                break

    memory = dll.get_memory_mapped_image()
    if bits == 32:
        dsm = Cs(CS_ARCH_X86, CS_MODE_32)
    else:
        dsm = Cs(CS_ARCH_X86, CS_MODE_64)

    for op in dsm.disasm(memory[exp_addr:exp_addr + 0xA0],
                         (exp_addr + dll.OPTIONAL_HEADER.ImageBase)):
        if op.mnemonic == 'call':
            last_call = op.op_str
        if op.mnemonic == 'ret':
            break
    next_func = int(last_call, 16) - dll.OPTIONAL_HEADER.ImageBase
    calls = 0
    call_free = 0
    for op in dsm.disasm(memory[next_func:next_func + 0x200],
                         (next_func + dll.OPTIONAL_HEADER.ImageBase)):

        if op.mnemonic == 'call' and ('0x%x' % imp_addr in op.op_str
                                      or 'qword ptr' in op.op_str):
            call_free += 1
        if call_free == 2:
            return last_call
        if op.mnemonic == 'call':
            last_call = op.address - dll.OPTIONAL_HEADER.ImageBase
        if op.mnemonic == 'ret':
            return
Ejemplo n.º 16
0
def disasm_bytes(bytes, addr):
    md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
    md.syntax = CS_OPT_SYNTAX_ATT
    md.detail = True
    result = []
    for ins in range(0, len(bytes), 4):
        disasm = list(md.disasm(bytes[ins:ins + 4], addr + ins))
        if len(disasm):
            result += disasm
        else:
            # the instruction is invalid, so we craft a fake "nop" (to make the rest of the code work)
            # and we just overwrite it as data with a comment
            fake_ins = InstructionWrapper(
                list(md.disasm(b"\x1f\x20\x03\xd5",
                               addr + ins))[0])  # bytes for nop
            fake_ins.mnemonic = ".quad 0x%x // invalid instruction" % int.from_bytes(
                bytes[ins:ins + 4],
                byteorder="little")  # are we sure about 'little'?
            result += [fake_ins]
    return result
Ejemplo n.º 17
0
def extract_call_destinations(elf):
    symbols_list = []

    # get the code section or segment (if there's no section)
    code_section_or_segment = get_ep_section_or_segment(elf)

    # if we only got the segment, start extracting calls from the EP
    if type(code_section_or_segment) == elftools.elf.segments.Segment:
        ofs = elf.header.e_entry
        code_data = code_section_or_segment.data(
        )[ofs - code_section_or_segment["p_vaddr"]:]
    # otherwise we use the code section
    else:
        ofs = elf_get_imagebase(elf) + code_section_or_segment["sh_offset"]
        code_data = code_section_or_segment.data()

    # get the architecture of our ELF file.
    # the disassembly and the call opcode and mnemonic will be based on the
    # determined architecture, as defined by the CALL_LIST dict above
    arch = elf.get_machine_arch()

    # in case we have not specified the opcode, mnemonic, and the
    # capstone arch and mode, skip
    if arch not in CALL_LIST:
        return []

    # TODO: automatically identify the architecture the binary was compiled to
    md = Cs(CALL_LIST[arch]["cs_arch"], CALL_LIST[arch]["cs_mode"])

    if code_section_or_segment is not None:
        # TODO: handle UPX-packed binaries as they have no sections so we should go straight to segment offset
        for i in md.disasm(code_data, ofs):
            if arch in ("x86", "x64") and i.mnemonic == "call":
                # Consider only call to absolute addresses
                if i.op_str.startswith("0x"):
                    address = i.op_str[2:]  # cut off '0x' prefix
                    if not address in symbols_list:
                        symbols_list.append(address)

            elif arch == "ARM" and i.mnemonic.startswith("bl"):
                if i.op_str.startswith("#0x"):
                    address = i.op_str[3:]
                    if not address in symbols_list:
                        symbols_list.append(address)

            elif arch == "MIPS" and i.mnemonic == "lw":
                if i.op_str.startswith("$t9, "):
                    address = i.op_str[8:-5]
                    if not address in symbols_list:
                        symbols_list.append(address)

    return symbols_list
Ejemplo n.º 18
0
def disassemble(addr, data, thumb=False):
    none = 0                                            # disassed at least on
    if thumb == True:
        mode = CS_MODE_THUMB
    else:
        mode = CS_MODE_ARM
    md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
    disassed = md.disasm(data, addr)
    for i in disassed:
        none = 1
        print "0x%x:\t%s    %s" %(i.address, i.mnemonic, i.op_str)
    if none != 1:
        print "Couldn't disassemble at 0x%x"%(addr)
Ejemplo n.º 19
0
def disassemble(addr, data, thumb=False):
    none = 0  # disassed at least on
    if thumb == True:
        mode = CS_MODE_THUMB
    else:
        mode = CS_MODE_ARM
    md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
    disassed = md.disasm(data, addr)
    for i in disassed:
        none = 1
        print "0x%x:\t%s    %s" % (i.address, i.mnemonic, i.op_str)
    if none != 1:
        print "Couldn't disassemble at 0x%x" % (addr)
Ejemplo n.º 20
0
    def __gadgetsFinding(self, section, gadgets, arch, mode):

        C_OP = 0
        C_SIZE = 1
        C_ALIGN = 2
        PREV_BYTES = 9  # Number of bytes prior to the gadget to store.
        ret = []
        md = Cs(arch, mode)
        for gad in gadgets:
            allRefRet = [
                m.start() for m in re.finditer(gad[C_OP], section["opcodes"])
            ]
            for ref in allRefRet:
                for i in range(self.__options.depth):
                    if (section["vaddr"] + ref -
                        (i * gad[C_ALIGN])) % gad[C_ALIGN] == 0:
                        decodes = md.disasm(
                            section["opcodes"][ref - (i * gad[C_ALIGN]):ref +
                                               gad[C_SIZE]],
                            section["vaddr"] + ref)
                        gadget = ""
                        for decode in decodes:
                            gadget += (decode.mnemonic + " " + decode.op_str +
                                       " ; ").replace("  ", " ")
                        if re.search(gad[C_OP], decode.bytes) is None:
                            continue
                        if len(gadget) > 0:
                            gadget = gadget[:-3]
                            off = self.__offset
                            vaddr = off + section["vaddr"] + ref - (
                                i * gad[C_ALIGN])
                            prevBytesAddr = max(section["vaddr"],
                                                vaddr - PREV_BYTES)
                            prevBytes = section["opcodes"][
                                prevBytesAddr - section["vaddr"]:vaddr -
                                section["vaddr"]]
                            ret += [{
                                "vaddr":
                                vaddr,
                                "gadget":
                                gadget,
                                "decodes":
                                decodes,
                                "bytes":
                                section["opcodes"][ref -
                                                   (i * gad[C_ALIGN]):ref +
                                                   gad[C_SIZE]],
                                "prev":
                                prevBytes
                            }]
        return ret
Ejemplo n.º 21
0
def find_instr_addr(mod_name, bits):

	dll = pefile.PE(mod_name)

	for entry in dll.DIRECTORY_ENTRY_EXPORT.symbols:
		if entry.name == 'rtcInStrChar':
			exp_addr = entry.address
			break

	for imp in dll.DIRECTORY_ENTRY_IMPORT:
		for entry in imp.imports:
			if entry.name == 'SysFreeString':
				imp_addr = entry.address
				break

	memory = dll.get_memory_mapped_image()
	if bits == 32:
		dsm = Cs(CS_ARCH_X86, CS_MODE_32)
	else:
		dsm = Cs(CS_ARCH_X86, CS_MODE_64)

	for op in dsm.disasm(memory[exp_addr:exp_addr + 0xA0], (exp_addr + dll.OPTIONAL_HEADER.ImageBase)):
		if op.mnemonic == 'call':
			last_call = op.op_str
		if op.mnemonic == 'ret':
			break
	next_func = int(last_call, 16) - dll.OPTIONAL_HEADER.ImageBase
	calls = 0
	call_free = 0
	for op in dsm.disasm(memory[next_func:next_func + 0x200], (next_func + dll.OPTIONAL_HEADER.ImageBase)):
		if op.mnemonic == 'call' and ('0x%x' % imp_addr in op.op_str or 'qword ptr' in op.op_str):
			call_free += 1
		if call_free == 2:
			return last_call
		if op.mnemonic == 'call':
			last_call = op.address - dll.OPTIONAL_HEADER.ImageBase
		if op.mnemonic == 'ret':
			return
Ejemplo n.º 22
0
def find_single((raw_data, pvaddr, elftype, elf_base_addr, arch, mode, gad,
                 need_filter, ref)):
    C_OP = 0
    C_SIZE = 1
    C_ALIGN = 2

    allgadgets = []

    md = Cs(arch, mode)
    md.detail = True

    for i in range(10):
        back_bytes = i * gad[C_ALIGN]
        section_start = ref - back_bytes
        start_address = pvaddr + section_start
        if elftype == 'DYN':
            start_address = elf_base_addr + start_address

        decodes = md.disasm(raw_data[section_start:ref + gad[C_SIZE]],
                            start_address)

        decodes = list(decodes)
        insns = []
        for decode in decodes:
            insns.append((decode.mnemonic + " " + decode.op_str).strip())

        if len(insns) > 0:
            if (start_address % gad[C_ALIGN]) == 0:
                address = start_address
                if mode == CS_MODE_THUMB:
                    address = address | 1

                bytes = raw_data[ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]]
                onegad = Gadget(address, insns, {}, 0, bytes)
                if not passClean(decodes):
                    continue

                if arch == CS_ARCH_X86:
                    onegad = filter_for_x86_big_binary(onegad)
                elif arch == CS_ARCH_ARM:
                    onegad = filter_for_arm_big_binary(onegad)

                if (not need_filter) and onegad:
                    classifier = GadgetClassifier(arch, mode)
                    onegad = classifier.classify(onegad)

                if onegad:
                    allgadgets += [onegad]

    return allgadgets
Ejemplo n.º 23
0
def disasm_plt(bytes, offset=0):
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        instruc = disassembled[0]

        # get rip relative address
        for op in instruc.operands:
            if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                return disassembled[1].address + op.mem.disp, op.size
        return None, None
    except CsError as e:
        print("ERROR: %s" % e)
Ejemplo n.º 24
0
def disasm_plt(bytes, offset=0):
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        instruc = disassembled[0]

        # get rip relative address
        for op in instruc.operands:
            if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                return disassembled[1].address + op.mem.disp, op.size
        return None, None
    except CsError as e:
        print("ERROR: %s" %e)
Ejemplo n.º 25
0
    def disassemble(self, size, thumb=True):
        """
        Display the bytes disassembled using Capstone at the current position.

        Args:
            size (:obj:`int`): the number of bytes to disassemble
            thumb (:obj:`bool`): True if Thumb, False otherwise
        """
        from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM, CS_MODE_THUMB

        cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB if thumb else CS_MODE_ARM)

        addr = self._ptr.value
        for insn in cs.disasm(self.read(size), addr):
            insn_info = insn.address, insn.mnemonic, insn.op_str
            print("{:08x}:\t{} {}".format(insn_info))
Ejemplo n.º 26
0
    def _cs_disassemble_one(self, data, address):
        """Disassemble the data into an instruction in string form.
        """
        disasm = list(self._disassembler.disasm(data, address))

        # TODO: Improve this check.
        if len(disasm) > 0:
            return disasm[0]
        else:
            cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
            disasm = list(cs_arm.disasm(data, address))

            if len(disasm) > 0:
                return disasm[0]
            else:
                raise InvalidDisassemblerData("CAPSTONE: Unknown instruction (Addr: {:s}).".format(hex(address)))
Ejemplo n.º 27
0
def generate_pic(buff, mode):
    """
    Return a position independent result of the byte sequence.

    :param bytes buff: Complete data stream.
    :param int mode: Capstone hardware mode.

    :return: YARA compliant hex string sequence.
    :rtype: str
    """

    md = Cs(CS_ARCH_X86, mode)
    md.detail = True

    relative_tracker = []
    relative = False
    offset = 0

    for insn in md.disasm(buff, 0x0):

        if relative:
            r_size = insn.address - offset
            relative_tracker.append((offset, r_size))
            relative = False

        if insn.op_count(X86_OP_IMM) == 1 or insn.op_count(X86_OP_MEM) == 1:

            offset = insn.address + _get_opcode_length(insn.opcode)
            relative = True

            if insn.modrm > 0:
                offset += 1
            if insn.rex > 0:
                offset += 1
            if insn.sib > 0:
                offset += 1

            offset += MAX_PREFIX_SIZE - insn.prefix.count(0x0)

            continue

    if relative:
        r_size = len(buff) - offset
        relative_tracker.append((offset, r_size))

    hex_bytes = '{ ' + _to_yara_hex_string(buff, relative_tracker) + ' }'
    return hex_bytes
Ejemplo n.º 28
0
 def __parse_plt(self):
     # parsing .plt section
     plt_sct = self.elf.get_section_by_name(".plt")
     if plt_sct is None:
         raise ValueError
     md = Cs(CS_ARCH_X86, CS_MODE_64)
     md.detail = True
     mnemonics = md.disasm(plt_sct.data(), plt_sct["sh_addr"])
     cnt = 0
     for mnemonic in mnemonics:
         if cnt % 3 == 0 and cnt != 0:
             rip = mnemonic.address + mnemonic.size
             assert len(mnemonic.operands) == 1
             rip_plus = mnemonic.operands[0].value.mem.disp
             self.plt_got_dic[mnemonic.address] = rip + rip_plus
             self.got_plt_dic[rip + rip_plus] = mnemonic.address
         cnt += 1
Ejemplo n.º 29
0
    def disasm(self, addr):
        (data, virtual_addr, flags) = self.binary.get_section(addr)

        if not flags["exec"]:
            die("the address 0x%x is not in an executable section" % addr)

        mode = CS_MODE_64 if self.bits == 64 else CS_MODE_32
        md = Cs(CS_ARCH_X86, mode)
        md.detail = True

        for i in md.disasm(data, virtual_addr):
            self.code[i.address] = i
            self.code_idx.append(i.address)

        # Now load imported symbols for PE. This cannot be done before,
        # because we need the code for a better resolution.
        if self.binary.get_type() == T_BIN_PE:
            self.binary.load_import_symbols(self.code)
Ejemplo n.º 30
0
    def _cs_disassemble_one(self, data, address):
        """Disassemble the data into an instruction in string form.
        """
        disasm = list(self._disassembler.disasm(data, address))

        # TODO: Improve this check.
        if len(disasm) > 0:
            return disasm[0]
        else:
            cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
            disasm = list(cs_arm.disasm(data, address))

            if len(disasm) > 0:
                return disasm[0]
            else:
                raise InvalidDisassemblerData(
                    "CAPSTONE: Unknown instruction (Addr: {:s}).".format(
                        hex(address)))
Ejemplo n.º 31
0
 def compute_eflags_setter(self):
     dis32 = Cs(CS_ARCH_X86, CS_MODE_32)
     dis32.detail = True
     flag_insn = False
     for fl, traces in self.traces.items():
         for trace in traces:
             for g_addr in trace:
                 if flag_insn:
                     flag_insn = False
                     break
                 gadget_bytes = self._emu.gadget_map[g_addr].rop_bytes
                 for insn in dis32.disasm(gadget_bytes, g_addr):
                     # Check every instruction of the gadget to see if it can perform a modification of the
                     # monitored bits (doesn't mean that the bits have been actually modified)
                     if insn.eflags and insn.eflags & self.capstone_to_eflags_aux(fl.monitored_bits):
                         fl.set_eflag_bitmask(self.capstone_to_eflags_aux(insn.eflags))
                         flag_insn = True
                         break
Ejemplo n.º 32
0
def get_compiler_info(rom_bytes, entry_point, print_result=True):
    md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
    md.detail = True

    jumps = 0
    branches = 0

    for insn in md.disasm(rom_bytes[0x1000:], entry_point):
        if insn.mnemonic == "j":
            jumps += 1
        elif insn.mnemonic == "b":
            branches += 1

    compiler = "IDO" if branches > jumps else "GCC"
    if print_result:
        print(
            f"{branches} branches and {jumps} jumps detected in the first code segment. Compiler is most likely {compiler}"
        )
    return compiler
Ejemplo n.º 33
0
def disassemble_bytes(
    cap: cs.Cs, base_address: int, data: bytes
) -> List[Tuple[Optional[CsInsn], int, bytes]]:
    output: List[Tuple[Optional[CsInsn], int, bytes]] = []
    offset = 0
    end = len(data)
    while offset < end:
        code = data[offset:end]
        for insn in cap.disasm(code, base_address + offset):
            if insn.id in MANUAL_MNEMONICS:
                output.append((None, insn.address, insn.bytes))
            else:
                output.append((insn, insn.address, insn.bytes))
            offset += 4
        if offset < end:
            address = base_address + offset
            output.append((None, address, data[offset : offset + 4]))
            offset += 4
    return output
Ejemplo n.º 34
0
def main(fname):
    """ Basic python version of the tools:
    
            - "objdump -d" (linux)
            - "dumpbin /disasm" (MSVC)
        
        It parses the AR and COFF structures, but uses the "capstone" library to disassemble
    """
    for coff in read_lib_file(fname):
        if coff:
            syms = deque(coff.symbols)
            md = Cs(CS_ARCH_X86, CS_MODE_32)
            md.skipdata = True
            for i in md.disasm(coff.sections[0].data, 0x000):
                if syms and i.address >= syms[0].value:
                    if syms[0].type == 32:
                        print(syms[0].name.decode(errors="ignore"))
                    syms.popleft()
                print("    0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
Ejemplo n.º 35
0
def dumpASM(flo, mode, maxAddr=1e99):
    modeRef = {32: CS_MODE_32, 64: CS_MODE_64}

    md = Cs(CS_ARCH_X86, modeRef[mode])
    md.detail = True

    for i in md.disasm(flo, 0):
        # print(dir(i))
        print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
        print("\tImplicit registers read: ", end="")
        for r in i.regs_read:
            print("%s " % i.reg_name(r))
        print()

        print("\tImplicit registers written: ", end="")
        for r in i.regs_write:
            print("%s " % i.reg_name(r))
        print()

        if i.address > maxAddr:
            break
Ejemplo n.º 36
0
    def disas_function(self, name):
        if len(self.functions_name_dic) == 0:
            self.__parse_functions()
        all_txt = self.elf.get_section_by_name(".text")
        base_addr = all_txt["sh_addr"]
        sct = self.functions_name_dic[name]
        if sct == None:
            return
        offset = sct["st_value"] - base_addr
        func_txt = all_txt.data()[offset:offset + sct["st_size"]]
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        for mnemonic in md.disasm(func_txt, sct["st_value"]):
            print(
                self.__disas_function_format.format(hex(mnemonic.address),
                                                    mnemonic.mnemonic,
                                                    mnemonic.op_str))
            regs = mnemonic.regs_access()
            read_regs = regs[0]
            write_regs = regs[1]

            if len(read_regs) > 1:
                print("\tRead registers: {}".format(
                    reduce(
                        lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic.
                        reg_name(r2), read_regs)))
            elif len(read_regs) == 1:
                print("\tRead registers: {}".format(
                    mnemonic.reg_name(read_regs[0])))
            if len(write_regs) > 1:
                print("\tWrite registers: {}".format(
                    reduce(
                        lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic.
                        reg_name(r2), write_regs)))
            elif len(write_regs) == 1:
                print("\tWrite registers: {}".format(
                    mnemonic.reg_name(write_regs[0])))
            """
Ejemplo n.º 37
0
def disasm(bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True
            # Handle jump/call instructions
            if instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL):
                # We can only decode the destination if it's an immediate value
                if instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled)-1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol = executable.ex.get_symbol_by_addr(dest_addr)
                        if symbol:
                            text_sect = executable.ex.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']
                            
                            instr.external_jump = True
                            instr.jump_address = dest_addr
                            instr.jump_function_name = demangle(symbol.name)
                            instr.jump_function_address = dest_addr
                            instr.jump_function_offset = dest_addr - sect_addr + sect_offset
                            instr.jump_function_size = symbol['st_size']
                            instr.comment = demangle(symbol.name)
            # Handle individual operands
            for op in instr.operands:
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[i+1].address + instr.rip_offset
                    symbol = executable.ex.get_symbol_by_addr(instr.rip_resolved)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                    bytes = executable.ex.get_bytes(instr.rip_resolved, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
            # what registers does this instruction read/write?
            instr.regs_write_names = [instr.reg_name(reg) for reg in instr.regs_write]
            instr.regs_read_names = [instr.reg_name(reg) for reg in instr.regs_read]
            # Add in documentation meta-data
            instr.docfile = doc_file(instr)
            instr.short_desc = get_short_desc(instr)
            if instr.docfile is None:
                with open('missing_docs.log', 'a+') as f:
                    f.write('[{}] : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic))
        return disassembled

    except CsError as e:
        print("ERROR: %s" %e)
Ejemplo n.º 38
0
from __future__ import print_function

# test1.py
from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_MODE_32

CODE = b"\x8d\x44\x38\x02"

md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True

for i in md.disasm(CODE, 0):
    # print(dir(i))
    print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
    if len(i.regs_read) > 0:
        print("\tImplicit registers read: "),
        for r in i.regs_read:
            print("%s " % i.reg_name(r)),
        print
    if len(i.groups) > 0:
        print("\tThis instruction belongs to groups:", end="")
        for g in i.groups:
            print("%u" % g)
            # print("%u" % g, end="")
        print()


def dumpASM(flo, mode, maxAddr=1e99):
    modeRef = {32: CS_MODE_32, 64: CS_MODE_64}

    md = Cs(CS_ARCH_X86, modeRef[mode])
    md.detail = True
Ejemplo n.º 39
0
    def do_POST(self):
        length = int(self.headers.getheader('content-length'))
        if length:
            rdata = self.rfile.read(length)
            rdata = urlparse.parse_qs(rdata)
            addr = 0
            extra = ""

            try:
                addr = int(rdata['addr'][0])
            except KeyError:
                print "[+] Warning: addr not received"
            try:
                data = rdata['data'][0]
            except KeyError:
                print "[+] Error: dump not received"
                return
            try:
                typ = rdata['type'][0]
            except KeyError:
                print "[+] Error: msg type not received"
                return

            try:
                extra = rdata['extra'][0]
            except KeyError:
                pass

            if(typ == 'read'):
                print display_data(addr, data.decode('hex'))
            if(typ == 'dis'):
                if(extra == "thumb"):
                    disassemble(addr, data.decode('hex'), thumb=True)
                else:
                    disassemble(addr, data.decode('hex'))

            if(typ == 'dis_res'):
                mode = CS_MODE_ARM
                md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
                disassed = md.disasm(data.decode('hex'), addr)
                ops = []
                ptrstr = ""
                for i in disassed:
                    if i.mnemonic == "SVC":
                        print "Could not resolve " + extra + " (syscall)
                        return
                    ops.append(i.op_str[7:])
                    


                ptrstr = ops[1].rjust(4,'0')+ops[0].rjust(4,'0')
                print ptrstr
                cmdstr = "resolve 0x" + ptrstr + " " + extra
                if int(ptrstr,16) > 0x40000000:
                    self.mods.append(cmdstr)
                else:
                    print "Could not resolve " + extra + " (invalid address)
                




                

            if(typ == 'dump'):
                fname = extra
                dump_data(data.decode('hex'), fname)
Ejemplo n.º 40
0
    def do_POST(self):
        length = int(self.headers.getheader('content-length'))
        if length:
            rdata = self.rfile.read(length)
            rdata = urlparse.parse_qs(rdata)
            addr = 0
            extra = ""

            try:
                addr = int(rdata['addr'][0])
            except KeyError:
                print "[+] Warning: addr not received"
            try:
                data = rdata['data'][0]
            except KeyError:
                print "[+] Error: dump not received"
                return
            try:
                typ = rdata['type'][0]
            except KeyError:
                print "[+] Error: msg type not received"
                return

            try:
                extra = rdata['extra'][0]
            except KeyError:
                pass

            if(typ == 'read'):
                print display_data(addr, data.decode('hex'))
            if(typ == 'dis'):
                if(extra == "thumb"):
                    disassemble(addr, data.decode('hex'), thumb=True)
                else:
                    disassemble(addr, data.decode('hex'))

            if(typ == 'dis_res'):
                mode = CS_MODE_ARM
                md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
                disassed = md.disasm(data.decode('hex'), addr)
                ops = []
                ptrstr = ""
                print "Parsing: " + extra
                for i in disassed:
                    print "0x%x:\t%s    %s" %(i.address, i.mnemonic, i.op_str)
                    if i.mnemonic == "SVC":
                        print "Could not resolve " + extra + " (syscall) "
                        return
                    ops.append(i.op_str[7:])
                    


                ptrstr = "0x"+ops[1].rjust(4,'0')+ops[0].rjust(4,'0')
                cmdstr = "resolve " + ptrstr + " " + extra
                print cmdstr
                if (int(ptrstr,16) > 0x40000000) and (int(ptrstr,16) < 0xE000000000):
                    self.mods.append(cmdstr)
                else:
                    print "Could not resolve " + extra + " (invalid address) "
                print "----"
            """    
            if(typ == 'dump'):
                fname = extra
                dump_data(data.decode('hex'), fname)
            """
            if typ == 'dump':
                global CURRENT_DUMP_FILE_NAME
                if CURRENT_DUMP_FILE_NAME == "":
                    #If this is the initial dump
                    CURRENT_DUMP_FILE_NAME = extra
                    #check if this file already exists
                    self.dump_directory_initializer(extra)
                elif not extra.startswith(CURRENT_DUMP_FILE_NAME):
                    #If this is a different dump
                    self.dump_directory_initializer(extra)
                    CURRENT_DUMP_FILE_NAME = extra

                dump_data(data.decode('hex'), CURRENT_DUMP_FILE_NAME)
Ejemplo n.º 41
0
class Tracer():
    def __init__(self, target, log, start_clnum=0, end_clnum=0):
        f = open(target, 'rb')
        self.data = f.read()
        f.close()
        self.target = target
        self.log = log

        self.os = self.get_os()
        if self.os is None:
            raise Exception('not supports os')

        self.arch = self.get_arch()
        if self.arch is None:
            raise Exception('not known arch')

        self.base = self.get_base()

        if self.os == 'windows':
            self.pe = PE(target)
        else:
            self.elf = Elf(target)

        if self.arch == 'i386':
            self.md = Cs(CS_ARCH_X86, CS_MODE_32)
        else:
            self.md = Cs(CS_ARCH_X86, CS_MODE_64)

        if self.arch == 'i386':
            self.t = qiradb.Trace(log, 0, 4, 9, False)  # 32 bits
        else:
            self.t = qiradb.Trace(log, 0, 8, 17, False)  # 64 bits

        while not self.t.did_update():
            print "waiting..."
            time.sleep(0.1)

        self.disasms = {}
        # self.generate_trace(target, log, start_clnum, end_clnum, 4)

    def get_disasm(self, va):
        offset = self.get_offset_from_rva(va - self.base)
        #print hex(offset)
        if offset > len(self.data):
            return ''
        try:
            if self.disasms.has_key(va):
                insn = self.disasms[va]
                return insn.mnemonic + ' ' + insn.op_str
            for insn in self.md.disasm(self.data[offset:], va, count=1):
                disasm = insn.mnemonic + ' ' + insn.op_str
                self.disasms[va] = insn
                return disasm
        except:
            pass
        return ''

    def get_os(self):
        if self.data[0:4] == '\x7fELF':
            return 'linux'
        elif self.data[0:2] == 'MZ':
            return 'windows'
        return None

    def get_arch(self):
        if self.os == 'linux':
            value = l16(self.data[0x12:0x14])
            if value == 3:
                return 'i386'
            elif value == 0x3e:
                return 'x86_64'
        if self.os == 'windows':  # to modify
            return 'i386'
        return None

    def get_base(self, module_name=None):
        # default is the main module
        if module_name is None:
            f = open(log + '_base', 'rb')
            for line in f:
                line = line.strip()
                if line == '':
                    continue
                if self.os == 'linux':
                    pattern = '\.so'
                else:
                    pattern = '\.dll'
                matches = re.findall(pattern, line)
                if not matches:
                    f.close()
                    return long(line.split('-')[0], 16)
            f.close()
        else:
            f = open(log + '_base', 'rb')
            for line in f:
                if module_name in line:
                    f.close()
                    return long(line.split('-')[0], 16)
            f.close()
        return None

    def get_reg_name(self, index):
        if self.arch == 'i386':
            reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi']
            return reg_names[index / 4]
        else:
            reg_names = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13',
                         'r14', 'r15', 'rip']
            return reg_names[index / 8]

    def get_reg_index(self, name):
        reg_names2 = ['ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di']
        reg_names3 = ['ah', 'ch', 'dh', 'bh']
        reg_names4 = ['al', 'cl', 'dl', 'bl']
        for i in range(len(reg_names2)):
            if name == reg_names2[i]:
                return i | 0x400
        for i in range(len(reg_names3)):
            if name == reg_names3[i]:
                return i | 0x200
        for i in range(len(reg_names4)):
            if name == reg_names4[i]:
                return i | 0x100
        if self.arch == 'i386':
            reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'eip']
            for i in range(len(reg_names)):
                if name == reg_names[i]:
                    return i | 0x800
        else:
            reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'r8d', 'r9d', 'r10d', 'r11d', 'r12d',
                         'r13d', 'r14d', 'r15d']
            for i in range(len(reg_names)):
                if name == reg_names[i]:
                    return i | 0x800
            reg_names5 = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12',
                          'r13','r14', 'r15', 'rip']
            for i in range(len(reg_names5)):
                if name == reg_names5[i]:
                    return i | 0x1000

    def get_offset_from_rva(self, rva):
        if self.os == 'linux':  # to modify
            return self.elf.vma2offset(rva + self.base)
        else:
            return self.pe.get_offset_from_rva(rva)

    def is_branch(self, ins):
        if ins == '':
            return False
        opcode = ins.split(' ')[1]
        if opcode == 'ret':
            return True
        if opcode == 'call':
            return True
        if opcode.startswith('j'):
            return True
        return False

    def write_one_ins(self, out, clnum, ins, ops):
        result = str(clnum) + ': '
        result = result.ljust(8, ' ')
        result += ins.ljust(50, ' ')
        for op in ops:
            if self.arch == 'i386':
                result += op.ljust(24, ' ')
            else:
                result += op.ljust(36, ' ')
        out.write(result + '\n')

        if self.is_branch(ins):
            out.write('\n')

    def byte_to_value(self, bytes):
        result = ''
        for byte in bytes:
            result += chr(byte & 0xff)
        if len(result) == 1:
            return l8(result)
        elif len(result) == 2:
            return l16(result)
        elif len(result) == 4:
            return l32(result)
        elif len(result) == 8:
            return l64(result)
        raise Exception('not known len:%d' % len(result))

    def generate_trace(self, start_addr=None, start_clnum=0, end_clnum=0, limit=1):
        out = open(self.log + '.out', 'wb')
        if start_clnum == 0:
            start_clnum = self.t.get_minclnum()

        if end_clnum == 0:
            end_clnum = self.t.get_maxclnum()
        print 'start:', start_clnum
        print 'end:', end_clnum
        ins = ''
        ops = []

        start_record = True
        if start_addr is not None:
            start_record = False
        for i in range(start_clnum, end_clnum):
            changes = self.t.fetch_changes_by_clnum(i, limit)
            if len(changes) < 1:
                continue
            change = changes[0]
            #print change
            if change['type'] == 'I':
                if not start_record:
                    pc = change['address']
                    if pc == start_addr:
                        start_record = True
                    else:
                        continue
                self.md.detail = True
                ins = '%x %s' % (change['address'], self.get_disasm(change['address']))
                ops = []
                if not self.disasms.has_key(change['address']):
                    continue
                insn = self.disasms[change['address']]
                operands = insn.operands
                if len(operands) > 0:
                    j = -1
                    for op in operands:
                        j += 1
                        if op.type == X86_OP_IMM:
                            continue
                        elif op.type == X86_OP_FP:
                            continue
                        elif op.type == X86_OP_REG:
                            reg_name = insn.reg_name(op.reg)
                            reg_value = self.get_reg(i - j, reg_name)
                            ops.append('%s:%x' % (reg_name, reg_value))

                        elif op.type == X86_OP_MEM:
                            if op.mem.base != 0:
                                base_name = insn.reg_name(op.mem.base)  # reg
                                base = self.get_reg(i - j, base_name)
                            else:
                                base = 0

                            if op.mem.index != 0:
                                index_name = insn.reg_name(op.mem.index)  # reg
                                index = self.get_reg(i - j, index_name)
                            else:
                                index = 0
                            scale = op.mem.scale
                            disp = op.mem.disp
                            mem_addr = base + scale * index + disp
                            mem_byte = self.t.fetch_memory(i - j, mem_addr, op.size)
                            mem_value = self.byte_to_value(mem_byte)
                            ops.append('[%x]:%x' % (mem_addr, mem_value))
                '''
                elif change['type'] == 'R':
                    op = '%s => %x' % (self.get_reg_name(change['address']), change['data'])
                    ops.append(op)
                    # change['size']
                elif change['type'] == 'W':
                    op = '%s <= %x' % (self.get_reg_name(change['address']), change['data'])
                    ops.append(op)
                elif change['type'] == 'L':
                    op = '[%x] => %x' % (change['address'], change['data'])
                    ops.append(op)
                elif change['type'] == 'S':
                    op = '[%x] <= %x' % (change['address'], change['data'])
                    ops.append(op)
                elif change['type'] == 's':
                    pass
                    # if self.os == 'linux':
                    # 'sys_' + self.get_sys_call_name(change['address'])
                else:
                    print change
                '''
            self.write_one_ins(out, i, ins, ops)
        out.close()

    def get_memory(self, clnum, addr, size):
        result = ''
        for byte in self.t.fetch_memory(clnum, addr, size):
            result += chr(byte & 0xff)
        return result

    def get_reg(self, clnum, reg_name):
        index = self.get_reg_index(reg_name)
        reg_value = self.t.fetch_registers(clnum)[index & 0xff]
        if index & 0x1000:
            reg_value = reg_value
        if index & 0x800:
            reg_value = reg_value & 0xffffffff
        elif index & 0x400:
            reg_value = reg_value & 0xffff
        elif index & 0x200:
            reg_value = (reg_value & 0xff00) >> 8
        elif index & 0x100:
            reg_value &= 0xff

        if (self.arch != 'i386') & (index&0xff == 16):
            changes = self.t.fetch_changes_by_clnum(clnum, 1)
            for change in changes:
                if change['type'] == 'I':
                    reg_value = change['address'] + change['data'] #rip
        return reg_value

    def get_ret_addr(self, clnum):
        if self.arch == 'i386':
            esp = self.get_reg(clnum, 'esp')
            retval = l32(self.get_memory(clnum, esp, 4))
        else:
            rsp = self.get_reg(clnum, 'rsp')
            retval = l64(self.get_memory(clnum, rsp, 8))
        return retval

    def get_pc(self, clnum):
        changes = self.t.fetch_changes_by_clnum(clnum, 1)
        for change in changes:
            # print change
            if change['type'] == 'I':
                return change['address']
        return 0

    def generate_cfg(self, start_addr, ret_addr=None, start_clnum=0, end_clnum=0):
        if start_clnum == 0:
            start_clnum = self.t.get_minclnum() + 1

        if end_clnum == 0:
            end_clnum = self.t.get_maxclnum() - 1

        traces = []
        enter_call = 0
        enter_sub_call = 0

        for i in range(start_clnum, end_clnum + 1):
            pc = self.get_pc(i)
            asm = self.get_disasm(pc)
            if enter_call == 0:
                if pc == start_addr:
                    if ret_addr is None:
                        end_addr = self.get_ret_addr(i - 1)
                        print hex(end_addr)
                    else:
                        end_addr = ret_addr
                    enter_call = 1
                    trace = [(i, pc, asm)]
            else:
                if end_addr == pc:
                    print 'exit call'
                    enter_call = 0
                    traces.append(trace)
                    trace = []
                if enter_sub_call == 0:
                    trace.append((i, pc, asm))
                    if asm.startswith('call'):
                        enter_sub_call = 1
                        sub_call_ret = self.get_ret_addr(i)
                else:
                    if pc == sub_call_ret:
                        trace.append((i, pc, asm))
                        enter_sub_call = 0

        graph = Graph()

        pcs = []
        for trace in traces:
            print trace

        for trace in traces:
            exist_node = None
            exist_index = 1
            new_node = None
            for ins in trace:
                if ins[1] not in pcs:
                    pcs.append(ins[1])
                    if exist_node is None:
                        if new_node is None:
                            new_node = Node([Assemble(ins[1], ins[2])])
                            graph.add_node(new_node)
                        else:
                            new_node.add_asm(Assemble(ins[1], ins[2]))
                    else:
                        new_node = Node([Assemble(ins[1], ins[2])])
                        graph.add_node(new_node)
                        if len(exist_node.asm_seqs) == exist_index:
                            graph.add_edge(exist_node, new_node)
                        else:
                            node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count - 1)
                            graph.add_edge(node1, new_node)
                        exist_node = None
                        exist_index = 0
                else:
                    if exist_node is None:
                        if new_node is None:
                            exist_node = graph.search_and_split(ins[1])
                            exist_node.add_count()
                            exist_index = 1
                        else:
                            node, index = graph.search_node(ins[1])
                            if index == 0:
                                graph.add_edge(new_node, node)
                                node2 = node
                            else:
                                node1, node2 = graph.split_node(node, index)
                                if node == new_node:
                                    graph.add_edge(node2, node2)
                                else:
                                    graph.add_edge(new_node, node2)
                            new_node = None
                            exist_node = node2
                            node2.add_count()
                            exist_index = 1
                    else:
                        if new_node is None:
                            if len(exist_node.asm_seqs) == exist_index:
                                node3 = graph.search_and_split(ins[1])
                                graph.add_edge(exist_node, node3)
                                exist_node = node3
                                node3.add_count()
                                exist_index = 1
                            else:
                                if exist_node.asm_seqs[exist_index].addr == ins[1]:
                                    exist_index += 1
                                else:
                                    node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count-1)
                                    node3 = graph.search_and_split(ins[1])
                                    graph.add_edge(node1, node3)
                                    exist_node = node3
                                    node3.add_count()
                                    exist_index = 1
                        else:
                            print 'impossible2', ins
        graph.print_graph('tracer.png')

    def test(self):
        changes = self.t.fetch_changes_by_clnum(13, 1000)
        print self.t.fetch_registers(13)
        for change in changes:
            print change
Ejemplo n.º 42
0
def createDisassembly(fileContent, offset):
  capStone = Cs(CS_ARCH_X86, CS_MODE_32)
  return list(capStone.disasm(fileContent, offset))
Ejemplo n.º 43
0
def disasm(exe, bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True

            # Handle jump/call instructions            
            elif instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL):
                # jump table
                if instr.group(x86.X86_GRP_JUMP) and instr.operands[0].type == x86.X86_OP_REG: 
                    instr.jump_table = instr.reg_name(instr.operands[0].reg)

                # We can only decode the destination if it's an immediate value
                elif instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled)-1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol, field_name = exe.get_symbol_by_addr(
                            dest_addr, 
                            instr.address)
                        if symbol:
                            text_sect = exe.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']
                            
                            instr.comment = demangle(symbol.name)
                            # only follow call address if it is a known location
                            if symbol['st_size'] > 0:
                                instr.external_jump = True
                                instr.jump_address = symbol["st_value"]
                                instr.jump_function_name = demangle(symbol.name)
                                instr.jump_function_address = symbol["st_value"]
                                instr.jump_function_offset = symbol["st_value"] - sect_addr + sect_offset
                                instr.jump_function_size = symbol['st_size']

            if instr.group(x86.X86_GRP_RET):
                instr.return_type = True
            # Handle individual operands
            c = -1
            instr.regs_explicit = []
            for op in instr.operands:
                c += 1
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[i+1].address + instr.rip_offset

                    # file offset depends on section
                    section = exe.get_section_from_offset(instr.rip_resolved)
                    file_offset = instr.rip_resolved - section["sh_addr"] + section["sh_offset"]

                    # Read in and unpack the first byte at the offset
                    val_8 = exe.get_bytes(file_offset, 1)
                    instr.signed_8 = unpack('b', val_8)[0]
                    instr.unsigned_8 = unpack('B', val_8)[0]
                    instr.hex_8 = hex(instr.unsigned_8)

                    # Read in and unpack the first two bytes at the offset
                    val_16 = exe.get_bytes(file_offset, 2)
                    instr.signed_16 = unpack('h', val_16)[0]
                    instr.unsigned_16 = unpack('H', val_16)[0]
                    instr.hex_16 = hex(instr.unsigned_16)

                    # Read in and unpack the first four bytes at the offset
                    val_32 = exe.get_bytes(file_offset, 4)
                    instr.signed_32 = unpack('i', val_32)[0]
                    instr.unsigned_32 = unpack('I', val_32)[0]
                    instr.hex_32 = hex(instr.unsigned_32)
                    instr.float = unpack('f', val_32)[0]

                    # Read in and unpack the first eight bytes at the offset
                    val_64 = exe.get_bytes(file_offset, 8)
                    instr.signed_64 = unpack('q', val_64)[0]
                    instr.unsigned_64 = unpack('Q', val_64)[0]
                    instr.hex_64 = hex(instr.unsigned_64)
                    instr.double = unpack('d', val_64)[0]

                    symbol, field_name = exe.get_symbol_by_addr(
                        instr.rip_resolved, 
                        instr.address,
                        instr_size=op.size,
                        get_sub_symbol=True)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                        if field_name:
                            instr.comment += '.' + field_name
                    bytes = exe.get_bytes(file_offset, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
                # Handle explicitly read/written registers
                if op.type == x86.X86_OP_MEM:
                    ptr = ["", "", ""] # using an array instead of object to guarantee ordering
                    instr.regs_ptr_explicit = []
                    if op.value.mem.base != 0:
                        regname = instr.reg_name(op.value.mem.base)
                        ptr[0] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.index != 0:
                        regname = instr.reg_name(op.value.mem.index)
                        ptr[1] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.disp != 0:
                        ptr[2] = hex(op.value.mem.disp)

                    instr.ptr = ptr
                    instr.ptr_size = op.size
                    instr.regs_explicit.append(instr.ptr)
                elif op.type == x86.X86_OP_REG:
                    instr.regs_explicit.append(instr.reg_name(op.value.reg))
                else:
                    instr.regs_explicit.append("")

            # what registers does this instruction read/write?
            instr.regs_write_implicit = [instr.reg_name(reg) for reg in instr.regs_write]
            if instr.group(x86.X86_GRP_CALL) and instr.reg_name(x86.X86_REG_RAX) not in instr.regs_write_implicit:
                instr.regs_write_implicit.append(instr.reg_name(x86.X86_REG_RAX))
            instr.regs_read_implicit = [instr.reg_name(reg) for reg in instr.regs_read]
            # Add in documentation meta-data
            instr.short_desc, instr.docfile = get_documentation(instr)
            if instr.docfile is None or instr.short_desc is None:
                with open(CUR_PATH + 'missing_docs.log', 'a+') as f:
                    f.write('[{}] : {} : {} : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc))
        return disassembled

    except CsError as e:
        print("ERROR: %s" %e)