Ejemplo n.º 1
0
def show_asm(buff, mode, base):
    """
    Return the given byte sequence as assembly under the given hardware mode.

    :param bytes buff: Complete data stream.
    :param int mode: Capstone hardware mode.
    :param int base: Base address from which to start.

    :return: Assembly code representation.
    :rtype: str
    """

    md = Cs(CS_ARCH_X86, mode)
    md.detail = True

    ret = ''
    for insn in md.disasm(buff, base):
        b = binascii.hexlify(insn.bytes).decode('utf-8')
        b = ' '.join(a + b for a, b in zip(b[::2], b[1::2]))
        if len(b) > 18:
            b = b[:18] + '+'
        ret += "{0:10} {1:20} {2:10} {3:10}\n".format(
            '%08x:' % insn.address, b, insn.mnemonic, insn.op_str)
    ret += '*/\n'

    return ret
Ejemplo n.º 2
0
 def _disassemble(self, data):
     capstone = Cs(*self.args.mode.value)
     strz = self._strings(data)
     string = next(strz, None)
     cursor, done = 0, 0
     while done < len(data):
         cursor = max(cursor, done)
         patchsize = self._nullsize(data, cursor, self.args.width)
         if patchsize > 2:
             yield self._format(done, data[done:done + patchsize], 'db', ','.join('0' * patchsize))
             done += patchsize
             continue
         if cursor >= len(data):
             yield self._bytepatch(data, done, len(data))
             done = cursor
         if string and cursor >= string.end:
             yield self._bytepatch(data, done, string.start)
             yield self._format(string.start, data[string.start:string.end], 'db', string.data)
             done = string.end
             continue
         try:
             ins = next(capstone.disasm(
                 data[cursor:cursor + 15], cursor, count=1))
             end = ins.address + ins.size
             if self.args.str and string:
                 if end > string.start and string.end > cursor:
                     cursor = string.end
                     continue
         except StopIteration:
             cursor += 1
             continue
         else:
             yield self._format(ins.address, ins.bytes, ins.mnemonic, ins.op_str)
             done = end
Ejemplo n.º 3
0
    def generate_rule(self):
        """ Generate Yara rule. Return a YaraRule object """
        self.yr_rule.rule_name = self.rule_name
        self.yr_rule.metas["generated_by"] = "\"mkYARA - By Jelle Vergeer\""
        self.yr_rule.metas["date"] = "\"{}\"".format(datetime.now().strftime("%Y-%m-%d %H:%M"))
        self.yr_rule.metas["version"] = "\"1.0\""

        md = Cs(self.instruction_set, self.instruction_mode)
        md.detail = True
        md.syntax = CS_OPT_SYNTAX_INTEL
        chunk_nr = 0

        for chunk in self._chunks:
            chunk_nr += 1
            chunk_id = "$chunk_{}".format(chunk_nr)
            chunk_signature = ""
            chunk_comment = ""
            if chunk.is_data is False:
                disasm = md.disasm(chunk.data, chunk.offset)
                for ins in disasm:
                    rule_part, comment = self._process_instruction(ins)
                    rule_part = self.format_hex(rule_part)
                    chunk_signature += rule_part + "\n"
                    chunk_comment += comment + "\n"
                self.yr_rule.add_string(chunk_id, chunk_signature, StringType.HEX)
                if self.do_comment_sig:
                    self.yr_rule.comments.append(chunk_comment)
            else:
                rule_part = self.format_hex(chunk.data.encode("hex"))
                self.yr_rule.add_string(chunk_id, rule_part, StringType.HEX)

        self.yr_rule.condition = "any of them"
        return self.yr_rule
Ejemplo n.º 4
0
    def __init__(self, func, r2obj: dict):
        self.parent = func
        try:
            self.address = r2obj['offset']
            self.jump = r2obj.get('jump', 0)
            self.fail = r2obj.get('fail', 0)
            cases = r2obj.get('switchop', dict()).get('cases', dict())
            self.cases = {c['jump'] for c in cases}

            self.insns = []
            for op in r2obj['ops']:
                md = Cs(CS_ARCH_X86, CS_MODE_64)
                md.detail = True
                _addr = op['offset']
                _insns = list(
                    md.disasm(BasicBlock.to_bytes(op['bytes']), _addr))
                if len(_insns) != 1:
                    raise CapstoneDecodeError(f'Decoder error at {_addr:#x}')
                else:
                    _insn: CsInsn = _insns[0]
                    _reads, _ = _insn.regs_access()
                    indirect = _insn.mnemonic == 'jmp' and len(_reads) > 0
                    self.insns.append(Instruction(_addr, indirect))
        except KeyError:
            err_msg = f'Unexpected radare2 output at Basic Block {self.address:#x}'
            logging.error(err_msg)
            raise UnhandledOutputError(err_msg)
Ejemplo n.º 5
0
    def __init__(self, arch, mode):
        self.arch = arch
        self.mode = mode
        self.capstone = Cs(self.arch, self.mode)

        self.prologues = {
            # Triple backslash (\\\) are needed to escape bytes in the compiled regex
            CS_MODE_32: [
                b"\x55\x89\xE5",  # push ebp & mov ebp, esp
                b"\x55\x8B\xEC",  # push ebp & mov ebp, esp
                b"\x55\x8b\x6c\x24",  # push ebp & mov ebp, [esp+?]
            ],
            CS_MODE_64: [
                b"\x55\x48\x89\xE5",  # push rbp & mov rbp, rsp
            ]
        }[mode]

        self.conditional_jmp_mnemonics = {
            'jz', 'je', 'jcxz', 'jecxz', 'jrcxz', 'jnz', 'jp', 'jpe', 'jnp',
            'ja', 'jae', 'jb', 'jbe', 'jg', 'jge', 'jl', 'jle', 'js', 'jns',
            'jo', 'jno', 'jecxz', 'loop', 'loopne', 'loope', 'jne'
        }
        self.x86_32_registers = {
            'eax', 'ebx', 'ecx', 'edx', 'esi', 'edi', 'esp', 'ebp'
        }
        self.max_instruction_size = 16
Ejemplo n.º 6
0
 def disassemble(self, code: bytes, address: int) -> List[DumpAssembly]:
     dump_assemblies = []
     md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
     for i in md.disasm(code, address):
         dump_assemblies.append(
             DumpAssembly(i.address, f'{i.mnemonic}\t{i.op_str}'))
     return dump_assemblies
Ejemplo n.º 7
0
 def dissemble_code(self, code, baseaddr):
     md = Cs(capstone.CS_ARCH_PPC,
             capstone.CS_MODE_32 | capstone.CS_MODE_BIG_ENDIAN)
     md.syntax = capstone.CS_OPT_SYNTAX_INTEL
     for (address, size, mnemonic,
          op_str) in md.disasm_lite(code, baseaddr):
         print "0x%x:\t%s\t%s" % (address, mnemonic, op_str)
Ejemplo n.º 8
0
    def __init__(self, binary: MachoBinary) -> None:
        self.binary = binary
        self.cs = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
        self.cs.detail = True

        # Worker to parse dyld bytecode stream and extract dyld stub addresses to the DyldBoundSymbol they represent
        self.dyld_info_parser = DyldInfoParser(self.binary)
        # Each __stubs function calls a single dyld stub address, which has a corresponding DyldBoundSymbol.
        # Map of each __stub function to the associated name of the DyldBoundSymbol
        self._imported_symbol_addresses_to_names: Dict[VirtualMemoryPointer,
                                                       str] = {}

        self.crossref_helper = MachoStringTableHelper(binary)
        self.imported_symbols = self.crossref_helper.imported_symbols

        self.imp_stubs = MachoImpStubsParser(binary, self.cs).imp_stubs
        self._objc_helper: Optional[ObjcRuntimeDataParser] = None
        self._objc_method_list: List[ObjcMethodInfo] = []

        # Use a temporary database to store cross-referenced data. This provides constant-time lookups for things like
        # finding all the calls to a particular function.
        self._has_computed_xrefs = False
        self._db_tempdir = pathlib.Path(tempfile.mkdtemp())
        self._db_path = self._db_tempdir / "strongarm.db"
        self._db_handle = sqlite3.connect(self._db_path.as_posix())
        cursor = self._db_handle.executescript(ANALYZER_SQL_SCHEMA)
        with self._db_handle:
            cursor.close()

        self._build_callable_symbol_index()
        self._build_function_boundaries_index()

        # Done setting up, store this analyzer in class cache
        MachoAnalyzer._ANALYZER_CACHE[binary] = self
Ejemplo n.º 9
0
 def getCapstone(self):
     if self.capstone is None:
         self.capstone = Cs(CS_ARCH_X86,
                            CS_MODE_64) if self.bitness == 64 else Cs(
                                CS_ARCH_X86, CS_MODE_32)
         self.capstone.detail = True
     return self.capstone
Ejemplo n.º 10
0
def main(fname):
    """ Basic python version of the tools:
    
            - "objdump -d" (linux)
            - "dumpbin /disasm" (MSVC)
        
        It parses the AR and COFF structures, but uses the "capstone" library to disassemble
    """
    for coff in read_lib_file(fname):
        if coff:
            syms = deque(coff.symbols)
            #print (syms)
            md = Cs(CS_ARCH_X86, CS_MODE_32)
            md.skipdata = True
            # iterate through "CsInsn"
            for i in md.disasm(coff.sections[0].data, 0x000):
                while syms and i.address >= syms[0].value:
                    if syms[0].type == 32 and syms[0].section_number == 1:
                        print(syms[0].name.decode(errors="ignore") + ":")
                    syms.popleft()
                instr_bytes = i.bytes
                remain_bytes = b""
                if len(instr_bytes) >= 6:
                    instr_bytes, remain_bytes = instr_bytes[:6], instr_bytes[
                        6:]
                if not i.op_str:
                    asm_part = i.mnemonic
                else:
                    asm_part = "%-12s%s" % (i.mnemonic, format_asm(i.op_str))
                print("  %08X: %-19s" %
                      (i.address, hex_with_spaces(instr_bytes)) + asm_part)
                if remain_bytes:
                    print("            %s" % (hex_with_spaces(remain_bytes)))
Ejemplo n.º 11
0
    def __init__(self, args):
        self.args = args
        self.parse_binary()
        self.check_binary()

        # Initialize the engine
        mode = UC_MODE_THUMB if self.mclf.entry & 1 else UC_MODE_ARM
        self.uc = Uc(UC_ARCH_ARM, mode)

        self.map_sections()
        self.map_shared_memory()
        self.map_tlapi_handler()

        # Add the debug hook if needed
        if args.debug:
            from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM, CS_MODE_THUMB
            self.cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
            self.cs_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
            self.uc.hook_add(UC_HOOK_CODE, self.DEBUG)

        self.start_forkserver()
        self.load_input()

        try:
            self.LOG("[+] Starting fuzzing")
            self.uc.emu_start(self.mclf.entry | 1,
                              self.mclf.text_va + self.mclf.text_len,
                              count=0,
                              timeout=0)
        except UcError as e:
            self.force_crash(e)
Ejemplo n.º 12
0
    def _initialize_zelos(self, binary=None):
        self.state = State(self, binary, self.date)

        cs_arch_mode_sm_dict = {
            "x86": (CS_ARCH_X86, CS_MODE_32),
            "x86_64": (CS_ARCH_X86, CS_MODE_64),
            "arm": (CS_ARCH_ARM, CS_MODE_ARM),
            "mips": (CS_ARCH_MIPS, CS_MODE_MIPS32),
        }

        arch = self.state.arch
        (cs_arch, cs_mode) = cs_arch_mode_sm_dict[arch]

        endianness = self.state.endianness
        if endianness == "little":
            cs_mode |= CS_MODE_LITTLE_ENDIAN
        elif endianness == "big":
            cs_mode |= CS_MODE_BIG_ENDIAN
        else:
            raise ZelosLoadException(f"Unsupported endianness {endianness}")
        self.cs = Cs(cs_arch, cs_mode)
        self.cs.detail = True

        self.logger.debug(
            f"Initialized {arch} {self.state.bits} emulator/disassembler")

        self.triggers = Triggers(self)
        self.processes.set_architecture(self.state)

        self.network = Network(self.helpers, self.files, None)

        self.processes._create_first_process(self.main_module_name)
        p = self.current_process
        p.cmdline_args = self.cmdline_args
        p.environment_variables = self.config.env_vars
        p.virtual_filename = self.config.virtual_filename
        p.virtual_path = self.config.virtual_path

        if hasattr(unicorn.unicorn, "WITH_ZEROPOINT_PATCH"):

            def process_switch_wrapper(*args, **kwargs):
                # Block count interrupt. Fires every 2^N blocks executed
                # Use this as an opportunity to swap threads.
                self.logger.info(">>> Tracing Thread Swap Opportunity")
                self.processes.schedule_next()

            self.interrupt_handler.register_interrupt_handler(
                0xF8F8F8F8, process_switch_wrapper)

        if self.config.filename is not None and self.config.filename != "":
            if (self.config.virtual_filename is not None
                    and self.config.virtual_filename != ""):
                self.files.add_file(self.config.filename,
                                    self.config.virtual_filename)
            else:
                self.files.add_file(self.config.filename)

        # TODO: SharedSection needs to be removed
        self.processes.handles.new("section", "\\Windows\\SharedSection")
Ejemplo n.º 13
0
    def _setup(self,
               user_arch=None,
               user_mode=None,
               cs_arch=None,
               cs_mode=None):
        if user_arch is not None and user_mode is not None:
            try:
                self.uc = unicorn.Uc(user_arch, user_mode)
                self.cs = Cs(cs_arch, cs_mode)

                self.thumb = user_mode == unicorn.UC_MODE_THUMB
            except:
                raise self.EmulatorSetupFailedError('Unsupported arch')
        else:
            if self.dwarf.arch == 'arm':
                self.setup_arm()
            elif self.dwarf.arch == 'arm64':
                self.setup_arm64()
            elif self.dwarf.arch == 'ia32':
                self.setup_x86()
            elif self.dwarf.arch == 'x64':
                self.setup_x64()
            else:
                # unsupported arch
                raise self.EmulatorSetupFailedError('Unsupported arch')

        if not self.uc or not self.cs:
            raise self.EmulatorSetupFailedError('Unicorn or Capstone missing')

        # enable capstone details
        if self.cs is not None:
            self.cs.detail = True

        if not self.context.is_native_context:
            raise self.EmulatorSetupFailedError(
                'Cannot run emulator on non-native context')

        err = self.map_range(self.context.pc.value)
        if err:
            raise self.EmulatorSetupFailedError('Mapping failed')

        self.current_context = EmulatorContext(self.dwarf)
        for reg in self.current_context._unicorn_registers:
            if reg in self.context.__dict__:
                if reg not in self._blacklist_regs:
                    self.uc.reg_write(
                        self.current_context._unicorn_registers[reg],
                        self.context.__dict__[reg].value)

        self.uc.hook_add(unicorn.UC_HOOK_CODE, self.hook_code)
        self.uc.hook_add(unicorn.UC_HOOK_MEM_WRITE | unicorn.UC_HOOK_MEM_READ,
                         self.hook_mem_access)
        self.uc.hook_add(
            unicorn.UC_HOOK_MEM_FETCH_UNMAPPED
            | unicorn.UC_HOOK_MEM_WRITE_UNMAPPED
            | unicorn.UC_HOOK_MEM_READ_UNMAPPED, self.hook_unmapped)
        self.current_context.set_context(self.uc)
        return 0
Ejemplo n.º 14
0
def disassemble(addr, data):
    none = 0
    md = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN)
    disassed = md.disasm(data, addr)
    for i in disassed:
        none = 1
        print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
    if none != 1:
        print("Couldn't disassemble at 0x%x" % (addr))
Ejemplo n.º 15
0
def get_raw_disassembler(arch, detailed=True):
    if arch == BinaryType.SCS_32BIT_BINARY.value:
        d = Cs(CS_ARCH_X86, CS_MODE_32)
    elif arch == BinaryType.SCS_64BIT_BINARY.value:
        d = Cs(CS_ARCH_X86, CS_MODE_64)
    else:
        raise Exception("No disassembler for this architecture")
    d.detail = detailed
    return d
Ejemplo n.º 16
0
 def __init__(self, encoding, position):
     super(CAPSInstruction, self).__init__(encoding, position)
     # CAPSTONE object
     encoding_bytes = (self._encoding).to_bytes(4, byteorder='little')
     #endian = CS_MODE_LITTLE_ENDIAN if little_endian else CS_MODE_BIG_ENDIAN
     md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
     md.detail = True
     self._cap = None
     for i in md.disasm(encoding_bytes, position):
         self._cap = i
Ejemplo n.º 17
0
    def __init__(self, win_emu):
        super(ApiHandler, self).__init__()
        self.funcs = {}
        self.data = {}
        self.mod_name = ''
        self.win_emu = win_emu
        self.arch = self.win_emu.get_arch()
        self.ptr_size = self.win_emu.get_ptr_size()
        self.cpp_procedure = {

        }
        self.disasm = Cs(CS_ARCH_X86, CS_MODE_32)
Ejemplo n.º 18
0
def extract_call_destinations(elf):
    symbols_list = []

    # get the code section or segment (if there's no section)
    code_section_or_segment = get_ep_section_or_segment(elf)

    # if we only got the segment, start extracting calls from the EP
    if type(code_section_or_segment) == elftools.elf.segments.Segment:
        ofs = elf.header.e_entry
        code_data = code_section_or_segment.data(
        )[ofs - code_section_or_segment["p_vaddr"]:]
    # otherwise we use the code section
    else:
        ofs = elf_get_imagebase(elf) + code_section_or_segment["sh_offset"]
        code_data = code_section_or_segment.data()

    # get the architecture of our ELF file.
    # the disassembly and the call opcode and mnemonic will be based on the
    # determined architecture, as defined by the CALL_LIST dict above
    arch = elf.get_machine_arch()

    # in case we have not specified the opcode, mnemonic, and the
    # capstone arch and mode, skip
    if arch not in CALL_LIST:
        return []

    # TODO: automatically identify the architecture the binary was compiled to
    md = Cs(CALL_LIST[arch]["cs_arch"], CALL_LIST[arch]["cs_mode"])

    if code_section_or_segment is not None:
        # TODO: handle UPX-packed binaries as they have no sections so we should go straight to segment offset
        for i in md.disasm(code_data, ofs):
            if arch in ("x86", "x64") and i.mnemonic == "call":
                # Consider only call to absolute addresses
                if i.op_str.startswith("0x"):
                    address = i.op_str[2:]  # cut off '0x' prefix
                    if not address in symbols_list:
                        symbols_list.append(address)

            elif arch == "ARM" and i.mnemonic.startswith("bl"):
                if i.op_str.startswith("#0x"):
                    address = i.op_str[3:]
                    if not address in symbols_list:
                        symbols_list.append(address)

            elif arch == "MIPS" and i.mnemonic == "lw":
                if i.op_str.startswith("$t9, "):
                    address = i.op_str[8:-5]
                    if not address in symbols_list:
                        symbols_list.append(address)

    return symbols_list
Ejemplo n.º 19
0
def disassemble(addr, data, thumb=False):
    none = 0                                            # disassed at least on
    if thumb == True:
        mode = CS_MODE_THUMB
    else:
        mode = CS_MODE_ARM
    md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
    disassed = md.disasm(data, addr)
    for i in disassed:
        none = 1
        print "0x%x:\t%s    %s" %(i.address, i.mnemonic, i.op_str)
    if none != 1:
        print "Couldn't disassemble at 0x%x"%(addr)
 def init(self, disassembly):
     if disassembly.binary_info.code_areas:
         self._code_areas = disassembly.binary_info.code_areas
     self.disassembly = disassembly
     self.lang_analyzer = LanguageAnalyzer(disassembly)
     self.disassembly.language = self.lang_analyzer.identify()
     self.bitness = disassembly.binary_info.bitness
     self.capstone = Cs(CS_ARCH_X86, CS_MODE_32)
     if self.bitness == 64:
         self.capstone = Cs(CS_ARCH_X86, CS_MODE_64)
     self.locateCandidates()
     self.disassembly.identified_alignment = self.identified_alignment
     self._buildQueue()
Ejemplo n.º 21
0
def disassemble(addr, data, thumb=False):
    none = 0  # disassed at least on
    if thumb == True:
        mode = CS_MODE_THUMB
    else:
        mode = CS_MODE_ARM
    md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
    disassed = md.disasm(data, addr)
    for i in disassed:
        none = 1
        print "0x%x:\t%s    %s" % (i.address, i.mnemonic, i.op_str)
    if none != 1:
        print "Couldn't disassemble at 0x%x" % (addr)
Ejemplo n.º 22
0
    def __gadgetsFinding(self, section, gadgets, arch, mode):

        C_OP = 0
        C_SIZE = 1
        C_ALIGN = 2
        PREV_BYTES = 9  # Number of bytes prior to the gadget to store.
        ret = []
        md = Cs(arch, mode)
        for gad in gadgets:
            allRefRet = [
                m.start() for m in re.finditer(gad[C_OP], section["opcodes"])
            ]
            for ref in allRefRet:
                for i in range(self.__options.depth):
                    if (section["vaddr"] + ref -
                        (i * gad[C_ALIGN])) % gad[C_ALIGN] == 0:
                        decodes = md.disasm(
                            section["opcodes"][ref - (i * gad[C_ALIGN]):ref +
                                               gad[C_SIZE]],
                            section["vaddr"] + ref)
                        gadget = ""
                        for decode in decodes:
                            gadget += (decode.mnemonic + " " + decode.op_str +
                                       " ; ").replace("  ", " ")
                        if re.search(gad[C_OP], decode.bytes) is None:
                            continue
                        if len(gadget) > 0:
                            gadget = gadget[:-3]
                            off = self.__offset
                            vaddr = off + section["vaddr"] + ref - (
                                i * gad[C_ALIGN])
                            prevBytesAddr = max(section["vaddr"],
                                                vaddr - PREV_BYTES)
                            prevBytes = section["opcodes"][
                                prevBytesAddr - section["vaddr"]:vaddr -
                                section["vaddr"]]
                            ret += [{
                                "vaddr":
                                vaddr,
                                "gadget":
                                gadget,
                                "decodes":
                                decodes,
                                "bytes":
                                section["opcodes"][ref -
                                                   (i * gad[C_ALIGN]):ref +
                                                   gad[C_SIZE]],
                                "prev":
                                prevBytes
                            }]
        return ret
Ejemplo n.º 23
0
    def __init__(self, firmware: Firmware = None, state: CpuState = None, verbose=0, init=True):
        self.firmware = firmware
        self.uc = Uc(UC_ARCH_ARM, UC_MODE_THUMB)
        self.cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
        self.cs.detail = True
        self.state = state
        self.has_error = None
        self.last_addr = None
        self.ready = False
        self.context = None
        self.verbose = verbose

        if init:
            self.init()
Ejemplo n.º 24
0
def disasm_plt(bytes, offset=0):
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        instruc = disassembled[0]

        # get rip relative address
        for op in instruc.operands:
            if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                return disassembled[1].address + op.mem.disp, op.size
        return None, None
    except CsError as e:
        print("ERROR: %s" % e)
Ejemplo n.º 25
0
def find_single((raw_data, pvaddr, elftype, elf_base_addr, arch, mode, gad,
                 need_filter, ref)):
    C_OP = 0
    C_SIZE = 1
    C_ALIGN = 2

    allgadgets = []

    md = Cs(arch, mode)
    md.detail = True

    for i in range(10):
        back_bytes = i * gad[C_ALIGN]
        section_start = ref - back_bytes
        start_address = pvaddr + section_start
        if elftype == 'DYN':
            start_address = elf_base_addr + start_address

        decodes = md.disasm(raw_data[section_start:ref + gad[C_SIZE]],
                            start_address)

        decodes = list(decodes)
        insns = []
        for decode in decodes:
            insns.append((decode.mnemonic + " " + decode.op_str).strip())

        if len(insns) > 0:
            if (start_address % gad[C_ALIGN]) == 0:
                address = start_address
                if mode == CS_MODE_THUMB:
                    address = address | 1

                bytes = raw_data[ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]]
                onegad = Gadget(address, insns, {}, 0, bytes)
                if not passClean(decodes):
                    continue

                if arch == CS_ARCH_X86:
                    onegad = filter_for_x86_big_binary(onegad)
                elif arch == CS_ARCH_ARM:
                    onegad = filter_for_arm_big_binary(onegad)

                if (not need_filter) and onegad:
                    classifier = GadgetClassifier(arch, mode)
                    onegad = classifier.classify(onegad)

                if onegad:
                    allgadgets += [onegad]

    return allgadgets
Ejemplo n.º 26
0
    def __setup_available_disassemblers(self):
        arch_map = {
            ARCH_ARM_MODE_ARM: CS_MODE_ARM,
            ARCH_ARM_MODE_THUMB: CS_MODE_THUMB,
        }

        self._avaliable_disassemblers = {
            ARCH_ARM_MODE_ARM: Cs(CS_ARCH_ARM, arch_map[ARCH_ARM_MODE_ARM]),
            ARCH_ARM_MODE_THUMB: Cs(CS_ARCH_ARM,
                                    arch_map[ARCH_ARM_MODE_THUMB]),
        }

        self._avaliable_disassemblers[ARCH_ARM_MODE_ARM].detail = True
        self._avaliable_disassemblers[ARCH_ARM_MODE_THUMB].detail = True
Ejemplo n.º 27
0
def disasm_plt(bytes, offset=0):
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        instruc = disassembled[0]

        # get rip relative address
        for op in instruc.operands:
            if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                return disassembled[1].address + op.mem.disp, op.size
        return None, None
    except CsError as e:
        print("ERROR: %s" %e)
Ejemplo n.º 28
0
 def __init__(self, sections: SectionFinder, arch, bits):
     '''Start disassembly of the provided code blob.
     Arguments:
         sections -- A section finder instance.
         arch -- The architecture, as defined by Capstone.
         bits -- The bit width, as defined by Capstone.
     '''
     # Set up options for disassembly.
     self.md = Cs(arch, bits)
     self.md.skipdata = True
     self.md.detail = True
     self.sections = sections
     self._last_data = None
     self._last_start = 0
     self._last_end = 0
Ejemplo n.º 29
0
 def setup_arm(self):
     self.thumb = self.context.pc.thumb
     if self.thumb:
         self._current_cpu_mode = unicorn.UC_MODE_THUMB
         self.cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
         self.uc = unicorn.Uc(unicorn.UC_ARCH_ARM, unicorn.UC_MODE_THUMB)
         # Enable VFP instr
         self.uc.mem_map(0x1000, 1024)
         self.uc.mem_write(0x1000, binascii.unhexlify(VFP))
         self.uc.emu_start(0x1000 | 1, 0x1000 + len(VFP))
         self.uc.mem_unmap(0x1000, 1024)
     else:
         self.cs = Cs(CS_ARCH_ARM, CS_MODE_ARM)
         self.uc = unicorn.Uc(unicorn.UC_ARCH_ARM, unicorn.UC_MODE_ARM)
         self._current_cpu_mode = unicorn.UC_MODE_ARM
Ejemplo n.º 30
0
    def __init__(self, architecture, code):
        self.md = None
        self.data = []
        self.code = code
        self.iterator = None
        self.architecture = architecture

        self.valid = False

        if architecture in arch_mapping:
            arch, mode = arch_mapping[architecture]
            self.md = Cs(arch, mode)
            self.md.detail = True
            self.iterator = self.md.disasm(self.code, 0)
            self.valid = True
Ejemplo n.º 31
0
    def _cs_disassemble_one(self, data, address):
        """Disassemble the data into an instruction in string form.
        """
        disasm = list(self._disassembler.disasm(data, address))

        # TODO: Improve this check.
        if len(disasm) > 0:
            return disasm[0]
        else:
            cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
            disasm = list(cs_arm.disasm(data, address))

            if len(disasm) > 0:
                return disasm[0]
            else:
                raise InvalidDisassemblerData("CAPSTONE: Unknown instruction (Addr: {:s}).".format(hex(address)))
Ejemplo n.º 32
0
def trace(ql: Qiling, address: int, size: int, md: Cs):
    """Emit tracing info for each and every instruction that is about to be executed.

    Args:
        ql: the qiling instance
        address: the address of the instruction that is about to be executed
        size: size of the instruction (in bytes)
        md: initialized disassembler object
    """

    # read current instruction bytes and disassemble it
    buf = ql.mem.read(address, size)
    insn = next(md.disasm(buf, address))

    nibbles = ql.arch.bits // 4
    color_faded = '\033[2m'
    color_reset = '\033[0m'

    # get values of the registers referenced by this instruction.
    #
    # note: since this method is called before the instruction has been emulated, the 'rip'
    # register still points to the current instruction, while the instruction considers it
    # as if it was pointing to the next one. that will cause 'rip' to show an incorrect value
    reads = (f'{md.reg_name(reg)} = {ql.arch.regs.read(CS_UC_REGS[reg]):#x}'
             for reg in insn.regs_access()[0])

    # construct a human-readable trace line
    trace_line = f'{insn.address:0{nibbles}x} | {insn.bytes.hex():24s} {insn.mnemonic:12} {insn.op_str:35s} | {", ".join(reads)}'

    # emit the trace line in a faded color, so it would be easier to tell trace info from other log entries
    ql.log.info(f'{color_faded}{trace_line}{color_reset}')
Ejemplo n.º 33
0
    def disassemble(self, size, thumb=True):
        """
        Display the bytes disassembled using Capstone at the current position.

        Args:
            size (:obj:`int`): the number of bytes to disassemble
            thumb (:obj:`bool`): True if Thumb, False otherwise
        """
        from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM, CS_MODE_THUMB

        cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB if thumb else CS_MODE_ARM)

        addr = self._ptr.value
        for insn in cs.disasm(self.read(size), addr):
            insn_info = insn.address, insn.mnemonic, insn.op_str
            print("{:08x}:\t{} {}".format(insn_info))
Ejemplo n.º 34
0
    def disasm(self, addr):
        (data, virtual_addr, flags) = self.binary.get_section(addr)

        if not flags["exec"]:
            die("the address 0x%x is not in an executable section" % addr)

        mode = CS_MODE_64 if self.bits == 64 else CS_MODE_32
        md = Cs(CS_ARCH_X86, mode)
        md.detail = True

        for i in md.disasm(data, virtual_addr):
            self.code[i.address] = i
            self.code_idx.append(i.address)

        # Now load imported symbols for PE. This cannot be done before,
        # because we need the code for a better resolution.
        if self.binary.get_type() == T_BIN_PE:
            self.binary.load_import_symbols(self.code)
Ejemplo n.º 35
0
def dumpASM(flo, mode, maxAddr=1e99):
    modeRef = {32: CS_MODE_32, 64: CS_MODE_64}

    md = Cs(CS_ARCH_X86, modeRef[mode])
    md.detail = True

    for i in md.disasm(flo, 0):
        # print(dir(i))
        print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
        print("\tImplicit registers read: ", end="")
        for r in i.regs_read:
            print("%s " % i.reg_name(r))
        print()

        print("\tImplicit registers written: ", end="")
        for r in i.regs_write:
            print("%s " % i.reg_name(r))
        print()

        if i.address > maxAddr:
            break
Ejemplo n.º 36
0
def find_instr_addr(mod_name, bits):

	dll = pefile.PE(mod_name)

	for entry in dll.DIRECTORY_ENTRY_EXPORT.symbols:
		if entry.name == 'rtcInStrChar':
			exp_addr = entry.address
			break

	for imp in dll.DIRECTORY_ENTRY_IMPORT:
		for entry in imp.imports:
			if entry.name == 'SysFreeString':
				imp_addr = entry.address
				break

	memory = dll.get_memory_mapped_image()
	if bits == 32:
		dsm = Cs(CS_ARCH_X86, CS_MODE_32)
	else:
		dsm = Cs(CS_ARCH_X86, CS_MODE_64)

	for op in dsm.disasm(memory[exp_addr:exp_addr + 0xA0], (exp_addr + dll.OPTIONAL_HEADER.ImageBase)):
		if op.mnemonic == 'call':
			last_call = op.op_str
		if op.mnemonic == 'ret':
			break
	next_func = int(last_call, 16) - dll.OPTIONAL_HEADER.ImageBase
	calls = 0
	call_free = 0
	for op in dsm.disasm(memory[next_func:next_func + 0x200], (next_func + dll.OPTIONAL_HEADER.ImageBase)):
		if op.mnemonic == 'call' and ('0x%x' % imp_addr in op.op_str or 'qword ptr' in op.op_str):
			call_free += 1
		if call_free == 2:
			return last_call
		if op.mnemonic == 'call':
			last_call = op.address - dll.OPTIONAL_HEADER.ImageBase
		if op.mnemonic == 'ret':
			return
Ejemplo n.º 37
0
    def __init__(self, target, log, start_clnum=0, end_clnum=0):
        f = open(target, 'rb')
        self.data = f.read()
        f.close()
        self.target = target
        self.log = log

        self.os = self.get_os()
        if self.os is None:
            raise Exception('not supports os')

        self.arch = self.get_arch()
        if self.arch is None:
            raise Exception('not known arch')

        self.base = self.get_base()

        if self.os == 'windows':
            self.pe = PE(target)
        else:
            self.elf = Elf(target)

        if self.arch == 'i386':
            self.md = Cs(CS_ARCH_X86, CS_MODE_32)
        else:
            self.md = Cs(CS_ARCH_X86, CS_MODE_64)

        if self.arch == 'i386':
            self.t = qiradb.Trace(log, 0, 4, 9, False)  # 32 bits
        else:
            self.t = qiradb.Trace(log, 0, 8, 17, False)  # 64 bits

        while not self.t.did_update():
            print "waiting..."
            time.sleep(0.1)

        self.disasms = {}
Ejemplo n.º 38
0
def createDisassembly(fileContent, offset):
  capStone = Cs(CS_ARCH_X86, CS_MODE_32)
  return list(capStone.disasm(fileContent, offset))
Ejemplo n.º 39
0
def disasm(bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True
            # Handle jump/call instructions
            if instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL):
                # We can only decode the destination if it's an immediate value
                if instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled)-1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol = executable.ex.get_symbol_by_addr(dest_addr)
                        if symbol:
                            text_sect = executable.ex.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']
                            
                            instr.external_jump = True
                            instr.jump_address = dest_addr
                            instr.jump_function_name = demangle(symbol.name)
                            instr.jump_function_address = dest_addr
                            instr.jump_function_offset = dest_addr - sect_addr + sect_offset
                            instr.jump_function_size = symbol['st_size']
                            instr.comment = demangle(symbol.name)
            # Handle individual operands
            for op in instr.operands:
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[i+1].address + instr.rip_offset
                    symbol = executable.ex.get_symbol_by_addr(instr.rip_resolved)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                    bytes = executable.ex.get_bytes(instr.rip_resolved, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
            # what registers does this instruction read/write?
            instr.regs_write_names = [instr.reg_name(reg) for reg in instr.regs_write]
            instr.regs_read_names = [instr.reg_name(reg) for reg in instr.regs_read]
            # Add in documentation meta-data
            instr.docfile = doc_file(instr)
            instr.short_desc = get_short_desc(instr)
            if instr.docfile is None:
                with open('missing_docs.log', 'a+') as f:
                    f.write('[{}] : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic))
        return disassembled

    except CsError as e:
        print("ERROR: %s" %e)
Ejemplo n.º 40
0
    def do_POST(self):
        length = int(self.headers.getheader('content-length'))
        if length:
            rdata = self.rfile.read(length)
            rdata = urlparse.parse_qs(rdata)
            addr = 0
            extra = ""

            try:
                addr = int(rdata['addr'][0])
            except KeyError:
                print "[+] Warning: addr not received"
            try:
                data = rdata['data'][0]
            except KeyError:
                print "[+] Error: dump not received"
                return
            try:
                typ = rdata['type'][0]
            except KeyError:
                print "[+] Error: msg type not received"
                return

            try:
                extra = rdata['extra'][0]
            except KeyError:
                pass

            if(typ == 'read'):
                print display_data(addr, data.decode('hex'))
            if(typ == 'dis'):
                if(extra == "thumb"):
                    disassemble(addr, data.decode('hex'), thumb=True)
                else:
                    disassemble(addr, data.decode('hex'))

            if(typ == 'dis_res'):
                mode = CS_MODE_ARM
                md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
                disassed = md.disasm(data.decode('hex'), addr)
                ops = []
                ptrstr = ""
                print "Parsing: " + extra
                for i in disassed:
                    print "0x%x:\t%s    %s" %(i.address, i.mnemonic, i.op_str)
                    if i.mnemonic == "SVC":
                        print "Could not resolve " + extra + " (syscall) "
                        return
                    ops.append(i.op_str[7:])
                    


                ptrstr = "0x"+ops[1].rjust(4,'0')+ops[0].rjust(4,'0')
                cmdstr = "resolve " + ptrstr + " " + extra
                print cmdstr
                if (int(ptrstr,16) > 0x40000000) and (int(ptrstr,16) < 0xE000000000):
                    self.mods.append(cmdstr)
                else:
                    print "Could not resolve " + extra + " (invalid address) "
                print "----"
            """    
            if(typ == 'dump'):
                fname = extra
                dump_data(data.decode('hex'), fname)
            """
            if typ == 'dump':
                global CURRENT_DUMP_FILE_NAME
                if CURRENT_DUMP_FILE_NAME == "":
                    #If this is the initial dump
                    CURRENT_DUMP_FILE_NAME = extra
                    #check if this file already exists
                    self.dump_directory_initializer(extra)
                elif not extra.startswith(CURRENT_DUMP_FILE_NAME):
                    #If this is a different dump
                    self.dump_directory_initializer(extra)
                    CURRENT_DUMP_FILE_NAME = extra

                dump_data(data.decode('hex'), CURRENT_DUMP_FILE_NAME)
Ejemplo n.º 41
0
def disasm(exe, bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True

            # Handle jump/call instructions            
            elif instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL):
                # jump table
                if instr.group(x86.X86_GRP_JUMP) and instr.operands[0].type == x86.X86_OP_REG: 
                    instr.jump_table = instr.reg_name(instr.operands[0].reg)

                # We can only decode the destination if it's an immediate value
                elif instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled)-1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol, field_name = exe.get_symbol_by_addr(
                            dest_addr, 
                            instr.address)
                        if symbol:
                            text_sect = exe.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']
                            
                            instr.comment = demangle(symbol.name)
                            # only follow call address if it is a known location
                            if symbol['st_size'] > 0:
                                instr.external_jump = True
                                instr.jump_address = symbol["st_value"]
                                instr.jump_function_name = demangle(symbol.name)
                                instr.jump_function_address = symbol["st_value"]
                                instr.jump_function_offset = symbol["st_value"] - sect_addr + sect_offset
                                instr.jump_function_size = symbol['st_size']

            if instr.group(x86.X86_GRP_RET):
                instr.return_type = True
            # Handle individual operands
            c = -1
            instr.regs_explicit = []
            for op in instr.operands:
                c += 1
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[i+1].address + instr.rip_offset

                    # file offset depends on section
                    section = exe.get_section_from_offset(instr.rip_resolved)
                    file_offset = instr.rip_resolved - section["sh_addr"] + section["sh_offset"]

                    # Read in and unpack the first byte at the offset
                    val_8 = exe.get_bytes(file_offset, 1)
                    instr.signed_8 = unpack('b', val_8)[0]
                    instr.unsigned_8 = unpack('B', val_8)[0]
                    instr.hex_8 = hex(instr.unsigned_8)

                    # Read in and unpack the first two bytes at the offset
                    val_16 = exe.get_bytes(file_offset, 2)
                    instr.signed_16 = unpack('h', val_16)[0]
                    instr.unsigned_16 = unpack('H', val_16)[0]
                    instr.hex_16 = hex(instr.unsigned_16)

                    # Read in and unpack the first four bytes at the offset
                    val_32 = exe.get_bytes(file_offset, 4)
                    instr.signed_32 = unpack('i', val_32)[0]
                    instr.unsigned_32 = unpack('I', val_32)[0]
                    instr.hex_32 = hex(instr.unsigned_32)
                    instr.float = unpack('f', val_32)[0]

                    # Read in and unpack the first eight bytes at the offset
                    val_64 = exe.get_bytes(file_offset, 8)
                    instr.signed_64 = unpack('q', val_64)[0]
                    instr.unsigned_64 = unpack('Q', val_64)[0]
                    instr.hex_64 = hex(instr.unsigned_64)
                    instr.double = unpack('d', val_64)[0]

                    symbol, field_name = exe.get_symbol_by_addr(
                        instr.rip_resolved, 
                        instr.address,
                        instr_size=op.size,
                        get_sub_symbol=True)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                        if field_name:
                            instr.comment += '.' + field_name
                    bytes = exe.get_bytes(file_offset, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
                # Handle explicitly read/written registers
                if op.type == x86.X86_OP_MEM:
                    ptr = ["", "", ""] # using an array instead of object to guarantee ordering
                    instr.regs_ptr_explicit = []
                    if op.value.mem.base != 0:
                        regname = instr.reg_name(op.value.mem.base)
                        ptr[0] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.index != 0:
                        regname = instr.reg_name(op.value.mem.index)
                        ptr[1] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.disp != 0:
                        ptr[2] = hex(op.value.mem.disp)

                    instr.ptr = ptr
                    instr.ptr_size = op.size
                    instr.regs_explicit.append(instr.ptr)
                elif op.type == x86.X86_OP_REG:
                    instr.regs_explicit.append(instr.reg_name(op.value.reg))
                else:
                    instr.regs_explicit.append("")

            # what registers does this instruction read/write?
            instr.regs_write_implicit = [instr.reg_name(reg) for reg in instr.regs_write]
            if instr.group(x86.X86_GRP_CALL) and instr.reg_name(x86.X86_REG_RAX) not in instr.regs_write_implicit:
                instr.regs_write_implicit.append(instr.reg_name(x86.X86_REG_RAX))
            instr.regs_read_implicit = [instr.reg_name(reg) for reg in instr.regs_read]
            # Add in documentation meta-data
            instr.short_desc, instr.docfile = get_documentation(instr)
            if instr.docfile is None or instr.short_desc is None:
                with open(CUR_PATH + 'missing_docs.log', 'a+') as f:
                    f.write('[{}] : {} : {} : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc))
        return disassembled

    except CsError as e:
        print("ERROR: %s" %e)
Ejemplo n.º 42
0
    def do_POST(self):
        length = int(self.headers.getheader('content-length'))
        if length:
            rdata = self.rfile.read(length)
            rdata = urlparse.parse_qs(rdata)
            addr = 0
            extra = ""

            try:
                addr = int(rdata['addr'][0])
            except KeyError:
                print "[+] Warning: addr not received"
            try:
                data = rdata['data'][0]
            except KeyError:
                print "[+] Error: dump not received"
                return
            try:
                typ = rdata['type'][0]
            except KeyError:
                print "[+] Error: msg type not received"
                return

            try:
                extra = rdata['extra'][0]
            except KeyError:
                pass

            if(typ == 'read'):
                print display_data(addr, data.decode('hex'))
            if(typ == 'dis'):
                if(extra == "thumb"):
                    disassemble(addr, data.decode('hex'), thumb=True)
                else:
                    disassemble(addr, data.decode('hex'))

            if(typ == 'dis_res'):
                mode = CS_MODE_ARM
                md = Cs(CS_ARCH_ARM, mode + CS_MODE_LITTLE_ENDIAN)
                disassed = md.disasm(data.decode('hex'), addr)
                ops = []
                ptrstr = ""
                for i in disassed:
                    if i.mnemonic == "SVC":
                        print "Could not resolve " + extra + " (syscall)
                        return
                    ops.append(i.op_str[7:])
                    


                ptrstr = ops[1].rjust(4,'0')+ops[0].rjust(4,'0')
                print ptrstr
                cmdstr = "resolve 0x" + ptrstr + " " + extra
                if int(ptrstr,16) > 0x40000000:
                    self.mods.append(cmdstr)
                else:
                    print "Could not resolve " + extra + " (invalid address)
                




                

            if(typ == 'dump'):
                fname = extra
                dump_data(data.decode('hex'), fname)
Ejemplo n.º 43
0
from __future__ import print_function

# test1.py
from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_MODE_32

CODE = b"\x8d\x44\x38\x02"

md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True

for i in md.disasm(CODE, 0):
    # print(dir(i))
    print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
    if len(i.regs_read) > 0:
        print("\tImplicit registers read: "),
        for r in i.regs_read:
            print("%s " % i.reg_name(r)),
        print
    if len(i.groups) > 0:
        print("\tThis instruction belongs to groups:", end="")
        for g in i.groups:
            print("%u" % g)
            # print("%u" % g, end="")
        print()


def dumpASM(flo, mode, maxAddr=1e99):
    modeRef = {32: CS_MODE_32, 64: CS_MODE_64}

    md = Cs(CS_ARCH_X86, modeRef[mode])
    md.detail = True
Ejemplo n.º 44
0
def main():
    BYTES = 500
    NUM_MNEM = 30
    SIG_FILE = "./mpesm.sig"
    THRESHOLD = .85
    VERBOSE = False
    DIR_PROCESSING = False
    signatures = {}
    file_list = []
    nos = 0
    ep = 0
    ep_ava = 0

    parser = ArgumentParser(description="Mnemonic PE Signature Matching")
    parser.add_argument("-n", "--num-mnem",
                        dest="num_mnem", help="Use a lenght of 'n' mnemonics (default: " + str(NUM_MNEM) + ')')
    parser.add_argument("-s", "--signatures",
                        dest="sig_file", help="signature file to use (default: " + SIG_FILE + ')')
    parser.add_argument("-b", "--bytes",
                        dest="bytes", help="Grab and disassemble x bytes from EP, you should only need to change this if you give a super large number for -n (default: " + str(BYTES) + ')')
    parser.add_argument("-t", "--threshold",
                        dest="threshold", help="Display all matches greater than -t supplied similarity (default: " + str(THRESHOLD) + ')')
    parser.add_argument("-v", "--verbose",
                        dest="verbose", help="Verbose output", action='store_true')
    parser.add_argument("file", nargs=1, help='File to analyze')
    args = parser.parse_args()

    if args.sig_file:
        SIG_FILE = args.sig_file
    if args.threshold:
        THRESHOLD = float(args.threshold)
    if args.bytes:
        BYTES = args.bytes
    if args.num_mnem:
        NUM_MNEM = args.num_mnem
    if args.verbose:
        VERBOSE = True

    config = ConfigParser.RawConfigParser()
    config.read(SIG_FILE)

    if len(config.sections()) == 0:
        print "Error Reading from config file: %s, it's either empty or not present" %(SIG_FILE)
        sys.exit(1)
    for s in config.sections():
        signatures[s] = {}
        signatures[s]['mnemonics'] = config.get(s, 'mnemonics').split(',')
        if config.has_option(s, 'num_mnemonics'):
            signatures[s]['num_mnemonics'] = config.getint(s, 'num_mnemonics')
        if config.has_option(s, 'major_linker'):
            signatures[s]['major_linker'] = config.getint(s, 'major_linker')
        if config.has_option(s, 'minor_linker'):
            signatures[s]['minor_linker'] = config.getint(s, 'minor_linker')
        if config.has_option(s, 'numberofsections'):
            signatures[s]['numberofsections'] = config.getint(s, 'numberofsections')

    if os.path.isdir(args.file[0]):
        file_list = glob.glob(args.file[0]+'/*')
        DIR_PROCESSING = True
    else:
        file_list.append(args.file[0])

    for f in file_list:
        file_type = None
        if VERBOSE:
            print '[*] Processing: ' + f
        try:
            fe = pefile.PE(f)
            file_type = 'PE'
        except Exception as e:
            if VERBOSE:
                sys.stderr.write("[*] Error with %s - %s\n" %(f, str(e)))


        if not file_type:
            try:
                fe = macholib.MachO.MachO(f)
                file_type = 'MACHO'

            except Exception as e:
                if VERBOSE:
                    sys.stderr.write("[*] Error with %s - %s\n" %(f, str(e)))

        if not file_type:
            sys.stderr.write("[*] Error with %s - not a PE or Mach-O\n" % f)



        if file_type == 'PE':
            try:
                minor_linker = 0
                major_linker = 0
                try:
                    minor_linker = fe.OPTIONAL_HEADER.MinorLinkerVersion
                    major_linker = fe.OPTIONAL_HEADER.MajorLinkerVersion
                except Exception as e:
                    pass
                if hasattr(fe, 'FILE_HEADER') and hasattr(fe.FILE_HEADER, 'NumberOfSections'):
                    nos = fe.FILE_HEADER.NumberOfSections
                if hasattr(fe, 'OPTIONAL_HEADER') and hasattr(fe.OPTIONAL_HEADER, 'AddressOfEntryPoint'):
                    ep = fe.OPTIONAL_HEADER.AddressOfEntryPoint
                if hasattr(fe, 'OPTIONAL_HEADER') and hasattr(fe.OPTIONAL_HEADER, 'ImageBase') and ep > 0:
                    ep_ava = ep+fe.OPTIONAL_HEADER.ImageBase
                    data = fe.get_memory_mapped_image()[ep:ep+BYTES]
                    #
                    # Determine if the file is 32bit or 64bit
                    #
                    mode = CS_MODE_32
                    if fe.OPTIONAL_HEADER.Magic == 0x20b:
                        mode = CS_MODE_64

                    md = Cs(CS_ARCH_X86, mode)
                    match = []
                    for (address, size, mnemonic, op_str) in md.disasm_lite(data, 0x1000):
                        match.append(mnemonic.encode('utf-8').strip())

                    for s in signatures:
                        m = match
                        sig = signatures[s]['mnemonics']
                        if m and m[0] == sig[0] or THRESHOLD < .7:
                            additional_info = []
                            if 'minor_linker' in signatures[s]:
                                if minor_linker == signatures[s]['minor_linker']:
                                    additional_info.append('Minor Linker Version Match: True')
                                else:
                                    additional_info.append('Minor Linker Version Match: False')
                            if 'major_linker' in signatures[s]:
                                if major_linker == signatures[s]['major_linker']:
                                    additional_info.append('Major Linker Version Match: True')
                                else:
                                    additional_info.append('Major Linker Version Match: False')
                            if 'numberofsections' in signatures[s]:
                                if nos == signatures[s]['numberofsections']:
                                    additional_info.append('Number Of Sections Match: True')
                                else:
                                    additional_info.append('Number Of Sections Match: False')

                            if 'num_mnemonics' in signatures[s]:
                                nm = signatures[s]['num_mnemonics']
                                m = match[:nm]
                                sig = signatures[s]['mnemonics'][:nm]
                            else:
                                m = match[:NUM_MNEM]
                                sig = signatures[s]['mnemonics'][:NUM_MNEM]
                            distance = tapered_levenshtein(sig, m)
                            similarity = 1.0 - distance/float(max(len(sig), len(m)))
                            if similarity > THRESHOLD:
                                if DIR_PROCESSING:
                                    print "[%s] [%s] (Edits: %s | Similarity: %0.3f) (%s)" %(f, s, distance, similarity, ' | '.join(additional_info))
                                else:
                                    print "[%s] (Edits: %s | Similarity: %0.3f) (%s)" %(s, distance, similarity, ' | '.join(additional_info))
                                if VERBOSE:
                                    print "%s\n%s\n" %(sig, m)
            except Exception as e:
                print str(e)
        elif file_type == 'MACHO':
            macho_file = open(f, 'rb')
            macho_data = macho_file.read()
            macho_file.close()
            for header in fe.headers:
                # Limit it to X86
                if header.header.cputype not in [7, 0x01000007]:
                    continue

                # Limit it to Object and Executable files
                if header.header.filetype not in [1, 2]:
                    continue

                magic = int(header.MH_MAGIC)
                offset = int(header.offset)

                all_sections = []
                entrypoint_type = ''
                entrypoint_address = 0
                for cmd in header.commands:
                    load_cmd = cmd[0]
                    cmd_info = cmd[1]
                    cmd_data = cmd[2]
                    cmd_name = load_cmd.get_cmd_name()
                    if cmd_name in ('LC_SEGMENT', 'LC_SEGMENT_64'):
                        for section_data in cmd_data:
                            sd = section_data.describe()
                            all_sections.append(sd)

                    elif cmd_name in ('LC_THREAD', 'LC_UNIXTHREAD'):
                        entrypoint_type = 'old'
                        flavor = int(struct.unpack(header.endian + 'I', cmd_data[0:4])[0])
                        count = int(struct.unpack(header.endian + 'I', cmd_data[4:8])[0])
                        if flavor == 1:
                            entrypoint_address = int(struct.unpack(header.endian + 'I', cmd_data[48:52])[0])
                        elif flavor == 4:
                            entrypoint_address = int(struct.unpack(header.endian + 'Q', cmd_data[136:144])[0])

                    elif cmd_name == 'LC_MAIN':
                        entrypoint_type = 'new'
                        entrypoint_address = cmd_info.describe()['entryoff']

                entrypoint_data = ''
                if entrypoint_type == 'new':
                    entrypoint_offset = offset + entrypoint_address
                    entrypoint_data = macho_data[entrypoint_offset:entrypoint_offset+500]
                elif entrypoint_type == 'old':
                    found_section = False
                    for sec in all_sections:
                        if entrypoint_address >= sec['addr'] and entrypoint_address < (sec['addr'] + sec['size']):
                            found_section = True
                            entrypoint_address = (entrypoint_address - sec['addr']) + sec['offset']
                            break

                    if found_section:
                        entrypoint_offset = offset + entrypoint_address
                        entrypoint_data = macho_data[entrypoint_offset:entrypoint_offset+500]

                mode = CS_MODE_32
                if magic == 0xcffaedfe:
                    mode = CS_MODE_64

                md = Cs(CS_ARCH_X86, mode)
                match = []
                if entrypoint_data:
                    try:
                        for (address, size, mnemonic, op_str) in md.disasm_lite(entrypoint_data, 0x1000):
                            match.append(mnemonic.encode('utf-8').strip())
                    except Exception as e:
                        print str(e)

                    for s in signatures:
                        m = match
                        sig = signatures[s]['mnemonics']
                        if m and m[0] == sig[0] or THRESHOLD < .7:
                            additional_info = []
                            if 'num_mnemonics' in signatures[s]:
                                nm = signatures[s]['num_mnemonics']
                                m = match[:nm]
                                sig = signatures[s]['mnemonics'][:nm]
                            else:
                                m = match[:NUM_MNEM]
                                sig = signatures[s]['mnemonics'][:NUM_MNEM]

                            distance = tapered_levenshtein(sig, m)
                            similarity = 1.0 - distance/float(max(len(sig), len(m)))
                            if similarity > THRESHOLD:
                                if DIR_PROCESSING:
                                    print "[%s] [%s] (Edits: %s | Similarity: %0.3f) (%s)" %(f, s, distance, similarity, ' | '.join(additional_info))
                                else:
                                    print "[%s] (Edits: %s | Similarity: %0.3f) (%s)" %(s, distance, similarity, ' | '.join(additional_info))
                                if VERBOSE:
                                    print "%s\n%s\n" %(sig, m)
Ejemplo n.º 45
0
class Tracer():
    def __init__(self, target, log, start_clnum=0, end_clnum=0):
        f = open(target, 'rb')
        self.data = f.read()
        f.close()
        self.target = target
        self.log = log

        self.os = self.get_os()
        if self.os is None:
            raise Exception('not supports os')

        self.arch = self.get_arch()
        if self.arch is None:
            raise Exception('not known arch')

        self.base = self.get_base()

        if self.os == 'windows':
            self.pe = PE(target)
        else:
            self.elf = Elf(target)

        if self.arch == 'i386':
            self.md = Cs(CS_ARCH_X86, CS_MODE_32)
        else:
            self.md = Cs(CS_ARCH_X86, CS_MODE_64)

        if self.arch == 'i386':
            self.t = qiradb.Trace(log, 0, 4, 9, False)  # 32 bits
        else:
            self.t = qiradb.Trace(log, 0, 8, 17, False)  # 64 bits

        while not self.t.did_update():
            print "waiting..."
            time.sleep(0.1)

        self.disasms = {}
        # self.generate_trace(target, log, start_clnum, end_clnum, 4)

    def get_disasm(self, va):
        offset = self.get_offset_from_rva(va - self.base)
        #print hex(offset)
        if offset > len(self.data):
            return ''
        try:
            if self.disasms.has_key(va):
                insn = self.disasms[va]
                return insn.mnemonic + ' ' + insn.op_str
            for insn in self.md.disasm(self.data[offset:], va, count=1):
                disasm = insn.mnemonic + ' ' + insn.op_str
                self.disasms[va] = insn
                return disasm
        except:
            pass
        return ''

    def get_os(self):
        if self.data[0:4] == '\x7fELF':
            return 'linux'
        elif self.data[0:2] == 'MZ':
            return 'windows'
        return None

    def get_arch(self):
        if self.os == 'linux':
            value = l16(self.data[0x12:0x14])
            if value == 3:
                return 'i386'
            elif value == 0x3e:
                return 'x86_64'
        if self.os == 'windows':  # to modify
            return 'i386'
        return None

    def get_base(self, module_name=None):
        # default is the main module
        if module_name is None:
            f = open(log + '_base', 'rb')
            for line in f:
                line = line.strip()
                if line == '':
                    continue
                if self.os == 'linux':
                    pattern = '\.so'
                else:
                    pattern = '\.dll'
                matches = re.findall(pattern, line)
                if not matches:
                    f.close()
                    return long(line.split('-')[0], 16)
            f.close()
        else:
            f = open(log + '_base', 'rb')
            for line in f:
                if module_name in line:
                    f.close()
                    return long(line.split('-')[0], 16)
            f.close()
        return None

    def get_reg_name(self, index):
        if self.arch == 'i386':
            reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi']
            return reg_names[index / 4]
        else:
            reg_names = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13',
                         'r14', 'r15', 'rip']
            return reg_names[index / 8]

    def get_reg_index(self, name):
        reg_names2 = ['ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di']
        reg_names3 = ['ah', 'ch', 'dh', 'bh']
        reg_names4 = ['al', 'cl', 'dl', 'bl']
        for i in range(len(reg_names2)):
            if name == reg_names2[i]:
                return i | 0x400
        for i in range(len(reg_names3)):
            if name == reg_names3[i]:
                return i | 0x200
        for i in range(len(reg_names4)):
            if name == reg_names4[i]:
                return i | 0x100
        if self.arch == 'i386':
            reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'eip']
            for i in range(len(reg_names)):
                if name == reg_names[i]:
                    return i | 0x800
        else:
            reg_names = ['eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi', 'r8d', 'r9d', 'r10d', 'r11d', 'r12d',
                         'r13d', 'r14d', 'r15d']
            for i in range(len(reg_names)):
                if name == reg_names[i]:
                    return i | 0x800
            reg_names5 = ['rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11', 'r12',
                          'r13','r14', 'r15', 'rip']
            for i in range(len(reg_names5)):
                if name == reg_names5[i]:
                    return i | 0x1000

    def get_offset_from_rva(self, rva):
        if self.os == 'linux':  # to modify
            return self.elf.vma2offset(rva + self.base)
        else:
            return self.pe.get_offset_from_rva(rva)

    def is_branch(self, ins):
        if ins == '':
            return False
        opcode = ins.split(' ')[1]
        if opcode == 'ret':
            return True
        if opcode == 'call':
            return True
        if opcode.startswith('j'):
            return True
        return False

    def write_one_ins(self, out, clnum, ins, ops):
        result = str(clnum) + ': '
        result = result.ljust(8, ' ')
        result += ins.ljust(50, ' ')
        for op in ops:
            if self.arch == 'i386':
                result += op.ljust(24, ' ')
            else:
                result += op.ljust(36, ' ')
        out.write(result + '\n')

        if self.is_branch(ins):
            out.write('\n')

    def byte_to_value(self, bytes):
        result = ''
        for byte in bytes:
            result += chr(byte & 0xff)
        if len(result) == 1:
            return l8(result)
        elif len(result) == 2:
            return l16(result)
        elif len(result) == 4:
            return l32(result)
        elif len(result) == 8:
            return l64(result)
        raise Exception('not known len:%d' % len(result))

    def generate_trace(self, start_addr=None, start_clnum=0, end_clnum=0, limit=1):
        out = open(self.log + '.out', 'wb')
        if start_clnum == 0:
            start_clnum = self.t.get_minclnum()

        if end_clnum == 0:
            end_clnum = self.t.get_maxclnum()
        print 'start:', start_clnum
        print 'end:', end_clnum
        ins = ''
        ops = []

        start_record = True
        if start_addr is not None:
            start_record = False
        for i in range(start_clnum, end_clnum):
            changes = self.t.fetch_changes_by_clnum(i, limit)
            if len(changes) < 1:
                continue
            change = changes[0]
            #print change
            if change['type'] == 'I':
                if not start_record:
                    pc = change['address']
                    if pc == start_addr:
                        start_record = True
                    else:
                        continue
                self.md.detail = True
                ins = '%x %s' % (change['address'], self.get_disasm(change['address']))
                ops = []
                if not self.disasms.has_key(change['address']):
                    continue
                insn = self.disasms[change['address']]
                operands = insn.operands
                if len(operands) > 0:
                    j = -1
                    for op in operands:
                        j += 1
                        if op.type == X86_OP_IMM:
                            continue
                        elif op.type == X86_OP_FP:
                            continue
                        elif op.type == X86_OP_REG:
                            reg_name = insn.reg_name(op.reg)
                            reg_value = self.get_reg(i - j, reg_name)
                            ops.append('%s:%x' % (reg_name, reg_value))

                        elif op.type == X86_OP_MEM:
                            if op.mem.base != 0:
                                base_name = insn.reg_name(op.mem.base)  # reg
                                base = self.get_reg(i - j, base_name)
                            else:
                                base = 0

                            if op.mem.index != 0:
                                index_name = insn.reg_name(op.mem.index)  # reg
                                index = self.get_reg(i - j, index_name)
                            else:
                                index = 0
                            scale = op.mem.scale
                            disp = op.mem.disp
                            mem_addr = base + scale * index + disp
                            mem_byte = self.t.fetch_memory(i - j, mem_addr, op.size)
                            mem_value = self.byte_to_value(mem_byte)
                            ops.append('[%x]:%x' % (mem_addr, mem_value))
                '''
                elif change['type'] == 'R':
                    op = '%s => %x' % (self.get_reg_name(change['address']), change['data'])
                    ops.append(op)
                    # change['size']
                elif change['type'] == 'W':
                    op = '%s <= %x' % (self.get_reg_name(change['address']), change['data'])
                    ops.append(op)
                elif change['type'] == 'L':
                    op = '[%x] => %x' % (change['address'], change['data'])
                    ops.append(op)
                elif change['type'] == 'S':
                    op = '[%x] <= %x' % (change['address'], change['data'])
                    ops.append(op)
                elif change['type'] == 's':
                    pass
                    # if self.os == 'linux':
                    # 'sys_' + self.get_sys_call_name(change['address'])
                else:
                    print change
                '''
            self.write_one_ins(out, i, ins, ops)
        out.close()

    def get_memory(self, clnum, addr, size):
        result = ''
        for byte in self.t.fetch_memory(clnum, addr, size):
            result += chr(byte & 0xff)
        return result

    def get_reg(self, clnum, reg_name):
        index = self.get_reg_index(reg_name)
        reg_value = self.t.fetch_registers(clnum)[index & 0xff]
        if index & 0x1000:
            reg_value = reg_value
        if index & 0x800:
            reg_value = reg_value & 0xffffffff
        elif index & 0x400:
            reg_value = reg_value & 0xffff
        elif index & 0x200:
            reg_value = (reg_value & 0xff00) >> 8
        elif index & 0x100:
            reg_value &= 0xff

        if (self.arch != 'i386') & (index&0xff == 16):
            changes = self.t.fetch_changes_by_clnum(clnum, 1)
            for change in changes:
                if change['type'] == 'I':
                    reg_value = change['address'] + change['data'] #rip
        return reg_value

    def get_ret_addr(self, clnum):
        if self.arch == 'i386':
            esp = self.get_reg(clnum, 'esp')
            retval = l32(self.get_memory(clnum, esp, 4))
        else:
            rsp = self.get_reg(clnum, 'rsp')
            retval = l64(self.get_memory(clnum, rsp, 8))
        return retval

    def get_pc(self, clnum):
        changes = self.t.fetch_changes_by_clnum(clnum, 1)
        for change in changes:
            # print change
            if change['type'] == 'I':
                return change['address']
        return 0

    def generate_cfg(self, start_addr, ret_addr=None, start_clnum=0, end_clnum=0):
        if start_clnum == 0:
            start_clnum = self.t.get_minclnum() + 1

        if end_clnum == 0:
            end_clnum = self.t.get_maxclnum() - 1

        traces = []
        enter_call = 0
        enter_sub_call = 0

        for i in range(start_clnum, end_clnum + 1):
            pc = self.get_pc(i)
            asm = self.get_disasm(pc)
            if enter_call == 0:
                if pc == start_addr:
                    if ret_addr is None:
                        end_addr = self.get_ret_addr(i - 1)
                        print hex(end_addr)
                    else:
                        end_addr = ret_addr
                    enter_call = 1
                    trace = [(i, pc, asm)]
            else:
                if end_addr == pc:
                    print 'exit call'
                    enter_call = 0
                    traces.append(trace)
                    trace = []
                if enter_sub_call == 0:
                    trace.append((i, pc, asm))
                    if asm.startswith('call'):
                        enter_sub_call = 1
                        sub_call_ret = self.get_ret_addr(i)
                else:
                    if pc == sub_call_ret:
                        trace.append((i, pc, asm))
                        enter_sub_call = 0

        graph = Graph()

        pcs = []
        for trace in traces:
            print trace

        for trace in traces:
            exist_node = None
            exist_index = 1
            new_node = None
            for ins in trace:
                if ins[1] not in pcs:
                    pcs.append(ins[1])
                    if exist_node is None:
                        if new_node is None:
                            new_node = Node([Assemble(ins[1], ins[2])])
                            graph.add_node(new_node)
                        else:
                            new_node.add_asm(Assemble(ins[1], ins[2]))
                    else:
                        new_node = Node([Assemble(ins[1], ins[2])])
                        graph.add_node(new_node)
                        if len(exist_node.asm_seqs) == exist_index:
                            graph.add_edge(exist_node, new_node)
                        else:
                            node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count - 1)
                            graph.add_edge(node1, new_node)
                        exist_node = None
                        exist_index = 0
                else:
                    if exist_node is None:
                        if new_node is None:
                            exist_node = graph.search_and_split(ins[1])
                            exist_node.add_count()
                            exist_index = 1
                        else:
                            node, index = graph.search_node(ins[1])
                            if index == 0:
                                graph.add_edge(new_node, node)
                                node2 = node
                            else:
                                node1, node2 = graph.split_node(node, index)
                                if node == new_node:
                                    graph.add_edge(node2, node2)
                                else:
                                    graph.add_edge(new_node, node2)
                            new_node = None
                            exist_node = node2
                            node2.add_count()
                            exist_index = 1
                    else:
                        if new_node is None:
                            if len(exist_node.asm_seqs) == exist_index:
                                node3 = graph.search_and_split(ins[1])
                                graph.add_edge(exist_node, node3)
                                exist_node = node3
                                node3.add_count()
                                exist_index = 1
                            else:
                                if exist_node.asm_seqs[exist_index].addr == ins[1]:
                                    exist_index += 1
                                else:
                                    node1, node2 = graph.split_node(exist_node, exist_index, count=exist_node.count-1)
                                    node3 = graph.search_and_split(ins[1])
                                    graph.add_edge(node1, node3)
                                    exist_node = node3
                                    node3.add_count()
                                    exist_index = 1
                        else:
                            print 'impossible2', ins
        graph.print_graph('tracer.png')

    def test(self):
        changes = self.t.fetch_changes_by_clnum(13, 1000)
        print self.t.fetch_registers(13)
        for change in changes:
            print change