コード例 #1
0
class MiasmSEOracle:
    def __init__(self, code, architecture):
        self.code = code
        self.machine = Machine(architecture)
        self.mdis = self.machine.dis_engine(code)
        self.ira = SubIRA64(self.mdis.symbol_pool)
        self.se_engine = SymbolicExecutionEngine(
            self.ira, self.machine.mn.regs.regs_init)

    def determine_ira(self, architecture):
        if architecture == "x86_64":
            return SubIRA64(self.mdis.symbol_pool)
        elif architecture == "x86_32":
            return SubIRA32(self.mdis.symbol_pool)
        else:
            raise NotImplementedError(
                "Architecture {} not supported".format(architecture))

    def execute(self):
        addr = 0
        while addr < len(self.code):
            basic_block = self.mdis.dis_block(addr)
            self.ira.add_block(basic_block)
            ira_block = self.ira.get_block(addr)
            self.se_engine.emulbloc(ira_block)
            addr = basic_block.get_range()[1]
コード例 #2
0
ファイル: ethre.py プロジェクト: woods1060/ethRE
class ethRE:
    def __init__(self):
        self.machine = Machine("evm")
        self.mn = self.machine.mn

    def get_bytecode(self, account_addr):
        code = evm_env.code(int(account_addr[2:], 16))
        code = code[2:]  # To remove '0x'..
        if len(code) % 2 == 1:
            code = "0" + code
        code = binascii.unhexlify(code)
        return code

    def from_bytecode(self, bytecode):

        container = Container.from_string(bytecode)

        mdis = self.machine.dis_engine(container.bin_stream)
        self.blks = mdis.dis_multibloc(0)

    def from_asm(self, asm_text):
        all_bloc, symbol_pool = parse_asm.parse_txt(self.mn, 0, asm_text)
        self.blks = all_bloc
        raise Exception("Not correctly implemented")

    def graph(self):
        if not self.blks:
            raise Exception("Need to parse bytecode before")
        return self.blks.dot()
コード例 #3
0
ファイル: ethre.py プロジェクト: jbcayrou/ethRE
class ethRE:

    def __init__(self):
        self.machine = Machine("evm")
        self.mn = self.machine.mn

    def get_bytecode(self, account_addr):
        code = evm_env.code(int(account_addr[2:],16))
        code = code[2:] # To remove '0x'..
        if len(code) % 2 == 1:
            code  = "0"+code
        code = binascii.unhexlify(code)
        return code

    def from_bytecode(self, bytecode):

        container = Container.from_string(bytecode)

        mdis = self.machine.dis_engine(container.bin_stream)
        self.blks = mdis.dis_multibloc(0)

    def from_asm(self, asm_text):
        all_bloc, symbol_pool = parse_asm.parse_txt(self.mn,0, asm_text)
        self.blks = all_bloc
        raise Exception("Not correctly implemented")

    def graph(self):
        if not self.blks:
            raise Exception("Need to parse bytecode before")
        return self.blks.dot()
コード例 #4
0
def symexec(handler):
    inst_bytes = handler.bytes_without_jmp
    machine = Machine("x86_32")
    cont = Container.from_string(inst_bytes)
    bs = cont.bin_stream
    mdis = machine.dis_engine(bs, symbol_pool=cont.symbol_pool)

    end_offset = len(inst_bytes)

    mdis.dont_dis = [end_offset]

    asm_block = mdis.dis_block(0)
    # print asm_block
    ira = machine.ira(mdis.symbol_pool)
    ira.add_block(asm_block)

    symb = SymbolicExecutionEngine(ira, symbols_init)

    cur_addr = symb.emul_ir_block(0)
    count = 0
    while cur_addr != ExprInt(end_offset, 32):  # execute to end
        cur_addr = symb.emul_ir_block(cur_addr)

        count += 1
        if count > 1000:
            print '[!] to many loop at %s' % handler.name
            break

    return symb
コード例 #5
0
def main():
    global cfg
    global block
    global data

    #Paint the cfg_before image from disassembly
    cont = Container.from_stream(open('300.bin'))
    bin_stream = cont.bin_stream
    adr = 0x401550
    machine = Machine(cont.arch)
    mdis = machine.dis_engine(bin_stream)
    blocks = mdis.dis_multibloc(adr)
    open("cfg_before.dot", "w").write(blocks.dot())

    #Get filename
    parser = Sandbox_Linux_x86_64.parser(description="300.bin")
    parser.add_argument("filename", help="filename")
    options = parser.parse_args()
    options.mimic_env = True

    #Start Sandbox
    sb = Sandbox_Linux_x86_64(options.filename, options, globals())
    sb.jitter.init_run(sb.entry_point)
    sb.jitter.add_breakpoint(sb.entry_point, stop)
    machine = Machine("x86_64")
    sb.run()

    #Get bytecode
    interpret()

    #Paint cfg
    open("vm_graph.dot", "w").write(cfg.dot())
コード例 #6
0
ファイル: r2bindings-r2m2_ad.py プロジェクト: cobrce/r2m2
def r2m2_dis(opcode):
    """Disassemble an instruction using miasm."""

    machine = Machine("x86_64")
    mode = machine.dis_engine().attrib
    instr = machine.mn().dis(opcode, mode)

    return [instr.l, str(instr)]
コード例 #7
0
def main():
    #Setup Machine for arm, get filename
    machine = Machine('armtl')
    parser = ArgumentParser("Description")
    parser.add_argument('filename', help='filename')
    args = parser.parse_args()

    #Setup disassembly stream in container, get blocks and draw the graph
    cont = Container.from_stream(open(args.filename))
    bin_stream = cont.bin_stream
    mdis = machine.dis_engine(bin_stream)
    blocks = mdis.dis_multibloc(0x614)
    open("cfg.dot", "w").write(blocks.dot())

    #Create a template for matching blocks in the control flow graph
    #Requirement 1) Don't get block 0xdf8, it can't disassemble
    #Requirement 2) Get ones that start with LDR
    #Requirement 3) Get ones where the second to last instruction is CMP
    #No restructions for in going and out going edges
    mblock = MatchGraphJoker(
        name='mblock',
        restrict_in=False,
        restrict_out=False,
        filt=lambda block: block.label.offset != 0xdf8 and "LDR" in block.
        lines[0].name and "CMP" in block.lines[-2].name)

    #Basic block matcher
    nblock = MatchGraphJoker(name="next",
                             restrict_in=False,
                             restrict_out=False)

    #Now it should match the blocks we want with the checks
    matcher = nblock >> mblock

    flag_storage = {}
    #Loop through matching template blocks
    for sol in matcher.match(blocks):
        try:
            #Grab position line
            pline = sol[mblock].lines[3]
            #Grab character check line
            cline = sol[mblock].lines[-2]
            #Transform character and position to integer
            pos = int(pline.arg2str(pline.args[1]), 16)
            c = int(cline.arg2str(cline.args[1]), 16)
            #If its NULL, ignore
            if c != 0:
                flag_storage.update({pos: c})
        except ValueError:
            #The F at the beginning is a NULL check
            pass
    #Print Flag
    flag = "".join(map(lambda x: chr(flag_storage[x]),
                       sorted(flag_storage))).replace("F", "I")
    print "F" + flag

    pass
コード例 #8
0
ファイル: MiasmPatcher.py プロジェクト: jeffp507/cgrex
 def execc(self, code):
     machine = Machine('x86_32')
     mdis = machine.dis_engine(code)
     blocs = mdis.dis_multibloc(0)
     ira = machine.ira()
     for b in blocs:
         ira.add_bloc(b)
     sb = symbexec(ira, machine.mn.regs.regs_init)
     sb.emul_ir_blocs(ira, 0)
     return sb
コード例 #9
0
ファイル: binaries_helpers.py プロジェクト: markbirss/flashre
class ReverseFlashairBinary(object):
    """
    Frequent operations on the flashair binary
    """
    def __init__(self, filename, offset=0, r2_options=None):
        """
        Initialize the object
        """

        # Remember the file offset
        self.offset = offset

        # Get a r2pipe handle
        self.r2p = get_r2pipe(filename, offset, r2_options)

        # Create the miasm Machine
        self.machine = Machine("mepl")
        self.mn = self.machine.mn()

    def strings(self):
        """
        get_strings wrapper
        """
        return get_strings(self.r2p)

    def prologues(self):
        """
        get_prologues wrapper
        """
        return get_prologues(self.r2p)

    def nearest_prologue(self, address):
        """
        find the nearest function prologue
        """

        tmp = [(address - p, p) for p in self.prologues() if p < address]
        prologues = sorted(tmp)
        if len(prologues):
            return prologues[0][1]
        return prologues

    def assemble(
        self,
        instruction,
    ):
        """
        Assemble one instructtion with miasm2
        """
        mode = self.machine.dis_engine().attrib
        instr = self.mn.fromstring(instruction, mode)
        instr.mode = mode
        return self.mn.asm(instr, mode)
コード例 #10
0
ファイル: r2bindings-r2m2_ad.py プロジェクト: cobrce/r2m2
def r2m2_asm(mn_str):
    """Assemble an instruction using miasm."""

    # miasm2 only parses upper case mnemonics
    mn_str = mn_str.upper()
    mn_str = mn_str.replace("X", "x")  # hexadecimal

    machine = Machine("x86_64")
    mode = machine.dis_engine().attrib
    mn = machine.mn()
    instr = mn.fromstring(mn_str, mode)
    asm_instr = [i for i in mn.asm(instr)][0]

    return [struct.unpack("!B", byte)[0] for byte in asm_instr]
コード例 #11
0
class MiasmSEOracle:
    def __init__(self, code, architecture):
        self.code = code
        self.machine = Machine(architecture)
        self.mdis = self.machine.dis_engine(code)
        self.ira = SubIRA64(self.mdis.symbol_pool)
        self.se_engine = SymbolicExecutionEngine(
            self.ira, self.machine.mn.regs.regs_init)

    def execute(self):
        addr = 0
        while addr < len(self.code):
            basic_block = self.mdis.dis_block(addr)
            self.ira.add_block(basic_block)
            ira_block = self.ira.get_block(addr)
            self.se_engine.emulbloc(ira_block)
            addr = basic_block.get_range()[1]
コード例 #12
0
ファイル: findref.py プロジェクト: commial/Sibyl
class ExtractRef(object):
    '''
    Class used to concolic run a snapshot and extract references to input
    '''
    def __init__(self, testcreator, replayed_snapshot):
        '''
        @testcreator: TestCreator instance with associated information
        @replayed_snapshot: snapshot to be used
        '''
        self.isFuncFound = False
        self.filename = testcreator.program
        self.learned_addr = testcreator.address
        self.snapshot = replayed_snapshot
        self.replayexception = []
        self.abicls = testcreator.abicls
        self.machine = Machine(testcreator.machine)
        self.ira = self.machine.ira()
        self.ptr_size = self.ira.sizeof_pointer() / 8
        self.types = testcreator.types
        self.prototype = testcreator.prototype
        self.logger = testcreator.logger

    def use_snapshot(self, jitter):
        '''Initilize the VM with the snapshot informations'''
        for reg, value in self.snapshot.input_reg.iteritems():
            setattr(jitter.cpu, reg, value)

        # Set values for input memory
        for addr, mem in self.snapshot.in_memory.iteritems():
            assert mem.access != 0
            if not jitter.vm.is_mapped(addr, mem.size):
                jitter.vm.add_memory_page(addr, mem.access, mem.data)
            else:
                if jitter.vm.get_mem_access(addr) & 0b11 == mem.access & 0b11:
                    jitter.vm.set_mem(addr, mem.data)
                else:
                    # TODO memory page is already set but have not the
                    # same access right. However delete page does not
                    # exist
                    jitter.vm.set_mem(addr, mem.data)

    def compare_snapshot(self, jitter):
        '''Compare the expected result with the real one to determine if the function is recognize or not'''
        func_found = True

        for reg, value in self.snapshot.output_reg.iteritems():
            if value != getattr(jitter.cpu, reg):
                self.replayexception += [
                    "output register %s wrong : %i expected, %i found" %
                    (reg, value, getattr(jitter.cpu, reg))
                ]
                func_found = False

        for addr, mem in self.snapshot.out_memory.iteritems():
            self.logger.debug("Check @%s, %s bytes: %r", hex(addr),
                              hex(mem.size), mem.data[:0x10])
            if mem.data != jitter.vm.get_mem(addr, mem.size):
                self.replayexception += [
                    "output memory wrong at 0x%x: %s expected, %s found" %
                    (addr + offset, repr(mem.data),
                     repr(jitter.vm.get_mem(addr + offset, mem.size)))
                ]
                func_found = False

        return func_found

    def end_func(self, jitter):
        if jitter.vm.is_mapped(getattr(jitter.cpu, self.ira.ret_reg.name), 1):
            self.replayexception += ["return value might be a pointer"]

        self.isFuncFound = self.compare_snapshot(jitter)

        jitter.run = False
        return False

    def is_pointer(self, expr):
        """Return True if expr may be a pointer"""
        target_types = expr_to_types(self.c_handler, expr)

        return any(
            objc_is_dereferenceable(target_type)
            for target_type in target_types)

    def callback(self, jitter):

        # Check previous state
        is_symbolic = lambda expr: (isinstance(expr, m2_expr.ExprMem) and
                                    not isinstance(expr.arg, m2_expr.ExprInt))

        # When it is possible, consider only elements modified in the last run
        # -> speed up to avoid browsing the whole memory
        to_consider = self.symb.modified_exprs

        for symbol in to_consider:
            # Do not consider PC
            if symbol == self.ira.pc:
                continue

            # Write to @NN[... argX ...]
            if is_symbolic(symbol):
                self.memories_write.add(symbol)

            # Read from ... @NN[... argX ...] ...
            symb_value = self.symb.eval_expr(symbol)
            to_replace = {}
            for expr in m2_expr.ExprAff(symbol,
                                        symb_value).get_r(mem_read=True):
                if is_symbolic(expr):
                    if isinstance(expr, m2_expr.ExprMem):
                        # Consider each byte individually
                        # Case: @32[X] with only @8[X+1] to replace
                        addr_expr = expr.arg
                        new_expr = []
                        consider = False
                        for offset in xrange(expr.size / 8):
                            sub_expr = m2_expr.ExprMem(
                                self.symb.expr_simp(
                                    addr_expr + m2_expr.ExprInt(
                                        offset, size=addr_expr.size)), 8)
                            if not self.is_pointer(sub_expr):
                                # Not a PTR, we have to replace with the real value
                                original_sub_expr = sub_expr.replace_expr(
                                    self.init_values)
                                new_expr.append(
                                    self.symb.eval_expr(original_sub_expr))
                                consider = True
                            else:
                                new_expr.append(sub_expr)

                        # Rebuild the corresponding expression
                        if consider:
                            assert len(new_expr) == expr.size / 8
                            to_replace[expr] = m2_expr.ExprCompose(*new_expr)

                    if expr not in self.memories_write:
                        # Do not consider memory already written during the run
                        self.memories_read.add(expr)

            # Replace with real value for non-pointer symbols
            if to_replace:
                symb_value = self.symb.expr_simp(
                    symb_value.replace_expr(to_replace))
                if isinstance(symbol, m2_expr.ExprMem):
                    # Replace only in ptr (case to_replace: @[arg] = 8, expr:
                    # @[arg] = @[arg])
                    symbol = m2_expr.ExprMem(
                        self.symb.expr_simp(
                            symbol.arg.replace_expr(to_replace)), symbol.size)
                self.symb.apply_change(symbol, symb_value)

            # Check computed values against real ones
            # TODO idem memory
            if (isinstance(symbol, m2_expr.ExprId)
                    and isinstance(symb_value, m2_expr.ExprInt)):
                if hasattr(jitter.cpu, symbol.name):
                    value = m2_expr.ExprInt(getattr(jitter.cpu, symbol.name),
                                            symbol.size)
                    assert value == self.symb.symbols[symbol]

        cur_addr = jitter.pc
        self.logger.debug("Current address: %s", hex(cur_addr))
        if cur_addr == 0x1337BEEF or cur_addr == self.return_addr:
            # End reached
            if self.logger.isEnabledFor(logging.DEBUG):
                print "In:"
                for x in self.memories_read:
                    print "\t%s (%s)" % (
                        x,
                        self.c_handler.expr_to_c(x),
                    )
                print "Out:"
                for x in self.memories_write:
                    print "\t%s (%s)" % (
                        x,
                        self.c_handler.expr_to_c(x),
                    )
            return True

        # Update state
        ## Reset cache structures
        self.mdis.job_done.clear()
        self.symb_ir.blocs.clear()

        ## Update current state
        asm_block = self.mdis.dis_bloc(cur_addr)
        irblocks = self.symb_ir.add_bloc(asm_block)

        self.symb.emul_ir_blocks(cur_addr)

        return True

    def prepare_symbexec(self, jitter, return_addr):
        # Activate callback on each instr
        jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1)
        #jitter.jit.log_mn = True
        #jitter.jit.log_regs = True
        jitter.exec_cb = self.callback

        # Disassembler
        self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm),
                                            lines_wd=1)

        # Symbexec engine
        ## Prepare the symbexec engine
        self.symb_ir = self.machine.ir()
        self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm,
                                              self.symb_ir, {})
        self.symb.enable_emulated_simplifications()

        ## Update registers value
        self.symb.reset_regs()
        self.symb.update_engine_from_cpu()

        ## Load the memory as ExprMem
        self.symb.func_read = None
        self.symb.func_write = None
        for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems():
            # Split into 8 bytes chunk for get_mem_overlapping
            for start in xrange(0, mem_segment["size"], 8):
                expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start,
                                                           size=64),
                                           size=8 *
                                           min(8, mem_segment["size"] - start))
                # Its initialisation, self.symb.apply_change is not necessary
                self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem)

        ## Save the initial state
        self.symbols_init = self.symb.symbols.copy()

        ## Save the returning address
        self.return_addr = return_addr

        # Inject argument
        # TODO
        # TODO: use abicls
        abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"]
        self.init_values = {}
        struct_expr_types = {}
        self.args_symbols = []
        for i, param_name in enumerate(self.prototype.args_order):
            cur_arg_abi = getattr(self.ira.arch.regs, abi_order[i])
            cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name),
                                     size=cur_arg_abi.size)
            arg_type = self.prototype.args[param_name]
            if objc_is_dereferenceable(arg_type):
                # Convert the argument to symbol to track access based on it
                self.init_values[cur_arg] = self.symb.symbols[cur_arg_abi]
                self.symb.apply_change(cur_arg_abi, cur_arg)
            struct_expr_types[cur_arg.name] = arg_type
            self.args_symbols.append(cur_arg)

        # Init Expr <-> C conversion
        # Strict access is deliberately not enforced (example: memcpy(struct))
        self.c_handler = CHandler(self.types,
                                  struct_expr_types,
                                  enforce_strict_access=False)

        # Init output structures
        self.memories_read = set()
        self.memories_write = set()

    def build_references(self):
        """At the end of the execution,
        - Fill memories accesses
        - Prepare output structures

        Enrich the snapshot with outputs
        """

        memory_in = {}
        memory_out = {}

        # Get the resulting symbolic value
        # TODO use abi
        output_value = self.symb.symbols[self.symb.ir_arch.arch.regs.RAX]

        # Fill memory *out* (written)
        for expr in self.memories_write:
            # Eval the expression with the *output* state
            value = self.symb.eval_expr(expr)
            assert isinstance(value, m2_expr.ExprInt)
            memory_out[expr] = value

        # Fill memory *in* (read)
        saved_symbols = self.symb.symbols
        self.symb.symbols = self.symbols_init
        for expr in self.memories_read:
            # Eval the expression with the *input* state
            original_expr = expr.replace_expr(self.init_values)
            value = self.symb.eval_expr(original_expr)
            assert isinstance(value, m2_expr.ExprInt)
            memory_in[expr] = value
        self.symb.symbols = saved_symbols

        if self.logger.isEnabledFor(logging.DEBUG):
            print "In:"
            print memory_in
            print "Out:"
            print memory_out
            print "Final value:"
            print output_value

        self.snapshot.memory_in = AssignBlock(memory_in)
        self.snapshot.memory_out = AssignBlock(memory_out)
        self.snapshot.output_value = output_value
        self.snapshot.c_handler = self.c_handler
        self.snapshot.arguments_symbols = self.args_symbols

    def run(self):
        '''Main function that is in charge of running the test and return the result:
        true if the snapshot has recognized the function, false else.'''

        # TODO inherit from Replay
        jitter = self.machine.jitter(config.miasm_engine)

        vm_load_elf(jitter.vm, open(self.filename, "rb").read())

        # Init segment
        jitter.ir_arch.do_stk_segm = True
        jitter.ir_arch.do_ds_segm = True
        jitter.ir_arch.do_str_segm = True
        jitter.ir_arch.do_all_segm = True

        FS_0_ADDR = 0x7ff70000
        jitter.cpu.FS = 0x4
        jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR)
        jitter.vm.add_memory_page(FS_0_ADDR + 0x28, PAGE_READ,
                                  "\x42\x42\x42\x42\x42\x42\x42\x42",
                                  "Stack canary FS[0x28]")

        # Init the jitter with the snapshot
        self.use_snapshot(jitter)

        # Get the return address for our breakpoint
        return_addr = struct.unpack("P",
                                    jitter.vm.get_mem(jitter.cpu.RSP, 0x8))[0]
        jitter.add_breakpoint(return_addr, self.end_func)

        # Prepare the execution
        jitter.init_run(self.learned_addr)
        self.prepare_symbexec(jitter, return_addr)

        # Run the execution
        try:
            jitter.continue_run()
            assert jitter.run == False
        except AssertionError:
            if jitter.vm.get_exception() & EXCEPT_ACCESS_VIOL:
                self.replayexception += ["access violation"]
            elif jitter.vm.get_exception() & EXCEPT_DIV_BY_ZERO:
                self.replayexception += ["division by zero"]
            elif jitter.vm.get_exception() & EXCEPT_PRIV_INSN:
                self.replayexception += ["execution of private instruction"]
            elif jitter.vm.get_exception():
                self.replayexception += [
                    "exception no %i" % (jitter.vm.get_exception())
                ]
            else:
                raise
            self.isFuncFound = False

        # Rebuild references
        self.build_references()

        return self.isFuncFound
コード例 #13
0
# Minimalist Symbol Exec example
from miasm2.core.bin_stream import bin_stream_str
from miasm2.ir.symbexec import SymbolicExecutionEngine
from miasm2.analysis.machine import Machine

START_ADDR = 0
machine = Machine("x86_32")

# Assemble and disassemble a MOV
## Ensure that attributes 'offset' and 'l' are set
line = machine.mn.fromstring("MOV EAX, EBX", 32)
asm = machine.mn.asm(line)[0]

# Get back block
bin_stream = bin_stream_str(asm)
mdis = machine.dis_engine(bin_stream)
asm_block = mdis.dis_bloc(START_ADDR)

# Translate ASM -> IR
ira = machine.ira(mdis.symbol_pool)
ira.add_bloc(asm_block)

# Instanciate a Symbolic Execution engine with default value for registers
## EAX = EAX_init, ...
symbols_init = ira.arch.regs.regs_init
symb = SymbolicExecutionEngine(ira, symbols_init)

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.emul_ir_block(START_ADDR)
コード例 #14
0
    p = log.progress("emulating step%d" % x)
    for i in range((len(bytecode) * 8) - 1):
        if i % 100 == 0:
            p.status("%d/%d" % (i, (len(bytecode) * 8) - 1))
        sb.call(0x5DD9, i)  # VMDecodeInstruction
    # with open("all_steps", "w") as f:
    #     import json
    #     json.dump(lt, f)
    print(lt)


parser = Sandbox_Linux_x86_64.parser(description="ELF sandboxer")
parser.add_argument("filename", help="ELF Filename")
options = parser.parse_args()
sb = Sandbox_Linux_x86_64(options.filename, options, globals())
machine = Machine('x86_64')
cont = Container.from_stream(open(options.filename))
mdis = machine.dis_engine(cont.bin_stream)
sb.jitter.add_breakpoint(0x2C87F,
                         GetMem)  # break on return of VMDecodeInstruction
sb.jitter.add_breakpoint(
    0x21B0, Malloc
)  # Break on call to _malloc because for some reason MIASM can't find the symbol
sb.jitter.add_breakpoint(
    0x4A9C, GetBytecode
)  # break on return of VMDecodeBytecode to return the good value that could not be loaded because of global variable not set
emulate(sb, 0, step0, lookup_table_step0)
emulate(sb, 1, step1, lookup_table_step1)
emulate(sb, 2, step2, lookup_table_step2)
emulate(sb, -1, step1 + step2, lookup_table_stepX)
コード例 #15
0
from miasm2.ir.symbexec import SymbolicExecutionEngine
from miasm2.analysis.machine import Machine
from miasm2.core.locationdb import LocationDB

START_ADDR = 0
machine = Machine("x86_32")
loc_db = LocationDB()

# Assemble and disassemble a MOV
## Ensure that attributes 'offset' and 'l' are set
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32)
asm = machine.mn.asm(line)[0]

# Get back block
bin_stream = bin_stream_str(asm)
mdis = machine.dis_engine(bin_stream, loc_db=loc_db)
mdis.lines_wd = 1
asm_block = mdis.dis_block(START_ADDR)

# Translate ASM -> IR
ira = machine.ira(mdis.loc_db)
ira.add_block(asm_block)

# Instanciate a Symbolic Execution engine with default value for registers
symb = SymbolicExecutionEngine(ira, {})

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.run_at(START_ADDR)

# Modified elements
コード例 #16
0
ファイル: single_instr.py プロジェクト: litestar/miasm
from miasm2.core.asmblock import AsmSymbolPool

START_ADDR = 0
machine = Machine("x86_32")

symbol_pool = AsmSymbolPool()


# Assemble and disassemble a MOV
## Ensure that attributes 'offset' and 'l' are set
line = machine.mn.fromstring("MOV EAX, EBX", symbol_pool, 32)
asm = machine.mn.asm(line)[0]

# Get back block
bin_stream = bin_stream_str(asm)
mdis = machine.dis_engine(bin_stream, symbol_pool=symbol_pool)
mdis.lines_wd = 1
asm_block = mdis.dis_block(START_ADDR)

# Translate ASM -> IR
ira = machine.ira(mdis.symbol_pool)
ira.add_block(asm_block)

# Instanciate a Symbolic Execution engine with default value for registers
symb = SymbolicExecutionEngine(ira, {})

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.run_at(START_ADDR)

# Modified elements
コード例 #17
0
ファイル: single_instr.py プロジェクト: commial/miasm
from miasm2.ir.symbexec import SymbolicExecutionEngine
from miasm2.analysis.machine import Machine
from miasm2.core.locationdb import LocationDB

START_ADDR = 0
machine = Machine("x86_32")
loc_db = LocationDB()

# Assemble and disassemble a MOV
## Ensure that attributes 'offset' and 'l' are set
line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32)
asm = machine.mn.asm(line)[0]

# Get back block
bin_stream = bin_stream_str(asm)
mdis = machine.dis_engine(bin_stream, loc_db=loc_db)
mdis.lines_wd = 1
asm_block = mdis.dis_block(START_ADDR)

# Translate ASM -> IR
ira = machine.ira(mdis.loc_db)
ircfg = ira.new_ircfg()
ira.add_asmblock_to_ircfg(asm_block, ircfg)

# Instanciate a Symbolic Execution engine with default value for registers
symb = SymbolicExecutionEngine(ira)

# Emulate one IR basic block
## Emulation of several basic blocks can be done through .emul_ir_blocks
cur_addr = symb.run_at(ircfg, START_ADDR)
コード例 #18
0
ファイル: solve_condition_stp.py プロジェクト: vbrandl/miasm
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")


if __name__ == '__main__':

    translator_smt2 = Translator.to_language("smt2")

    addr = int(options.address, 16)

    cont = Container.from_stream(open(args[0]))
    mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db)
    ir_arch = machine.ir(mdis.loc_db)
    ircfg = ir_arch.new_ircfg()
    symbexec = SymbolicExecutionEngine(ir_arch)

    asmcfg, loc_db = parse_asm.parse_txt(machine.mn,
                                         32,
                                         '''
    init:
    PUSH argv
    PUSH argc
    PUSH ret_addr
    ''',
                                         loc_db=mdis.loc_db)

    argc_lbl = loc_db.get_name_location('argc')
コード例 #19
0
ファイル: depgraph.py プロジェクト: Junraa/miasm
with open(args.filename) as fstream:
    cont = Container.from_stream(fstream)

arch = args.architecture if args.architecture else cont.arch
machine = Machine(arch)

# Check elements
elements = set()
regs = machine.mn.regs.all_regs_ids_byname
for element in args.element:
    try:
        elements.add(regs[element])
    except KeyError:
        raise ValueError("Unknown element '%s'" % element)

mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True)
ir_arch = machine.ira(mdis.symbol_pool)

# Common argument forms
init_ctx = {}
if args.rename_args:
    if arch == "x86_32":
        # StdCall example
        for i in xrange(4):
            e_mem = ExprMem(ExprId("ESP_init") + ExprInt32(4 * (i + 1)), 32)
            init_ctx[e_mem] = ExprId("arg%d" % i)

# Disassemble the targeted function
blocks = mdis.dis_multibloc(int(args.func_addr, 0))

# Generate IR
コード例 #20
0
ファイル: findref.py プロジェクト: serpilliere/Sibyl
class ExtractRef(object):
    '''
    Class used to concolic run a snapshot and extract references to input
    '''

    def __init__(self, testcreator, replayed_snapshot):
        '''
        @testcreator: TestCreator instance with associated information
        @replayed_snapshot: snapshot to be used
        '''
        self.isFuncFound = False
        self.filename = testcreator.program
        self.learned_addr = testcreator.address
        self.snapshot = replayed_snapshot
        self.replayexception = []
        self.abicls = testcreator.abicls
        self.machine = Machine(testcreator.machine)
        self.ira = self.machine.ira()
        self.ptr_size = self.ira.sizeof_pointer()/8
        self.types = testcreator.types
        self.prototype = testcreator.prototype
        self.logger = testcreator.logger

    def use_snapshot(self, jitter):
        '''Initilize the VM with the snapshot informations'''
        for reg, value in self.snapshot.input_reg.iteritems():
            setattr(jitter.cpu, reg, value)

        # Set values for input memory
        for addr, mem in self.snapshot.in_memory.iteritems():
            assert mem.access != 0
            if not jitter.vm.is_mapped(addr, mem.size):
                jitter.vm.add_memory_page(addr, mem.access, mem.data)
            else:
                if jitter.vm.get_mem_access(addr) & 0b11 == mem.access & 0b11:
                    jitter.vm.set_mem(addr, mem.data)
                else:
                    # TODO memory page is already set but have not the
                    # same access right. However delete page does not
                    # exist
                    jitter.vm.set_mem(addr, mem.data)

    def compare_snapshot(self, jitter):
        '''Compare the expected result with the real one to determine if the function is recognize or not'''
        func_found = True

        for reg, value in self.snapshot.output_reg.iteritems():
            if value != getattr(jitter.cpu, reg):
                self.replayexception += ["output register %s wrong : %i expected, %i found" % (reg, value, getattr(jitter.cpu, reg))]
                func_found = False

        for addr, mem in self.snapshot.out_memory.iteritems():
            self.logger.debug("Check @%s, %s bytes: %r", hex(addr), hex(mem.size), mem.data[:0x10])
            if mem.data != jitter.vm.get_mem(addr, mem.size):
                self.replayexception += ["output memory wrong at 0x%x: %s expected, %s found" % (addr + offset, repr(mem.data), repr(jitter.vm.get_mem(addr + offset, mem.size)))]
                func_found = False

        return func_found

    def end_func(self, jitter):
        if jitter.vm.is_mapped(getattr(jitter.cpu, self.ira.ret_reg.name), 1):
            self.replayexception += ["return value might be a pointer"]

        self.isFuncFound = self.compare_snapshot(jitter)

        jitter.run = False
        return False

    def is_pointer(self, expr):
        """Return True if expr may be a pointer"""
        target_types = expr_to_types(self.c_handler, expr)

        return any(objc_is_dereferenceable(target_type)
                   for target_type in target_types)

    def is_symbolic(self, expr):
        return expr.is_mem() and not expr.arg.is_int()

    def get_arg_n(self, arg_number):
        """Return the Expression corresponding to the argument number
        @arg_number"""
        # TODO use abicls
        abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"]
        size = 64
        sp = m2_expr.ExprId("RSP", 64)
        if arg_number < len(abi_order):
            return m2_expr.ExprId(abi_order[arg_number], size)
        else:
            destack = (arg_number - len(abi_order) + 1)
            return m2_expr.ExprMem(sp + m2_expr.ExprInt(destack * size / 8,
                                                        size),
                                   size)

    def callback(self, jitter):

        # Check previous state

        # When it is possible, consider only elements modified in the last run
        # -> speed up to avoid browsing the whole memory
        to_consider = self.symb.modified_exprs

        for symbol in to_consider:
            # Do not consider PC
            if symbol == self.ira.pc:
                continue

            # Read from ... @NN[... argX ...] ...
            symb_value = self.symb.eval_expr(symbol)
            to_replace = {}
            for expr in m2_expr.ExprAff(symbol,
                                        symb_value).get_r(mem_read=True):
                if self.is_symbolic(expr):
                    if isinstance(expr, m2_expr.ExprMem):
                        # Consider each byte individually
                        # Case: @32[X] with only @8[X+1] to replace
                        addr_expr = expr.arg
                        new_expr = []
                        consider = False
                        for offset in xrange(expr.size/8):
                            sub_expr = m2_expr.ExprMem(self.symb.expr_simp(addr_expr + m2_expr.ExprInt(offset, size=addr_expr.size)),
                                                       8)
                            if not self.is_pointer(sub_expr):
                                # Not a PTR, we have to replace with the real value
                                original_sub_expr = sub_expr.replace_expr(self.init_values)
                                new_expr.append(self.symb.eval_expr(original_sub_expr))
                                consider = True
                            else:
                                new_expr.append(sub_expr)

                        # Rebuild the corresponding expression
                        if consider:
                            assert len(new_expr) == expr.size / 8
                            to_replace[expr] = m2_expr.ExprCompose(*new_expr)

                    if expr not in self.memories_write:
                        # Do not consider memory already written during the run
                        self.memories_read.add(expr)

            # Write to @NN[... argX ...]
            # Must be after Read, case: @[X] = f(@[X])
            if self.is_symbolic(symbol):
                self.memories_write.add(symbol)


            # Replace with real value for non-pointer symbols
            if to_replace:
                symb_value = self.symb.expr_simp(symb_value.replace_expr(to_replace))
                if isinstance(symbol, m2_expr.ExprMem):
                    # Replace only in ptr (case to_replace: @[arg] = 8, expr:
                    # @[arg] = @[arg])
                    symbol = m2_expr.ExprMem(self.symb.expr_simp(symbol.arg.replace_expr(to_replace)),
                                      symbol.size)
                self.symb.apply_change(symbol, symb_value)

            # Check computed values against real ones
            # TODO idem memory
            if (isinstance(symbol, m2_expr.ExprId) and
                isinstance(symb_value, m2_expr.ExprInt)):
                if hasattr(jitter.cpu, symbol.name):
                    value = m2_expr.ExprInt(getattr(jitter.cpu, symbol.name),
                                            symbol.size)
                    assert value == self.symb.symbols[symbol]

        cur_addr = jitter.pc
        self.logger.debug("Current address: %s", hex(cur_addr))
        if cur_addr == 0x1337BEEF or cur_addr == self.return_addr:
            # End reached
            if self.logger.isEnabledFor(logging.DEBUG):
                print "In:"
                for x in self.memories_read:
                    print "\t%s (%s)" % (x,
                                         self.c_handler.expr_to_c(x),
                    )
                print "Out:"
                for x in self.memories_write:
                    print "\t%s (%s)" % (x,
                                         self.c_handler.expr_to_c(x),
                    )
            return True

        # Update state
        ## Reset cache structures
        self.mdis.job_done.clear()
        self.symb_ir.blocks.clear()

        ## Update current state
        asm_block = self.mdis.dis_bloc(cur_addr)
        irblocks = self.symb_ir.add_bloc(asm_block)

        self.symb.emul_ir_blocks(cur_addr)

        return True

    def prepare_symbexec(self, jitter, return_addr):
        # Activate callback on each instr
        jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1)
        #jitter.jit.log_mn = True
        #jitter.jit.log_regs = True
        jitter.exec_cb = self.callback

        # Disassembler
        self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm),
                                            lines_wd=1)

        # Symbexec engine
        ## Prepare the symbexec engine
        self.symb_ir = self.machine.ir()
        self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {})
        self.symb.enable_emulated_simplifications()

        ## Update registers value
        self.symb.reset_regs()
        self.symb.update_engine_from_cpu()

        ## Load the memory as ExprMem
        self.symb.func_read = None
        self.symb.func_write = None
        for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems():
            # Split into 8 bytes chunk for get_mem_overlapping
            for start in xrange(0, mem_segment["size"], 8):
                expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start,
                                                           size=64),
                                           size=8*min(8, mem_segment["size"] - start))
                # Its initialisation, self.symb.apply_change is not necessary
                self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem)

        ## Save the initial state
        self.symbols_init = self.symb.symbols.copy()

        ## Save the returning address
        self.return_addr = return_addr

        # Inject argument
        self.init_values = {}
        struct_expr_types = {}
        self.args_symbols = []
        for i, param_name in enumerate(self.prototype.args_order):
            cur_arg_abi = self.get_arg_n(i)
            cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name),
                                     size=cur_arg_abi.size)
            self.init_values[cur_arg] = self.symb.eval_expr(cur_arg_abi)
            arg_type = self.prototype.args[param_name]
            if objc_is_dereferenceable(arg_type):
                # Convert the argument to symbol to track access based on it
                self.symb.apply_change(cur_arg_abi, cur_arg)
            struct_expr_types[cur_arg.name] = arg_type
            self.args_symbols.append(cur_arg)

        # Init Expr <-> C conversion
        # Strict access is deliberately not enforced (example: memcpy(struct))
        self.c_handler = CHandler(self.types, struct_expr_types,
                                  enforce_strict_access=False)

        # Init output structures
        self.memories_read = set()
        self.memories_write = set()

    def build_references(self):
        """At the end of the execution,
        - Fill memories accesses
        - Prepare output structures

        Enrich the snapshot with outputs
        """

        memory_in = {}
        memory_out = {}

        # Get the resulting symbolic value
        # TODO use abi
        output_value = self.symb.symbols[self.symb.ir_arch.arch.regs.RAX]

        # Fill memory *out* (written)
        for expr in self.memories_write:
            # Eval the expression with the *output* state
            value = self.symb.eval_expr(expr)
            memory_out[expr] = value

        # Fill memory *in* (read)
        saved_symbols = self.symb.symbols
        self.symb.symbols = self.symbols_init
        for expr in self.memories_read:
            # Eval the expression with the *input* state
            original_expr = expr.replace_expr(self.init_values)
            value = self.symb.eval_expr(original_expr)
            assert isinstance(value, m2_expr.ExprInt)
            memory_in[expr] = value
        self.symb.symbols = saved_symbols

        if self.logger.isEnabledFor(logging.DEBUG):
            print "In:"
            print memory_in
            print "Out:"
            print memory_out
            print "Final value:"
            print output_value

        self.snapshot.memory_in = AssignBlock(memory_in)
        self.snapshot.memory_out = AssignBlock(memory_out)
        self.snapshot.output_value = output_value
        self.snapshot.c_handler = self.c_handler
        self.snapshot.arguments_symbols = self.args_symbols
        self.snapshot.init_values = self.init_values

    def run(self):
        '''Main function that is in charge of running the test and return the result:
        true if the snapshot has recognized the function, false else.'''

        # TODO inherit from Replay
        jitter = self.machine.jitter(config.miasm_engine)

        vm_load_elf(jitter.vm, open(self.filename, "rb").read())

        # Init segment
        jitter.ir_arch.do_stk_segm = True
        jitter.ir_arch.do_ds_segm = True
        jitter.ir_arch.do_str_segm = True
        jitter.ir_arch.do_all_segm = True

        FS_0_ADDR = 0x7ff70000
        jitter.cpu.FS = 0x4
        jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR)
        jitter.vm.add_memory_page(
            FS_0_ADDR + 0x28, PAGE_READ, "\x42\x42\x42\x42\x42\x42\x42\x42", "Stack canary FS[0x28]")

        # Init the jitter with the snapshot
        self.use_snapshot(jitter)

        # Get the return address for our breakpoint
        return_addr = struct.unpack("P", jitter.vm.get_mem(jitter.cpu.RSP,
                                                           0x8))[0]
        jitter.add_breakpoint(return_addr, self.end_func)

        # Prepare the execution
        jitter.init_run(self.learned_addr)
        self.prepare_symbexec(jitter, return_addr)

        # Run the execution
        try:
            jitter.continue_run()
            assert jitter.run == False
        except AssertionError:
            if jitter.vm.get_exception() & EXCEPT_ACCESS_VIOL:
                self.replayexception += ["access violation"]
            elif jitter.vm.get_exception() & EXCEPT_DIV_BY_ZERO:
                self.replayexception += ["division by zero"]
            elif jitter.vm.get_exception() & EXCEPT_PRIV_INSN:
                self.replayexception += ["execution of private instruction"]
            elif jitter.vm.get_exception():
                self.replayexception += ["exception no %i" % (jitter.vm.get_exception())]
            else:
                raise
            self.isFuncFound = False

        # Rebuild references
        self.build_references()

        return self.isFuncFound
コード例 #21
0
with open(args.filename) as fstream:
    cont = Container.from_stream(fstream)

arch = args.architecture if args.architecture else cont.arch
machine = Machine(arch)

# Check elements
elements = set()
regs = machine.mn.regs.all_regs_ids_byname
for element in args.element:
    try:
        elements.add(regs[element.upper()])
    except KeyError:
        raise ValueError("Unknown element '%s'" % element)

mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True)
ir_arch = machine.ira(mdis.symbol_pool)

# Disassemble the targeted function
blocks = mdis.dis_multibloc(int(args.func_addr, 16))

# Generate IR
for block in blocks:
    ir_arch.add_bloc(block)

# Build the IRA Graph
ir_arch.gen_graph()

# Get the instance
dg = DependencyGraph(ir_arch,
                     implicit=args.implicit,
コード例 #22
0
class PEAnalysis(object):
    """A basic static analysis for binary files that utilises Miasm
    to recover functions contained in the file.
    """
    def _set_logging(self, verbose):
        """Setup up an internal logger.

        Args:
            verbose (bool): affects log verbosity
        """
        self._logger = getLogger(hex(hash(self)))
        if verbose:
            self._logger.setLevel(DEBUG)
        else:
            self._logger.setLevel(INFO)
        log_handler = StreamHandler(stdout)
        log_handler.setFormatter(Formatter('[%(levelname)s]\t%(message)s'))
        self._logger.addHandler(log_handler)

    def __init__(self, filename, verbose=False):
        """Load binary file.

        Args:
            filename (str): path to a file to be analyzed
            verbose (bool): affects log verbosity
        """
        self._container = Container.from_stream(open(filename))
        self.bin_stream = self._container.bin_stream
        self.entry_point = self._container.entry_point
        self.machine = Machine(self._container.arch)

        self.fn = {}
        self.interval = interval()
        self.deep = 0
        self.offset = 0

        self._set_logging(verbose)
        self._logger.info("PE loaded")

    def _update_interval(self, block):
        """Update analyzed interval.

        Args:
            block (AsmBlock): extend interval describing used code
                by offsets from this block
        """
        for line in block.lines:
            self.interval += interval([(line.offset, line.offset + line.l)])

    def process_fn(self, offset):
        """Use Miasm to explore reachable code and add discovered functions.

        Args:
            offset (int): a starting offset, preferably entry point
        """
        if offset in self.fn:
            return

        mdis = self.machine.dis_engine(self.bin_stream)
        self.fn[offset] = mdis.dis_multiblock(offset)
        self._logger.debug("sub_" + hex(offset)[2:] + " added")

        map(self._update_interval, self.fn[offset])

        for block in self.fn[offset]:
            instr = block.get_subcall_instr()
            if not instr:
                continue
            for dest in instr.getdstflow(mdis.symbol_pool):
                if not (isinstance(dest, ExprId)
                        and isinstance(dest.name, AsmLabel)):
                    continue
                self.process_fn(dest.name.offset)

    def process_rest(self):
        """Try to naively explore bytes lying in regions that are not
        covered by interval.
        """
        for _, right in self.interval.intervals:
            if right in self.fn:
                continue
            self.process_fn(right)

    def analyze(self, analyze_unreachable=False):
        """Explore the binary file, try to find code. The search
        starts at the binary file's declared entry point.

        Args:
            analyze_unreachable (bool): analyze code that could not
                be located during reachable code exploration.
        """
        self.process_fn(self.entry_point)
        self._logger.info("reachable code analysis done")
        self.deep = 1
        if analyze_unreachable:
            self.deep = 2
            self.process_rest()
            self._logger.info("unreachable code analysis done")