def execute(self,
                start: int,
                end: int = None,
                *,
                context: ProcessorContext = None) -> ProcessorContext:
        """
        Emulates from start instruction to end instruction (not including the end instruction)
        (Or just emulates the start instruction if end is not provided.)

        Emulation will stop when either the end instruction is reached or the max number of instructions
        have been emulated. Whichever comes first.

        Loops will be emulated as expected. If you would like to force emulation down the path to the
        end instruction, use iter_context_at() or context_at() instead.

        :param start: Address of instruction to start emulation.
        :param end: Address of instruction to stop emulation. (non-inclusive)
        :param context: A premade context that you would like to use to start out emulation.
            In which case, this is just a wrapper for running execute() on the context itself.

        :returns: A ProcessorContext object after emulation has occurred.
            If a context was provided in the parameters, this context will be a reference to that.
        """
        if not context:
            context = self.new_context()

        context.execute(start=start,
                        end=end,
                        max_instructions=self.max_instructions)
        return context
Example #2
0
def test_issue_7():
    """Tests the use of WIDE_STRING for read_data()"""
    from kordesii.utils import function_tracing
    from kordesii.utils.function_tracing.cpu_context import ProcessorContext

    context = ProcessorContext.from_arch()

    wide_string = b'/\x00f\x00a\x00v\x00.\x00i\x00c\x00o\x00'
    context.memory.write(0x123000, wide_string)
    assert context.read_data(
        0x123000, data_type=function_tracing.WIDE_STRING) == wide_string
    wide_string = b'\x00/\x00f\x00a\x00v\x00.\x00i\x00c\x00o'
    context.memory.write(0x123000, wide_string)
    assert context.read_data(
        0x123000, data_type=function_tracing.WIDE_STRING) == wide_string
        def emulated_function(*args, context: ProcessorContext = None):
            """
            Emulates a function and returns result in rax.

            :param *args: Arguments to pass into function before emulation.
                If enforce_args is not enabled, the number of arguments provided can be less or more
                than the number of arguments required by the function.
                Any arguments not provided will default to whatever is set in the context.
            :param context: CPU context to use. If not provided an empty context will be used.
                If you would like to examine the context after emulation, you must provide your own.

            :returns: Value or derefernced value in rax.

            :raises TypeError: If enforce_args is enabled and incorrect number of positional args have been provided.
            """
            if context and context.emulator != self:
                raise ValueError(
                    "Supplied context must be created from same emulator.")
            context = context or self.new_context()
            # (ip must be set in order to get correct function arguments in signature.)
            context.ip = func_obj.start_ea

            # Temporarily turn off branch tracking since it is unneeded and will just waste time.
            orig_branch_tracking = self.branch_tracking
            self.branch_tracking = False

            # Fill in context with argument values.
            func_sig = context.get_function_signature(func_obj.start_ea)
            if enforce_args and len(func_sig.args) != len(args):
                raise TypeError(
                    f"Function takes {len(func_sig.args)} positional arguments, but {len(args)} were given."
                )

            logger.debug(f'Emulating {func_sig.name}')
            for arg, func_arg in zip(args, func_sig.args):
                if isinstance(arg, int):
                    logger.debug(
                        f'Setting argument {func_arg.name} = {repr(arg)}')
                    func_arg.value = arg
                elif isinstance(arg, bytes):
                    ptr = context.mem_alloc(len(arg))
                    context.mem_write(ptr, arg)
                    logger.debug(
                        f'Setting argument {func_arg.name} = {hex(ptr)} ({repr(arg)})'
                    )
                    func_arg.value = ptr
                else:
                    raise TypeError(f'Invalid arg type {type(arg)}')

            context.execute(func_obj.start_ea,
                            end=func_obj.end_ea,
                            max_instructions=self.max_instructions)

            if return_type is not None or return_size is not None:
                result = context.read_data(context.ret,
                                           size=return_size,
                                           data_type=return_type)
            else:
                result = context.ret

            logger.debug(f'Returned: {repr(result)}')
            self.branch_tracking = orig_branch_tracking
            return result
    def _execute_to(self,
                    ea,
                    *,
                    context: ProcessorContext = None) -> ProcessorContext:
        """
        Creates a cpu_context (or emulates on top of the given one) for instructions up to, but not
        including, the given ea within the current function.
        This function is a hybrid approach to the non-loop following mode in which it will
        force the other branch to be taken if the branch it wants to take will not lead to the
        desired end address.

        This is an internal function used as a helper for iter_context_at() when following loops.

        :param int ea: ea of interest
        :param context: ProcessorContext to use during emulation, a new one will be created if not provided.

        :raises RuntimeError: If maximum number of instructions have been hit.
        """
        if not context:
            context = self.new_context()

        flowchart = Flowchart.from_cache(ea)
        func_obj = utils.Function(ea)

        start_block = flowchart.find_block(func_obj.start_ea)
        end_block = flowchart.find_block(ea)
        valid_blocks = end_block.ancestors()
        valid_blocks.add(end_block)
        count = self.max_instructions

        # Starting from start_block, we are going to emulate each instruction in each basic block
        # until we get to the end_block.
        # If execution tries to branch us into a block that can't lead us to the end_block,
        # we will force the branch to go in the other direction.
        current_block = start_block
        while current_block != end_block:
            # We can't use execute() with start and end here because the end_ea of a block
            # is not actually in the block.
            for _ea in current_block.heads():
                context.execute(_ea)
                count -= 1

            if count <= 0:
                raise RuntimeError("Hit maximum number of instructions.")

            # Get the successor block that execution branched to as well as
            # is a valid block that can reach the end block.
            # If no such block exists, just pick the first valid successor block.
            valid_successors = [
                bb for bb in current_block.succs() if bb in valid_blocks
            ]
            assert valid_successors, "Expected there to be at least 1 valid successor block."
            for successor in valid_successors:
                if context.ip == successor.start_ea:
                    break
            else:
                # If no valid successor, force branch.
                successor = valid_successors[0]
                context.ip = successor.start_ea
                context.prep_for_branch(successor.start_ea)

            current_block = successor

        # Emulate the instructions in the final block.
        context.execute(start=current_block.start_ea, end=ea)

        return context
def test_builtin_funcs():
    """Tests the emulated builtin_funcs."""
    from kordesii.utils import function_tracing
    from kordesii.utils.function_tracing.cpu_context import ProcessorContext
    from kordesii.utils.function_tracing import builtin_funcs

    src = 0x123000
    dst = 0x124000

    # test strcat
    context = ProcessorContext.from_arch()
    context.memory.write(src, b'world')
    context.memory.write(dst, b'hello')
    assert builtin_funcs.strcat(context, 'strcat', [dst, src]) == dst
    assert context.read_data(dst) == b'helloworld'
    for encoding in ['utf-16-le', 'utf-16-be']:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u'world'.encode(encoding))
        context.memory.write(dst, u'hello'.encode(encoding))
        assert builtin_funcs.strcat(context, 'wcscat', [dst, src]) == dst
        assert context.read_data(
            dst,
            data_type=function_tracing.WIDE_STRING) == u'helloworld'.encode(
                encoding)

    # test strncat
    context = ProcessorContext.from_arch()
    context.memory.write(src, b'world')
    context.memory.write(dst, b'hello')
    assert builtin_funcs.strncat(context, 'strncat', [dst, src, 10]) == dst
    assert context.read_data(dst) == b'helloworld'
    assert builtin_funcs.strncat(context, 'strncat', [dst, src, 2]) == dst
    assert context.read_data(dst) == b'helloworldwo'
    for encoding in ['utf-16-le', 'utf-16-be']:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u'world'.encode(encoding))
        context.memory.write(dst, u'hello'.encode(encoding))
        assert builtin_funcs.strncat(context, 'wcsncat', [dst, src, 10]) == dst
        assert context.read_data(
            dst,
            data_type=function_tracing.WIDE_STRING) == u'helloworld'.encode(
                encoding)
        assert builtin_funcs.strncat(context, 'wcsncat', [dst, src, 2]) == dst
        assert context.read_data(
            dst,
            data_type=function_tracing.WIDE_STRING) == u'helloworldwo'.encode(
                encoding)

    # test strcpy
    context = ProcessorContext.from_arch()
    context.memory.write(src, b'world')
    context.memory.write(dst, b'hello!!!')
    assert builtin_funcs.strcpy(context, 'strcpy', [dst, src]) == dst
    assert context.read_data(dst) == b'world'
    for encoding in ['utf-16-le', 'utf-16-be']:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u'world'.encode(encoding))
        context.memory.write(dst, u'hello!!!'.encode(encoding))
        assert builtin_funcs.strcpy(context, 'wcscpy', [dst, src]) == dst
        assert context.read_data(
            dst, data_type=function_tracing.WIDE_STRING) == u'world'.encode(
                encoding)

    # test strncpy
    context = ProcessorContext.from_arch()
    context.memory.write(src, b'world')
    context.memory.write(dst, b'hello!!!')
    assert builtin_funcs.strncpy(context, 'strncpy', [dst, src, 2]) == dst
    # Since we are only copying 2 characters over, the null doesn't get sent over and therefore get
    # some of the original string in the copy.
    assert context.read_data(dst) == b'wollo!!!'
    for encoding in ['utf-16-le', 'utf-16-be']:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u'world'.encode(encoding))
        context.memory.write(dst, u'hello!!!'.encode(encoding))
        assert builtin_funcs.strncpy(context, 'wcsncpy', [dst, src, 2]) == dst
        assert context.read_data(
            dst, data_type=function_tracing.WIDE_STRING) == u'wollo!!!'.encode(
                encoding)

    # test strdup/strndup
    heap_ptr = context.memory.HEAP_BASE
    context = ProcessorContext.from_arch()
    context.memory.write(src, b'hello')
    # should return a newly allocated string
    assert builtin_funcs.strdup(context, 'strdup', [src]) == heap_ptr
    assert context.read_data(heap_ptr) == b'hello'
    context = ProcessorContext.from_arch()
    context.memory.write(src, b'hello')
    assert builtin_funcs.strndup(context, 'strndup', [src, 2]) == heap_ptr
    assert context.read_data(heap_ptr) == b'he'

    # test strlen
    context = ProcessorContext.from_arch()
    context.memory.write(src, b'hello')
    assert builtin_funcs.strlen(context, 'strlen', [src]) == 5
    for encoding in ['utf-16-le', 'utf-16-be']:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u'hello'.encode(encoding))
        assert builtin_funcs.strlen(context, 'wcslen', [src]) == 5
def test_builtin_funcs():
    """Tests the emulated builtin_funcs."""
    from kordesii.utils import function_tracing
    from kordesii.utils.function_tracing.cpu_context import ProcessorContext
    from kordesii.utils.function_tracing import builtin_funcs

    src = 0x123000
    dst = 0x124000

    # test strcat
    context = ProcessorContext.from_arch()
    assert context.ARCH_NAME == "metapc"
    context.memory.write(src, b"world")
    context.memory.write(dst, b"hello")
    assert builtin_funcs.strcat(context, "strcat", [dst, src]) == dst
    assert context.read_data(dst) == b"helloworld"
    for encoding in ["utf-16-le", "utf-16-be"]:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u"world".encode(encoding))
        context.memory.write(dst, u"hello".encode(encoding))
        assert builtin_funcs.strcat(context, "wcscat", [dst, src]) == dst
        assert context.read_data(dst, data_type=function_tracing.WIDE_STRING) == u"helloworld".encode(encoding)

    # test strncat
    context = ProcessorContext.from_arch()
    context.memory.write(src, b"world")
    context.memory.write(dst, b"hello")
    assert builtin_funcs.strncat(context, "strncat", [dst, src, 10]) == dst
    assert context.read_data(dst) == b"helloworld"
    assert builtin_funcs.strncat(context, "strncat", [dst, src, 2]) == dst
    assert context.read_data(dst) == b"helloworldwo"
    for encoding in ["utf-16-le", "utf-16-be"]:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u"world".encode(encoding))
        context.memory.write(dst, u"hello".encode(encoding))
        assert builtin_funcs.strncat(context, "wcsncat", [dst, src, 10]) == dst
        assert context.read_data(dst, data_type=function_tracing.WIDE_STRING) == u"helloworld".encode(encoding)
        assert builtin_funcs.strncat(context, "wcsncat", [dst, src, 2]) == dst
        assert context.read_data(dst, data_type=function_tracing.WIDE_STRING) == u"helloworldwo".encode(encoding)

    # test strcpy
    context = ProcessorContext.from_arch()
    context.memory.write(src, b"world")
    context.memory.write(dst, b"hello!!!")
    assert builtin_funcs.strcpy(context, "strcpy", [dst, src]) == dst
    assert context.read_data(dst) == b"world"
    for encoding in ["utf-16-le", "utf-16-be"]:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u"world".encode(encoding))
        context.memory.write(dst, u"hello!!!".encode(encoding))
        assert builtin_funcs.strcpy(context, "wcscpy", [dst, src]) == dst
        assert context.read_data(dst, data_type=function_tracing.WIDE_STRING) == u"world".encode(encoding)

    # test strncpy
    context = ProcessorContext.from_arch()
    context.memory.write(src, b"world")
    context.memory.write(dst, b"hello!!!")
    assert builtin_funcs.strncpy(context, "strncpy", [dst, src, 2]) == dst
    # Since we are only copying 2 characters over, the null doesn't get sent over and therefore get
    # some of the original string in the copy.
    assert context.read_data(dst) == b"wollo!!!"
    for encoding in ["utf-16-le", "utf-16-be"]:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u"world".encode(encoding))
        context.memory.write(dst, u"hello!!!".encode(encoding))
        assert builtin_funcs.strncpy(context, "wcsncpy", [dst, src, 2]) == dst
        assert context.read_data(dst, data_type=function_tracing.WIDE_STRING) == u"wollo!!!".encode(encoding)

    # test strdup/strndup
    heap_ptr = context.memory.HEAP_BASE
    context = ProcessorContext.from_arch()
    context.memory.write(src, b"hello")
    # should return a newly allocated string
    assert builtin_funcs.strdup(context, "strdup", [src]) == heap_ptr
    assert context.read_data(heap_ptr) == b"hello"
    context = ProcessorContext.from_arch()
    context.memory.write(src, b"hello")
    assert builtin_funcs.strndup(context, "strndup", [src, 2]) == heap_ptr
    assert context.read_data(heap_ptr) == b"he"

    # test strlen
    context = ProcessorContext.from_arch()
    context.memory.write(src, b"hello")
    assert builtin_funcs.strlen(context, "strlen", [src]) == 5
    for encoding in ["utf-16-le", "utf-16-be"]:
        context = ProcessorContext.from_arch()
        context.memory.write(src, u"hello".encode(encoding))
        assert builtin_funcs.strlen(context, "wcslen", [src]) == 5
def test_registers():
    """Tests registers"""
    from kordesii.utils.function_tracing.cpu_context import ProcessorContext
    from kordesii.utils.function_tracing.registers import Register

    # Basic register tests.
    reg = Register(8, rax=0xFFFFFFFFFFFFFFFF, eax=0xFFFFFFFF, ax=0xFFFF, al=0xFF, ah=0xFF00)
    assert sorted(reg.names) == ["ah", "al", "ax", "eax", "rax"]
    assert reg.rax == 0
    assert reg.ax == 0
    assert reg["rax"] == 0
    assert reg["ax"] == 0
    reg.ah = 0x23
    assert reg.ah == 0x23
    assert reg.al == 0x00
    assert reg.ax == 0x2300
    assert reg.eax == 0x00002300
    reg.eax = 0x123
    assert reg.ah == 0x01
    assert reg.al == 0x23
    assert reg.rax == 0x0000000000000123

    context = ProcessorContext.from_arch()
    registers = context.registers

    # fmt: off
    # Test getting all register names.
    assert sorted(registers.names) == [
        'ac', 'af', 'ah', 'al', 'ax', 'b', 'bh', 'bl', 'bp', 'bpl', 'bx',
        'c0', 'c1', 'c2', 'c3', 'cf', 'ch', 'cl', 'cs', 'cx', 'd', 'df',
        'dh', 'di', 'dil', 'dl', 'dm', 'ds', 'dx', 'eax', 'ebp', 'ebx',
        'ecx', 'edi', 'edx', 'eflags', 'es', 'esi', 'esp', 'flags', 'fs', 'gs', 'i', 'ic',
        'id', 'iem', 'if', 'im', 'iopl', 'ir', 'nt', 'o', 'of', 'om', 'p',
        'pc', 'pf', 'pm', 'r10', 'r10b', 'r10d', 'r10w', 'r11', 'r11b',
        'r11d', 'r11w', 'r12', 'r12b', 'r12d', 'r12w', 'r13', 'r13b', 'r13d',
        'r13w', 'r14', 'r14b', 'r14d', 'r14w', 'r15', 'r15b', 'r15d', 'r15w',
        'r8', 'r8b', 'r8d', 'r8w', 'r9', 'r9b', 'r9d', 'r9w', 'rax', 'rbp',
        'rbx', 'rc', 'rcx', 'rdi', 'rdx', 'rf', 'rip', 'rsi', 'rsp', 'sf',
        'sf', 'si', 'sil', 'sp', 'spl', 'ss',
        'st', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', 'st7',
        'tag0', 'tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag6', 'tag7',
        'tf', 'top', 'u', 'um', 'vif', 'vip', 'vm',
        'xmm0', 'xmm1', 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15',
        'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'xmm8', 'xmm9',
        'z', 'zf', 'zm',
    ]
    # Test getting register names for FPU.
    assert sorted(registers.fpu.names) == [
        "b", "c0", "c1", "c2", "c3", "d", "dm", "i", "ic", "iem", "im", "ir",
        "o", "om", "p", "pc", "pm", "rc", "sf",
        "st", "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7",
        "tag0", "tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7",
        "top", "u", "um", "z", "zm",
    ]
    # fmt: on

    # Test FPU registers.
    # TODO: Add tests for flags
    assert registers.st0 is None
    assert registers["st0"] is None
    assert registers.fpu.st0 is None
    assert registers.fpu["st0"] is None
    registers.fpu.push(-12.3)
    assert registers.st0 == -12.3
    assert registers.st1 is None
    registers.fpu.push(34)
    assert registers.st0 == 34
    assert registers.st1 == -12.3
    registers.fpu.pop()
    assert registers.st0 == -12.3
    assert registers.st1 is None
    registers.fpu.push(registers.fpu.INFINITY)
    assert registers.st0 == registers.fpu.INFINITY
    assert registers.st1 == -12.3
Example #8
0
 def new_context(self) -> ProcessorContext:
     return ProcessorContext.from_arch(self)