Example #1
0
    def _init_emulator(self, path=None, data=None, is_raw_code=False) -> None:
        """
        Based on the PE metadata, use the appropriate emulator. That is,
        a user mode emulator vs a kernel mode emulator

        """
        if not is_raw_code:
            pe = PeFile(path=path, data=data)
            # Get the machine type we only support x86/x64 atm
            mach = MACHINE_TYPE[pe.FILE_HEADER.Machine].split(
                '_')[-1:][0].lower()
            if mach not in ('amd64', 'i386'):
                raise SpeakeasyError('Unsupported architecture: %s' % mach)

            if pe.is_dotnet():
                raise NotSupportedError(
                    '.NET assemblies are not currently supported')

            if pe.is_driver():
                self.emu = WinKernelEmulator(config=self.config,
                                             logger=self.logger,
                                             debug=self.debug,
                                             exit_event=self.exit_event)
            else:
                self.emu = Win32Emulator(config=self.config,
                                         logger=self.logger,
                                         argv=self.argv,
                                         debug=self.debug,
                                         exit_event=self.exit_event)
        else:
            self.emu = Win32Emulator(config=self.config,
                                     logger=self.logger,
                                     argv=self.argv,
                                     debug=self.debug,
                                     exit_event=self.exit_event)
Example #2
0
class Speakeasy(object):
    """
    Wrapper class for invoking the speakeasy emulators
    """
    def check_init(func):
        """Wrapper to make sure the emulator is initialized"""
        def wrap(self, *args, **kwargs):
            if not self.emu:
                raise SpeakeasyError('Emulator not initialized')
            return func(self, *args, **kwargs)

        return wrap

    def __init__(self,
                 config=None,
                 logger=None,
                 argv=[],
                 debug=False,
                 exit_event=None):

        self.logger = logger
        self._init_config(config)
        self.emu = None
        self.api_hooks = []
        self.code_hooks = []
        self.mem_read_hooks = []
        self.argv = argv
        self.exit_event = exit_event
        self.debug = debug
        self.loaded_bins = []
        self.mem_write_hooks = []
        self.mem_invalid_hooks = []
        self.interrupt_hooks = []

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        del self

    def _init_config(self, config: dict) -> None:
        """
        Init the emulator config
        args:
            config: The configuration data to set up the emulator environment
                    If none is supplied, a default config is used.
        return:
            None
        """
        if not config:
            config_path = os.path.join(os.path.dirname(speakeasy.__file__),
                                       'configs', 'default.json')
            with open(config_path, 'r') as f:
                self.config = json.load(f)
        else:
            self.config = config

        try:
            validate_config(self.config)
        except jsonschema.exceptions.SchemaError as err:
            if self.logger:
                self.logger.exception('Invalid config schema: %s', str(err))
            raise ConfigError('Invalid config schema')
        except jsonschema.exceptions.ValidationError as err:
            if self.logger:
                self.logger.exception('Invalid config: %s', str(err))
            raise ConfigError('Invalid config')

    def _init_emulator(self, path=None, data=None, is_raw_code=False) -> None:
        """
        Based on the PE metadata, use the appropriate emulator. That is,
        a user mode emulator vs a kernel mode emulator

        """
        if not is_raw_code:
            pe = PeFile(path=path, data=data)
            # Get the machine type we only support x86/x64 atm
            mach = MACHINE_TYPE[pe.FILE_HEADER.Machine].split(
                '_')[-1:][0].lower()
            if mach not in ('amd64', 'i386'):
                raise SpeakeasyError('Unsupported architecture: %s' % mach)

            if pe.is_dotnet():
                raise NotSupportedError(
                    '.NET assemblies are not currently supported')

            if pe.is_driver():
                self.emu = WinKernelEmulator(config=self.config,
                                             logger=self.logger,
                                             debug=self.debug,
                                             exit_event=self.exit_event)
            else:
                self.emu = Win32Emulator(config=self.config,
                                         logger=self.logger,
                                         argv=self.argv,
                                         debug=self.debug,
                                         exit_event=self.exit_event)
        else:
            self.emu = Win32Emulator(config=self.config,
                                     logger=self.logger,
                                     argv=self.argv,
                                     debug=self.debug,
                                     exit_event=self.exit_event)

    def _init_hooks(self) -> None:
        """
        Lazily add hooks if users added them early before emulator engine was instantiated
        """
        # Add any configured hooks here
        while self.api_hooks:
            h = self.api_hooks.pop(0)
            cb, mod, func, argc, cconv = h
            self.add_api_hook(cb, mod, func, argc, cconv)
        while self.code_hooks:
            h = self.code_hooks.pop(0)
            cb, begin, end, ctx = h
            self.add_code_hook(cb, begin, end, ctx)
        while self.mem_read_hooks:
            h = self.mem_read_hooks.pop(0)
            cb, begin, end = h
            self.add_mem_read_hook(cb, begin, end)
        while self.mem_write_hooks:
            h = self.mem_write_hooks.pop(0)
            cb, begin, end = h
            self.add_mem_write_hook(cb, begin, end)
        while self.mem_invalid_hooks:
            h = self.mem_invalid_hooks.pop(0)
            cb, = h
            self.add_mem_invalid_hook(cb)
        while self.interrupt_hooks:
            h = self.interrupt_hooks.pop(0)
            cb, ctx = h
            self.add_interrupt_hook(cb, ctx)

    def disasm(self, addr: int, size: int):
        """
        Get the disassembly from an address

        args:
            addr: address to being disassebmly
            size: number of bytes to include
        return:
            A tuple of: (mnemonic, operands, and the full instruction)
        """
        return self.emu.get_disasm(addr, size)

    def is_pe(self, data: bytes) -> bool:
        """
        Test data to see if it looks like a PE

        args:
            data: Bytes to be tested for a PE
        return:
            True is data appears to be a PE
        """
        # Check for the PE header
        if data[:2] == b'MZ':
            return True
        else:
            return False

    def load_module(self, path=None, data=None) -> PeFile:
        """
        Load a module into the speakeasy emulator

        args:
            path: Path to file to load into the emulation space
            data: Raw data to load as a module into the emulation space
        return:
            A PeFile object representing the newly loaded module
        """
        if not path and not data:
            raise SpeakeasyError('No emulation target supplied')

        if path and not os.path.exists(path):
            raise SpeakeasyError('Target file not found: %s' % (path))

        if data:
            test = data
        else:
            with open(path, 'rb') as f:
                test = f.read(4)

        self.loaded_bins.append(path)

        if not self.is_pe(test):
            raise SpeakeasyError('Target file is not a PE')

        self._init_emulator(path=path, data=data)

        return self.emu.load_module(path=path, data=data)

    @check_init
    def run_module(self, module, all_entrypoints=False) -> None:
        """
        Run a previously loaded module through the configured emulator

        args:
            module: The module whose entry point to be run
            all_entrypoints: If true, all exports will be emulated, otherwise
            just the main PE entry point is emulated.
        return:
            None
        """
        self._init_hooks()
        return self.emu.run_module(module=module,
                                   all_entrypoints=all_entrypoints)

    def load_shellcode(self, fpath, arch, data=None) -> int:
        """
        Load a shellcode blob into emulation space

        args:
            fpath: file path containing shellcode blob
            arch: Architecture (x86 | amd64) to load shellcode as
            data: bytes object containing shellcode blob
        return:
            Address of the loaded shellcode in the emulation space
        """
        self._init_emulator(is_raw_code=True)
        self.loaded_bins.append(fpath)

        return self.emu.load_shellcode(fpath, arch, data=data)

    @check_init
    def run_shellcode(self, sc_addr: int, offset=0) -> None:
        """
        Run a previously loaded shellcode blob by address

        args:
            sc_addr: address of the previously loaded shellcode blog to emulate
            offset: offset within the blob to begin emulation
        return:
            None
        """
        self._init_hooks()
        return self.emu.run_shellcode(sc_addr, offset=offset)

    @check_init
    def get_report(self) -> dict:
        """
        Get the emulation report from the emulator

        return:
            Get the raw emulation report as a python dictionary
        """
        return self.emu.get_report()

    @check_init
    def get_json_report(self) -> str:
        """
        Get the emulation report from the emulator formatted as a JSON string
        return:
            Get the emulation report as a JSON object
        """
        return self.emu.get_json_report()

    def add_api_hook(self,
                     cb: Callable,
                     module='',
                     api_name='',
                     argc=0,
                     call_conv=None):
        """
        Set a callback to fire when a specified API is called during emulation

        args:
            cb: Callable python function to execute
            module: name of the module containing the target API
            api_name: Name of the API to hook. Wild cards (e.g. *) are supported.
            argc: force the emulator to account for this amount of arguments (for stack cleanup)
            call_conv: force the emulator to use the supplied calling convention for this hook
        return:
            Hook object for newly registered hooks
        """
        if not self.emu:
            self.api_hooks.append((cb, module, api_name, argc, call_conv))
            return
        return self.emu.add_api_hook(cb,
                                     module=module,
                                     api_name=api_name,
                                     argc=argc,
                                     call_conv=call_conv,
                                     emu=self)

    def stop(self) -> None:
        """
        Stops emulation
        """
        return self.emu.stop()

    def call(self, addr: int, params=[]) -> None:
        """
        Start emulating at the specified address

        args:
            addr: Address to being emulation at
            params: list of arguments to push onto the stack for the call
        return:
            None
        """
        return self.emu.call(addr, params=params)

    def add_code_hook(self, cb: Callable, begin=1, end=0, ctx={}):
        """
        Set a callback to fire for every CPU instruction that is emulated

        args:
            cb: Callable python function to execute
            begin: beginning of the address range to hook
            end: end of the address range to hook
            ctx: Optional context to pass back and forth between the hook function
        return:
            Hook object for newly registered hooks
        """
        if not self.emu:
            self.code_hooks.append((cb, begin, end, ctx))
            return
        return self.emu.add_code_hook(cb,
                                      begin=begin,
                                      end=end,
                                      ctx=ctx,
                                      emu=self)

    def add_mem_read_hook(self, cb: Callable, begin=1, end=0):
        """
        Set a callback to fire when a memory address is read from

        args:
            cb: Callable python function to execute
            begin: beginning of the address range to hook
            end: end of the address range to hook
        return:
            Hook object for newly registered hooks
        """
        if not self.emu:
            self.mem_read_hooks.append((cb, begin, end))
            return
        return self.emu.add_mem_read_hook(cb, begin=begin, end=end, emu=self)

    def add_mem_write_hook(self, cb: Callable, begin=1, end=0):
        """
        Set a callback to fire when a memory address is written to

        args:
            cb: Callable python function to execute
            begin: beginning of the address range to hook
            end: end of the address range to hook
        return:
            Hook object for newly registered hooks
        """
        if not self.emu:
            self.mem_write_hooks.append((cb, begin, end))
            return
        return self.emu.add_mem_write_hook(cb, begin=begin, end=end, emu=self)

    def add_mem_invalid_hook(self, cb: Callable):
        """
        Get a callback for when a memory access violation occurs

        args:
            cb: Callable python function to execute
        return:
            Hook object for newly registered hooks
        """
        if not self.emu:
            self.mem_invalid_hooks.append((cb, ))
            return
        return self.emu.add_mem_invalid_hook(cb, emu=self)

    def add_interrupt_hook(self, cb: Callable, ctx={}):
        """
        Get a callback for software interrupts

        args:
            cb: Callable python function to execute
            ctx: Optional context to pass back and forth between the hook function
        return:
            Hook object for newly registered hooks
        """
        if not self.emu:
            self.interrupt_hooks.append((cb, ))
            return
        return self.emu.add_interrupt_hook(cb, ctx=ctx, emu=self)

    def get_registry_key(self, handle=0, path=''):
        """
        Get registry key by path or handle

        args:
            handle: handle corresponding for a registry key
            path: Path to a registry key
        return:
            If valid, a registry key object
        """
        return self.emu.reg_get_key(handle=handle, path=path)

    def get_address_map(self, addr: int):
        """
        Get the address mapping object associated with the specified address

        args:
            addr: Address in the emulation space
        return:
            A memory map object that holds the specified address
        """
        return self.emu.get_address_map(addr)

    def mem_alloc(self, size, base=None, tag='speakeasy.None') -> int:
        """
        Allocate a block of memory in the emulation space

        args:
            size: Size of requested memory block
            base: Optionally request a base address. If in use, the next nearest
                  address will be returned
            tag: Tag to assign the new memory mapping
        return:
            Address of the newly allocated memory block
        """
        return self.emu.mem_map(size, base=base, tag=tag)

    def mem_free(self, base: int) -> None:
        """
        Free a block of memory in the emulation space
        args:
            base: Address to free
        return:
            None
        """
        return self.emu.mem_free(base)

    def mem_read(self, addr: int, size: int) -> bytes:
        """
        Read bytes from a memory address

        args:
            addr: address to read bytes from
            size: number of bytes to read
        return:
            Python bytes object contained the data read
        """
        return self.emu.mem_read(addr, size)

    def mem_write(self, addr: int, data: bytes) -> None:
        """
        Write bytes to a memory address

        args:
            addr: address to write bytes to
            data: data to write
        return:
            None
        """
        return self.emu.mem_write(addr, data)

    def mem_cast(self, obj, addr: int):
        """
        Cast an address as an object for easy access

        args:
            obj: object to cast into
            addr: address containing the data to cast into type "obj"
        return:
            Python object based on the data located at addr
        """
        return self.emu.mem_cast(obj, addr)

    def reg_read(self, reg: str) -> int:
        """
        Read value from a register

        args:
            reg: name of the register to read from
        return:
            value contained in the requested register
        """
        return self.emu.reg_read(reg)

    def get_dyn_imports(self) -> list:
        """
        Returns the imports dynamically resolved at runtime

        return:
            List of functions that were resolved at runtime (e.g. GetProcAddress,
                                                                  MmGetSystemRoutineAddress)
        """
        return self.emu.get_dyn_imports()

    def reg_write(self, reg: str, val: int) -> None:
        """
        Write value to a register

        args:
            reg: name of the register to write to
        return:
            None
        """
        return self.emu.reg_write(reg, val)

    def get_dropped_files(self) -> list:
        """
        Get files that were written to disk during emulation

        return:
            Returns a list of files that were written by the sample
        """
        return self.emu.get_dropped_files()

    def create_file_archive(self) -> bytes:
        """
        Creates a file archive package.
        The archive contains a manifest that can be used to match dropped files
        metadata with the acquired files.

        return:
            A Bytes object containing a zip archive of dropped files
        """
        manifest = []
        _zip = BytesIO()
        files = self.get_dropped_files()

        if not files:
            return b''

        with zipfile.ZipFile(_zip, mode="w",
                             compression=zipfile.ZIP_DEFLATED) as zf:

            for f in files:

                path = f.get_path()
                file_name = ntpath.basename(path)
                manifest.append({
                    'path': path,
                    'file_name': file_name,
                    'size': f.get_size(),
                    'sha256': f.get_hash()
                })
                zf.writestr(file_name, f.get_data())

            manifest = json.dumps(manifest, indent=4, sort_keys=False)
            zf.writestr('speakeasy_manifest.json', manifest)

        return _zip.getvalue()

    def get_mem_maps(self) -> list:
        """
        Get all memory maps in the emulation space

        return:
            A list of all valid memory maps from the emulator
        """
        return self.emu.get_mem_maps()

    def get_memory_dumps(self) -> tuple:
        """
        Returns all memory contents along with context information

        return:
            A generator of tuples of all valid memory with context
        """
        for mm in self.emu.get_mem_maps():
            base = mm.get_base()
            size = mm.get_size()
            tag = mm.get_tag()
            proc = mm.get_process()
            is_free = mm.is_free()
            try:
                data = self.emu.mem_read(base, size)
            except Exception:
                continue
            yield (tag, base, size, is_free, proc, data)

    def read_mem_string(self, address: int, width=1, max_chars=0) -> str:
        """
        Read a string from emulated memory

        args:
            address: address of the string to read
            width: character width
            max_chars: maximum characters to read, 0 reads until null terminator

        return:
            decoded string
        """
        return self.emu.read_mem_string(address, width, max_chars)

    def get_symbols(self) -> dict:
        """
        Returns a dictionary of symbol information

        return:
            a dictionary of symbol information
        """
        return self.emu.symbols

    def get_ret_address(self) -> int:
        """
        Returns the value stored at the top of the stack

        return:
            value stored at the top of the stack
        """
        return self.emu.get_ret_address()

    def push_stack(self, val: int) -> None:
        """
        Put a value on the stack and adjust the stack pointer

        args:
            val: value to push to the stack
        return:
            None
        """
        self.emu.push_stack(val)

    def pop_stack(self) -> int:
        """
        Get value from the stack and adjust the stack pointer

        return:
            value stored at the top of the stack
        """
        return self.emu.pop_stack()

    def get_stack_ptr(self) -> int:
        """
        Get the current address of the stack pointer

        return:
            address of stack pointer
        """
        return self.emu.get_stack_ptr()

    def set_stack_ptr(self, addr: int) -> None:
        """
        Set the current address of the stack pointer

        args:
            addr: address to set the stack pointer to
        return:
            None
        """
        self.emu.set_stack_ptr(addr)

    def set_pc(self, addr: int) -> None:
        """
        Set the value of the current program counter

        args:
            addr: address to set the program counter to
        return:
            None
        """
        self.emu.set_pc(addr)

    def reset_stack(self, base: int) -> tuple:
        """
        Reset stack to the supplied base address

        args:
            base: stack base address
        return:
            base, ptr
        """
        return self.emu.reset_stack(base)

    def get_stack_base(self) -> int:
        """
        Get the base address of the stack

        return:
            base address of stack
        """
        return self.emu.stack_base

    def get_arch(self) -> int:
        """
        Get the architecture of the emulator

        return:
            emulator architecture constant value
        """
        return self.emu.get_arch()

    def get_ptr_size(self) -> int:
        """
        Get the size of a pointer

        return:
            pointer size
        """
        return self.emu.ptr_size

    def get_symbol_from_address(self, address: int) -> str:
        """
        If the supplied address is related to a known symbol, look it up here

        args:
            address: address to lookup

        return:
            symbol name
        """
        return self.emu.get_symbol_from_address(address)

    def is_address_valid(self, address: int) -> bool:
        """
        Was this address previously reserved or mapped?

        args:
            address: address to check

        return:
            True if address is valid, false otherwise
        """
        return self.emu.is_address_valid(address)

    def create_memdump_archive(self) -> bytes:
        """
        Creates a memory dump archive package of the emulated sample.
        The archive contains a manifest that can be used to match memory chunk
        metadata with the dumped binary memory files.

        return:
            Bytes object containing a zip of all memory
        """
        manifest = []
        _zip = BytesIO()

        loaded_bins = [
            os.path.splitext(os.path.basename(b))[0] for b in self.loaded_bins
        ]

        with zipfile.ZipFile(_zip, mode="w",
                             compression=zipfile.ZIP_DEFLATED) as zf:
            procs = []
            [
                procs.append(block[4]) for block in self.get_memory_dumps()
                if block[4] not in procs
            ]

            for process in procs:
                memory_blocks = []
                arch = self.emu.get_arch()
                if arch == _arch.ARCH_X86:
                    arch = 'x86'
                else:
                    arch = 'amd64'

                if process:
                    pid = process.get_pid()
                    path = process.get_process_path()
                else:
                    continue

                manifest.append({
                    'pid': pid,
                    'process_name': path,
                    'arch': arch,
                    'memory_blocks': memory_blocks
                })
                for block in self.get_memory_dumps():

                    tag, base, size, is_free, _proc, data = block

                    if not tag:
                        continue
                    if _proc != process:
                        continue
                    # Ignore emulator noise such as structures created by the emulator, or
                    # modules that were loaded
                    if tag and tag.startswith(
                            'emu') and not tag.startswith('emu.shellcode.'):
                        bns = [b for b in loaded_bins if b in tag]
                        if not len(bns):
                            continue

                    h = hashlib.sha256()
                    h.update(data)
                    _hash = h.hexdigest()

                    file_name = '%s.mem' % (tag)

                    memory_blocks.append({
                        'tag': tag,
                        'base': hex(base),
                        'size': hex(size),
                        'is_free': is_free,
                        'sha256': _hash,
                        'file_name': file_name
                    })
                    zf.writestr(file_name, data)

            manifest = json.dumps(manifest, indent=4, sort_keys=False)
            zf.writestr('speakeasy_manifest.json', manifest)

        return _zip.getvalue()