Esempio n. 1
0
 def process(self, data):
     pe = PE(data=data, fast_load=True)
     pe.parse_data_directories(directories=[IMAGE_DIRECTORY_ENTRY_IMPORT])
     th = pe.get_imphash()
     if not th:
         raise ValueError('no import directory.')
     return th.encode(self.codec) if self.args.text else bytes.fromhex(th)
Esempio n. 2
0
def pdb_guid(file):
    pe = PE(file, fast_load=True)
    pe.parse_data_directories()
    try:
        codeview = next(
            filter(
                lambda x: x.struct.Type == DEBUG_TYPE[
                    "IMAGE_DEBUG_TYPE_CODEVIEW"],
                pe.DIRECTORY_ENTRY_DEBUG,
            ))
    except StopIteration:
        print("Failed to find CodeView in pdb")
        raise RuntimeError("Failed to find GUID age")

    offset = codeview.struct.PointerToRawData
    size = codeview.struct.SizeOfData
    tmp = CV_RSDS_HEADER.parse(pe.__data__[offset:offset + size])
    guidstr = "%08x%04x%04x%s%x" % (
        tmp.GUID.Data1,
        tmp.GUID.Data2,
        tmp.GUID.Data3,
        hexlify(tmp.GUID.Data4).decode("ascii"),
        tmp.Age,
    )
    return {"filename": tmp.Filename, "GUID": guidstr}
Esempio n. 3
0
 def parse_file_info(cls, pe: PE) -> dict:
     """
     Extracts a JSON-serializable and human readable dictionary with information about
     the version resource of an input PE file, if available.
     """
     try:
         pe.parse_data_directories(directories=[
             DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']
         ])
         FileInfoList = pe.FileInfo
     except AttributeError:
         return None
     for FileInfo in FileInfoList:
         for FileInfoEntry in FileInfo:
             with suppress(AttributeError):
                 for StringTableEntry in FileInfoEntry.StringTable:
                     StringTableEntryParsed = cls._parse_pedict(
                         StringTableEntry.entries)
                     with suppress(AttributeError):
                         LangID = StringTableEntry.entries.get(
                             'LangID', None) or StringTableEntry.LangID
                         LangID = int(LangID, 0x10) if not isinstance(
                             LangID, int) else LangID
                         LangHi = LangID >> 0x10
                         LangLo = LangID & 0xFFFF
                         Language = cls._LCID.get(LangHi,
                                                  'Language Neutral')
                         Charset = cls._CHARSET.get(LangLo,
                                                    'Unknown Charset')
                         StringTableEntryParsed.update(
                             LangID=F'{LangID:08X}',
                             Charset=Charset,
                             Language=Language)
                     return StringTableEntryParsed
Esempio n. 4
0
 def _algorithm(self, data):
     pe = PE(data=data, fast_load=True)
     pe.parse_data_directories(directories=[IMAGE_DIRECTORY_ENTRY_IMPORT])
     th = pe.get_imphash()
     if not th:
         raise ValueError('no import directory.')
     return bytes.fromhex(th)
Esempio n. 5
0
    def parse_time_stamps(cls, pe: PE, raw_time_stamps: bool) -> dict:
        """
        Extracts time stamps from the PE header (link time), as well as from the imports,
        exports, debug, and resource directory. The resource time stamp is also parsed as
        a DOS time stamp and returned as the "Delphi" time stamp.
        """
        if raw_time_stamps:

            def dt(ts):
                return ts
        else:

            def dt(ts):
                # parse as UTC but then forget time zone information
                return datetime.fromtimestamp(
                    ts, tz=timezone.utc).replace(tzinfo=None)

        pe.parse_data_directories(directories=[
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']
        ])

        info = {}

        with suppress(AttributeError):
            info.update(Linker=dt(pe.FILE_HEADER.TimeDateStamp))

        with suppress(AttributeError):
            for entry in pe.DIRECTORY_ENTRY_IMPORT:
                info.update(Import=dt(entry.TimeDateStamp()))

        with suppress(AttributeError):
            for entry in pe.DIRECTORY_ENTRY_DEBUG:
                info.update(DbgDir=dt(entry.struct.TimeDateStamp))

        with suppress(AttributeError):
            Export = pe.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp
            if Export: info.update(Export=dt(Export))

        with suppress(AttributeError):
            res_timestamp = pe.DIRECTORY_ENTRY_RESOURCE.struct.TimeDateStamp
            if res_timestamp:
                with suppress(ValueError):
                    from ...misc.datefix import datefix
                    dos = datefix.dostime(res_timestamp)
                    info.update(Delphi=dos)
                    info.update(RsrcTS=dt(res_timestamp))

        def norm(value):
            if isinstance(value, int):
                return value
            return str(value)

        return {key: norm(value) for key, value in info.items()}
Esempio n. 6
0
def parse_pe_fetch_pdb(symbol_server, file_path):
    '''
        Attempt to fetch a symbol that relates to a PE file. The file must have a
        valid IMAGE_DEBUG_DIRECTORY and as well as a IMAGE_DEBUG_TYPE_CODEVIEW directroy
        entry.
    '''
    try:
        guid = None
        pdb_filename = None
        pe = PE(file_path, fast_load=True)
        pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG']])

        code_view_entry = None
        for debug_entry in pe.DIRECTORY_ENTRY_DEBUG:
            if DEBUG_TYPE[debug_entry.struct.Type] == "IMAGE_DEBUG_TYPE_CODEVIEW":
                code_view_entry = debug_entry
                break

        if code_view_entry == None:
            logger.warn("%s doesn't have symbol information", basename(file_path))
            return None, None

        symbol_type_offset = code_view_entry.struct.PointerToRawData
        symbol_type_size = code_view_entry.struct.SizeOfData
        symbol_type_data = pe.__data__[symbol_type_offset:symbol_type_offset+symbol_type_size]

        if symbol_type_data[:4] == "RSDS":
            rsds = CV_RSDS_HEADER.parse(symbol_type_data)
            guid = "%08x%04x%04x%s%x" % (rsds.GUID.Data1, rsds.GUID.Data2, rsds.GUID.Data3, rsds.GUID.Data4.encode('hex'), rsds.Age)
            pdb_filename = ntbasename(rsds.Filename)
        elif symbol_type_data[:4] == "NB10":
            nb10 = CV_NB10_HEADER.parse(symbol_type_data)
            guid = "%x%x" % (nb10.Timestamp, nb10.Age)
            pdb_filename = ntbasename(nb10.Filename)
        else:
            logger.error("%s unsupported symbol type", symbol_type_data[:4])
            return None, None

        assert guid
        assert pdb_filename

        symbol = __fetch__(symbol_server, guid, file_path, pdb_filename)

        if symbol[:4] == 'MSCF':
            # TODO, unpack cabinet
        else:
            logger.error("Excpected symbol server to return a cabinet file")
            return None, None

        return symbol, basename(pdb_filename)
    except Exception:
        logger.error(format_exc())
        return None, None
Esempio n. 7
0
    def map_and_load(self, path, execute_now=False):
        ql = self.ql
        pe = PE(path, fast_load=True)

        # Make sure no module will occupy the NULL page
        if self.next_image_base > pe.OPTIONAL_HEADER.ImageBase:
            IMAGE_BASE = self.next_image_base
            pe.relocate_image(IMAGE_BASE)
        else:
            IMAGE_BASE = pe.OPTIONAL_HEADER.ImageBase
        IMAGE_SIZE = ql.mem.align(pe.OPTIONAL_HEADER.SizeOfImage, 0x1000)

        while IMAGE_BASE + IMAGE_SIZE < self.heap_base_address:
            if not ql.mem.is_mapped(IMAGE_BASE, 1):
                self.next_image_base = IMAGE_BASE + 0x10000
                ql.mem.map(IMAGE_BASE, IMAGE_SIZE)
                pe.parse_data_directories()
                data = bytearray(pe.get_memory_mapped_image())
                ql.mem.write(IMAGE_BASE, bytes(data))
                logging.info("[+] Loading %s to 0x%x" % (path, IMAGE_BASE))
                entry_point = IMAGE_BASE + pe.OPTIONAL_HEADER.AddressOfEntryPoint
                if self.entry_point == 0:
                    # Setting entry point to the first loaded module entry point, so the debugger can break.
                    self.entry_point = entry_point
                logging.info("[+] PE entry point at 0x%x" % entry_point)
                self.install_loaded_image_protocol(IMAGE_BASE, IMAGE_SIZE)
                self.images.append(
                    self.coverage_image(
                        IMAGE_BASE,
                        IMAGE_BASE + pe.NT_HEADERS.OPTIONAL_HEADER.SizeOfImage,
                        path))
                if execute_now:
                    logging.info(
                        f'[+] Running from 0x{entry_point:x} of {path}')
                    assembler = self.ql.create_assembler()
                    code = f"""
                        mov rcx, {IMAGE_BASE}
                        mov rdx, {self.gST}
                        mov rax, {entry_point}
                        call rax
                    """
                    runcode, _ = assembler.asm(code)
                    ptr = ql.os.heap.alloc(len(runcode))
                    ql.mem.write(ptr, bytes(runcode))
                    ql.os.exec_arbitrary(ptr, ptr + len(runcode))

                else:
                    self.modules.append((path, IMAGE_BASE, entry_point, pe))
                return True
            else:
                IMAGE_BASE += 0x10000
                pe.relocate_image(IMAGE_BASE)
        return False
Esempio n. 8
0
class PEDebugData(object):
    def __init__(self, path, filename=None):
        self.pe = PE(path, fast_load=True)
        self.path = path
        self.filename = filename
        if filename is None:
            self.filename = os.path.basename(path)

    @property
    def symbol_id(self):
        return self.codeview_info().symbol_id

    @property
    def executable_id(self):
        retval = None
        if self.filename is not None:
            retval = '%s/%X%X' % (self.filename.lower(),
                                  self.pe.FILE_HEADER.TimeDateStamp,
                                  self.pe.OPTIONAL_HEADER.SizeOfImage)
        return retval

    def codeview_info(self):
        info = None
        data = self.debug_data()
        if data is not None:
            if data[:4] == 'RSDS':
                info = CodeViewRSDS(data)
            elif data[:4] == 'NB10':
                info = CodeViewNB10(data)
            else:
                raise PEUnknownDebugDataError('Unknown CodeView type: %s' % data[:4])
        else:
            raise PEMissingDebugDataError()
        return info

    def debug_data(self):
        data = None
        if not hasattr(self.pe, 'DIRECTORY_ENTRY_DEBUG'):
            self.pe.parse_data_directories(
                DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG']
            )
        if hasattr(self.pe, 'DIRECTORY_ENTRY_DEBUG'):
            for entry in self.pe.DIRECTORY_ENTRY_DEBUG:
                off = entry.struct.PointerToRawData
                if (entry.struct.Type == DEBUG_TYPE['IMAGE_DEBUG_TYPE_CODEVIEW'] or
                    entry.struct.Type == DEBUG_TYPE['IMAGE_DEBUG_TYPE_MISC']):
                    data = self.pe.__data__[off:off+entry.struct.SizeOfData]
                    if data is not None:
                        break
        return data
Esempio n. 9
0
    def process(self, data: bytearray) -> bytearray:
        pe = PE(data=data, fast_load=True)
        pe.parse_data_directories(directories=[self._SECDIRID])
        security = pe.OPTIONAL_HEADER.DATA_DIRECTORY[self._SECDIRID]

        sgnoff = security.VirtualAddress + 8
        sgnend = sgnoff + security.Size
        length, revision, certtype = unpack('<IHH', data[sgnoff - 8:sgnoff])
        signature = data[sgnoff:sgnend]

        if len(signature) + 8 != length:
            raise RefineryPartialResult(
                F'Found {len(signature) + 8} bytes of signature, but length should be {length}.',
                partial=signature)

        return signature
Esempio n. 10
0
def _locate_extra_dependencies(dll_filepath, search_path_dll_map,
                               extra_dependencies, package_dir):
    dll_filepath = Path(dll_filepath)

    header = ModuleHeader(dll_filepath)
    architecture = header.getArchitecture()

    pe = PE(dll_filepath)
    pe.parse_data_directories(import_dllnames_only=True)

    if dll_filepath.name in extra_dependencies:
        return

    extra_dependencies[dll_filepath.name] = dll_filepath
    for dependency in pe.DIRECTORY_ENTRY_IMPORT:
        dependency_dll_name = dependency.dll.decode('utf-8')
        try:
            result = WindowsApi.loadModule(dependency_dll_name,
                                           cwd=dll_filepath.parent,
                                           architecture=architecture)
            assert result in (0, 126)
        except AssertionError:
            print(
                f"Encountered error {result} for dependency {dependency_dll_name}. Aborting ..."
            )
            exit(1)

        if result == 126:
            try:
                search_path = search_path_dll_map[dependency_dll_name]
                if dependency_dll_name not in extra_dependencies:
                    dependency_dll_filepath = Path(search_path,
                                                   dependency_dll_name)
                    dependency_dll_copy_filepath = Path(
                        package_dir, dependency_dll_filepath.name)
                    copyfile(dependency_dll_filepath,
                             dependency_dll_copy_filepath)
                    _locate_extra_dependencies(dependency_dll_copy_filepath,
                                               search_path_dll_map,
                                               extra_dependencies, package_dir)
            except KeyError:
                print(
                    f"Dependency {dependency_dll_name} was not found in any search path. Aborting ..."
                )
                exit(1)
Esempio n. 11
0
def get_import_table_string(file_path):
    import_table_bytes = b''
    import_table_string = ''
    try:
        pe = PE(file_path, fast_load=True)
        pe.parse_data_directories()
        for entry in pe.DIRECTORY_ENTRY_IMPORT:
            # import_table_string += str(entry.dll)
            for imp in entry.imports:
                # import_table_string += hex(imp.address)
                try:
                    import_table_bytes += imp.name
                    import_table_string += imp.name.decode("utf-8") + '\n'
                except:
                    pass
    except Exception as e:
        print(e)

    f = open(file_path + ".log", "w+")
    f.write(import_table_string)
    return import_table_bytes
Esempio n. 12
0
    def process(self, data: bytearray) -> bytearray:
        pe = PE(data=data, fast_load=True)
        pe.parse_data_directories(directories=[self._SECDIRID])
        security = pe.OPTIONAL_HEADER.DATA_DIRECTORY[self._SECDIRID]
        self.log_info(F'signature offset: 0x{security.VirtualAddress:08X}')
        self.log_info(F'signature length: 0x{security.Size:08X}')
        if security.VirtualAddress == 0 or security.Size == 0:
            raise ValueError(
                F'IMAGE_DIRECTORY_ENTRY_SECURITY ({self._SECDIRID}) is corrupt.'
            )
        sgnoff = security.VirtualAddress + 8
        sgnend = sgnoff + security.Size
        length, revision, certtype = unpack('<IHH', data[sgnoff - 8:sgnoff])
        signature = data[sgnoff:sgnend]

        if len(signature) + 8 != length:
            raise RefineryPartialResult(
                F'Found {len(signature) + 8} bytes of signature, but length should be {length}.',
                partial=signature)

        return signature
Esempio n. 13
0
    def map_and_load(self, path: str, exec_now: bool = False):
        """Map and load a module into memory.

        The specified module would be mapped and loaded into the address set
        in the `next_image_base` member. It is the caller's responsibility to
        make sure that the memory is available.

        On success, `next_image_base` will be updated accordingly.

        Args:
            path     : path of the module binary to load
            exec_now : execute module right away; will be enququed if not

        Raises:
            QlMemoryMappedError : when `next_image_base` is not available
        """

        ql = self.ql
        pe = PE(path, fast_load=True)

        # use image base only if it does not point to NULL
        image_base = pe.OPTIONAL_HEADER.ImageBase or self.next_image_base
        image_size = ql.mem.align(pe.OPTIONAL_HEADER.SizeOfImage, 0x1000)

        assert (image_base %
                0x1000) == 0, 'image base is expected to be page-aligned'

        if image_base != pe.OPTIONAL_HEADER.ImageBase:
            pe.relocate_image(image_base)

        pe.parse_data_directories()
        data = bytes(pe.get_memory_mapped_image())

        ql.mem.map(image_base, image_size, info="[module]")
        ql.mem.write(image_base, data)
        ql.log.info(f'Module {path} loaded to {image_base:#x}')

        entry_point = image_base + pe.OPTIONAL_HEADER.AddressOfEntryPoint
        ql.log.info(f'Module entry point at {entry_point:#x}')

        # the 'entry_point' member is used by the debugger. if not set, set it
        # to the first loaded module entry point so the debugger can break
        if self.entry_point == 0:
            self.entry_point = entry_point

        self.install_loaded_image_protocol(image_base, image_size)

        # this would be used later be os.find_containing_image
        self.images.append(
            self.coverage_image(image_base, image_base + image_size, path))

        # update next memory slot to allow sequencial loading. its availability
        # is unknown though
        self.next_image_base = image_base + image_size

        module_info = (path, image_base, entry_point)

        # execute the module right away or enqueue it
        if exec_now:
            # call entry point while retaining the current return address
            self.execute_module(*module_info, eoe_trap=None)
        else:
            self.modules.append(module_info)
Esempio n. 14
0
class PEInfo:
    def __init__(self, path):
        self.path = path
        self.type = 'PE'
        self.parser = PE(path, fast_load=True)
        self.parser.parse_data_directories()
        self.imageBase = self.parser.OPTIONAL_HEADER.ImageBase
        self.entryPoint = self.parser.OPTIONAL_HEADER.AddressOfEntryPoint
        self.sections = []
        self.stringAddrs = []
        self.strings = self.strings()
        for section in self.parser.sections:
            s = Section(section.Name.decode().replace('\x00', ''), section.VirtualAddress + self.imageBase,
                        section.Misc_VirtualSize)
            self.sections.append(s)
        self.imports = []
        for entry in self.parser.DIRECTORY_ENTRY_IMPORT:
            for imp in entry.imports:
                importFunc = ImportFunction(imp.name.decode(), imp.address, entry.dll.decode())
                self.imports.append(importFunc)
        self.exports = []
        if hasattr(self.parser, "DIRECTORY_ENTRY_EXPORT"):
            for exp in self.parser.DIRECTORY_ENTRY_EXPORT.symbols:
                exportFunc = ExportFunction(self.imageBase + exp.address, exp.name.decode())
                self.exports.append(exportFunc)
        self.findStrings()

    def getOffsetAtAddress(self, address):
        for section in self.parser.sections:
            if section.contains_rva(address):
                return section.get_offset_from_rva(address)
        return None

    @property
    def codeRange(self):
        for section in self.parser.sections:
            if '.text' in section.Name.decode():
                return interval([(
                    section.VirtualAddress + self.imageBase,
                    section.VirtualAddress + self.imageBase + section.Misc_VirtualSize)])

    @property
    def dataRange(self):
        dataRanges = []
        for section in self.parser.sections:
            if 'data' in section.Name.decode():
                dataRanges.append((section.VirtualAddress + self.imageBase,
                                   section.VirtualAddress + self.imageBase + section.Misc_VirtualSize))
        return dataRanges

    def inDataSection(self, address):
        for start, end in self.dataRange:
            if start <= address and address < end:
                return True
        else:
            return False

    def getData(self, start, size):
        f = open(self.path, 'rb')
        f.seek(start)
        data = f.read(size)
        f.close()
        return data

    def findStrings(self):
        strings = {}
        for section in self.parser.sections:
            if 'data' in section.Name.decode():
                start = section.PointerToRawData
                size = section.SizeOfRawData
                address = self.imageBase + section.VirtualAddress
                data = self.getData(start, size)
                indexs = re.finditer(b"([a-zA-Z0-9` \n~!@#$%^&*()-_=+|';\":.,?><*-]{2,})", data)
                for index in indexs:
                    strings[address + index.start(0)] = str(data[index.start(0):index.end(0)])[2:-1]
        return strings

    def strings(self):
        strings = []
        for address, string in self.findStrings().items():
            strings.append((hex(address), string))
        for section in self.parser.sections:
            if 'data' not in section.name:
                start = section.PointerToRawData
                size = section.SizeOfRawData
                vAddress = self.imageBase + section.VirtualAddress
                data = self.getData(start, size)
                indexs = re.finditer(b"([a-zA-Z0-9` \n~!@#$%^&*()-_=+|';\":.,?><*-]{2,})", data)
                for index in indexs:
                    address = hex(vAddress + index.start(0))
                    string = str(data[index.start(0):index.end(0)])[2:-1]
                    strings.append((address, string))
                    self.stringAddrs.append(vAddress + index.start(0))
        return strings

    def info(self):
        text = 'File name: <b>' + os.path.basename(self.path) + '</b><br/>'
        text += 'Type: <b>' + self.type + '</b><br/>'
        text += 'Imagebase: <b>' + hex(self.imageBase) + '</b><br/>'
        text += 'Entrypoint: <b>' + hex(self.entryPoint) +'</b>'
        return text
Esempio n. 15
0
def parse_resources(pe: pefile.PE):
    pe.parse_data_directories(
        directories=[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']])
    return has_resources(pe)