def __init__(self, path, fn):
        self.fn = fn
        self.pe = pefile.PE(path, fast_load=True)

        if self.pe.OPTIONAL_HEADER.Magic == 0x20b:
            print(fn)
            raise pefile.PEFormatError('x64 binary')

        self.neg_history = list()
        self.ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint
        self.imageBase = self.pe.OPTIONAL_HEADER.ImageBase
        self.md = Cs(CS_ARCH_X86, CS_MODE_32)
        self.data = self.pe.get_memory_mapped_image()
        self.offset = 100
        self.importTable = list()
        self.pe.full_load()

        self.main_func_patterns = [
            ['push r', 'push i', 'push r', 'push r', 'call', 'push r', 'call'],
            ['push r', 'push r', 'push r', 'push r', 'call', 'push r', 'call'],
            ['push r', 'push r', 'push n', 'push n', 'call', 'push r', 'call'],
            ['push r', 'push r', 'push n', 'push n', 'call'],
            ['push r', 'push r', 'push r', 'push n', 'call'],
            ['push i', 'push i', 'push i', 'call'],
            ['push r', 'push r', 'push i', 'call']
        ]

        self.exit_proc_funcs = [
            'abort', 'exitprocess', 'exit', 'thunrtmain', 'afxwinmain',
            'terminateprocess'
        ]
        self.not_exit_proc_funcs = ['msg', 'exitwindow']

        self.pe.parse_data_directories()
        try:
            for entry in self.pe.DIRECTORY_ENTRY_IMPORT:
                for imp in entry.imports:
                    self.importTable.append((imp.address, imp.name))
        except:
            print('[*] no IMP')
    def process_pe_section(self, pe, section):
        """
        Retrieve desired PE section

        @param pe: PE object
        @param header: PE section to search

        @return a dict containing section and dump file associated
        """

        search_header = re.search(r'^(.+)(:header)$', section)

        # Iterate through all existing PE sections
        for sec in pe.sections:
            if search_header and search_header.group(1) == sec.Name.translate(
                    None, '\x00'):
                # Get section header
                return {'section': section, 'data': sec.__pack__()}
            elif section == sec.Name.translate(None, '\x00'):
                # Get section data
                return {'section': section, 'data': sec.get_data()}

        header = search_header.group(1) if search_header else section
        raise pefile.PEFormatError('Section {0} not found'.format(header))
Example #3
0
    def process_pe_section(self, pe, section):
        """
        Retrieve all PE section

        @param dump_path: PE dump file
        @param pe: PE object
        @param header: PE section to search

        @return a dict containing section and dump file associated
        """

        search_header = re.search(r'^(.+)(:header)$', section)

        # Iterate through all existing PE sections
        for index, sec in enumerate(pe.sections):
            if search_header and search_header.group(1) == sec.Name.translate(
                    None, '\x00'):
                # Get section header
                return {
                    'section': section,
                    'data': sec.__pack__(),
                    'offset': 0,
                    'size': len(sec.__pack__())
                }

            elif section == sec.Name.translate(None, '\x00'):
                # Get section data
                return {
                    'section': sec.Name,
                    'data': sec.data,
                    'offset': sec.VirtualAddress,
                    'size': sec.SizeOfRawData
                }

        header = search_header.group(1) if search_header else section
        raise pefile.PEFormatError('Section {0} not found'.format(header))
Example #4
0
    def __init__(self, file_name):

        self.filepath = file_name
        self.pe = pefile.PE(self.filepath)
        if not self.pe:
            raise pefile.PEFormatError('__EmptyFile__')
Example #5
0
    def run(self):
        results = {}
        try:
            pe = pefile.PE(self.filepath)
            if not pe:
                raise pefile.PEFormatError("Empty file?")
            full_dump = pe.dump_dict()

            results["imphash"] = pe.get_imphash()

            results["warnings"] = pe.get_warnings()

            if pe.is_dll():
                results["type"] = "DLL"
            elif pe.is_driver():
                results["type"] = "DRIVER"
            elif pe.is_exe():
                results["type"] = "EXE"

            sections = []
            for section in pe.sections:
                try:
                    name = section.Name.decode()
                except UnicodeDecodeError as e:
                    name = "UnableToDecode"
                    logger.warning(
                        f"Unable to decode section {section.Name} exception {e}"
                    )
                section_item = {
                    "name": name,
                    "address": hex(section.VirtualAddress),
                    "virtual_size": hex(section.Misc_VirtualSize),
                    "size": section.SizeOfRawData,
                    "entropy": section.get_entropy(),
                }
                sections.append(section_item)

            results["sections"] = sections

            machine_value = pe.FILE_HEADER.Machine
            results["machine"] = machine_value
            mt = {"0x14c": "x86", "0x0200": "Itanium", "0x8664": "x64"}
            architecture = ""
            if isinstance(machine_value, int):
                architecture = mt.get(str(hex(machine_value)), "")
            if not architecture:
                architecture = str(machine_value) + " => Not x86/64 or Itanium"
            results["architecture"] = architecture

            results["os"] = "{}.{}".format(
                pe.OPTIONAL_HEADER.MajorOperatingSystemVersion,
                pe.OPTIONAL_HEADER.MinorOperatingSystemVersion,
            )

            results["entrypoint"] = hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint)

            results["imagebase"] = hex(pe.OPTIONAL_HEADER.ImageBase)

            timestamp = pe.FILE_HEADER.TimeDateStamp
            results["compilation_timestamp"] = datetime.utcfromtimestamp(
                timestamp).strftime("%Y-%m-%d %H:%M:%S")

            import_table = []
            directory_entry_import = getattr(pe, "DIRECTORY_ENTRY_IMPORT", [])
            for entry in directory_entry_import:
                imp = {
                    "entryname": entry.dll.decode() if entry.dll else None,
                    "symbols": [],
                }
                for symbol in entry.imports:
                    if symbol.name:
                        imp["symbols"].append(symbol.name.decode())
                import_table.append(imp)
            results["import_table"] = import_table

            export_table = []
            for entry in full_dump.get("Exported symbols", []):
                symbol_name = entry.get("Name", None)
                # in case it is a dictionary, we do not mind it
                try:
                    export_table.append(symbol_name.decode())
                except (UnicodeDecodeError, AttributeError) as e:
                    logger.debug(
                        f"PE info error while decoding export table symbols: {e}"
                    )
            # this is to reduce the output
            export_table = export_table[:100]
            results["export_table"] = export_table

            results["flags"] = full_dump.get("Flags", [])

        except pefile.PEFormatError as e:
            warning_message = (
                "job_id:{} analyzer:{} md5:{} filename: {} PEFormatError {}"
                "".format(self.job_id, self.analyzer_name, self.md5,
                          self.filename, e))
            logger.warning(warning_message)
            self.report.errors.append(warning_message)
            self.report.status = self.report.Status.FAILED
            self.report.save()

        return results
Example #6
0
def collect(filename):
    """
    Collects the data from the PE format of the specified filename. It assumes that the file already exists in the
    working directory.
    :param filename: String representing the file's name
    :return: Dictionary containing all collected data
    """
    data = {}
    file = pefile.PE(filename, fast_load=False)

    if file.DOS_HEADER.dump_dict().get('e_magic').get('Value') != 23117:
        with open(filename, mode='rb') as file:
            file_data = file.read()
            signature = file_data[:2].decode("utf-8")
            if signature != 'MZ':
                pass
            else:
                raise pefile.PEFormatError(
                    "File: %s doesn't contain 'MZ' magic value" % filename)

    data.update({'Section Count': file.FILE_HEADER.NumberOfSections})
    data.update(
        {'Page Count': file.DOS_HEADER.dump_dict().get('e_cp').get('Value')})
    data.update({
        'Time':
        str(datetime.datetime.fromtimestamp(file.FILE_HEADER.TimeDateStamp))
    })

    # TODO: Add more machines (MIPS, RISC-V, Hitachi)
    # Sets the machine by comparing returned integer from FILE_HEADER to a predetermined dictionary of machines
    machine = file.FILE_HEADER.dump_dict().get('Machine').get('Value')
    machines_dict = {
        34404: "x86-64",
        332: "Intel i386",
        448: "ARM",
        43620: "ARM64",
        3772: "EFI"
    }
    if machines_dict.get(machine):
        data.update({'Machine': machines_dict.get(machine)})
    else:
        data.update({'Machine': "Unknown"})

    data.update({
        'Entry Point':
        file.OPTIONAL_HEADER.dump_dict().get('AddressOfEntryPoint').get(
            'Value')
    })

    # Sets the subsystem by comparing returned integer from OPTIONAL_HEADER to a predetermined dictionary of subsystems
    subsystem = file.OPTIONAL_HEADER.dump_dict().get('Subsystem').get('Value')
    subsystems_dict = {
        1: "Native",
        2: "Windows GUI",
        3: "Windows CUI",
        5: "OS/2 CUI",
        7: "POSIX CUI",
        8: "Native Windows",
        9: "Windows CE",
        10: "EFI Application",
        16: "Windows Boot Application"
    }
    if subsystems_dict.get(subsystem):
        data.update({'Subsystem': subsystems_dict.get(subsystem)})
    else:
        data.update({'Subsystem': "Unknown"})

    # Gets information on each section including name, entropy, address, virtual size, and raw size
    sections_dict = {}
    for section in file.sections:
        section_name = section.Name.decode("utf-8").rstrip('\x00')
        sections_dict.update({
            section_name: {
                'name': section_name,
                'entropy': round(get_entropy(section), 5),
                'address': hex(section.VirtualAddress),
                'Virtual Size': hex(section.Misc_VirtualSize),
                'Raw Size': hex(section.SizeOfRawData)
            }
        })
    # Updates the data dictionary with a sections dictionary
    data.update({'Sections': sections_dict})

    return data