Python BinaryStruct.unpack Examples

Programming Language: Python

Namespace/Package Name: soulstruct.utilities.core

Class/Type: BinaryStruct

Method/Function: unpack

Examples at hotexamples.com: 5

Python BinaryStruct.unpack - 5 examples found. These are the top rated real world Python examples of soulstruct.utilities.core.BinaryStruct.unpack extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BinaryStruct(29)

pack(6)

unpack(5)

add_fields(3)

unpack_count(1)

Example #1

Show file

class FMG:

    PRE_HEADER_STRUCT = (  # Always little-endian
        "x",
        ("big_endian", "?"),  # Only DeS is big-endian
        ("version", "b"),  # 0 for DeS, 1 for DS1/DSR/DS2, 2 for BB/DS3
        "x",
    )

    HEADER_STRUCTS = {
        0: (
            ("file_size", "i"),
            ("one", "b", 1),
            ("version_magic", "b", -1),
            "2x",
            ("range_count", "i"),
            ("string_count", "i"),
            ("string_offsets_offset", "i"),
            ("zero", "i", 0),
        ),
        1: (
            ("file_size", "i"),
            ("one", "b", 1),
            ("version_magic", "b", 0),
            "2x",
            ("range_count", "i"),
            ("string_count", "i"),
            ("string_offsets_offset", "i"),
            ("zero", "i", 0),
        ),
        2: (
            ("file_size", "i"),
            ("one", "b", 1),
            ("version_magic", "b", 0),
            "2x",
            ("range_count", "i"),
            ("string_count", "i"),
            ("version_3_extra", "i", 255),
            ("string_offsets_offset", "q"),
            ("zero", "q", 0),
        ),
    }

    RANGE_STRUCTS = {
        0: (("first_index", "i"), ("first_id", "i"), ("last_id", "i"))
    }
    RANGE_STRUCTS[1] = RANGE_STRUCTS[0]
    RANGE_STRUCTS[2] = RANGE_STRUCTS[0] + ("4x", )

    STRING_OFFSET_STRUCTS = {
        0: (("offset", "i"), ),
        1: (("offset", "i"), ),
        2: (("offset", "q"), )
    }

    LINE_LIMIT = {
        "ds1": 11,
        "ds2": 11,  # TODO
        "bb": 11,  # TODO
        "ds3": 11,  # TODO
    }

    def __init__(self, fmg_source, remove_empty_entries=True, version=None):

        self.pre_header_struct = BinaryStruct(*self.PRE_HEADER_STRUCT)
        self.version = None
        self.big_endian = False

        self.header_struct = BinaryStruct()
        self.range_struct = BinaryStruct()
        self.string_offset_struct = BinaryStruct()

        self.fmg_path = None
        self.entries = {}

        if fmg_source is None:
            return

        if isinstance(fmg_source, dict):
            self.entries = fmg_source
            self._set_version(version)
            return

        if version is not None:
            raise ValueError(
                "You cannot specify 'version' when loading an FMG from file content. The version will\n"
                "be automatically detected.")

        if isinstance(fmg_source, bytes):
            self.unpack(io.BytesIO(fmg_source), remove_empty_entries)

        elif isinstance(fmg_source, str):
            self.fmg_path = fmg_source
            with open(fmg_source, "rb") as file:
                self.unpack(file, remove_empty_entries)

        elif isinstance(fmg_source, BNDEntry):
            self.unpack(io.BytesIO(fmg_source.data), remove_empty_entries)

        else:
            raise TypeError(f"Invalid `fmg_source` type: {type(fmg_source)}")

    def _set_version(self, version):
        if str(version).lower() in {"des", "0"}:
            self.version = v = 0
            self.big_endian = True
        elif str(version).lower() in {
                "ds1", "ptd", "ptde", "dsr", "ds2", "bb", "1"
        }:
            self.version = v = 1
            self.big_endian = False
        elif str(version).lower() in {"ds3", "2"}:
            self.version = v = 2
            self.big_endian = False
        else:
            raise ValueError(
                f"Unrecognized FMG version: {version}. Try one in: ('ds1', 'ds2', 'bb', 'ds3')."
            )

        byte_order = ">" if self.big_endian else "<"
        self.header_struct = BinaryStruct(*self.HEADER_STRUCTS[v],
                                          byte_order=byte_order)
        self.range_struct = BinaryStruct(*self.RANGE_STRUCTS[v],
                                         byte_order=byte_order)
        self.string_offset_struct = BinaryStruct(
            *self.STRING_OFFSET_STRUCTS[v], byte_order=byte_order)

    def unpack(self, fmg_buffer, remove_empty_entries=True):
        try:
            pre_header = self.pre_header_struct.unpack(fmg_buffer)
        except ValueError:
            raise ValueError(
                "Could not read FMG header. Is the file/data correct?")
        try:
            self._set_version(pre_header["version"])
        except ValueError:
            raise ValueError(
                f"Unrecognized FMG version in file content: {pre_header['version']}."
            )
        header = self.header_struct.unpack(fmg_buffer)

        # Groups of contiguous text string IDs are defined by ranges (first ID, last ID) to save space.
        ranges = self.range_struct.unpack_count(fmg_buffer,
                                                count=header["range_count"])
        if fmg_buffer.tell() != header["string_offsets_offset"]:
            _LOGGER.warning(
                "Range data did not end at string data offset given in FMG header."
            )
        string_offsets = self.string_offset_struct.unpack_count(
            fmg_buffer, count=header["string_count"])

        # Text pointer table corresponds to all the IDs (joined together) of the above ranges, in order.
        for string_range in ranges:
            i = string_range["first_index"]
            for string_id in range(string_range["first_id"],
                                   string_range["last_id"] + 1):
                if string_id in self.entries:
                    raise ValueError(
                        f"Malformed FMG: Entry index {string_id} appeared more than once."
                    )
                string_offset = string_offsets[i]["offset"]
                if string_offset == 0:
                    if not remove_empty_entries:
                        # Empty text string. These will trigger in-game error messages, like ?PlaceName?.
                        # Distinct from ' ', which is intentionally blank text data (e.g. the unused area subtitles).
                        self.entries[string_id] = ""
                else:
                    string = read_chars_from_buffer(fmg_buffer,
                                                    offset=string_offset,
                                                    encoding="utf-16le")
                    if string or not remove_empty_entries:
                        self.entries[string_id] = string
                i += 1

    def pack(self,
             remove_empty_entries=True,
             pipe_to_newline=True,
             word_wrap_limit=None,
             max_lines="ds1"):
        """Pack text dictionary to binary FMG file.

        Args:
            remove_empty_entries: Ignore empty entries ('') when writing. This will remove many entries from the vanilla
                FMG files, and likely make some of them larger (as the ranges used to define them will be more broken
                up), but will make the dictionary much easier to read through. (Default: True)
            pipe_to_newline: Convert pipes ('|') to newlines ('\n'), which allows for nicer strings. Newline characters
                will still be treated normally. (Default: True)
            word_wrap_limit: Specify a horizontal character limit for automatic word wrapping. If None, no wrapping will
                be applied. (Default: None)
            max_lines: Maximum number of lines that should appear in each text entry. An error will be raised if any
                text exceeds this (and no file will be written). This is most useful for item descriptions when auto
                wrapping is used. You can also specify a game key in {'ds1', 'ds2', 'bb', 'ds3'} to use the line limit
                I have set for item descriptions in that game.

        Note that none of these arguments will modify the entries in this FMG instance.
        """
        if self.version not in {0, 1, 2}:
            raise AttributeError(
                "FMG version must be 0, 1, or 2. Set it manually with FMG.version."
            )

        # Convert to sorted list (sorted by ID).
        if remove_empty_entries:
            fmg_entries = sorted([(k, v)
                                  for k, v in self.entries.items() if v != ""],
                                 key=lambda x: x[0])
        else:
            fmg_entries = sorted([(k, v) for k, v in self.entries.items()],
                                 key=lambda x: x[0])

        for i in range(len(fmg_entries)):
            # Optional: convert double spaces to double new lines.
            index, string = fmg_entries[i]
            if pipe_to_newline:
                string = string.replace("|", "\n")
                fmg_entries[i] = (index, string)
            # Optional: insert new lines to wrap automatically.
            if word_wrap_limit is not None:
                lines = string.split("\n\n")
                if lines != [" "]:
                    # Wrap lines, and re-add manual newlines.
                    wrapped_lines = []
                    for line in lines:
                        if "\n" in line:
                            # Don't touch lines with newlines already in them.
                            wrapped_lines.append(line)
                        else:
                            wrapped_lines.append("\n".join(
                                wrap(line, word_wrap_limit)))
                    wrapped_string = "\n\n".join(wrapped_lines).rstrip("\n")
                    if isinstance(max_lines, str):
                        try:
                            max_lines = self.LINE_LIMIT[max_lines]
                        except KeyError:
                            raise ValueError(
                                f"Line limit for descriptions could not be "
                                f"determined from key {repr(max_lines)}.")
                    if wrapped_string.count("\n") > max_lines - 1:
                        line_count = wrapped_string.count("\n") + 1
                        _LOGGER.warning(
                            f"FMG index {index} has {line_count} lines (max is {max_lines}):\n"
                            f"{wrapped_string}")
                    fmg_entries[i] = (index, wrapped_string)

        # Encode all text entries and pack them, and record the offsets (will be globally offset later).
        relative_string_offset = 0
        packed_strings = b""
        string_offset_list = []

        for string_id, string in fmg_entries:
            if string == "":
                string_offset_list.append(
                    -1)  # changed to zero when offsets become absolute
            null_terminated_text = string.encode("utf-16le") + b"\0\0"
            packed_strings += null_terminated_text
            string_offset_list.append(relative_string_offset)
            relative_string_offset += len(null_terminated_text)

        # Next, the ranges. We just make these as efficient as possible, but unlike FROM, we value the lack of clutter
        # from empty entries more highly than defining a handful less ranges.
        ranges = []
        range_start_index = None
        range_start = None
        range_stop = None
        for string_index, (string_id, _) in enumerate(fmg_entries):
            if range_start_index is None:
                range_start_index = string_index
                range_start = range_stop = string_id
            elif string_id == range_stop + 1:
                # Expand current range to include this string.
                range_stop += 1
            else:
                # Terminate last range...
                ranges.append(
                    self.range_struct.pack(first_index=range_start_index,
                                           first_id=range_start,
                                           last_id=range_stop))
                # ... then start new one at this string.
                range_start_index = string_index
                range_start = range_stop = string_id

        if range_start is not None:
            # Terminate last range.
            ranges.append(
                self.range_struct.pack(first_index=range_start_index,
                                       first_id=range_start,
                                       last_id=range_stop))

        packed_ranges = b"".join(ranges)

        # Compute table offsets.
        ranges_offset = self.pre_header_struct.size + self.header_struct.size
        string_offsets_offset = ranges_offset + len(packed_ranges)
        packed_strings_offset = string_offsets_offset + self.string_offset_struct.size * len(
            string_offset_list)
        file_size = packed_strings_offset + len(packed_strings)
        packed_string_offsets = b""
        for string_offset in string_offset_list:
            if string_offset == -1:
                packed_string_offsets += self.string_offset_struct.pack(
                    offset=0)
            else:
                packed_string_offsets += self.string_offset_struct.pack(
                    offset=packed_strings_offset + string_offset)

        packed_pre_header = self.pre_header_struct.pack(
            big_endian=self.big_endian,
            version=self.version,
        )

        packed_header = self.header_struct.pack(
            file_size=file_size,
            range_count=len(ranges),
            string_count=len(fmg_entries),
            string_offsets_offset=string_offsets_offset,
        )

        return packed_pre_header + packed_header + packed_ranges + packed_string_offsets + packed_strings

    def write_packed(self,
                     fmg_path=None,
                     remove_empty_entries=True,
                     pipe_to_newline=True,
                     word_wrap_limit=None,
                     max_lines="ds1"):
        """Write binary FMG to given path.

        See `pack` for descriptions of the other arguments.
        """
        if fmg_path is None:
            if self.fmg_path:
                fmg_path = self.fmg_path
            else:
                raise ValueError(
                    "FMG path could not be determined automatically (must be specified)."
                )
        with open(fmg_path, "wb") as output:
            output.write(
                self.pack(
                    remove_empty_entries=remove_empty_entries,
                    pipe_to_newline=pipe_to_newline,
                    word_wrap_limit=word_wrap_limit,
                    max_lines=max_lines,
                ))

    def __getitem__(self, index: int):
        return self.entries[index]

    def __setitem__(self, index: int, text: str):
        self.entries[index] = text

    def update(self, entries):
        if isinstance(entries, dict):
            return self.entries.update(entries)
        elif isinstance(entries, FMG):
            return self.entries.update(entries.entries)
        raise TypeError(
            f"Can only call `FMG.update()` with a dictionary or another FMG, not {type(entries)}."
        )

    def find(self, search_string, replace_with=None):
        """Search for the given text in this FMG.

        Args:
            search_string: Text to find. The text can appear anywhere inside an entry to return a result.
            replace_with: String to replace the given text with in any results. (Default: None)
        """
        found_something = False
        for index, text in self.entries.items():
            if search_string in text:
                if not found_something:
                    print(
                        f"\n~~~ FMG: {str(self.fmg_path) if self.fmg_path is not None else '<None>'}"
                    )
                    found_something = True
                print(f"\n  [{index}]:\n{text}")
                if replace_with is not None:
                    self.entries[index] = text.replace(search_string,
                                                       replace_with)
                    print(f"  -> {self.entries[index]}")
        if not found_something:
            print(
                f"Could not find any occurrences of string {repr(search_string)}."
            )

    def __iter__(self):
        return iter(self.entries.items())

    def __eq__(self, other):
        if isinstance(other, dict):
            return self.entries == other
        elif isinstance(other, FMG):
            return self.entries == other.entries
        raise TypeError(
            "Can only test FMG equality with a dictionary or other FMG.")

    def __repr__(self):
        s = f"FMG Path: {str(self.fmg_path) if self.fmg_path is not None else '<None>'}"
        for index, text in self.entries.items():
            s += f"\n    {index}: {text}"
        return s

Example #2

Show file

File: core.py Project: Tbeck-91/soulstruct

class BND4(BaseBND):

    HEADER_STRUCT_START = (
        ('bnd_version', '4s', b'BND4'), ('flag_1', '?'), ('flag_2', '?'), '2x',
        ('big_endian', 'i'))  # 0x00010000 (False) or 0x00000100 (True)
    HEADER_STRUCT_ENDIAN = (
        ('entry_count', 'i'),
        ('header_size', 'q', 64),
        ('bnd_signature',
         '8s'),  # Real signature may be shorter, but packing will pad it out.
        ('entry_header_size', 'q'),
        ('data_offset', 'q'),
        ('utf16_paths', '?'),
        ('bnd_magic', 'b'),
        ('hash_table_type', 'B'),  # 0, 1, 4, or 128
        '5x',
        ('hash_table_offset', 'q'),  # only non-zero if hash_table_type == 4
    )

    BND_ENTRY_HEADER = (('entry_magic', 'B'), '3x', ('minus_one', 'i', -1),
                        ('compressed_data_size', 'q'))
    UNCOMPRESSED_DATA_SIZE = ('uncompressed_data_size', 'q')
    DATA_OFFSET = ('data_offset', 'I')
    ENTRY_ID = ('entry_id', 'i')
    NAME_OFFSET = ('path_offset', 'i')

    HASH_TABLE_HEADER = BinaryStruct('8x', ('path_hashes_offset', 'q'),
                                     ('hash_group_count', 'I'),
                                     ('unknown', 'i', 0x00080810))
    PATH_HASH_STRUCT = BinaryStruct(
        ('hashed_value', 'I'),
        ('entry_index', 'i'),
    )
    HASH_GROUP_STRUCT = BinaryStruct(
        ('length', 'i'),
        ('index', 'i'),
    )

    def __init__(self, bnd_source=None, entry_class=None):
        self.bnd_flags = (False, False)  # Two unknown bools.
        self.utf16_paths = False  # If False, paths are written in Shift-JIS.
        self.hash_table_type = 0
        self.hash_table_offset = 0
        super().__init__(bnd_source, entry_class)

    def unpack(self, bnd_buffer):
        if isinstance(bnd_buffer, bytes):
            bnd_buffer = BytesIO(bnd_buffer)

        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START,
                                          byte_order='<')
        header = self.header_struct.unpack(bnd_buffer)
        self.bnd_flags = (header.flag_1, header.flag_2)
        self.bnd_version = header.bnd_version
        self.big_endian = header.big_endian == 0x00000100  # Magic not used to infer endianness here.
        byte_order = '>' if self.big_endian else '<'
        header.update(
            self.header_struct.unpack(bnd_buffer,
                                      *self.HEADER_STRUCT_ENDIAN,
                                      byte_order=byte_order))
        self.bnd_signature = header.bnd_signature
        self.bnd_magic = header.bnd_magic
        self.utf16_paths = header.utf16_paths
        self.hash_table_type = header.hash_table_type
        self.hash_table_offset = header.hash_table_offset
        path_encoding = ('utf-16be' if self.big_endian else
                         'utf-16le') if self.utf16_paths else 'shift-jis'

        if header.entry_header_size != header_size(self.bnd_magic):
            raise ValueError(
                f"Expected BND entry header size {header_size(self.bnd_magic)} based on magic\n"
                f"{hex(self.bnd_magic)}, but BND header says {header.entry_header_size}."
            )
        if self.hash_table_type != 4 and self.hash_table_offset != 0:
            _LOGGER.warning(
                f"Found non-zero hash table offset {self.hash_table_offset}, but header says this BND has no hash "
                f"table.")
        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER,
                                                byte_order=byte_order)
        if has_uncompressed_size(self.bnd_magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE,
                                                byte_order=byte_order)
        self.entry_header_struct.add_fields(self.DATA_OFFSET,
                                            byte_order=byte_order)
        if has_id(self.bnd_magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID,
                                                byte_order=byte_order)
        if has_path(self.bnd_magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET,
                                                byte_order=byte_order)
        if self.bnd_magic == 0x20:
            # Extra pad.
            self.entry_header_struct.add_fields('8x')
        if header.entry_header_size != self.entry_header_struct.size:
            _LOGGER.warning(
                f"Entry header size given in BND header ({header.entry_header_size}) does not match actual entry "
                f"header size ({self.entry_header_struct.size}).")
        for entry in BNDEntry.unpack(bnd_buffer,
                                     self.entry_header_struct,
                                     path_encoding=path_encoding,
                                     count=header.entry_count):
            self.add_entry(entry)

        # Read hash table.
        if self.hash_table_type == 4:
            bnd_buffer.seek(self.hash_table_offset)
            self._most_recent_hash_table = bnd_buffer.read(
                header.data_offset - self.hash_table_offset)
        self._most_recent_entry_count = len(self.binary_entries)
        self._most_recent_paths = [entry.path for entry in self.binary_entries]

    def load_unpacked_dir(self, directory):
        directory = Path(directory)
        if not directory.is_dir():
            raise ValueError(
                f"Could not find unpacked BND directory {repr(directory)}.")
        with (directory / 'bnd_manifest.txt').open('rb') as f:
            self.bnd_version = self.read_bnd_setting(f.readline(),
                                                     'version',
                                                     assert_values=[b'BND4'])
            self.bnd_signature = self.read_bnd_setting(f.readline(),
                                                       'bnd_signature')
            self.bnd_magic = self.read_bnd_setting(f.readline(),
                                                   'bnd_magic',
                                                   assert_type=int)
            self.big_endian = self.read_bnd_setting(f.readline(),
                                                    'big_endian',
                                                    assert_type=bool)
            self.utf16_paths = self.read_bnd_setting(f.readline(),
                                                     'utf16_paths',
                                                     assert_type=bool)
            self.hash_table_type = self.read_bnd_setting(f.readline(),
                                                         'hash_table_type',
                                                         assert_type=int)
            self.bnd_flags = self.read_bnd_setting(f.readline(),
                                                   'unknown_flags',
                                                   assert_type=tuple)
            self.dcx = self.read_bnd_setting(f.readline(),
                                             'dcx',
                                             assert_type=tuple)

            self.add_entries_from_manifest_paths(f, directory)

            self._most_recent_hash_table = b''  # Hash table will need to be built on first pack.
            self._most_recent_entry_count = len(self.binary_entries)
            self._most_recent_paths = [
                entry.path for entry in self.binary_entries
            ]

    def pack(self):
        entry_header_dicts = []
        packed_entry_headers = b''
        packed_entry_paths = b''
        relative_entry_path_offsets = []
        packed_entry_data = b''
        relative_entry_data_offsets = []
        rebuild_hash_table = not self._most_recent_hash_table
        path_encoding = ('utf-16be' if self.big_endian else
                         'utf-16le') if self.utf16_paths else 'shift-jis'

        if len(self.binary_entries) != len(self._entries):
            raise ValueError(
                "Number of classed entries does not match number of binary entries.\n"
                "You must use the add_entry() method to add new BND entries.")

        if len(self.binary_entries) != self._most_recent_entry_count:
            rebuild_hash_table = True
        for i, entry in enumerate(self._entries):
            if not isinstance(entry, BNDEntry):
                if not hasattr(entry, 'pack'):
                    raise AttributeError(
                        f"Cannot pack BND: entry class {self._entry_class} has no pack() method."
                    )
                self.binary_entries[i].data = entry.pack()
                entry = self.binary_entries[i]
            if not rebuild_hash_table and entry.path != self._most_recent_paths[
                    i]:
                rebuild_hash_table = True

        self._most_recent_entry_count = len(self.binary_entries)
        self._most_recent_paths = [entry.path for entry in self.binary_entries]

        for entry in self.binary_entries:

            packed_entry_data += b'\0' * 10  # Each entry is separated by ten pad bytes. (Probably not necessary.)

            entry_header_dict = {
                'entry_magic': entry.magic,
                'compressed_data_size': entry.data_size,
                'data_offset': len(packed_entry_data),
            }
            if has_id(self.bnd_magic):
                entry_header_dict['entry_id'] = entry.id
            if has_path(self.bnd_magic):
                entry_header_dict['path_offset'] = len(packed_entry_paths)
                relative_entry_path_offsets.append(
                    len(packed_entry_paths
                        ))  # Relative to start of packed entry paths.
                packed_entry_paths += entry.get_packed_path(path_encoding)
            if has_uncompressed_size(self.bnd_magic):
                entry_header_dict['uncompressed_data_size'] = entry.data_size

            relative_entry_data_offsets.append(len(packed_entry_data))
            entry_data, is_compressed = entry.get_data_for_pack()
            if is_compressed:
                entry_header_dict['compressed_data_size'] = len(entry_data)
            packed_entry_data += entry_data
            entry_header_dicts.append(entry_header_dict)

        entry_header_table_offset = self.header_struct.size
        entry_path_table_offset = entry_header_table_offset + self.entry_header_struct.size * len(
            self._entries)
        if self.hash_table_type == 4:
            hash_table_offset = entry_path_table_offset + len(
                packed_entry_paths)
            if rebuild_hash_table:
                packed_hash_table = self.build_hash_table()
            else:
                packed_hash_table = self._most_recent_hash_table
            entry_packed_data_offset = hash_table_offset + len(
                packed_hash_table)
        else:
            hash_table_offset = 0
            packed_hash_table = b''
            entry_packed_data_offset = entry_path_table_offset + len(
                packed_entry_paths)
        # BND file size not needed.

        packed_header = self.header_struct.pack(
            flag_1=self.bnd_flags[0],
            flag_2=self.bnd_flags[1],
            big_endian=self.big_endian,
            entry_count=len(self._entries),
            bnd_signature=self.bnd_signature,
            entry_header_size=self.entry_header_struct.size,
            data_offset=entry_packed_data_offset,
            utf16_paths=self.utf16_paths,
            bnd_magic=self.bnd_magic,
            hash_table_type=self.hash_table_type,
            hash_table_offset=hash_table_offset,
        )

        # Convert relative offsets to absolute and pack entry headers.
        for entry_header_dict in entry_header_dicts:
            entry_header_dict['data_offset'] += entry_packed_data_offset
            if has_path(self.bnd_magic):
                entry_header_dict['path_offset'] += entry_path_table_offset
            packed_entry_headers += self.entry_header_struct.pack(
                entry_header_dict)

        return packed_header + packed_entry_headers + packed_entry_paths + packed_hash_table + packed_entry_data

    @property
    def bnd_manifest_header(self):
        bnd_signature = self.bnd_signature.rstrip(b'\0').decode()
        return (f"version = BND4\n"
                f"bnd_signature = {bnd_signature}\n"
                f"bnd_magic = {repr(self.bnd_magic)}\n"
                f"big_endian = {self.big_endian}\n"
                f"utf16_paths = {self.utf16_paths}\n"
                f"hash_table_type = {self.hash_table_type}\n"
                f"unknown_flags = {repr(self.bnd_flags)}\n"
                f"dcx = {repr(self.dcx)}\n"
                f"\n")

    @staticmethod
    def is_prime(p):
        if p < 2:
            return False
        if p == 2:
            return True
        if (p % 2) == 0:
            return False
        for i in range(3, p // 2, 2):
            if (p % i) == 0:
                return False
            if i**2 > p:
                return True
        return True

    def build_hash_table(self):
        """ Some BND4 resources include tables of hashed entry paths, which aren't needed to read file contents, but
        need to be re-hashed to properly pack the file in case any paths have changed (or the number of entries). """

        # Group count set to first prime number greater than or equal to the number of entries divided by 7.
        for p in range(len(self._entries) // 7, 100000):
            if self.is_prime(p):
                group_count = p
                break
        else:
            raise ValueError("Hash group count could not be determined.")

        hashes = []
        hash_lists = [[] for _ in range(group_count)]

        for entry_index, entry in enumerate(self.binary_entries):
            hashes.append(self.path_hash(entry.path))
            list_index = hashes[-1] % group_count
            hash_lists[list_index].append((hashes[-1], entry_index))

        for hash_list in hash_lists:
            hash_list.sort()  # Sort by hash value.

        hash_groups = []
        path_hashes = []

        total_hash_count = 0
        for hash_list in hash_lists:
            first_hash_index = total_hash_count
            for path_hash in hash_list:
                path_hashes.append({
                    'hashed_value': path_hash[0],
                    'entry_index': path_hash[1]
                })
                total_hash_count += 1
            hash_groups.append({
                'index': first_hash_index,
                'length': total_hash_count - first_hash_index
            })

        packed_hash_groups = self.HASH_GROUP_STRUCT.pack(hash_groups)
        packed_hash_table_header = self.HASH_TABLE_HEADER.pack(
            path_hashes_offset=self.HASH_TABLE_HEADER.size +
            len(packed_hash_groups),
            hash_group_count=group_count,
        )
        packed_path_hashes = self.PATH_HASH_STRUCT.pack(path_hashes)

        return packed_hash_table_header + packed_hash_groups + packed_path_hashes

    @staticmethod
    def path_hash(path_string):
        """ Simple string-hashing algorithm used by FROM. Strings use forward-slash path separators and always start
        with a forward slash. """
        hashable = path_string.replace('\\', '/')
        if not hashable.startswith('/'):
            hashable = '/' + hashable
        h = 0
        for i, s in enumerate(hashable):
            h += i * 37 + ord(s)
        return h

Example #3

Show file

File: core.py Project: wrekklol/soulstruct

class BND3(BaseBND):

    HEADER_STRUCT_START = (
        ("bnd_version", "4s", b"BND3"),
        ("bnd_signature", "8s"),  # Real signature may be shorter, but packing will pad it out.
        ("bnd_magic", "b"),
        ("big_endian", "?"),
    )
    HEADER_STRUCT_ENDIAN = (
        ("unknown", "?"),  # usually zero
        ("zero", "B", 0),
        ("entry_count", "i"),
        ("file_size", "i"),
        "8x",
    )

    BND_ENTRY_HEADER = (("entry_magic", "B"), "3x", ("compressed_data_size", "i"), ("data_offset", "i"))
    ENTRY_ID = ("entry_id", "i")
    NAME_OFFSET = ("path_offset", "i")
    UNCOMPRESSED_DATA_SIZE = ("uncompressed_data_size", "i")

    def __init__(self, bnd_source=None, entry_class=None):
        self.unknown = False
        super().__init__(bnd_source, entry_class)

    def unpack(self, bnd_buffer):
        if isinstance(bnd_buffer, bytes):
            bnd_buffer = BytesIO(bnd_buffer)

        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START, byte_order="<")
        header = self.header_struct.unpack(bnd_buffer)
        self.bnd_version = header["bnd_version"]
        self.bnd_signature = header["bnd_signature"]
        self.bnd_magic = header["bnd_magic"]
        self.big_endian = header["big_endian"] or is_big_endian(self.bnd_magic)
        byte_order = ">" if self.big_endian else "<"
        header.update(self.header_struct.unpack(bnd_buffer, *self.HEADER_STRUCT_ENDIAN, byte_order=byte_order))
        self.unknown = header["unknown"]

        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER, byte_order=byte_order)
        if has_id(self.bnd_magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID, byte_order=byte_order)
        if has_path(self.bnd_magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET, byte_order=byte_order)
        if has_uncompressed_size(self.bnd_magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE, byte_order=byte_order)

        for entry in BNDEntry.unpack(
            bnd_buffer, self.entry_header_struct, path_encoding="shift-jis", count=header["entry_count"]
        ):
            self.add_entry(entry)

    def load_unpacked_dir(self, directory):
        directory = Path(directory)
        if not directory.is_dir():
            raise ValueError(f"Could not find unpacked BND directory {repr(directory)}.")
        with (directory / "bnd_manifest.txt").open("rb") as f:
            self.bnd_version = self.read_bnd_setting(f.readline(), "version")
            self.bnd_signature = self.read_bnd_setting(f.readline(), "bnd_signature")
            self.bnd_magic = self.read_bnd_setting(f.readline(), "bnd_magic", assert_type=int)
            self.big_endian = self.read_bnd_setting(f.readline(), "big_endian", assert_type=bool)
            self.unknown = self.read_bnd_setting(f.readline(), "unknown", assert_type=bool)
            self.dcx = self.read_bnd_setting(f.readline(), "dcx", assert_type=tuple)

            self.add_entries_from_manifest_paths(f, directory)

        # Create header structs.
        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START, byte_order="<")
        byte_order = ">" if self.big_endian else "<"
        self.header_struct.add_fields(*self.HEADER_STRUCT_ENDIAN, byte_order=byte_order)
        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER, byte_order=byte_order)
        if has_id(self.bnd_magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID, byte_order=byte_order)
        if has_path(self.bnd_magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET, byte_order=byte_order)
        if has_uncompressed_size(self.bnd_magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE, byte_order=byte_order)

    def pack(self):
        entry_header_dicts = []
        packed_entry_headers = b""
        packed_entry_paths = b""
        relative_entry_path_offsets = []
        packed_entry_data = b""
        relative_entry_data_offsets = []

        if len(self.binary_entries) != len(self._entries):
            raise ValueError(
                "Number of classed entries does not match number of binary entries.\n"
                "Make sure you use the add_entry() method to add new BND entries."
            )

        for i, entry in enumerate(self._entries):
            if not isinstance(entry, BNDEntry):
                if not hasattr(entry, "pack"):
                    raise AttributeError(f"Cannot pack BND: entry class {self._entry_class} has no pack() method.")
                self.binary_entries[i].data = entry.pack()

        for entry in sorted(self.binary_entries, key=lambda e: e.id):
            entry_header_dict = {
                "entry_magic": entry.magic,
                "compressed_data_size": entry.data_size,
                "data_offset": len(packed_entry_data),
            }
            if has_id(self.bnd_magic):
                entry_header_dict["entry_id"] = entry.id
            if has_path(self.bnd_magic):
                entry_header_dict["path_offset"] = len(packed_entry_paths)
                relative_entry_path_offsets.append(len(packed_entry_paths))  # Relative to start of packed entry paths.
                packed_entry_paths += entry.get_packed_path("shift-jis")
            if has_uncompressed_size(self.bnd_magic):
                entry_header_dict["uncompressed_data_size"] = entry.data_size

            relative_entry_data_offsets.append(len(packed_entry_data))
            entry_data, is_compressed = entry.get_data_for_pack()
            if is_compressed:
                entry_header_dict["compressed_data_size"] = len(entry_data)
            packed_entry_data += entry_data
            entry_header_dicts.append(entry_header_dict)

        # Compute table offsets.
        entry_header_table_offset = self.header_struct.size
        entry_path_table_offset = entry_header_table_offset + self.entry_header_struct.size * len(self._entries)
        entry_packed_data_offset = entry_path_table_offset + len(packed_entry_paths)
        bnd_file_size = entry_packed_data_offset + len(packed_entry_data)

        # Pack BND header.
        packed_header = self.header_struct.pack(
            bnd_signature=self.bnd_signature,
            bnd_magic=self.bnd_magic,
            big_endian=self.big_endian,
            unknown=self.unknown,
            entry_count=len(self._entries),
            file_size=bnd_file_size,
        )

        # Convert relative offsets to absolute and pack entry headers.
        for entry_header_dict in entry_header_dicts:
            entry_header_dict["data_offset"] += entry_packed_data_offset
            if has_path(self.bnd_magic):
                entry_header_dict["path_offset"] += entry_path_table_offset
            packed_entry_headers += self.entry_header_struct.pack(entry_header_dict)

        return packed_header + packed_entry_headers + packed_entry_paths + packed_entry_data

    @property
    def bnd_manifest_header(self):
        bnd_signature = self.bnd_signature.rstrip(b"\0").decode()
        return (
            f"version = BND3\n"
            f"bnd_signature = {bnd_signature}\n"
            f"bnd_magic = {self.bnd_magic}\n"
            f"big_endian = {self.big_endian}\n"
            f"unknown = {self.unknown}\n"
            f"dcx = {repr(self.dcx)}\n"
            f"\n"
        )

Example #4

Show file

File: core.py Project: Tbeck-91/soulstruct

class BND3(BaseBND):

    HEADER_STRUCT_START = (
        ('bnd_version', '4s', b'BND3'),
        ('bnd_signature',
         '8s'),  # Real signature may be shorter, but packing will pad it out.
        ('bnd_magic', 'b'),
        ('big_endian', '?'))
    HEADER_STRUCT_ENDIAN = (
        ('unknown', '?'),  # usually zero
        ('zero', 'B', 0),
        ('entry_count', 'i'),
        ('file_size', 'i'),
        '8x')

    BND_ENTRY_HEADER = (('entry_magic', 'B'), '3x',
                        ('compressed_data_size', 'i'), ('data_offset', 'i'))
    ENTRY_ID = ('entry_id', 'i')
    NAME_OFFSET = ('path_offset', 'i')
    UNCOMPRESSED_DATA_SIZE = ('uncompressed_data_size', 'i')

    def __init__(self, bnd_source=None, entry_class=None):
        self.unknown = False
        super().__init__(bnd_source, entry_class)

    def unpack(self, bnd_buffer):
        if isinstance(bnd_buffer, bytes):
            bnd_buffer = BytesIO(bnd_buffer)

        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START,
                                          byte_order='<')
        header = self.header_struct.unpack(bnd_buffer)
        self.bnd_version = header.bnd_version
        self.bnd_signature = header.bnd_signature
        self.bnd_magic = header.bnd_magic
        self.big_endian = header.big_endian or is_big_endian(self.bnd_magic)
        byte_order = '>' if self.big_endian else '<'
        header.update(
            self.header_struct.unpack(bnd_buffer,
                                      *self.HEADER_STRUCT_ENDIAN,
                                      byte_order=byte_order))
        self.unknown = header.unknown

        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER,
                                                byte_order=byte_order)
        if has_id(self.bnd_magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID,
                                                byte_order=byte_order)
        if has_path(self.bnd_magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET,
                                                byte_order=byte_order)
        if has_uncompressed_size(self.bnd_magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE,
                                                byte_order=byte_order)

        for entry in BNDEntry.unpack(bnd_buffer,
                                     self.entry_header_struct,
                                     path_encoding='shift-jis',
                                     count=header.entry_count):
            self.add_entry(entry)

    def load_unpacked_dir(self, directory):
        directory = Path(directory)
        if not directory.is_dir():
            raise ValueError(
                f"Could not find unpacked BND directory {repr(directory)}.")
        with (directory / 'bnd_manifest.txt').open('rb') as f:
            self.bnd_version = self.read_bnd_setting(f.readline(), 'version')
            self.bnd_signature = self.read_bnd_setting(f.readline(),
                                                       'bnd_signature')
            self.bnd_magic = self.read_bnd_setting(f.readline(),
                                                   'bnd_magic',
                                                   assert_type=int)
            self.big_endian = self.read_bnd_setting(f.readline(),
                                                    'big_endian',
                                                    assert_type=bool)
            self.unknown = self.read_bnd_setting(f.readline(),
                                                 'unknown',
                                                 assert_type=bool)
            self.dcx = self.read_bnd_setting(f.readline(),
                                             'dcx',
                                             assert_type=tuple)

            self.add_entries_from_manifest_paths(f, directory)

        # Create header structs.
        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START,
                                          byte_order='<')
        byte_order = '>' if self.big_endian else '<'
        self.header_struct.add_fields(*self.HEADER_STRUCT_ENDIAN,
                                      byte_order=byte_order)
        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER,
                                                byte_order=byte_order)
        if has_id(self.bnd_magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID,
                                                byte_order=byte_order)
        if has_path(self.bnd_magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET,
                                                byte_order=byte_order)
        if has_uncompressed_size(self.bnd_magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE,
                                                byte_order=byte_order)

    def pack(self):
        entry_header_dicts = []
        packed_entry_headers = b''
        packed_entry_paths = b''
        relative_entry_path_offsets = []
        packed_entry_data = b''
        relative_entry_data_offsets = []

        if len(self.binary_entries) != len(self._entries):
            raise ValueError(
                "Number of classed entries does not match number of binary entries.\n"
                "Make sure you use the add_entry() method to add new BND entries."
            )

        for i, entry in enumerate(self._entries):
            if not isinstance(entry, BNDEntry):
                if not hasattr(entry, 'pack'):
                    raise AttributeError(
                        f"Cannot pack BND: entry class {self._entry_class} has no pack() method."
                    )
                self.binary_entries[i].data = entry.pack()

        for entry in self.binary_entries:

            entry_header_dict = {
                'entry_magic': entry.magic,
                'compressed_data_size': entry.data_size,
                'data_offset': len(packed_entry_data),
            }
            if has_id(self.bnd_magic):
                entry_header_dict['entry_id'] = entry.id
            if has_path(self.bnd_magic):
                entry_header_dict['path_offset'] = len(packed_entry_paths)
                relative_entry_path_offsets.append(
                    len(packed_entry_paths
                        ))  # Relative to start of packed entry paths.
                packed_entry_paths += entry.get_packed_path('shift-jis')
            if has_uncompressed_size(self.bnd_magic):
                entry_header_dict['uncompressed_data_size'] = entry.data_size

            relative_entry_data_offsets.append(len(packed_entry_data))
            entry_data, is_compressed = entry.get_data_for_pack()
            if is_compressed:
                entry_header_dict['compressed_data_size'] = len(entry_data)
            packed_entry_data += entry_data
            entry_header_dicts.append(entry_header_dict)

        # Compute table offsets.
        entry_header_table_offset = self.header_struct.size
        entry_path_table_offset = entry_header_table_offset + self.entry_header_struct.size * len(
            self._entries)
        entry_packed_data_offset = entry_path_table_offset + len(
            packed_entry_paths)
        bnd_file_size = entry_packed_data_offset + len(packed_entry_data)

        # Pack BND header.
        packed_header = self.header_struct.pack(
            bnd_signature=self.bnd_signature,
            bnd_magic=self.bnd_magic,
            big_endian=self.big_endian,
            unknown=self.unknown,
            entry_count=len(self._entries),
            file_size=bnd_file_size,
        )

        # Convert relative offsets to absolute and pack entry headers.
        for entry_header_dict in entry_header_dicts:
            entry_header_dict['data_offset'] += entry_packed_data_offset
            if has_path(self.bnd_magic):
                entry_header_dict['path_offset'] += entry_path_table_offset
            packed_entry_headers += self.entry_header_struct.pack(
                entry_header_dict)

        return packed_header + packed_entry_headers + packed_entry_paths + packed_entry_data

    @property
    def bnd_manifest_header(self):
        bnd_signature = self.bnd_signature.rstrip(b'\0').decode()
        return (f"version = BND3\n"
                f"bnd_signature = {bnd_signature}\n"
                f"bnd_magic = {self.bnd_magic}\n"
                f"big_endian = {self.big_endian}\n"
                f"unknown = {self.unknown}\n"
                f"dcx = {repr(self.dcx)}\n"
                f"\n")

Example #5

Show file

File: core.py Project: Tbeck-91/soulstruct

class LuaInfo(object):
    """Describes the lua scripts contained inside this `luabnd` archive.

    Registration in this file is necessary for the scripts to be loaded with the corresponding map. Individual Lua
    function names should also be registered in `luagnl`, though it's unclear exactly when this is necessary. If a
    script doesn't appear to be working (the enemy isn't moving at all), adding the function names to `luagnl` should
    be the first solution you try. Another reason for non-functional scripts that I've experienced is a clash between
    maps (the same script is being loaded by multiple maps at the same time). If your script is used in multiple maps,
    it's better to put it in `aiCommon.luabnd`.
    """

    HEADER_STRUCT = (
        ('lua_version', '4s', b'LUAI'),
        ('endian_one', 'i', 1),  # checked manually to guess endianness
        ('goal_count', 'i'),
        '4x',
    )

    GOAL_STRUCT_32 = (
        ('goal_id', 'i'),
        ('name_offset', 'I'),
        ('logic_interrupt_name_offset', 'I'),
        ('has_battle_interrupt', '?'),
        ('has_logic_interrupt', '?'),
        '2x',
    )

    GOAL_STRUCT_64 = (
        ('goal_id', 'i'),
        ('has_battle_interrupt', '?'),
        ('has_logic_interrupt', '?'),
        '2x',
        ('name_offset', 'q'),
        ('logic_interrupt_name_offset', 'q'),
    )

    def __init__(self, luainfo_source=None, big_endian=False, use_struct_64=False):
        self.big_endian = big_endian
        self.use_struct_64 = use_struct_64
        self.luainfo_path = None
        self.header_struct = BinaryStruct(*self.HEADER_STRUCT, byte_order=">" if self.big_endian else "<")

        self.goals = []  # type: List[LuaGoal]

        if luainfo_source is None:
            return
        if isinstance(luainfo_source, (list, tuple)):
            self.goals = luainfo_source  # type: List[LuaGoal]
            return
        if isinstance(luainfo_source, (str, Path)):
            self.luainfo_path = Path(luainfo_source)
            with self.luainfo_path.open("rb") as f:
                self.unpack(f)
            return
        if hasattr(luainfo_source, 'data'):
            luainfo_source = luainfo_source.data
        if isinstance(luainfo_source, bytes):
            luainfo_source = BytesIO(luainfo_source)
        if isinstance(luainfo_source, BufferedIOBase):
            self.unpack(luainfo_source)

    def unpack(self, info_buffer):
        self.big_endian = self._check_big_endian(info_buffer)
        self.header_struct = BinaryStruct(*self.HEADER_STRUCT, byte_order=">" if self.big_endian else "<")
        header = self.header_struct.unpack(info_buffer)
        # TODO: auto-detect `use_struct_64` for 64-bit offsets (PTDE and DSR both use 32-bit).
        goal_struct = BinaryStruct(*(self.GOAL_STRUCT_64 if self.use_struct_64 else self.GOAL_STRUCT_32),
                                   byte_order=">" if self.big_endian else "<")
        self.goals = []
        for _ in range(header.goal_count):
            goal = self.unpack_goal(info_buffer, goal_struct)
            if goal.script_name in [g.script_name for g in self.goals]:
                _LOGGER.warning(
                    f"Goal '{goal.goal_id}' is referenced multiple times in LuaInfo (same ID and type). Each goal ID "
                    f"should have (at most) one 'battle' goal and one 'logic' goal. All goal entries after the first "
                    f"will be ignored.")
            else:
                self.goals.append(goal)

    def pack(self):
        header = self.header_struct.pack(goal_count=len(self.goals))
        packed_goals = b''
        packed_strings = b''
        goal_struct = BinaryStruct(*(self.GOAL_STRUCT_64 if self.use_struct_64 else self.GOAL_STRUCT_32),
                                   byte_order=">" if self.big_endian else "<")
        packed_strings_offset = len(header) + len(self.goals) * goal_struct.size
        for goal in self.goals:
            name_offset = packed_strings_offset + len(packed_strings)
            packed_strings += goal.goal_name.encode(encoding="shift-jis") + b'\0'
            goal_kwargs = goal.get_interrupt_details()
            logic_interrupt_name = goal_kwargs.pop("logic_interrupt_name")
            if logic_interrupt_name:
                logic_interrupt_name_offset = packed_strings_offset + len(packed_strings)
                packed_strings += logic_interrupt_name.encode(encoding="shift-jis") + b'\0'
            else:
                logic_interrupt_name_offset = 0
            packed_goals += goal_struct.pack(
                goal_id=goal.goal_id, name_offset=name_offset, logic_interrupt_name_offset=logic_interrupt_name_offset,
                **goal_kwargs)

        return header + packed_goals + packed_strings

    def write(self, luainfo_path=None):
        if luainfo_path is None:
            luainfo_path = self.luainfo_path
        else:
            luainfo_path = Path(luainfo_path)
        create_bak(luainfo_path)
        with luainfo_path.open("wb") as f:
            f.write(self.pack())

    @staticmethod
    def unpack_goal(info_buffer, goal_struct):
        goal = goal_struct.unpack(info_buffer)
        name = read_chars_from_buffer(info_buffer, offset=goal.name_offset, encoding='shift-jis')
        if goal.logic_interrupt_name_offset > 0:
            logic_interrupt_name = read_chars_from_buffer(
                info_buffer, offset=goal.logic_interrupt_name_offset, encoding='shift-jis')
        else:
            logic_interrupt_name = ""
        return LuaGoal(goal_id=goal.goal_id, goal_name=name, has_battle_interrupt=goal.has_battle_interrupt,
                       has_logic_interrupt=goal.has_logic_interrupt, logic_interrupt_name=logic_interrupt_name)

    @staticmethod
    def _check_big_endian(info_buffer):
        info_buffer.seek(4)
        endian, = struct.unpack('i', info_buffer.read(4))
        info_buffer.seek(0)
        if endian == 0x1000000:
            return True
        elif endian == 0x1:
            return False
        raise ValueError(f"Invalid marker for LuaInfo byte order: {hex(endian)}")