Beispiel #1
0
class BND4(BaseBND):
    """BND used since Dark Souls 2 (2014)."""

    HEADER_STRUCT_START = (
        ("version", "4s", b"BND4"),
        ("flag1", "?"),
        ("flag2", "?"),
        "2x",
        ("big_endian", "i"),  # 0x00010000 (False) or 0x00000100 (True)
    )
    HEADER_STRUCT_ENDIAN = (
        ("entry_count", "i"),
        ("header_size", "q", 64),
        ("signature",
         "8s"),  # Real signature may be shorter, but packing will pad it out.
        ("entry_header_size", "q"),
        ("data_offset", "q"),
        ("utf16_paths", "?"),
        ("magic", "b"),
        ("hash_table_type", "B"),  # 0, 1, 4, or 128
        "5x",
        ("hash_table_offset", "q"),  # only non-zero if hash_table_type == 4
    )

    BND_ENTRY_HEADER = (("entry_magic", "B"), "3x", ("minus_one", "i", -1),
                        ("compressed_data_size", "q"))
    UNCOMPRESSED_DATA_SIZE = ("uncompressed_data_size", "q")
    DATA_OFFSET = ("data_offset", "I")
    ENTRY_ID = ("entry_id", "i")
    NAME_OFFSET = ("path_offset", "i")

    HASH_TABLE_HEADER = BinaryStruct("8x", ("path_hashes_offset", "q"),
                                     ("hash_group_count", "I"),
                                     ("unknown", "i", 0x00080810))
    PATH_HASH_STRUCT = BinaryStruct(
        ("hashed_value", "I"),
        ("entry_index", "i"),
    )
    HASH_GROUP_STRUCT = BinaryStruct(
        ("length", "i"),
        ("index", "i"),
    )

    VERSION = "BND4"
    MANIFEST_FIELDS = ("signature", "magic", "big_endian", "utf16_paths",
                       "hash_table_type", "flag1", "flag2")

    flag1: bool
    flag2: bool
    utf16_paths: bool
    hash_table_type: int
    hash_table_offset: int

    def __init__(self, bnd_source=None, dcx_magic=()):
        self.flag1 = False  # unknown
        self.flag2 = False  # unknown
        self.utf16_paths = False  # If False, paths are written in Shift-JIS.
        self.hash_table_type = 0
        self.hash_table_offset = 0
        super().__init__(bnd_source=bnd_source, dcx_magic=dcx_magic)

    def unpack(self, bnd_buffer, **kwargs):
        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START,
                                          byte_order="<")
        header = self.header_struct.unpack(bnd_buffer)
        self._check_version(header["version"].decode())
        self.flag1 = header["flag1"]
        self.flag2 = header["flag2"]
        self.big_endian = header[
            "big_endian"] == 0x00000100  # Magic not used to infer endianness here.
        byte_order = ">" if self.big_endian else "<"
        header.update(
            self.header_struct.unpack(bnd_buffer,
                                      *self.HEADER_STRUCT_ENDIAN,
                                      byte_order=byte_order))
        self.signature = header["signature"].rstrip(b"\0").decode()
        self.magic = header["magic"]
        self.utf16_paths = header["utf16_paths"]
        self.hash_table_type = header["hash_table_type"]
        self.hash_table_offset = header["hash_table_offset"]
        path_encoding = ("utf-16be" if self.big_endian else
                         "utf-16le") if self.utf16_paths else "shift-jis"

        if header["entry_header_size"] != header_size(self.magic):
            raise ValueError(
                f"Expected BND entry header size {header_size(self.magic)} based on magic\n"
                f"{hex(self.magic)}, but BND header says {header['entry_header_size']}."
            )
        if self.hash_table_type != 4 and self.hash_table_offset != 0:
            _LOGGER.warning(
                f"Found non-zero hash table offset {self.hash_table_offset}, but header says this BND has no hash "
                f"table.")
        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER,
                                                byte_order=byte_order)
        if has_uncompressed_size(self.magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE,
                                                byte_order=byte_order)
        self.entry_header_struct.add_fields(self.DATA_OFFSET,
                                            byte_order=byte_order)
        if has_id(self.magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID,
                                                byte_order=byte_order)
        if has_path(self.magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET,
                                                byte_order=byte_order)
        if self.magic == 0x20:
            # Extra pad.
            self.entry_header_struct.add_fields("8x")
        if header["entry_header_size"] != self.entry_header_struct.size:
            _LOGGER.warning(
                f"Entry header size given in BND header ({header['entry_header_size']}) does not match actual entry "
                f"header size ({self.entry_header_struct.size}).")
        for entry in BNDEntry.unpack(bnd_buffer,
                                     self.entry_header_struct,
                                     path_encoding=path_encoding,
                                     count=header["entry_count"]):
            self.add_entry(entry)

        # Read hash table.
        if self.hash_table_type == 4:
            bnd_buffer.seek(self.hash_table_offset)
            self._most_recent_hash_table = bnd_buffer.read(
                header["data_offset"] - self.hash_table_offset)
        self._most_recent_entry_count = len(self._entries)
        self._most_recent_paths = [entry.path for entry in self._entries]

    def create_header_structs(self):
        self._most_recent_hash_table = b""  # Hash table will need to be built on first pack.
        self._most_recent_entry_count = len(self._entries)
        self._most_recent_paths = [entry.path for entry in self._entries]

        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START,
                                          byte_order="<")
        byte_order = ">" if self.big_endian else "<"
        self.header_struct.add_fields(*self.HEADER_STRUCT_ENDIAN,
                                      byte_order=byte_order)
        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER,
                                                byte_order=byte_order)
        if has_uncompressed_size(self.magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE,
                                                byte_order=byte_order)
        self.entry_header_struct.add_fields(self.DATA_OFFSET,
                                            byte_order=byte_order)
        if has_id(self.magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID,
                                                byte_order=byte_order)
        if has_path(self.magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET,
                                                byte_order=byte_order)
        if self.magic == 0x20:
            # Extra pad.
            self.entry_header_struct.add_fields("8x")

    def pack(self):
        entry_header_dicts = []
        packed_entry_headers = b""
        packed_entry_paths = b""
        relative_entry_path_offsets = []
        packed_entry_data = b""
        relative_entry_data_offsets = []
        rebuild_hash_table = not self._most_recent_hash_table
        path_encoding = ("utf-16be" if self.big_endian else
                         "utf-16le") if self.utf16_paths else "shift-jis"

        if len(self._entries) != self._most_recent_entry_count:
            rebuild_hash_table = True
        for i, entry in enumerate(self._entries):
            if not rebuild_hash_table and entry.path != self._most_recent_paths[
                    i]:
                rebuild_hash_table = True

        self._most_recent_entry_count = len(self._entries)
        self._most_recent_paths = [entry.path for entry in self._entries]

        for entry in self._entries:

            packed_entry_data += b"\0" * 10  # Each entry is separated by ten pad bytes. (Probably not necessary.)

            entry_header_dict = {
                "entry_magic": entry.magic,
                "compressed_data_size": entry.data_size,
                "data_offset": len(packed_entry_data),
            }
            if has_id(self.magic):
                entry_header_dict["entry_id"] = entry.id
            if has_path(self.magic):
                entry_header_dict["path_offset"] = len(packed_entry_paths)
                relative_entry_path_offsets.append(
                    len(packed_entry_paths
                        ))  # Relative to start of packed entry paths.
                packed_entry_paths += entry.get_packed_path(path_encoding)
            if has_uncompressed_size(self.magic):
                entry_header_dict["uncompressed_data_size"] = entry.data_size

            relative_entry_data_offsets.append(len(packed_entry_data))
            entry_data, is_compressed = entry.get_data_for_pack()
            if is_compressed:
                entry_header_dict["compressed_data_size"] = len(entry_data)
            packed_entry_data += entry_data
            entry_header_dicts.append(entry_header_dict)

        entry_header_table_offset = self.header_struct.size
        entry_path_table_offset = entry_header_table_offset + self.entry_header_struct.size * len(
            self._entries)
        if self.hash_table_type == 4:
            hash_table_offset = entry_path_table_offset + len(
                packed_entry_paths)
            if rebuild_hash_table:
                packed_hash_table = self.build_hash_table()
            else:
                packed_hash_table = self._most_recent_hash_table
            entry_packed_data_offset = hash_table_offset + len(
                packed_hash_table)
        else:
            hash_table_offset = 0
            packed_hash_table = b""
            entry_packed_data_offset = entry_path_table_offset + len(
                packed_entry_paths)
        # BND file size not needed.

        packed_header = self.header_struct.pack(
            flag1=self.flag1,
            flag2=self.flag2,
            big_endian=self.big_endian,
            entry_count=len(self._entries),
            signature=self.signature,
            entry_header_size=self.entry_header_struct.size,
            data_offset=entry_packed_data_offset,
            utf16_paths=self.utf16_paths,
            magic=self.magic,
            hash_table_type=self.hash_table_type,
            hash_table_offset=hash_table_offset,
        )

        # Convert relative offsets to absolute and pack entry headers.
        for entry_header_dict in entry_header_dicts:
            entry_header_dict["data_offset"] += entry_packed_data_offset
            if has_path(self.magic):
                entry_header_dict["path_offset"] += entry_path_table_offset
            packed_entry_headers += self.entry_header_struct.pack(
                entry_header_dict)

        return packed_header + packed_entry_headers + packed_entry_paths + packed_hash_table + packed_entry_data

    def get_json_header(self):
        return {
            "version": "BND4",
            "signature": self.signature,
            "magic": self.magic,
            "big_endian": self.big_endian,
            "utf16_paths": self.utf16_paths,
            "hash_table_type": self.hash_table_type,
            "flag1": self.flag1,
            "flag2": self.flag2,
            "use_id_prefix": self.has_repeated_entry_names,
            "dcx_magic": self.dcx_magic,
        }

    @staticmethod
    def is_prime(p):
        if p < 2:
            return False
        if p == 2:
            return True
        if (p % 2) == 0:
            return False
        for i in range(3, p // 2, 2):
            if (p % i) == 0:
                return False
            if i**2 > p:
                return True
        return True

    def build_hash_table(self):
        """ Some BND4 resources include tables of hashed entry paths, which aren't needed to read file contents, but
        need to be re-hashed to properly pack the file in case any paths have changed (or the number of entries). """

        # Group count set to first prime number greater than or equal to the number of entries divided by 7.
        for p in range(len(self._entries) // 7, 100000):
            if self.is_prime(p):
                group_count = p
                break
        else:
            raise ValueError("Hash group count could not be determined.")

        hashes = []
        hash_lists = [[] for _ in range(group_count)
                      ]  # type: list[list[tuple[int, int]], ...]

        for entry_index, entry in enumerate(self._entries):
            hashes.append(self.path_hash(entry.path))
            list_index = hashes[-1] % group_count
            hash_lists[list_index].append((hashes[-1], entry_index))

        for hash_list in hash_lists:
            hash_list.sort()  # Sort by hash value.

        hash_groups = []
        path_hashes = []

        total_hash_count = 0
        for hash_list in hash_lists:
            first_hash_index = total_hash_count
            for path_hash in hash_list:
                path_hashes.append({
                    "hashed_value": path_hash[0],
                    "entry_index": path_hash[1]
                })
                total_hash_count += 1
            hash_groups.append({
                "index": first_hash_index,
                "length": total_hash_count - first_hash_index
            })

        packed_hash_groups = self.HASH_GROUP_STRUCT.pack_multiple(hash_groups)
        packed_hash_table_header = self.HASH_TABLE_HEADER.pack(
            path_hashes_offset=self.HASH_TABLE_HEADER.size +
            len(packed_hash_groups),
            hash_group_count=group_count,
        )
        packed_path_hashes = self.PATH_HASH_STRUCT.pack_multiple(path_hashes)

        return packed_hash_table_header + packed_hash_groups + packed_path_hashes

    @staticmethod
    def path_hash(path_string):
        """ Simple string-hashing algorithm used by FROM. Strings use forward-slash path separators and always start
        with a forward slash. """
        hashable = path_string.replace("\\", "/")
        if not hashable.startswith("/"):
            hashable = "/" + hashable
        h = 0
        for i, s in enumerate(hashable):
            h += i * 37 + ord(s)
        return h
Beispiel #2
0
class BND3(BaseBND):
    """BND used before Dark Souls 2 (2014)."""

    HEADER_STRUCT_START = (
        ("version", "4s", b"BND3"),
        ("signature",
         "8s"),  # Real signature may be shorter, but packing will pad it out.
        ("magic", "b"),
        ("big_endian", "?"),
    )
    HEADER_STRUCT_ENDIAN = (
        ("unknown", "?"),  # usually False
        ("zero", "B", 0),
        ("entry_count", "i"),
        ("file_size", "i"),
        "8x",
    )

    BND_ENTRY_HEADER = (
        ("entry_magic", "B"),
        "3x",
        ("compressed_data_size", "i"),
        ("data_offset", "i"),
    )
    ENTRY_ID = ("entry_id", "i")
    NAME_OFFSET = ("path_offset", "i")
    UNCOMPRESSED_DATA_SIZE = ("uncompressed_data_size", "i")

    VERSION = "BND3"
    MANIFEST_FIELDS = ("signature", "magic", "big_endian", "unknown")

    unknown: bool

    def __init__(self, bnd_source=None, dcx_magic=()):
        self.unknown = False
        super().__init__(bnd_source=bnd_source, dcx_magic=dcx_magic)

    def unpack(self, buffer, **kwargs):
        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START,
                                          byte_order="<")
        header = self.header_struct.unpack(buffer)
        self._check_version(header["version"].decode())
        self.signature = header["signature"].rstrip(b"\0").decode()
        self.magic = header["magic"]
        self.big_endian = header["big_endian"] or is_big_endian(self.magic)
        byte_order = ">" if self.big_endian else "<"
        header.update(
            self.header_struct.unpack(buffer,
                                      *self.HEADER_STRUCT_ENDIAN,
                                      byte_order=byte_order))
        self.unknown = header["unknown"]

        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER,
                                                byte_order=byte_order)
        if has_id(self.magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID,
                                                byte_order=byte_order)
        if has_path(self.magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET,
                                                byte_order=byte_order)
        if has_uncompressed_size(self.magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE,
                                                byte_order=byte_order)

        # NOTE: BND paths are *not* encoded in `shift_jis_2004`, unlike most other strings! They are `shift-jis`.
        #  The main annoyance here is that escaped backslashes are encoded as the yen symbol in `shift_jis_2004`.
        for entry in BNDEntry.unpack(buffer,
                                     self.entry_header_struct,
                                     path_encoding="shift-jis",
                                     count=header["entry_count"]):
            self.add_entry(entry)

    def create_header_structs(self):
        self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START,
                                          byte_order="<")
        byte_order = ">" if self.big_endian else "<"
        self.header_struct.add_fields(*self.HEADER_STRUCT_ENDIAN,
                                      byte_order=byte_order)
        self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER,
                                                byte_order=byte_order)
        if has_id(self.magic):
            self.entry_header_struct.add_fields(self.ENTRY_ID,
                                                byte_order=byte_order)
        if has_path(self.magic):
            self.entry_header_struct.add_fields(self.NAME_OFFSET,
                                                byte_order=byte_order)
        if has_uncompressed_size(self.magic):
            self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE,
                                                byte_order=byte_order)

    def pack(self):
        entry_header_dicts = []
        packed_entry_headers = b""
        packed_entry_paths = b""
        relative_entry_path_offsets = []
        packed_entry_data = b""
        relative_entry_data_offsets = []

        for entry in sorted(self._entries, key=lambda e: e.id):
            entry_header_dict = {
                "entry_magic": entry.magic,
                "compressed_data_size": entry.data_size,
                "data_offset": len(packed_entry_data),
            }
            if has_id(self.magic):
                entry_header_dict["entry_id"] = entry.id
            if has_path(self.magic):
                entry_header_dict["path_offset"] = len(packed_entry_paths)
                relative_entry_path_offsets.append(
                    len(packed_entry_paths
                        ))  # Relative to start of packed entry paths.
                packed_entry_paths += entry.get_packed_path("shift-jis")
            if has_uncompressed_size(self.magic):
                entry_header_dict["uncompressed_data_size"] = entry.data_size

            relative_entry_data_offsets.append(len(packed_entry_data))
            entry_data, is_compressed = entry.get_data_for_pack()
            if is_compressed:
                entry_header_dict["compressed_data_size"] = len(entry_data)
            packed_entry_data += entry_data
            entry_header_dicts.append(entry_header_dict)

        # Compute table offsets.
        entry_header_table_offset = self.header_struct.size
        entry_path_table_offset = entry_header_table_offset + self.entry_header_struct.size * len(
            self._entries)
        entry_packed_data_offset = entry_path_table_offset + len(
            packed_entry_paths)
        bnd_file_size = entry_packed_data_offset + len(packed_entry_data)

        # Pack BND header.
        packed_header = self.header_struct.pack(
            signature=self.signature,
            magic=self.magic,
            big_endian=self.big_endian,
            unknown=self.unknown,
            entry_count=len(self._entries),
            file_size=bnd_file_size,
        )

        # Convert relative offsets to absolute and pack entry headers.
        for entry_header_dict in entry_header_dicts:
            entry_header_dict["data_offset"] += entry_packed_data_offset
            if has_path(self.magic):
                entry_header_dict["path_offset"] += entry_path_table_offset
            packed_entry_headers += self.entry_header_struct.pack(
                entry_header_dict)

        return packed_header + packed_entry_headers + packed_entry_paths + packed_entry_data

    def get_json_header(self):
        return {
            "version": "BND3",
            "signature": self.signature,
            "magic": self.magic,
            "big_endian": self.big_endian,
            "unknown": self.unknown,
            "use_id_prefix": self.has_repeated_entry_names,
            "dcx_magic": self.dcx_magic,
        }