class BND4(BaseBND): """BND used since Dark Souls 2 (2014).""" HEADER_STRUCT_START = ( ("version", "4s", b"BND4"), ("flag1", "?"), ("flag2", "?"), "2x", ("big_endian", "i"), # 0x00010000 (False) or 0x00000100 (True) ) HEADER_STRUCT_ENDIAN = ( ("entry_count", "i"), ("header_size", "q", 64), ("signature", "8s"), # Real signature may be shorter, but packing will pad it out. ("entry_header_size", "q"), ("data_offset", "q"), ("utf16_paths", "?"), ("magic", "b"), ("hash_table_type", "B"), # 0, 1, 4, or 128 "5x", ("hash_table_offset", "q"), # only non-zero if hash_table_type == 4 ) BND_ENTRY_HEADER = (("entry_magic", "B"), "3x", ("minus_one", "i", -1), ("compressed_data_size", "q")) UNCOMPRESSED_DATA_SIZE = ("uncompressed_data_size", "q") DATA_OFFSET = ("data_offset", "I") ENTRY_ID = ("entry_id", "i") NAME_OFFSET = ("path_offset", "i") HASH_TABLE_HEADER = BinaryStruct("8x", ("path_hashes_offset", "q"), ("hash_group_count", "I"), ("unknown", "i", 0x00080810)) PATH_HASH_STRUCT = BinaryStruct( ("hashed_value", "I"), ("entry_index", "i"), ) HASH_GROUP_STRUCT = BinaryStruct( ("length", "i"), ("index", "i"), ) VERSION = "BND4" MANIFEST_FIELDS = ("signature", "magic", "big_endian", "utf16_paths", "hash_table_type", "flag1", "flag2") flag1: bool flag2: bool utf16_paths: bool hash_table_type: int hash_table_offset: int def __init__(self, bnd_source=None, dcx_magic=()): self.flag1 = False # unknown self.flag2 = False # unknown self.utf16_paths = False # If False, paths are written in Shift-JIS. self.hash_table_type = 0 self.hash_table_offset = 0 super().__init__(bnd_source=bnd_source, dcx_magic=dcx_magic) def unpack(self, bnd_buffer, **kwargs): self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START, byte_order="<") header = self.header_struct.unpack(bnd_buffer) self._check_version(header["version"].decode()) self.flag1 = header["flag1"] self.flag2 = header["flag2"] self.big_endian = header[ "big_endian"] == 0x00000100 # Magic not used to infer endianness here. byte_order = ">" if self.big_endian else "<" header.update( self.header_struct.unpack(bnd_buffer, *self.HEADER_STRUCT_ENDIAN, byte_order=byte_order)) self.signature = header["signature"].rstrip(b"\0").decode() self.magic = header["magic"] self.utf16_paths = header["utf16_paths"] self.hash_table_type = header["hash_table_type"] self.hash_table_offset = header["hash_table_offset"] path_encoding = ("utf-16be" if self.big_endian else "utf-16le") if self.utf16_paths else "shift-jis" if header["entry_header_size"] != header_size(self.magic): raise ValueError( f"Expected BND entry header size {header_size(self.magic)} based on magic\n" f"{hex(self.magic)}, but BND header says {header['entry_header_size']}." ) if self.hash_table_type != 4 and self.hash_table_offset != 0: _LOGGER.warning( f"Found non-zero hash table offset {self.hash_table_offset}, but header says this BND has no hash " f"table.") self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER, byte_order=byte_order) if has_uncompressed_size(self.magic): self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE, byte_order=byte_order) self.entry_header_struct.add_fields(self.DATA_OFFSET, byte_order=byte_order) if has_id(self.magic): self.entry_header_struct.add_fields(self.ENTRY_ID, byte_order=byte_order) if has_path(self.magic): self.entry_header_struct.add_fields(self.NAME_OFFSET, byte_order=byte_order) if self.magic == 0x20: # Extra pad. self.entry_header_struct.add_fields("8x") if header["entry_header_size"] != self.entry_header_struct.size: _LOGGER.warning( f"Entry header size given in BND header ({header['entry_header_size']}) does not match actual entry " f"header size ({self.entry_header_struct.size}).") for entry in BNDEntry.unpack(bnd_buffer, self.entry_header_struct, path_encoding=path_encoding, count=header["entry_count"]): self.add_entry(entry) # Read hash table. if self.hash_table_type == 4: bnd_buffer.seek(self.hash_table_offset) self._most_recent_hash_table = bnd_buffer.read( header["data_offset"] - self.hash_table_offset) self._most_recent_entry_count = len(self._entries) self._most_recent_paths = [entry.path for entry in self._entries] def create_header_structs(self): self._most_recent_hash_table = b"" # Hash table will need to be built on first pack. self._most_recent_entry_count = len(self._entries) self._most_recent_paths = [entry.path for entry in self._entries] self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START, byte_order="<") byte_order = ">" if self.big_endian else "<" self.header_struct.add_fields(*self.HEADER_STRUCT_ENDIAN, byte_order=byte_order) self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER, byte_order=byte_order) if has_uncompressed_size(self.magic): self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE, byte_order=byte_order) self.entry_header_struct.add_fields(self.DATA_OFFSET, byte_order=byte_order) if has_id(self.magic): self.entry_header_struct.add_fields(self.ENTRY_ID, byte_order=byte_order) if has_path(self.magic): self.entry_header_struct.add_fields(self.NAME_OFFSET, byte_order=byte_order) if self.magic == 0x20: # Extra pad. self.entry_header_struct.add_fields("8x") def pack(self): entry_header_dicts = [] packed_entry_headers = b"" packed_entry_paths = b"" relative_entry_path_offsets = [] packed_entry_data = b"" relative_entry_data_offsets = [] rebuild_hash_table = not self._most_recent_hash_table path_encoding = ("utf-16be" if self.big_endian else "utf-16le") if self.utf16_paths else "shift-jis" if len(self._entries) != self._most_recent_entry_count: rebuild_hash_table = True for i, entry in enumerate(self._entries): if not rebuild_hash_table and entry.path != self._most_recent_paths[ i]: rebuild_hash_table = True self._most_recent_entry_count = len(self._entries) self._most_recent_paths = [entry.path for entry in self._entries] for entry in self._entries: packed_entry_data += b"\0" * 10 # Each entry is separated by ten pad bytes. (Probably not necessary.) entry_header_dict = { "entry_magic": entry.magic, "compressed_data_size": entry.data_size, "data_offset": len(packed_entry_data), } if has_id(self.magic): entry_header_dict["entry_id"] = entry.id if has_path(self.magic): entry_header_dict["path_offset"] = len(packed_entry_paths) relative_entry_path_offsets.append( len(packed_entry_paths )) # Relative to start of packed entry paths. packed_entry_paths += entry.get_packed_path(path_encoding) if has_uncompressed_size(self.magic): entry_header_dict["uncompressed_data_size"] = entry.data_size relative_entry_data_offsets.append(len(packed_entry_data)) entry_data, is_compressed = entry.get_data_for_pack() if is_compressed: entry_header_dict["compressed_data_size"] = len(entry_data) packed_entry_data += entry_data entry_header_dicts.append(entry_header_dict) entry_header_table_offset = self.header_struct.size entry_path_table_offset = entry_header_table_offset + self.entry_header_struct.size * len( self._entries) if self.hash_table_type == 4: hash_table_offset = entry_path_table_offset + len( packed_entry_paths) if rebuild_hash_table: packed_hash_table = self.build_hash_table() else: packed_hash_table = self._most_recent_hash_table entry_packed_data_offset = hash_table_offset + len( packed_hash_table) else: hash_table_offset = 0 packed_hash_table = b"" entry_packed_data_offset = entry_path_table_offset + len( packed_entry_paths) # BND file size not needed. packed_header = self.header_struct.pack( flag1=self.flag1, flag2=self.flag2, big_endian=self.big_endian, entry_count=len(self._entries), signature=self.signature, entry_header_size=self.entry_header_struct.size, data_offset=entry_packed_data_offset, utf16_paths=self.utf16_paths, magic=self.magic, hash_table_type=self.hash_table_type, hash_table_offset=hash_table_offset, ) # Convert relative offsets to absolute and pack entry headers. for entry_header_dict in entry_header_dicts: entry_header_dict["data_offset"] += entry_packed_data_offset if has_path(self.magic): entry_header_dict["path_offset"] += entry_path_table_offset packed_entry_headers += self.entry_header_struct.pack( entry_header_dict) return packed_header + packed_entry_headers + packed_entry_paths + packed_hash_table + packed_entry_data def get_json_header(self): return { "version": "BND4", "signature": self.signature, "magic": self.magic, "big_endian": self.big_endian, "utf16_paths": self.utf16_paths, "hash_table_type": self.hash_table_type, "flag1": self.flag1, "flag2": self.flag2, "use_id_prefix": self.has_repeated_entry_names, "dcx_magic": self.dcx_magic, } @staticmethod def is_prime(p): if p < 2: return False if p == 2: return True if (p % 2) == 0: return False for i in range(3, p // 2, 2): if (p % i) == 0: return False if i**2 > p: return True return True def build_hash_table(self): """ Some BND4 resources include tables of hashed entry paths, which aren't needed to read file contents, but need to be re-hashed to properly pack the file in case any paths have changed (or the number of entries). """ # Group count set to first prime number greater than or equal to the number of entries divided by 7. for p in range(len(self._entries) // 7, 100000): if self.is_prime(p): group_count = p break else: raise ValueError("Hash group count could not be determined.") hashes = [] hash_lists = [[] for _ in range(group_count) ] # type: list[list[tuple[int, int]], ...] for entry_index, entry in enumerate(self._entries): hashes.append(self.path_hash(entry.path)) list_index = hashes[-1] % group_count hash_lists[list_index].append((hashes[-1], entry_index)) for hash_list in hash_lists: hash_list.sort() # Sort by hash value. hash_groups = [] path_hashes = [] total_hash_count = 0 for hash_list in hash_lists: first_hash_index = total_hash_count for path_hash in hash_list: path_hashes.append({ "hashed_value": path_hash[0], "entry_index": path_hash[1] }) total_hash_count += 1 hash_groups.append({ "index": first_hash_index, "length": total_hash_count - first_hash_index }) packed_hash_groups = self.HASH_GROUP_STRUCT.pack_multiple(hash_groups) packed_hash_table_header = self.HASH_TABLE_HEADER.pack( path_hashes_offset=self.HASH_TABLE_HEADER.size + len(packed_hash_groups), hash_group_count=group_count, ) packed_path_hashes = self.PATH_HASH_STRUCT.pack_multiple(path_hashes) return packed_hash_table_header + packed_hash_groups + packed_path_hashes @staticmethod def path_hash(path_string): """ Simple string-hashing algorithm used by FROM. Strings use forward-slash path separators and always start with a forward slash. """ hashable = path_string.replace("\\", "/") if not hashable.startswith("/"): hashable = "/" + hashable h = 0 for i, s in enumerate(hashable): h += i * 37 + ord(s) return h
class BND3(BaseBND): """BND used before Dark Souls 2 (2014).""" HEADER_STRUCT_START = ( ("version", "4s", b"BND3"), ("signature", "8s"), # Real signature may be shorter, but packing will pad it out. ("magic", "b"), ("big_endian", "?"), ) HEADER_STRUCT_ENDIAN = ( ("unknown", "?"), # usually False ("zero", "B", 0), ("entry_count", "i"), ("file_size", "i"), "8x", ) BND_ENTRY_HEADER = ( ("entry_magic", "B"), "3x", ("compressed_data_size", "i"), ("data_offset", "i"), ) ENTRY_ID = ("entry_id", "i") NAME_OFFSET = ("path_offset", "i") UNCOMPRESSED_DATA_SIZE = ("uncompressed_data_size", "i") VERSION = "BND3" MANIFEST_FIELDS = ("signature", "magic", "big_endian", "unknown") unknown: bool def __init__(self, bnd_source=None, dcx_magic=()): self.unknown = False super().__init__(bnd_source=bnd_source, dcx_magic=dcx_magic) def unpack(self, buffer, **kwargs): self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START, byte_order="<") header = self.header_struct.unpack(buffer) self._check_version(header["version"].decode()) self.signature = header["signature"].rstrip(b"\0").decode() self.magic = header["magic"] self.big_endian = header["big_endian"] or is_big_endian(self.magic) byte_order = ">" if self.big_endian else "<" header.update( self.header_struct.unpack(buffer, *self.HEADER_STRUCT_ENDIAN, byte_order=byte_order)) self.unknown = header["unknown"] self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER, byte_order=byte_order) if has_id(self.magic): self.entry_header_struct.add_fields(self.ENTRY_ID, byte_order=byte_order) if has_path(self.magic): self.entry_header_struct.add_fields(self.NAME_OFFSET, byte_order=byte_order) if has_uncompressed_size(self.magic): self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE, byte_order=byte_order) # NOTE: BND paths are *not* encoded in `shift_jis_2004`, unlike most other strings! They are `shift-jis`. # The main annoyance here is that escaped backslashes are encoded as the yen symbol in `shift_jis_2004`. for entry in BNDEntry.unpack(buffer, self.entry_header_struct, path_encoding="shift-jis", count=header["entry_count"]): self.add_entry(entry) def create_header_structs(self): self.header_struct = BinaryStruct(*self.HEADER_STRUCT_START, byte_order="<") byte_order = ">" if self.big_endian else "<" self.header_struct.add_fields(*self.HEADER_STRUCT_ENDIAN, byte_order=byte_order) self.entry_header_struct = BinaryStruct(*self.BND_ENTRY_HEADER, byte_order=byte_order) if has_id(self.magic): self.entry_header_struct.add_fields(self.ENTRY_ID, byte_order=byte_order) if has_path(self.magic): self.entry_header_struct.add_fields(self.NAME_OFFSET, byte_order=byte_order) if has_uncompressed_size(self.magic): self.entry_header_struct.add_fields(self.UNCOMPRESSED_DATA_SIZE, byte_order=byte_order) def pack(self): entry_header_dicts = [] packed_entry_headers = b"" packed_entry_paths = b"" relative_entry_path_offsets = [] packed_entry_data = b"" relative_entry_data_offsets = [] for entry in sorted(self._entries, key=lambda e: e.id): entry_header_dict = { "entry_magic": entry.magic, "compressed_data_size": entry.data_size, "data_offset": len(packed_entry_data), } if has_id(self.magic): entry_header_dict["entry_id"] = entry.id if has_path(self.magic): entry_header_dict["path_offset"] = len(packed_entry_paths) relative_entry_path_offsets.append( len(packed_entry_paths )) # Relative to start of packed entry paths. packed_entry_paths += entry.get_packed_path("shift-jis") if has_uncompressed_size(self.magic): entry_header_dict["uncompressed_data_size"] = entry.data_size relative_entry_data_offsets.append(len(packed_entry_data)) entry_data, is_compressed = entry.get_data_for_pack() if is_compressed: entry_header_dict["compressed_data_size"] = len(entry_data) packed_entry_data += entry_data entry_header_dicts.append(entry_header_dict) # Compute table offsets. entry_header_table_offset = self.header_struct.size entry_path_table_offset = entry_header_table_offset + self.entry_header_struct.size * len( self._entries) entry_packed_data_offset = entry_path_table_offset + len( packed_entry_paths) bnd_file_size = entry_packed_data_offset + len(packed_entry_data) # Pack BND header. packed_header = self.header_struct.pack( signature=self.signature, magic=self.magic, big_endian=self.big_endian, unknown=self.unknown, entry_count=len(self._entries), file_size=bnd_file_size, ) # Convert relative offsets to absolute and pack entry headers. for entry_header_dict in entry_header_dicts: entry_header_dict["data_offset"] += entry_packed_data_offset if has_path(self.magic): entry_header_dict["path_offset"] += entry_path_table_offset packed_entry_headers += self.entry_header_struct.pack( entry_header_dict) return packed_header + packed_entry_headers + packed_entry_paths + packed_entry_data def get_json_header(self): return { "version": "BND3", "signature": self.signature, "magic": self.magic, "big_endian": self.big_endian, "unknown": self.unknown, "use_id_prefix": self.has_repeated_entry_names, "dcx_magic": self.dcx_magic, }