Esempio n. 1
0
 def __init__(self):
     self.bhd = Bhd()
     self.bdt = Bdt()
     self.filelist = {}
     self.records_map = {}
     self.decompressed_list = []
Esempio n. 2
0
class ExternalArchive(object):
    """ Combination of BHD and BDT. Contains methods to export files to the
    filesystem and to generate archives from a directory tree.

    An ExternalArchive object is unusable when instantiated; use the function
    'reset' to create an empty one, or the functions 'load' or 'import_files' to
    populate it.

    Attributes:
    - bhd: Bhd object
    - bdt: Bdt object
    - filelist: dict which maps hashes to the original string
    - records_map: dict which maps record indices to the entry relative path
        they contain; the hashes are those found in the BHD file
    - decompressed_list: list of files that have been decompressed during the
        archive export; it's the decompressed name, i.e. w/o the .dcx extension
    """

    # Do not handle these files when crafting an archive.
    SPECIAL_FILE_TYPES     = (".json",)
    RECORDS_MAP_NAME       = "records.json"
    DECOMPRESSED_LIST_NAME = "decompressed.json"

    # Files without a known name are named by their uppercase hex hash.
    UNNAMED_FILE_RE = re.compile(r"[0-9A-F]{8}")

    def __init__(self):
        self.bhd = Bhd()
        self.bdt = Bdt()
        self.filelist = {}
        self.records_map = {}
        self.decompressed_list = []

    def reset(self):
        self.__init__()

    def load(self, bhd_name):
        """ Load the BHD file and prepare the BDT file for reading, return True
        on success. """
        self.reset()

        bhd_load_success = self.bhd.load(bhd_name)
        if not bhd_load_success:
            return False

        bdt_name = os.path.splitext(bhd_name)[0] + ".bdt"
        self.bdt.open(bdt_name)
        if not self.bdt.opened:
            return False

        return True

    def load_filelist(self, hashmap_path):
        with open(hashmap_path, "r") as hashmap_file:
            hashmap = json.load(hashmap_file)
        self.filelist = { int(k, 16): hashmap[k] for k in hashmap.keys() }

    def load_records_map(self, input_dir):
        """ Load the archive's records map that will be used to generate an
        archive with original record-to-entries map, return True on success. """
        map_path = os.path.join(input_dir, self.RECORDS_MAP_NAME)
        if not os.path.isfile(map_path):
            LOG.error("Records map file can't be found.")
            return False
        else:
            with open(map_path, "r") as records_map_file:
                self.records_map = json.load(records_map_file)
            return True

    def load_decompressed_list(self, input_dir):
        """ Load the list of files in that input dir that should be compressed
        before being imported in the archive. """
        list_path = os.path.join(input_dir, self.DECOMPRESSED_LIST_NAME)
        if not os.path.isfile(list_path):
            LOG.info("No decompressed file list found in the input dir.")
            return False
        else:
            with open(list_path, "r") as list_file:
                self.decompressed_list = json.load(list_file)
            LOG.info("Loaded decompressed file list.")
            return True

    def save_records_map(self, output_dir):
        """ Write the JSON map of records to their entries. """
        records_map_path = os.path.join(output_dir, self.RECORDS_MAP_NAME)
        with open(records_map_path, "w") as records_map_file:
            json.dump(self.records_map, records_map_file)

    def save_decompressed_list(self, output_dir):
        """ Write the JSON list of files (relative path) decompressed. """
        list_path = os.path.join(output_dir, self.DECOMPRESSED_LIST_NAME)
        with open(list_path, "w") as list_file:
            json.dump(self.decompressed_list, list_file)

    #------------------------------
    # Extraction
    #------------------------------

    @time_it(LOG)
    def export_all_files(self, output_dir, decompress = True):
        """ Export all files from the archive to a directory tree in output_dir
        and decompress (if decompress is True, which is default) DCX files. """
        self.records_map = {}
        self.decompressed_list = []
        for index_and_record in enumerate(self.bhd.records):
            self._export_record(index_and_record, output_dir, decompress)
        self.save_records_map(output_dir)
        self.save_decompressed_list(output_dir)

    def _export_record(self, index_and_record, output_dir, decompress):
        """ Export data entries of that record. """
        record_files = []

        index, record = index_and_record
        for entry in record.entries:
            rel_path = self.export_file(entry, output_dir)
            if not rel_path:
                continue
            record_files.append(rel_path)

            if decompress:
                base_rel_path, extension = os.path.splitext(rel_path)
                if extension == ".dcx":
                    self._try_decompress(rel_path, base_rel_path, output_dir)

        self.records_map[index] = record_files

    def export_file(self, entry, output_dir):
        """ Export the file corresponding to that BHD data entry, return the
        relative file path on success, None on failure """
        if not self.is_entry_valid(entry):
            LOG.error("Tried to extract a file not from this archive.")
            return None

        rel_path = self.filelist.get(entry.hash) or "{:08X}".format(entry.hash)
        LOG.info("Extracting {}".format(rel_path))

        file_content = self.bdt.read_entry(entry.offset, entry.size)
        content_len = len(file_content)
        if content_len != entry.size:
            LOG.error( "Tried to read {} bytes but only {} were available "
                       "(file '{}').".format(
                entry.size, content_len, rel_path
            ))
            return None

        output_path = os.path.join(output_dir, rel_path.lstrip("/"))
        if not os.path.isdir(os.path.dirname(output_path)):
            os.makedirs(os.path.dirname(output_path))
        with open(output_path, "wb") as output_file:
            output_file.write(file_content)
        return rel_path

    def is_entry_valid(self, entry):
        """ Return True if that BhdDataEntry is part of this archive. """
        for record in self.bhd.records:
            if entry in record.entries:
                return True
        return False

    def _try_decompress(self, rel_path, base_rel_path, output_dir):
        """ Try to decompress the DCX at rel_path to base_rel_path, in the
        output_dir; fails if a file is already expected at base_rel_path. """
        if base_rel_path in self.filelist.values():
            LOG.info("Won't decompress {} because it conflicts with {}".format(
                rel_path, base_rel_path
            ))
            return
        joinable_rel_path = os.path.normpath(rel_path.lstrip("/"))
        file_path = os.path.join(output_dir, joinable_rel_path)
        success = ExternalArchive._decompress(file_path)
        if success:
            self.decompressed_list.append(base_rel_path)

    @staticmethod
    def _decompress(file_path, remove_dcx = True):
        """ Decompress that file and remove the compressed original (DCX) if
        remove_dcx is True. Return True on success. """
        dcx = Dcx()
        import_success = dcx.load(file_path)
        if not import_success:
            return False

        decompressed_path = os.path.splitext(file_path)[0]  # remove the .dcx
        export_success = dcx.save_decompressed(decompressed_path)
        if not export_success:
            return False

        if remove_dcx:
            os.remove(file_path)
        return True

    #------------------------------
    # Import
    #------------------------------

    @time_it(LOG)
    def import_files(self, data_dir, bhd_path):
        """ Create an external archive from the data in data_dir, return True on
        success. """
        self.reset()
        self._prepare_import(data_dir, bhd_path)

        for root, _, files in os.walk(data_dir):
            for file_name in files:
                ext = os.path.splitext(file_name)[1]
                if ext in self.SPECIAL_FILE_TYPES:
                    continue
                self.import_file(data_dir, root, file_name)

        self._update_header()
        self._save_files(bhd_path)
        return True

    def _prepare_import(self, data_dir, bhd_path):
        """ Prepare and load some files used in the import process, return True
        if everything is ready or False if the import should be aborted. """
        # Open the BDR file in write mode and write basic data
        bdt_path = os.path.splitext(bhd_path)[0] + ".bdt"
        self.bdt.open(bdt_path, "wb")
        self.bdt.make_header()

        # Load the records to entries map and prepare the BHD record list.
        records_map_is_loaded = self.load_records_map(data_dir)
        if not records_map_is_loaded:
            return False
        num_records = len(self.records_map)
        self.bhd.records = [BhdRecord() for _ in range(num_records)]

        # Load the list of files to compress
        self.load_decompressed_list(data_dir)

    def import_file(self, data_dir, file_dir, file_name):
        """ Try to import the file file_name in file_dir, with data_dir as the
        archive root; create a data entry in the appropriate record, and write
        the file data in the BDT file. Return True on success. """
        file_path = os.path.join(file_dir, file_name)

        # Find rel_path: either a hashable name like "/chr/c5352.anibnd.dcx"
        # or directly a hash name like "192E66A4".
        is_unnamed = self.UNNAMED_FILE_RE.match(file_name) is not None
        if is_unnamed:
            rel_path = file_name
        else:
            rel_path = ExternalArchive._get_rel_path(data_dir, file_path)
            rel_path = "/" + rel_path
        LOG.info("Importing {}".format(rel_path))

        # If the file is in the decompressed list, it has to be compressed first
        # and that means we have to create its DCX file, then we update the
        # paths we use afterwards.
        if rel_path in self.decompressed_list:
            joinable_rel_path = os.path.normpath(rel_path.lstrip("/"))
            decompressed_path = os.path.join(data_dir, joinable_rel_path)
            success = ExternalArchive._compress(decompressed_path)
            if not success:
                return False
            rel_path = rel_path + ".dcx"
            file_path = file_path + ".dcx"

        # Import the file
        import_results = self.bdt.import_file(file_path)
        if import_results[1] == -1:  # written bytes
            return False

        # Unnamed files aren't decompressed, so their hash is already available.
        # Named files can be decompressed, therefore we don't know their
        # relative path until now.
        if is_unnamed:
            entry_hash = int(file_name, 16)
        else:
            entry_hash = BhdDataEntry.hash_name(rel_path)

        data_entry = BhdDataEntry()
        data_entry.hash = entry_hash
        data_entry.offset, data_entry.size = import_results

        record_is_updated = self._update_record(rel_path, data_entry)
        return record_is_updated

    @staticmethod
    def _get_rel_path(data_dir, file_path):
        """ Return the sanitized relative path of file_path. """
        relative_path = os.path.relpath(file_path, data_dir)
        # Invert backslashes on Windows.
        relative_path = relative_path.replace(os.path.sep, "/")
        return relative_path

    @staticmethod
    def _compress(file_path, remove_original = True):
        """ Compress the file in a DCX file and can remove the original. """
        dcx = Dcx()
        import_success = dcx.load_decompressed(file_path)
        if not import_success:
            return False

        dcx_path = file_path + ".dcx"
        export_success = dcx.save(dcx_path)

        if remove_original:
            os.remove(file_path)
        return export_success

    def _update_record(self, rel_path, data_entry):
        """ Add the data entry to the record associated with that relative path,
        return True on success. """
        try:
            index = next(( int(index)
                           for index, files in self.records_map.items()
                           if rel_path in files ))
        except StopIteration:
            LOG.error("File {} not in any record.".format(rel_path))
            return False
        record = self.bhd.records[index]
        record.entries.append(data_entry)
        return True

    def _update_header(self):
        """ Update the BHD header with values corresponding to the amount of
        records and entries stored; the header is then ready to write. """
        self.bhd.header = BhdHeader()

        num_records = len(self.bhd.records)
        num_data_entries = sum( ( len(record.entries)
                                  for record in self.bhd.records ) )
        file_size = (
            BhdHeader.HEADER_BIN.size +
            BhdRecord.RECORD_BIN.size * num_records +
            BhdDataEntry.DATA_ENTRY_BIN.size * num_data_entries
        )

        self.bhd.header.file_size = file_size
        self.bhd.header.num_records = num_records

    def _save_files(self, bhd_path):
        """ Write both BHD and BDT files to disk. """
        LOG.info("Saving files to disk...")
        self.bhd.save(bhd_path)
        self.bdt.close()