def get_archive_info(path): """Return a tuple (mime, num_pages, size) with info about the archive at <path>, or None if <path> doesn't point to a supported """ image_re = constants.SUPPORTED_IMAGE_REGEX # XXX: Deferred import to avoid circular dependency from mcomix import archive_extractor extractor = archive_extractor.Extractor() try: extractor.setup(path, None) except archive_extractor.ArchiveException: return None mime = extractor.get_mime_type() if mime is None: return None files = extractor.get_files() extractor.close() num_pages = len(filter(image_re.search, files)) size = os.stat(path).st_size return (mime, num_pages, size)
def _create_thumbnail_pixbuf(self, filepath): """ Creates a thumbnail pixbuf from <filepath>, and returns it as a tuple along with a file metadata dictionary: (pixbuf, tEXt_data) """ if archive_tools.archive_mime_type(filepath) is not None: extractor = archive_extractor.Extractor() tmpdir = tempfile.mkdtemp(prefix=u'mcomix_archive_thumb.') condition = extractor.setup(filepath, tmpdir) files = extractor.get_files() wanted = self._guess_cover(files) if wanted: extractor.set_files([wanted]) extractor.extract() image_path = os.path.join(tmpdir, wanted) condition.acquire() while not extractor.is_ready(wanted): condition.wait() condition.release() if not os.path.isfile(image_path): return None, None pixbuf = image_tools.load_pixbuf_size(image_path, self.width, self.height) tEXt_data = self._get_text_data(image_path) # Use the archive's mTime instead of the extracted file's mtime tEXt_data['tEXt::Thumb::MTime'] = str(long(os.stat(filepath).st_mtime)) shutil.rmtree(tmpdir, True) return pixbuf, tEXt_data else: # Then check for subarchives by file extension and # extract only the first... subs = filter(constants.SUPPORTED_ARCHIVE_REGEX.search, files) if subs: extractor.set_files([subs[0]]) extractor.extract() condition.acquire() while not extractor.is_ready(subs[0]): condition.wait() condition.release() subpath = os.path.join(tmpdir, subs[0]) # Recursively try to find an image to use as cover return self._create_thumbnail_pixbuf(subpath) shutil.rmtree(tmpdir, True) return None, None elif image_tools.is_image_file(filepath): pixbuf = image_tools.load_pixbuf_size(filepath, self.width, self.height) tEXt_data = self._get_text_data(filepath) return pixbuf, tEXt_data else: return None, None
def __init__(self, window): #: Indicates if files/archives are currently loaded/loading. self.file_loaded = False self.file_loading = False #: Indicate if files/archives load has failed. self.file_load_failed = False #: None if current file is not an archive, or unrecognized format. self.archive_type = None #: Either path to the current archive, or first file in image list. #: This is B{not} the path to the currently open page. self._current_file = None #: Reference to L{MainWindow}. self._window = window #: Path to opened archive file, or directory containing current images. self._base_path = None #: Temporary directory used for extracting archives. self._tmp_dir = tempfile.mkdtemp(prefix=u'mcomix.', suffix=os.sep) #: If C{True}, no longer wait for files to get extracted. self._stop_waiting = False #: List of comment files inside of the currently opened archive. self._comment_files = [] #: Mapping of absolute paths to archive path names. self._name_table = {} #: Archive extractor. self._extractor = archive_extractor.Extractor() self._extractor.file_extracted += self._extracted_file self._extractor.contents_listed += self._listed_contents #: Condition to wait on when extracting archives and waiting on files. self._condition = None #: Provides a list of available files/archives in the open directory. self._file_provider = None #: Keeps track of the last read page in archives self.last_read_page = last_read_page.LastReadPage( backend.LibraryBackend()) #: Regexp used for determining which archive files are images. self._image_re = constants.SUPPORTED_IMAGE_REGEX #: Regexp used for determining which archive files are comment files. self._comment_re = None self.update_comment_extensions() #: Forces call to window.draw_image (if loading is delayed by user interaction) self._must_call_draw = False self.last_read_page.set_enabled(bool(prefs['store recent file info']))
def _open_archive(self, path, start_page): """ Opens the archive passed in C{path}. Creates an L{archive_extractor.Extractor} and extracts all images found within the archive. @return: A tuple containing C{(image_files, image_index)}. """ self._base_path = path try: self._condition = self._extractor.setup(self._base_path, self._tmp_dir, self.archive_type) except Exception: self._condition = None raise if self._condition != None: files = self._extractor.get_files() archive_images = [image for image in files if self._image_re.search(image) # Remove MacOS meta files from image list and not u'__MACOSX' in os.path.normpath(image).split(os.sep)] tools.alphanumeric_sort(archive_images) image_files = [ os.path.join(self._tmp_dir, f) for f in archive_images ] comment_files = filter(self._comment_re.search, files) self._comment_files = [ os.path.join(self._tmp_dir, f) for f in comment_files ] # Allow managing sub-archives by keeping archives based on extension archive_files = filter(constants.SUPPORTED_ARCHIVE_REGEX.search, files) archive_files_paths = [ os.path.join(self._tmp_dir, f) for f in archive_files ] for name, full_path in zip(archive_images, image_files): self._name_table[full_path] = name for name, full_path in zip(comment_files, self._comment_files): self._name_table[full_path] = name for name, full_path in zip(archive_files, archive_files_paths): self._name_table[full_path] = name # Determine current archive image index. current_image_index = self._get_index_for_page(start_page, len(image_files), path) # Sort files to determine extraction order. self._sort_archive_files(archive_images, current_image_index) self._extractor.set_files(archive_images + comment_files + archive_files) self._extractor.file_extracted += self._extracted_file self._extractor.extract() # Manage subarchive through recursion if archive_files: has_subarchive = False # For each potential archive, change the current extractor, # extract recursively, and restore the internal extractor. for f in archive_files_paths: if not self._extractor.is_ready(f): self._wait_on_file(f) if archive_tools.archive_mime_type(f) is not None: # save self data state = self._save_state() # Setup temporary data self._extractor = archive_extractor.Extractor() self._tmp_dir = os.path.join(self._tmp_dir, os.path.basename(f) + u'.dir') if not os.path.exists(self._tmp_dir): os.mkdir(self._tmp_dir) self._condition = self._extractor.setup(self._base_path, self._tmp_dir, self.archive_type) self._extractor.file_extracted += self._extracted_file add_images, dummy_idx = self._open_archive(f, 1) # recursion here # Since it's recursive, we do not want to loose the way to ensure # that the file was extracted, so too bad but it will be a lil' slower. for image in add_images: self._wait_on_file(image) image_files.extend(add_images) self._extractor.stop() self._extractor.close() # restore self data self._restore_state(state) has_subarchive = True # Allows to avoid any behaviour changes if there was no subarchive.. if has_subarchive: # Mark additional files as extracted self._comment_files = \ filter(self._comment_re.search, image_files) tmp_image_files = \ filter(self._image_re.search, image_files) self._name_table.clear() for full_path in tmp_image_files + self._comment_files: self._name_table[full_path] = os.path.basename(full_path) # This trick here allows to avoid indefinite waiting on # the sub-extracted files. self._extractor._extracted[os.path.basename(full_path)] = True # set those files instead of image_files for the return image_files = tmp_image_files # Image index may have changed after additional files were extracted. current_image_index = self._get_index_for_page(start_page, len(image_files), path) return image_files, current_image_index else: # No condition was returned from the Extractor, i.e. invalid archive. return [], 0