def list_files(self, mode=FileProvider.IMAGES): """ Lists all files in the current directory. Returns a list of absolute paths, already sorted. """ if mode == FileProvider.IMAGES: should_accept = lambda file: image_tools.is_image_file(file) elif mode == FileProvider.ARCHIVES: should_accept = lambda file: \ archive_tools.get_supported_archive_regex().search(file, re.I) is not None else: should_accept = lambda file: True try: files = [ os.path.join(self.base_dir, filename) for filename in # Explicitly convert all files to Unicode, even when # os.listdir returns a mixture of byte/unicode strings. # (MComix bug #3424405) [i18n.to_unicode(fn) for fn in os.listdir(self.base_dir)] if should_accept(os.path.join(self.base_dir, filename)) ] FileProvider.sort_files(files) return files except OSError: log.warning(u'! ' + _('Could not open %s: Permission denied.'), self.base_dir) return []
def _iter_contents(self, archive, root=None): self._archive_list.append(archive) self._archive_root[archive] = root supported_archive_regexp = archive_tools.get_supported_archive_regex() for f in archive.iter_contents(): if supported_archive_regexp.search(f): # Extract sub-archive. destination_dir = os.path.join(self._destination_dir, 'sub-archives') if root is not None: destination_dir = os.path.join(destination_dir, root) archive.extract(f, destination_dir) # And open it and list its contents. sub_archive_path = os.path.join(destination_dir, f) sub_archive = archive_tools.get_archive_handler( sub_archive_path) if sub_archive is None: log.warning('Non-supported archive format: %s' % os.path.basename(sub_archive_path)) continue sub_root = f if root is not None: sub_root = os.path.join(root, sub_root) for name in self._iter_contents(sub_archive, sub_root): yield name else: name = f if root is not None: name = os.path.join(root, name) self._entry_mapping[name] = (archive, f) yield name
def list_files(self, mode=FileProvider.IMAGES): """ Lists all files in the current directory. Returns a list of absolute paths, already sorted. """ if mode == FileProvider.IMAGES: should_accept = lambda file: image_tools.is_image_file(file) elif mode == FileProvider.ARCHIVES: should_accept = lambda file: \ archive_tools.get_supported_archive_regex().search(file, re.I) is not None else: should_accept = lambda file: True try: files = [ os.path.join(self.base_dir, filename) for filename in # Explicitly convert all files to Unicode, even when # os.listdir returns a mixture of byte/unicode strings. # (MComix bug #3424405) [ i18n.to_unicode(fn) for fn in os.listdir(self.base_dir) ] if should_accept(os.path.join(self.base_dir, filename)) ] FileProvider.sort_files(files) return files except OSError: log.warning(u'! ' + _('Could not open %s: Permission denied.'), self.base_dir) return []
def _iter_contents(self, archive, root=None): self._archive_list.append(archive) self._archive_root[archive] = root supported_archive_regexp = archive_tools.get_supported_archive_regex() for f in archive.iter_contents(): if supported_archive_regexp.search(f): # Extract sub-archive. destination_dir = os.path.join(self._destination_dir, 'sub-archives') if root is not None: destination_dir = os.path.join(destination_dir, root) archive.extract(f, destination_dir) # And open it and list its contents. sub_archive_path = os.path.join(destination_dir, f) sub_archive = archive_tools.get_archive_handler(sub_archive_path) if sub_archive is None: log.warning('Non-supported archive format: %s' % os.path.basename(sub_archive_path)) continue sub_root = f if root is not None: sub_root = os.path.join(root, sub_root) for name in self._iter_contents(sub_archive, sub_root): yield name else: name = f if root is not None: name = os.path.join(root, name) self._entry_mapping[name] = (archive, f) yield name
def __get_file_filter(self, files): """ Determines what kind of files should be filtered in the given list of <files>. Returns either a filter accepting only images, or only archives, depending on what type of file is found first in the list. """ for file in files: if os.path.isfile(file) and image_tools.is_image_file(file): return lambda file: \ image_tools.is_image_file(file) elif (os.path.isfile(file) and archive_tools.get_supported_archive_regex().search(file, re.I) is not None): return lambda file: \ archive_tools.get_supported_archive_regex().search(file, re.I) is not None # Default filter only accepts images. return lambda file: image_tools.is_image_file(file)
def __get_file_filter(self, files): """ Determines what kind of files should be filtered in the given list of <files>. Returns either a filter accepting only images, or only archives, depending on what type of file is found first in the list. """ for file in files: if os.path.isfile(file) and image_tools.is_image_file(file): return lambda file: \ image_tools.is_image_file(file) elif (os.path.isfile(file) and archive_tools.get_supported_archive_regex().search( file, re.I) is not None): return lambda file: \ archive_tools.get_supported_archive_regex().search(file, re.I) is not None # Default filter only accepts images. return lambda file: image_tools.is_image_file(file)
def _create_thumbnail_pixbuf(self, filepath): """ Creates a thumbnail pixbuf from <filepath>, and returns it as a tuple along with a file metadata dictionary: (pixbuf, tEXt_data) """ if archive_tools.archive_mime_type(filepath) is not None: extractor = archive_extractor.Extractor() tmpdir = tempfile.mkdtemp(prefix=u'mcomix_archive_thumb.') condition = extractor.setup(filepath, tmpdir) files = extractor.get_files() wanted = self._guess_cover(files) if wanted: extractor.set_files([wanted]) extractor.extract() image_path = os.path.join(tmpdir, wanted) condition.acquire() while not extractor.is_ready(wanted): condition.wait() condition.release() if not os.path.isfile(image_path): return None, None pixbuf = image_tools.load_pixbuf_size(image_path, self.width, self.height) tEXt_data = self._get_text_data(image_path) # Use the archive's mTime instead of the extracted file's mtime tEXt_data['tEXt::Thumb::MTime'] = str(long(os.stat(filepath).st_mtime)) shutil.rmtree(tmpdir, True) return pixbuf, tEXt_data else: # Then check for subarchives by file extension and # extract only the first... subs = filter(archive_tools.get_supported_archive_regex().search, files) if subs: extractor.set_files([subs[0]]) extractor.extract() condition.acquire() while not extractor.is_ready(subs[0]): condition.wait() condition.release() subpath = os.path.join(tmpdir, subs[0]) # Recursively try to find an image to use as cover return self._create_thumbnail_pixbuf(subpath) shutil.rmtree(tmpdir, True) return None, None elif image_tools.is_image_file(filepath): pixbuf = image_tools.load_pixbuf_size(filepath, self.width, self.height) tEXt_data = self._get_text_data(filepath) return pixbuf, tEXt_data else: return None, None
def get_new_files(self, filelist): """ Returns a list of files that are present in the watched directory, but not in the list of files passed in C{filelist}. """ if not self.is_valid(): return [] old_files = frozenset([os.path.abspath(path) for path in filelist]) if not self.recursive: available_files = frozenset([os.path.join(self.directory, filename) for filename in os.listdir(self.directory) if archive_tools.get_supported_archive_regex().search(filename)]) else: available_files = [] for dirpath, dirnames, filenames in os.walk(self.directory): for filename in filter(archive_tools.get_supported_archive_regex().search, filenames): path = os.path.join(dirpath, filename) available_files.append(path) available_files = frozenset(available_files) return list(available_files.difference(old_files))
def _open_archive(self, path, start_page): """ Opens the archive passed in C{path}. Creates an L{archive_extractor.Extractor} and extracts all images found within the archive. @return: A tuple containing C{(image_files, image_index)}. """ self._base_path = path try: self._condition = self._extractor.setup(self._base_path, self._tmp_dir, self.archive_type) except Exception: self._condition = None raise if self._condition != None: files = self._extractor.get_files() archive_images = [image for image in files if self._image_re.search(image) # Remove MacOS meta files from image list and not u'__MACOSX' in os.path.normpath(image).split(os.sep)] archive_images = self._sort_archive_images(archive_images) image_files = [ os.path.join(self._tmp_dir, f) for f in archive_images ] comment_files = filter(self._comment_re.search, files) self._comment_files = [ os.path.join(self._tmp_dir, f) for f in comment_files ] # Allow managing sub-archives by keeping archives based on extension archive_files = filter( archive_tools.get_supported_archive_regex().search, files) archive_files_paths = [ os.path.join(self._tmp_dir, f) for f in archive_files ] for name, full_path in zip(archive_images, image_files): self._name_table[full_path] = name for name, full_path in zip(comment_files, self._comment_files): self._name_table[full_path] = name for name, full_path in zip(archive_files, archive_files_paths): self._name_table[full_path] = name # Determine current archive image index. current_image_index = self._get_index_for_page(start_page, len(image_files), path) # Sort files to determine extraction order. self._sort_archive_files(archive_images, current_image_index) self._extractor.set_files(archive_images + comment_files + archive_files) self._extractor.file_extracted += self._extracted_file self._extractor.extract() # Manage subarchive through recursion if archive_files: has_subarchive = False # For each potential archive, change the current extractor, # extract recursively, and restore the internal extractor. for f in archive_files_paths: if not self._extractor.is_ready(f): self._wait_on_file(f) if archive_tools.archive_mime_type(f) is not None: # save self data state = self._save_state() # Setup temporary data self._extractor = archive_extractor.Extractor() self._tmp_dir = os.path.join(self._tmp_dir, os.path.basename(f) + u'.dir') if not os.path.exists(self._tmp_dir): os.mkdir(self._tmp_dir) self._condition = self._extractor.setup(self._base_path, self._tmp_dir, self.archive_type) self._extractor.file_extracted += self._extracted_file add_images, dummy_idx = self._open_archive(f, 1) # recursion here # Since it's recursive, we do not want to loose the way to ensure # that the file was extracted, so too bad but it will be a lil' slower. for image in add_images: self._wait_on_file(image) image_files.extend(add_images) self._extractor.stop() self._extractor.close() # restore self data self._restore_state(state) has_subarchive = True # Allows to avoid any behaviour changes if there was no subarchive.. if has_subarchive: # Mark additional files as extracted self._comment_files = \ filter(self._comment_re.search, image_files) tmp_image_files = \ filter(self._image_re.search, image_files) self._name_table.clear() for full_path in tmp_image_files + self._comment_files: self._name_table[full_path] = os.path.basename(full_path) # This trick here allows to avoid indefinite waiting on # the sub-extracted files. self._extractor._extracted[os.path.basename(full_path)] = True # set those files instead of image_files for the return image_files = tmp_image_files # Image index may have changed after additional files were extracted. current_image_index = self._get_index_for_page(start_page, len(image_files), path, confirm=True) return image_files, current_image_index else: # No condition was returned from the Extractor, i.e. invalid archive. return [], 0
def _open_archive(self, path, start_page): """ Opens the archive passed in C{path}. Creates an L{archive_extractor.Extractor} and extracts all images found within the archive. @return: A tuple containing C{(image_files, image_index)}. """ self._base_path = path try: self._condition = self._extractor.setup(self._base_path, self._tmp_dir, self.archive_type) except Exception: self._condition = None raise if self._condition != None: files = self._extractor.get_files() archive_images = [ image for image in files if self._image_re.search(image) # Remove MacOS meta files from image list and not u'__MACOSX' in os.path.normpath(image).split(os.sep) ] archive_images = self._sort_archive_images(archive_images) image_files = [ os.path.join(self._tmp_dir, f) for f in archive_images ] comment_files = filter(self._comment_re.search, files) self._comment_files = [ os.path.join(self._tmp_dir, f) for f in comment_files ] # Allow managing sub-archives by keeping archives based on extension archive_files = filter( archive_tools.get_supported_archive_regex().search, files) archive_files_paths = [ os.path.join(self._tmp_dir, f) for f in archive_files ] for name, full_path in zip(archive_images, image_files): self._name_table[full_path] = name for name, full_path in zip(comment_files, self._comment_files): self._name_table[full_path] = name for name, full_path in zip(archive_files, archive_files_paths): self._name_table[full_path] = name # Determine current archive image index. current_image_index = self._get_index_for_page( start_page, len(image_files), path) # Sort files to determine extraction order. self._sort_archive_files(archive_images, current_image_index) self._extractor.set_files(archive_images + comment_files + archive_files) self._extractor.file_extracted += self._extracted_file self._extractor.extract() # Manage subarchive through recursion if archive_files: has_subarchive = False # For each potential archive, change the current extractor, # extract recursively, and restore the internal extractor. for f in archive_files_paths: if not self._extractor.is_ready(f): self._wait_on_file(f) if archive_tools.archive_mime_type(f) is not None: # save self data state = self._save_state() # Setup temporary data self._extractor = archive_extractor.Extractor() self._tmp_dir = os.path.join( self._tmp_dir, os.path.basename(f) + u'.dir') if not os.path.exists(self._tmp_dir): os.mkdir(self._tmp_dir) self._condition = self._extractor.setup( self._base_path, self._tmp_dir, self.archive_type) self._extractor.file_extracted += self._extracted_file add_images, dummy_idx = self._open_archive( f, 1) # recursion here # Since it's recursive, we do not want to loose the way to ensure # that the file was extracted, so too bad but it will be a lil' slower. for image in add_images: self._wait_on_file(image) image_files.extend(add_images) self._extractor.stop() self._extractor.close() # restore self data self._restore_state(state) has_subarchive = True # Allows to avoid any behaviour changes if there was no subarchive.. if has_subarchive: # Mark additional files as extracted self._comment_files = \ filter(self._comment_re.search, image_files) tmp_image_files = \ filter(self._image_re.search, image_files) self._name_table.clear() for full_path in tmp_image_files + self._comment_files: self._name_table[full_path] = os.path.basename( full_path) # This trick here allows to avoid indefinite waiting on # the sub-extracted files. self._extractor._extracted[os.path.basename( full_path)] = True # set those files instead of image_files for the return image_files = tmp_image_files # Image index may have changed after additional files were extracted. current_image_index = self._get_index_for_page(start_page, len(image_files), path, confirm=True) return image_files, current_image_index else: # No condition was returned from the Extractor, i.e. invalid archive. return [], 0