Python Extractor Examples

Programming Language: Python

Namespace/Package Name: mcomix.archive_extractor

Method/Function: Extractor

Examples at hotexamples.com: 4

Python Extractor - 4 examples found. These are the top rated real world Python examples of mcomix.archive_extractor.Extractor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def get_archive_info(path):
    """Return a tuple (mime, num_pages, size) with info about the archive
    at <path>, or None if <path> doesn't point to a supported
    """
    image_re = constants.SUPPORTED_IMAGE_REGEX

    # XXX: Deferred import to avoid circular dependency
    from mcomix import archive_extractor

    extractor = archive_extractor.Extractor()
    try:
        extractor.setup(path, None)
    except archive_extractor.ArchiveException:
        return None

    mime = extractor.get_mime_type()
    if mime is None:
        return None

    files = extractor.get_files()
    extractor.close()

    num_pages = len(filter(image_re.search, files))
    size = os.stat(path).st_size

    return (mime, num_pages, size)

Example #2

Show file

    def _create_thumbnail_pixbuf(self, filepath):
        """ Creates a thumbnail pixbuf from <filepath>, and returns it as a
        tuple along with a file metadata dictionary: (pixbuf, tEXt_data) """

        if archive_tools.archive_mime_type(filepath) is not None:
            extractor = archive_extractor.Extractor()
            tmpdir = tempfile.mkdtemp(prefix=u'mcomix_archive_thumb.')
            condition = extractor.setup(filepath, tmpdir)
            files = extractor.get_files()
            wanted = self._guess_cover(files)

            if wanted:
                extractor.set_files([wanted])
                extractor.extract()
                image_path = os.path.join(tmpdir, wanted)

                condition.acquire()
                while not extractor.is_ready(wanted):
                    condition.wait()
                condition.release()

                if not os.path.isfile(image_path):
                    return None, None

                pixbuf = image_tools.load_pixbuf_size(image_path, self.width, self.height)
                tEXt_data = self._get_text_data(image_path)
                # Use the archive's mTime instead of the extracted file's mtime
                tEXt_data['tEXt::Thumb::MTime'] = str(long(os.stat(filepath).st_mtime))

                shutil.rmtree(tmpdir, True)
                return pixbuf, tEXt_data
            else:
                # Then check for subarchives by file extension and
                # extract only the first...
                subs = filter(constants.SUPPORTED_ARCHIVE_REGEX.search, files)
                if subs:
                    extractor.set_files([subs[0]])
                    extractor.extract()
                    condition.acquire()
                    while not extractor.is_ready(subs[0]):
                        condition.wait()
                    condition.release()
                    subpath = os.path.join(tmpdir, subs[0])
                    # Recursively try to find an image to use as cover
                    return self._create_thumbnail_pixbuf(subpath)

                shutil.rmtree(tmpdir, True)
                return None, None

        elif image_tools.is_image_file(filepath):
            pixbuf = image_tools.load_pixbuf_size(filepath, self.width, self.height)
            tEXt_data = self._get_text_data(filepath)

            return pixbuf, tEXt_data
        else:
            return None, None

Example #3

Show file

    def __init__(self, window):
        #: Indicates if files/archives are currently loaded/loading.
        self.file_loaded = False
        self.file_loading = False
        #: Indicate if files/archives load has failed.
        self.file_load_failed = False
        #: None if current file is not an archive, or unrecognized format.
        self.archive_type = None

        #: Either path to the current archive, or first file in image list.
        #: This is B{not} the path to the currently open page.
        self._current_file = None
        #: Reference to L{MainWindow}.
        self._window = window
        #: Path to opened archive file, or directory containing current images.
        self._base_path = None
        #: Temporary directory used for extracting archives.
        self._tmp_dir = tempfile.mkdtemp(prefix=u'mcomix.', suffix=os.sep)
        #: If C{True}, no longer wait for files to get extracted.
        self._stop_waiting = False
        #: List of comment files inside of the currently opened archive.
        self._comment_files = []
        #: Mapping of absolute paths to archive path names.
        self._name_table = {}
        #: Archive extractor.
        self._extractor = archive_extractor.Extractor()
        self._extractor.file_extracted += self._extracted_file
        self._extractor.contents_listed += self._listed_contents
        #: Condition to wait on when extracting archives and waiting on files.
        self._condition = None
        #: Provides a list of available files/archives in the open directory.
        self._file_provider = None
        #: Keeps track of the last read page in archives
        self.last_read_page = last_read_page.LastReadPage(
            backend.LibraryBackend())
        #: Regexp used for determining which archive files are images.
        self._image_re = constants.SUPPORTED_IMAGE_REGEX
        #: Regexp used for determining which archive files are comment files.
        self._comment_re = None
        self.update_comment_extensions()
        #: Forces call to window.draw_image (if loading is delayed by user interaction)
        self._must_call_draw = False

        self.last_read_page.set_enabled(bool(prefs['store recent file info']))

Example #4

Show file

    def _open_archive(self, path, start_page):
        """ Opens the archive passed in C{path}.

        Creates an L{archive_extractor.Extractor} and extracts all images
        found within the archive.

        @return: A tuple containing C{(image_files, image_index)}. """


        self._base_path = path
        try:
            self._condition = self._extractor.setup(self._base_path,
                                                self._tmp_dir,
                                                self.archive_type)
        except Exception:
            self._condition = None
            raise

        if self._condition != None:

            files = self._extractor.get_files()
            archive_images = [image for image in files
                if self._image_re.search(image)
                # Remove MacOS meta files from image list
                and not u'__MACOSX' in os.path.normpath(image).split(os.sep)]

            tools.alphanumeric_sort(archive_images)
            image_files = [ os.path.join(self._tmp_dir, f)
                            for f in archive_images ]

            comment_files = filter(self._comment_re.search, files)
            self._comment_files = [ os.path.join(self._tmp_dir, f)
                                    for f in comment_files ]

            # Allow managing sub-archives by keeping archives based on extension
            archive_files = filter(constants.SUPPORTED_ARCHIVE_REGEX.search, files)
            archive_files_paths = [ os.path.join(self._tmp_dir, f)
                                    for f in archive_files ]

            for name, full_path in zip(archive_images, image_files):
                self._name_table[full_path] = name

            for name, full_path in zip(comment_files, self._comment_files):
                self._name_table[full_path] = name

            for name, full_path in zip(archive_files, archive_files_paths):
                self._name_table[full_path] = name

            # Determine current archive image index.
            current_image_index = self._get_index_for_page(start_page,
                len(image_files), path)

            # Sort files to determine extraction order.
            self._sort_archive_files(archive_images, current_image_index)

            self._extractor.set_files(archive_images + comment_files + archive_files)
            self._extractor.file_extracted += self._extracted_file
            self._extractor.extract()

            # Manage subarchive through recursion
            if archive_files:
                has_subarchive = False

                # For each potential archive, change the current extractor,
                # extract recursively, and restore the internal extractor.
                for f in archive_files_paths:
                    if not self._extractor.is_ready(f):
                        self._wait_on_file(f)

                    if archive_tools.archive_mime_type(f) is not None:
                        # save self data
                        state = self._save_state()
                        # Setup temporary data
                        self._extractor = archive_extractor.Extractor()
                        self._tmp_dir = os.path.join(self._tmp_dir,
                            os.path.basename(f) + u'.dir')
                        if not os.path.exists(self._tmp_dir):
                            os.mkdir(self._tmp_dir)
                        self._condition = self._extractor.setup(self._base_path,
                                                self._tmp_dir,
                                                self.archive_type)
                        self._extractor.file_extracted += self._extracted_file
                        add_images, dummy_idx = self._open_archive(f, 1) # recursion here
                        # Since it's recursive, we do not want to loose the way to ensure
                        # that the file was extracted, so too bad but it will be a lil' slower.
                        for image in add_images:
                            self._wait_on_file(image)
                        image_files.extend(add_images)
                        self._extractor.stop()
                        self._extractor.close()
                        # restore self data
                        self._restore_state(state)
                        has_subarchive = True

                # Allows to avoid any behaviour changes if there was no subarchive..
                if has_subarchive:
                    # Mark additional files as extracted
                    self._comment_files = \
                        filter(self._comment_re.search, image_files)
                    tmp_image_files = \
                        filter(self._image_re.search, image_files)
                    self._name_table.clear()
                    for full_path in tmp_image_files + self._comment_files:
                        self._name_table[full_path] = os.path.basename(full_path)
                        # This trick here allows to avoid indefinite waiting on
                        # the sub-extracted files.
                        self._extractor._extracted[os.path.basename(full_path)] = True
                    # set those files instead of image_files for the return
                    image_files = tmp_image_files

            # Image index may have changed after additional files were extracted.
            current_image_index = self._get_index_for_page(start_page,
                len(image_files), path)

            return image_files, current_image_index

        else:
            # No condition was returned from the Extractor, i.e. invalid archive.
            return [], 0