Example #1
0
def unzip(input: File, output_chapter: Queue, output_page: Queue):
    """
    First, open the file and try to guess what chapters it contains.
    Emit a chapter object for each chapter.
    Then emit a page for each page in that chapter.

    --

    """
    logger.debug('Entering unzip')

    zip_f = root_filestore.open(input.location, 'rb')
    try:
        zip = zipfs.ReadZipFS(zip_f)
    except zipfile.BadZipFile:
        logger.error(
            'Error -- {file} appears to not be a zipfile, marking for ignore'.
            format(file=input.location))
        backend.file.update(file_id=input.file_id, ignore=True)
        return

    unzip_strategies = [
        unzippers.chapters_in_subdirectories, unzippers.zip_containing_zips,
        unzippers.single_chapter,
        unzippers.chapters_in_subdirectories_with_credits
    ]

    success = False

    for strategy in unzip_strategies:
        if strategy.match(zip):
            logger.info(
                'Trying to unzip file {file} with strategy {strategy}'.format(
                    file=input.location,
                    strategy=strategy.__name__,
                ))
            try:
                success = strategy.process(input, zip, output_chapter,
                                           output_page)
            except Exception as e:
                success = False
                logger.error('Unzip failed: {err}'.format(err=str(e), ))

        if success:
            logger.info('Success')
            backend.file.update(file_id=input.file_id, state='done')
            break

    zip.close()

    if not success:
        logger.warning('Failed unzipping {file} -- no working strategy'.format(
            file=input.location, ))
        backend.file.update(file_id=input.file_id, state='error')
Example #2
0
 def test_implied(self):
     """Test zipfs creates intermediate directories."""
     fh, path = tempfile.mkstemp('testzip.zip')
     try:
         os.close(fh)
         with zipfile.ZipFile(path, mode='w') as z:
             z.writestr('foo/bar/baz/egg', b'hello')
         with zipfs.ReadZipFS(path) as zip_fs:
             foo = zip_fs.getinfo('foo', ['details'])
             bar = zip_fs.getinfo('foo/bar')
             baz = zip_fs.getinfo('foo/bar/baz')
             self.assertTrue(foo.is_dir)
             self.assertTrue(zip_fs.isfile('foo/bar/baz/egg'))
     finally:
         os.remove(path)
Example #3
0
 def test_implied(self):
     """Test zipfs creates intermediate directories."""
     fh, path = tempfile.mkstemp("testzip.zip")
     try:
         os.close(fh)
         with zipfile.ZipFile(path, mode="w") as z:
             z.writestr("foo/bar/baz/egg", b"hello")
         with zipfs.ReadZipFS(path) as zip_fs:
             foo = zip_fs.getinfo("foo", ["details"])
             bar = zip_fs.getinfo("foo/bar")
             baz = zip_fs.getinfo("foo/bar/baz")
             self.assertTrue(foo.is_dir)
             self.assertTrue(zip_fs.isfile("foo/bar/baz/egg"))
     finally:
         os.remove(path)
def process(input: File, zipfile: zipfs.ReadZipFS, output_chapter: Queue,
            output_page: Queue):
    logger.debug('Entering zip_containing_zips processing stage')
    unzip_tmpfs = fs.tempfs.TempFS()
    subzips = zipfile.listdir('/')
    logger.info('Unzipping found ({num_chapters}) chapters'.format(
        num_chapters=len(subzips), ))
    for subzip in subzips:
        chapter_number = guess_chapter(subzip)
        chapter = Chapter(
            manga_id=input.manga_id,
            name=str(chapter_number),
            sort_key=chapter_number,
        )

        output_chapter.put(chapter)

        logger.info('Starting to extract subzip to tmpfs')

        with zipfile.open(subzip, 'rb') as opened_zip:
            with unzip_tmpfs.open(subzip, 'wb') as tmpfs_f:
                tmpfs_f.write(opened_zip.read())

        logger.info('Tmpfs extraction successful')
        logger.info('Starting to read inner zip')

        tmpfs_f = unzip_tmpfs.open(subzip, 'rb')
        subdir = zipfs.ReadZipFS(tmpfs_f)

        pages = subdir.listdir('/')

        # Hack: if the zip contains a single directory, cd into that one instead
        # F*****g zip maintainers
        if len(pages) == 1 and subdir.isdir(pages[0]):
            logger.info(
                'Subzip was of a directory containing files -- changing dir')
            pages = [
                fs.path.join(pages[0], x) for x in subdir.listdir(pages[0])
            ]

        pages.sort()
        logger.info('Chapter has ({num_pages}) pages'.format(
            num_pages=len(pages), ))
        for idx, pagename in enumerate(pages):
            with subdir.open(fs.path.join('/', pagename), 'rb') as page_f:
                # noinspection PyUnresolvedReferences
                data = page_f.read()

            page = Page(
                chapter=chapter,
                sort_key=idx,
                file_id=input.file_id,
                data=data,
            )

            chapter.num_pages += 1

            output_page.put(page)

        tmpfs_f.close()
        subdir.close()

    unzip_tmpfs.close()
    return True