def unzip(input: File, output_chapter: Queue, output_page: Queue): """ First, open the file and try to guess what chapters it contains. Emit a chapter object for each chapter. Then emit a page for each page in that chapter. -- """ logger.debug('Entering unzip') zip_f = root_filestore.open(input.location, 'rb') try: zip = zipfs.ReadZipFS(zip_f) except zipfile.BadZipFile: logger.error( 'Error -- {file} appears to not be a zipfile, marking for ignore'. format(file=input.location)) backend.file.update(file_id=input.file_id, ignore=True) return unzip_strategies = [ unzippers.chapters_in_subdirectories, unzippers.zip_containing_zips, unzippers.single_chapter, unzippers.chapters_in_subdirectories_with_credits ] success = False for strategy in unzip_strategies: if strategy.match(zip): logger.info( 'Trying to unzip file {file} with strategy {strategy}'.format( file=input.location, strategy=strategy.__name__, )) try: success = strategy.process(input, zip, output_chapter, output_page) except Exception as e: success = False logger.error('Unzip failed: {err}'.format(err=str(e), )) if success: logger.info('Success') backend.file.update(file_id=input.file_id, state='done') break zip.close() if not success: logger.warning('Failed unzipping {file} -- no working strategy'.format( file=input.location, )) backend.file.update(file_id=input.file_id, state='error')
def test_implied(self): """Test zipfs creates intermediate directories.""" fh, path = tempfile.mkstemp('testzip.zip') try: os.close(fh) with zipfile.ZipFile(path, mode='w') as z: z.writestr('foo/bar/baz/egg', b'hello') with zipfs.ReadZipFS(path) as zip_fs: foo = zip_fs.getinfo('foo', ['details']) bar = zip_fs.getinfo('foo/bar') baz = zip_fs.getinfo('foo/bar/baz') self.assertTrue(foo.is_dir) self.assertTrue(zip_fs.isfile('foo/bar/baz/egg')) finally: os.remove(path)
def test_implied(self): """Test zipfs creates intermediate directories.""" fh, path = tempfile.mkstemp("testzip.zip") try: os.close(fh) with zipfile.ZipFile(path, mode="w") as z: z.writestr("foo/bar/baz/egg", b"hello") with zipfs.ReadZipFS(path) as zip_fs: foo = zip_fs.getinfo("foo", ["details"]) bar = zip_fs.getinfo("foo/bar") baz = zip_fs.getinfo("foo/bar/baz") self.assertTrue(foo.is_dir) self.assertTrue(zip_fs.isfile("foo/bar/baz/egg")) finally: os.remove(path)
def process(input: File, zipfile: zipfs.ReadZipFS, output_chapter: Queue, output_page: Queue): logger.debug('Entering zip_containing_zips processing stage') unzip_tmpfs = fs.tempfs.TempFS() subzips = zipfile.listdir('/') logger.info('Unzipping found ({num_chapters}) chapters'.format( num_chapters=len(subzips), )) for subzip in subzips: chapter_number = guess_chapter(subzip) chapter = Chapter( manga_id=input.manga_id, name=str(chapter_number), sort_key=chapter_number, ) output_chapter.put(chapter) logger.info('Starting to extract subzip to tmpfs') with zipfile.open(subzip, 'rb') as opened_zip: with unzip_tmpfs.open(subzip, 'wb') as tmpfs_f: tmpfs_f.write(opened_zip.read()) logger.info('Tmpfs extraction successful') logger.info('Starting to read inner zip') tmpfs_f = unzip_tmpfs.open(subzip, 'rb') subdir = zipfs.ReadZipFS(tmpfs_f) pages = subdir.listdir('/') # Hack: if the zip contains a single directory, cd into that one instead # F*****g zip maintainers if len(pages) == 1 and subdir.isdir(pages[0]): logger.info( 'Subzip was of a directory containing files -- changing dir') pages = [ fs.path.join(pages[0], x) for x in subdir.listdir(pages[0]) ] pages.sort() logger.info('Chapter has ({num_pages}) pages'.format( num_pages=len(pages), )) for idx, pagename in enumerate(pages): with subdir.open(fs.path.join('/', pagename), 'rb') as page_f: # noinspection PyUnresolvedReferences data = page_f.read() page = Page( chapter=chapter, sort_key=idx, file_id=input.file_id, data=data, ) chapter.num_pages += 1 output_page.put(page) tmpfs_f.close() subdir.close() unzip_tmpfs.close() return True