async def load_guesses(filename, comics_data): comic_presets = comics_data.get('presets', {}) comic_mixins = comics_data.get('mixins', {}) guess_comics = comics_data['guess_comics'] if not guess_comics: return loaded_comic_data = {} failed_comics = {} comic_guessers = FutureList() comic_loaders = FutureList() for name, url in guess_comics.items(): comic_guessers.add(load_guess_for(name, url, comic_presets, comic_mixins)) async for name, data, comics in comic_guessers: print(name, data) if not data or not comics: print() print() failed_comics[name] = guess_comics[name] continue loaded_comic_data[name] = data parser = ComicParser.load_parser(data, comic_presets, comic_mixins) downloader = ComicDownloader(parser, data['meta']) await downloader.load_existing_comics() for comic_id, comic in comics.items(): if comic_id not in downloader.comic_site.comics: downloader.comic_site.set_comic(comic_id, comic) await downloader.comic_site.save() comic_loaders.add(downloader.load_comics()) comics_data['comics'] = dict(comics_data['comics']).update(loaded_comic_data) comics_data['failed_comics'] = failed_comics print(comics_data) await comic_loaders
def cancel_all_tasks(): tasks = asyncio.Task.all_tasks(asyncio.get_event_loop()) pending = FutureList() for task in tasks: task.cancel() pending.add(task) asyncio.async(pending)
async def async_main(): pending_tasks = FutureList() with open(FILE) as f: comics_data = safe_load(f) comic_presets = comics_data.get('presets', {}) comic_mixins = comics_data.get('mixins', {}) comics = comics_data['comics'] pending_tasks.add(load_comics(comics, comic_presets, comic_mixins)) pending_tasks.add(load_guesses(FILE, comics_data)) # for name, url in await pending_tasks
async def load_comics(comics, comic_presets, comic_mixins): comic_parsers = FutureList() for name, comic in comics.items(): metadata = comic.get('meta', {}) for meta_keys in ['name', 'layout', 'folder', 'initialurl']: if meta_keys in comic and meta_keys not in metadata: metadata[meta_keys] = comic[meta_keys] metadata.setdefault('name', name) if metadata.get('layout') not in ('horizontal', 'vertical', 'pane'): metadata['layout'] = 'horizontal' if 'folder' not in comic: metadata['folder'] = to_folder_name(name) parser = ComicParser.load_parser(comic, comic_presets, comic_mixins) comic_parsers.add(ComicDownloader(parser, metadata).load_comics()) await comic_parsers
async def check_existing_comics(self, client): image_downloads = FutureList() try: for comic_id, comic in self.comic_site.comics.items(): if not await self.check_comic(client, comic_id, comic): image_downloads.add(self.download_comic(client, comic_id, comic)) except: log.exception("check_existing_comics failed.") await self.comic_site.save() raise else: try: await image_downloads finally: await self.comic_site.save() await self.comic_site.save_html(os.path.join(self.base_folder, "index.html"))
async def load_comics(self): pending_futures = FutureList() await self.load_existing_comics() await self.comic_site.save_html(os.path.join(self.base_folder, "index.html")) last_id_in_file = None try: with Client2(self.comic_site.comic_info["name"], skip_auto_headers=["User-Agent"]) as client: if self.comic_site and last_id_in_file is None: pending_futures.add(self.check_existing_comics(client)) current_id, current_url = await self.get_current_comic(client) if last_id_in_file is None: last_id_in_file = current_id while current_url is not None: try: comic = await self.load_comic(client, current_url) except SkipComicError as skip: current_url = skip.comic.next continue print(current_id, comic) pending_futures.add(self.download_comic(client, current_id, comic)) self.comic_site.set_comic(current_id, comic) current_url = comic.next current_id += 1 await self.comic_site.save() ## Download in 50-long blocks. if not comic.next or current_id > last_id_in_file + 1000: break log.info("Done loading information. Waiting on images.") await pending_futures except: log.exception("load_comics failed.") await self.comic_site.save() raise else: await self.comic_site.save() await self.comic_site.save_html(os.path.join(self.base_folder, "index.html"))