async def main(): get_urls_titles() global start global end global counter global err start, end = intro() if end == length: min_urls_titles = urls_titles[start:] else: min_urls_titles = urls_titles[start:end + 1] counter = start - 1 xs = stream.iterate(min_urls_titles) ys = stream.starmap(xs, fetch, task_limit=100) zs = stream.starmap(ys, process, task_limit=100) await zs if counter < 0: counter = 0 # About Novel about.find('img').decompose() # Remove IMG tags for a in about.select("a"): # Remove anchor tags a['href'] = '#' html_gen("hr", soup, "", about) html_gen("h3", soup, "Description", about) syn = synopsis.text.replace("Description", "") html_gen("p", soup, syn, about) html_gen("hr", soup, "", about) html_gen("h3", soup, "About This Download : ", about) html_gen("p", soup, "Total Chapters = " + str(counter), about) html_gen("p", soup, "No. Of Chapters That Raised Exceptions = " + str(len(err)), about) if len(err) != 0: html_gen("p", soup, "And They Are : ", about) for i in err: html_gen("li", soup, str(i), about) html_gen("hr", soup, "", about) # Create About Novel Page c1 = epub.EpubHtml(title="About Novel", file_name='About_novel' + '.xhtml', lang='hr') c1.content = about.encode('utf-8') book.add_item(c1) book.toc.insert(0, c1) book.spine.insert(1, c1) print("Created \"About Novel\" Page") save()
async def async_fetch_urlset(urls, download_dir, pbar=None, verbose=False): async with httpx.AsyncClient(http2=True) as session: ws = stream.repeat(session) xs = stream.zip(ws, stream.iterate(urls)) ys = stream.starmap(xs, fetch, ordered=False, task_limit=10) process_download = partial(process, download_dir=download_dir, pbar=pbar, verbose=verbose) zs = stream.map(ys, process_download) return await zs
async def async_fetch_urlset(urls, schedules, pbar=None, verbose=False, use_http2=True): async with httpx.AsyncClient(http2=use_http2) as session: ws = stream.repeat(session) xs = stream.zip(ws, stream.iterate(urls)) ys = stream.starmap(xs, fetch, ordered=False, task_limit=20) # 30 is similar IDK process = partial(process_soup, schedules=schedules, pbar=pbar, verbose=verbose) zs = stream.map(ys, process) return await zs
async def async_fetch_episodes(listings, pbar=None, verbose=False, use_http2=False): jsons = dict(zip(listings.broadcasts_urlset, listings.all_broadcasts)) limits = httpx.Limits(max_keepalive_connections=20) async with httpx.AsyncClient(http2=use_http2, limits=limits) as session: ws = stream.repeat(session) xs = stream.zip(ws, stream.iterate(listings.broadcasts_urlset)) ys = stream.starmap(xs, fetch, ordered=False, task_limit=20) # 20 is optimal process = partial(process_json, jsons=jsons, pbar=pbar, verbose=verbose) zs = stream.map(ys, process) return await zs
async def main(): db = AsyncIOMotorClient().db async def persist(updates, ): updates and await db[AGGREGATED_COLLECTION].bulk_write(updates) print('simulating long sleep') await asyncio.sleep(2) return 'done' batcher = Batcher(persist, interval=PERSIST_INTERVAL) def key(doc): return doc[ID_KEY] async def function(acc, document): return acc + 1 async def initializer(doc: dict): value = await db[AGGREGATED_COLLECTION].find_one( {AGGREGATED_ID_KEY: key(doc)}) value = value and value.get(AGGREGATED_KEY) return value or 0 # initializer = 0 # TODO rm xs = events(collection=db[EVENTS_COLLECTION]) xs = accumulate_by_key(xs, function, key=key, initializer=initializer) xs = stream.starmap(xs, make_db_operation) xs = stream.map( xs, batcher.push, ) # task_limit=1) # xs = window(xs, PERSIST_INTERVAL) # xs = stream.map(xs, take_last) # xs = stream.map(xs, list) # xs = stream.map(xs, lambda x: [z[1] for z in x]) # xs = stream.map(xs, persist, task_limit=1) xs = stream.map( xs, pretty, ) await asyncio.gather( store_some(db), xs, )
async def runner(self): while True: tasks = [] with open(self.config_file) as fp: config = load(fp) urls = config['urls'] animes_path = ((anime, join(self.root_path, anime)) for anime in config['urls'].keys()) self.log.debug("Paths found %s", animes_path) for anime, anime_path in animes_path: downloadeds = set((int(split(x)[1][:-4]) for x in iglob(join(anime_path, '*.mp4')))) last = max(downloadeds) not_found = sorted(set(range(1, last)) - downloadeds) apped_not_found = not_found.append next_ok = True while next_ok: last += 1 if await self.check_ok(urls[anime] % last): apped_not_found(last) else: self.log.info("Episode %d not found for %s", last, anime) next_ok = False self.log.debug("Not founds %s", not_found) tasks += [(anime, num, urls[anime], anime_path) for num in not_found] if not not_found: self.log.info("No new episode for %s", anime) else: await self.send_message_async( f"New episodes for {anime}:\n {', '.join((str(x) for x in not_found))}" ) async with starmap(iterate(tasks), self.task_job, task_limit=self.task_limit).stream() as stream: async for _ in stream: pass await sleep(self.sleep_time)