async def main():
    get_urls_titles()
    global start
    global end
    global counter
    global err

    start, end = intro()

    if end == length:
        min_urls_titles = urls_titles[start:]
    else:
        min_urls_titles = urls_titles[start:end + 1]

    counter = start - 1

    xs = stream.iterate(min_urls_titles)
    ys = stream.starmap(xs, fetch, task_limit=100)
    zs = stream.starmap(ys, process, task_limit=100)
    await zs

    if counter < 0: counter = 0

    # About Novel
    about.find('img').decompose()  # Remove IMG tags
    for a in about.select("a"):  # Remove anchor tags
        a['href'] = '#'
    html_gen("hr", soup, "", about)
    html_gen("h3", soup, "Description", about)
    syn = synopsis.text.replace("Description", "")
    html_gen("p", soup, syn, about)
    html_gen("hr", soup, "", about)
    html_gen("h3", soup, "About This Download : ", about)
    html_gen("p", soup, "Total Chapters = " + str(counter), about)
    html_gen("p", soup,
             "No. Of Chapters That Raised Exceptions = " + str(len(err)),
             about)
    if len(err) != 0:
        html_gen("p", soup, "And They Are : ", about)
        for i in err:
            html_gen("li", soup, str(i), about)
    html_gen("hr", soup, "", about)

    # Create About Novel Page
    c1 = epub.EpubHtml(title="About Novel",
                       file_name='About_novel' + '.xhtml',
                       lang='hr')
    c1.content = about.encode('utf-8')
    book.add_item(c1)
    book.toc.insert(0, c1)
    book.spine.insert(1, c1)
    print("Created \"About Novel\" Page")

    save()
Exemple #2
0
async def async_fetch_urlset(urls, download_dir, pbar=None, verbose=False):
    async with httpx.AsyncClient(http2=True) as session:
        ws = stream.repeat(session)
        xs = stream.zip(ws, stream.iterate(urls))
        ys = stream.starmap(xs, fetch, ordered=False, task_limit=10)
        process_download = partial(process,
                                   download_dir=download_dir,
                                   pbar=pbar,
                                   verbose=verbose)
        zs = stream.map(ys, process_download)
        return await zs
Exemple #3
0
async def async_fetch_urlset(urls,
                             schedules,
                             pbar=None,
                             verbose=False,
                             use_http2=True):
    async with httpx.AsyncClient(http2=use_http2) as session:
        ws = stream.repeat(session)
        xs = stream.zip(ws, stream.iterate(urls))
        ys = stream.starmap(xs, fetch, ordered=False,
                            task_limit=20)  # 30 is similar IDK
        process = partial(process_soup,
                          schedules=schedules,
                          pbar=pbar,
                          verbose=verbose)
        zs = stream.map(ys, process)
        return await zs
Exemple #4
0
async def async_fetch_episodes(listings,
                               pbar=None,
                               verbose=False,
                               use_http2=False):
    jsons = dict(zip(listings.broadcasts_urlset, listings.all_broadcasts))
    limits = httpx.Limits(max_keepalive_connections=20)
    async with httpx.AsyncClient(http2=use_http2, limits=limits) as session:
        ws = stream.repeat(session)
        xs = stream.zip(ws, stream.iterate(listings.broadcasts_urlset))
        ys = stream.starmap(xs, fetch, ordered=False,
                            task_limit=20)  # 20 is optimal
        process = partial(process_json,
                          jsons=jsons,
                          pbar=pbar,
                          verbose=verbose)
        zs = stream.map(ys, process)
        return await zs
async def main():
    db = AsyncIOMotorClient().db

    async def persist(updates, ):
        updates and await db[AGGREGATED_COLLECTION].bulk_write(updates)
        print('simulating long sleep')
        await asyncio.sleep(2)
        return 'done'

    batcher = Batcher(persist, interval=PERSIST_INTERVAL)

    def key(doc):
        return doc[ID_KEY]

    async def function(acc, document):
        return acc + 1

    async def initializer(doc: dict):
        value = await db[AGGREGATED_COLLECTION].find_one(
            {AGGREGATED_ID_KEY: key(doc)})
        value = value and value.get(AGGREGATED_KEY)
        return value or 0

    # initializer = 0  # TODO rm

    xs = events(collection=db[EVENTS_COLLECTION])
    xs = accumulate_by_key(xs, function, key=key, initializer=initializer)
    xs = stream.starmap(xs, make_db_operation)
    xs = stream.map(
        xs,
        batcher.push,
    )  # task_limit=1)
    # xs = window(xs, PERSIST_INTERVAL)
    # xs = stream.map(xs, take_last)
    # xs = stream.map(xs, list)
    # xs = stream.map(xs, lambda x: [z[1] for z in x])
    # xs = stream.map(xs, persist, task_limit=1)
    xs = stream.map(
        xs,
        pretty,
    )
    await asyncio.gather(
        store_some(db),
        xs,
    )
Exemple #6
0
 async def runner(self):
     while True:
         tasks = []
         with open(self.config_file) as fp:
             config = load(fp)
         urls = config['urls']
         animes_path = ((anime, join(self.root_path, anime))
                        for anime in config['urls'].keys())
         self.log.debug("Paths found %s", animes_path)
         for anime, anime_path in animes_path:
             downloadeds = set((int(split(x)[1][:-4])
                                for x in iglob(join(anime_path, '*.mp4'))))
             last = max(downloadeds)
             not_found = sorted(set(range(1, last)) - downloadeds)
             apped_not_found = not_found.append
             next_ok = True
             while next_ok:
                 last += 1
                 if await self.check_ok(urls[anime] % last):
                     apped_not_found(last)
                 else:
                     self.log.info("Episode %d not found for %s", last,
                                   anime)
                     next_ok = False
             self.log.debug("Not founds %s", not_found)
             tasks += [(anime, num, urls[anime], anime_path)
                       for num in not_found]
             if not not_found:
                 self.log.info("No new episode for %s", anime)
             else:
                 await self.send_message_async(
                     f"New episodes for {anime}:\n    {', '.join((str(x) for x in not_found))}"
                 )
         async with starmap(iterate(tasks),
                            self.task_job,
                            task_limit=self.task_limit).stream() as stream:
             async for _ in stream:
                 pass
         await sleep(self.sleep_time)