def _make_api_request(url, session=None, extra_headers={}): while True: if debug: output.warning("Mangadex API: requesting -> " + url) try: if session: r = session.get('https://api.mangadex.org/' + url.strip('/'), headers={ **MangadexV5Series.headers, **extra_headers }) else: r = requests.get('https://api.mangadex.org/' + url.strip('/'), headers={ **MangadexV5Series.headers, **extra_headers }) except requests.exceptions.ConnectionError: output.error( "Mangadex API: request to endpoint failed: {}".format(url)) raise exceptions.ScrapingError if r.status_code == 200: return r elif r.status_code == 429: retry_delay = int(r.headers["retry-after"]) output.warning( "Mangadex API: wait {} seconds...".format(retry_delay)) time.sleep(retry_delay) else: output.error("Mangadex API: got bad status code {}".format( r.status_code)) return r
def test_database(): """Runs a database sanity test.""" sanity_tester = sanity.DatabaseSanity(Base, engine) sanity_tester.test() if sanity_tester.errors: for error in sanity_tester.errors: err_target, err_msg = str(error).split(' ', 1) message = ' '.join([click.style(err_target, bold=True), err_msg]) output.warning(message) output.error('Database has failed sanity check; ' 'run `cu2 repair-db` to repair database') exit(1)
def get(self, use_db=True): """Downloads the chapter if it is available. Optionally does not attempt to remove the chapter from the database or mark the chapter as downloaded if `db_remove` is set to False. """ if self.available(): self.download() if use_db: self.mark_downloaded() elif use_db: output.warning('Removing {} {}: missing from remote'.format( self.name, self.chapter)) self.db_remove()
def follow(urls, directory, download, ignore): """Follow a series.""" chapters = [] for url in urls: try: series = utility.series_by_url(url) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(url)) continue except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, url)) continue if not series: output.warning('Invalid URL "{}"'.format(url)) continue series.directory = directory if ignore: series.follow(ignore=True) output.chapter('Ignoring {} chapters'.format(len(series.chapters))) else: series.follow() chapters += db.Chapter.find_new(alias=series.alias) del series if download: output.chapter('Downloading {} chapters'.format(len(chapters))) for chapter in chapters: try: chapter.get() except exceptions.LoginError as e: output.warning('Could not download {c.alias} {c.chapter}: {e}' .format(c=chapter, e=e.message)) del chapter
def page_download_task(page_num, r, page_url=None): """Saves the response body of a single request, returning the file handle and the passed through number of the page to allow for non- sequential downloads in parallel. """ ext = BaseChapter.guess_extension(r.headers.get('content-type')) f = NamedTemporaryFile(suffix=ext, delete=False) retries = 20 while retries > 0: try: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) retries = 0 # basically ignores this exception that requests throws. my # understanding is that it is raised when you attempt to iter_content() # over the same content twice. don't understand how that situation # arises with the current code but it did somehow. # https://stackoverflow.com/questions/45379903/ except requests.exceptions.StreamConsumedError: pass # when under heavy load, Mangadex will often kill the connection in # the middle of an image download. in the original architecture, # the requests are all opened in the scrapers in stream mode, then # the actual image payloads are downloaded in the asynchronous # callbacks. when this occurs we have not choice but to re-request # the image from the beginning (easier than playing around with range # headers). this means each thread may issue multiple new requests. # I have found the performance overhead to be mostly negligible. except requests.exceptions.ChunkedEncodingError: if not page_url: output.error( "Connection killed on page {} but scraper does not support retries" .format(str(page_num))) raise exceptions.ScrapingError output.warning( "Connection killed on page {}, {} retries remaining". format(str(page_num), str(retries))) retries = retries - 1 if retries <= 0: output.error( "Connection killed on page {}, no retries remaining - aborting chapter" .format(str(page_num))) raise exceptions.ScrapingError r = self.req_session.get(page_url, stream=True) f.flush() f.close() r.close() return ((page_num, f))
def get(input, directory): """Download chapters by URL or by alias:chapter. The command accepts input as either the chapter of the URL, the alias of a followed series, or the alias:chapter combination (e.g. 'bakuon:11'), if the chapter is already found in the database through a follow. The command will not enter the downloads in the database in case of URLs and ignores downloaded status in case of alias:chapter, so it can be used to download one-shots that don't require follows or for redownloading already downloaded chapters. """ chapter_list = [] for item in input: series = None try: series = utility.series_by_url(item) except exceptions.ScrapingError: pass except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue if series: chapter_list += series.chapters chapter = None try: chapter = utility.chapter_by_url(item) except exceptions.ScrapingError: pass except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue if chapter: chapter_list.append(chapter) if not (series or chapter): chapters = db.session.query(db.Chapter).join(db.Series) try: alias, chapter = item.split(':') chapters = chapters.filter(db.Series.alias == alias, db.Chapter.chapter == chapter) except ValueError: chapters = chapters.filter(db.Series.alias == item) chapters = chapters.all() if not chapters: output.warning('Invalid selection "{}"'.format(item)) for chapter in chapters: chapter_list.append(chapter.to_object()) if series: del series for chapter in chapter_list: chapter.directory = directory try: chapter.get(use_db=False) except exceptions.LoginError as e: output.warning('Could not download {c.alias} {c.chapter}: {e}' .format(c=chapter, e=e.message)) del chapter
def _translate_chapter_id(chapter_id): try: legacy_chapter_id = int(chapter_id) if debug: output.warning( "Mangadex API: querying legacy chapter {} -> /legacy/mapping" .format(str(legacy_chapter_id))) r = requests.post("https://api.mangadex.org/legacy/mapping", json={ "type": "chapter", "ids": [legacy_chapter_id] }) try: return r.json()["data"][0]["attributes"]["newId"] except KeyError: return "invalid" except ValueError: return chapter_id
def download(aliases): """Download all available chapters. If an optional alias is specified, the command will only download new chapters for that alias. """ chapters = [] if not aliases: chapters = db.Chapter.find_new() for alias in aliases: chapters += db.Chapter.find_new(alias=alias) output.chapter('Downloading {} chapters'.format(len(chapters))) for chapter in chapters: try: chapter.get() except exceptions.LoginError as e: output.warning('Could not download {c.alias} {c.chapter}: {e}' .format(c=chapter, e=e.message)) except exceptions.ScrapingError: pass
def update(fast): """Gather new chapters from followed series.""" pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads) futures = [] warnings = [] aliases = {} query = db.session.query(db.Series).filter_by(following=True).all() if fast: skip_count = 0 for series in query.copy(): if not series.needs_update: skip_count += 1 query.remove(series) output.series('Updating {} series ({} skipped)' .format(len(query), skip_count)) else: output.series('Updating {} series'.format(len(query))) for follow in query: fut = pool.submit(utility.series_by_url, follow.url) futures.append(fut) aliases[fut] = follow.alias with click.progressbar(length=len(futures), show_pos=True, fill_char='>', empty_char=' ') as bar: for future in concurrent.futures.as_completed(futures): try: series = future.result() except exceptions.ConnectionError: warnings.append('Unable to update {} (connection error)' .format(aliases[future])) except exceptions.ScrapingError: warnings.append('Unable to update {} (scraping error)' .format(aliases[future])) except exceptions.LoginError as e: warnings.append('Unable to update {} ({})' .format(aliases[future], e.message)) else: series.update() bar.update(1) for w in warnings: output.warning(w) utility.list_new()
def page_download_task(page_num, r, page_url=None): ext = BaseChapter.guess_extension(r.headers.get("content-type")) f = NamedTemporaryFile(suffix=ext, delete=False) download_start_time = int(time.time()) try: for chunk in r.iter_content(chunk_size=4096): if chunk: f.write(chunk) except ConnectionError: f.flush() # page failed to download, send failure report if debug: output.warning("Mangadex API: send failure report") requests.post("https://api.mangadex.network/report", data={ "url": page_url, "success": False, "bytes": f.tell(), "duration": int(time.time()) - download_start_time, "cached": True if r.headers.get("X-Cache") else False }) raise exceptions.ScrapingError f.flush() # page download successful, send success report if debug: output.warning("Mangadex API: send success report") requests.post("https://api.mangadex.network/report", data={ "url": page_url, "success": True, "bytes": f.tell(), "duration": int(time.time()) - download_start_time, "cached": True if r.headers.get("X-Cache") else False }) f.close() r.close() return ((page_num, f))
def follow(self, ignore=False): """Adds the series details to database and all current chapters.""" try: s = db.session.query(db.Series).filter_by(url=self.url).one() except NoResultFound: s = db.Series(self) s.check_alias_uniqueness() output.series('Adding follow for {s.name} ({s.alias})'.format(s=s)) db.session.add(s) db.session.commit() else: if s.following: output.warning( 'You are already following {s.name} ({s.alias})'.format( s=s)) else: s.directory = self.directory s.following = True db.session.commit() for chapter in self.chapters: chapter.save(s, ignore=ignore)