def update(): """Gather new chapters from followed series.""" pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads) futures = [] warnings = [] aliases = {} query = db.session.query(db.Series).filter_by(following=True).all() output.series('Updating {} series'.format(len(query))) for follow in query: fut = pool.submit(series_by_url, follow.url) futures.append(fut) aliases[fut] = follow.alias with click.progressbar(length=len(futures), show_pos=True, fill_char='>', empty_char=' ') as bar: for future in concurrent.futures.as_completed(futures): try: series = future.result() except requests.exceptions.ConnectionError as e: warnings.append('Unable to update {} (connection error)' .format(aliases[future])) except exceptions.ScrapingError: warnings.append('Unable to update {} (scraping error)' .format(follow.alias)) else: series.update() bar.update(1) for w in warnings: output.warning(w) list_new()
def test_database(): """Runs a database sanity test.""" sanity_tester = sanity.DatabaseSanity(Base, engine) sanity_tester.test() if sanity_tester.errors: for error in sanity_tester.errors: err_target, err_msg = str(error).split(' ', 1) message = ' '.join([click.style(err_target, bold=True), err_msg]) output.warning(message) output.error('Database has failed sanity check; ' 'run `cum repair-db` to repair database') exit(1)
def get(self, use_db=True): """Downloads the chapter if it is available. Optionally does not attempt to remove the chapter from the database or mark the chapter as downloaded if `db_remove` is set to False. """ if self.available(): self.download() if use_db: self.mark_downloaded() elif use_db: output.warning('Removing {} {}: missing from remote' .format(self.name, self.chapter)) self.db_remove()
def follow(urls, directory, download, ignore): """Follow a series.""" chapters = [] for url in urls: try: series = utility.series_by_url(url) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(url)) continue except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, url)) continue if not series: output.warning('Invalid URL "{}"'.format(url)) continue series.directory = directory if ignore: series.follow(ignore=True) output.chapter('Ignoring {} chapters'.format(len(series.chapters))) else: series.follow() chapters += db.Chapter.find_new(alias=series.alias) if download: output.chapter('Downloading {} chapters'.format(len(chapters))) for chapter in chapters: try: chapter.get() except exceptions.LoginError as e: output.warning( 'Could not download {c.alias} {c.chapter}: {e}'.format( c=chapter, e=e.message))
def follow(urls, directory, download, ignore): """Follow a series.""" chapters = [] for url in urls: try: series = series_by_url(url) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(url)) continue except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, url)) continue if not series: output.warning('Invalid URL "{}"'.format(url)) continue series.directory = directory if ignore: series.follow(ignore=True) output.chapter('Ignoring {} chapters'.format(len(series.chapters))) else: series.follow() chapters += db.Chapter.find_new(alias=series.alias) if download: output.chapter('Downloading {} chapters'.format(len(chapters))) for chapter in chapters: try: chapter.get() except exceptions.LoginError as e: output.warning('Could not download {c.alias} {c.chapter}: {e}' .format(c=chapter, e=e.message)) continue
def download(aliases): """Download all available chapters. If an optional alias is specified, the command will only download new chapters for that alias. """ chapters = [] if not aliases: chapters = db.Chapter.find_new() for alias in aliases: chapters += db.Chapter.find_new(alias=alias) output.chapter('Downloading {} chapters'.format(len(chapters))) for chapter in chapters: try: chapter.get() except exceptions.LoginError as e: output.warning('Could not download {c.alias} {c.chapter}: {e}' .format(c=chapter, e=e.message))
def update(fast): """Gather new chapters from followed series.""" pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads) futures = [] warnings = [] aliases = {} query = db.session.query(db.Series).filter_by(following=True).all() if fast: skip_count = 0 for series in query.copy(): if not series.needs_update: skip_count += 1 query.remove(series) output.series('Updating {} series ({} skipped)'.format( len(query), skip_count)) else: output.series('Updating {} series'.format(len(query))) for follow in query: fut = pool.submit(utility.series_by_url, follow.url) futures.append(fut) aliases[fut] = follow.alias with click.progressbar(length=len(futures), show_pos=True, fill_char='>', empty_char=' ') as bar: for future in concurrent.futures.as_completed(futures): try: series = future.result() except exceptions.ConnectionError: warnings.append( 'Unable to update {} (connection error)'.format( aliases[future])) except exceptions.ScrapingError: warnings.append('Unable to update {} (scraping error)'.format( aliases[future])) except exceptions.LoginError as e: warnings.append('Unable to update {} ({})'.format( aliases[future], e.message)) else: series.update() bar.update(1) for w in warnings: output.warning(w) utility.list_new()
def download(aliases): """Download all available chapters. If an optional alias is specified, the command will only download new chapters for that alias. """ chapters = [] if not aliases: chapters = db.Chapter.find_new() for alias in aliases: chapters += db.Chapter.find_new(alias=alias) output.chapter('Downloading {} chapters'.format(len(chapters))) for chapter in chapters: try: chapter.get() except exceptions.LoginError as e: output.warning( 'Could not download {c.alias} {c.chapter}: {e}'.format( c=chapter, e=e.message))
def follow(self, ignore=False): """Adds the series details to database and all current chapters.""" output.series('Adding follow for {s.name} ({s.alias})'.format(s=self)) try: s = db.session.query(db.Series).filter_by(url=self.url).one() except NoResultFound: s = db.Series(self) db.session.add(s) db.session.commit() else: if s.following: output.warning('You are already following this series') else: s.directory = self.directory s.following = True db.session.commit() for chapter in self.chapters: chapter.save(s, ignore=ignore)
def update(fast): """Gather new chapters from followed series.""" pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads) futures = [] warnings = [] aliases = {} query = db.session.query(db.Series).filter_by(following=True).all() if fast: skip_count = 0 for series in query.copy(): if not series.needs_update: skip_count += 1 query.remove(series) output.series('Updating {} series ({} skipped)' .format(len(query), skip_count)) else: output.series('Updating {} series'.format(len(query))) for follow in query: fut = pool.submit(utility.series_by_url, follow.url) futures.append(fut) aliases[fut] = follow.alias with click.progressbar(length=len(futures), show_pos=True, fill_char='>', empty_char=' ') as bar: for future in concurrent.futures.as_completed(futures): try: series = future.result() except exceptions.ConnectionError: warnings.append('Unable to update {} (connection error)' .format(aliases[future])) except exceptions.ScrapingError: warnings.append('Unable to update {} (scraping error)' .format(aliases[future])) except exceptions.LoginError as e: warnings.append('Unable to update {} ({})' .format(aliases[future], e.message)) else: series.update() bar.update(1) for w in warnings: output.warning(w) utility.list_new()
def follow(self, ignore=False): """Adds the series details to database and all current chapters.""" try: s = db.session.query(db.Series).filter_by(url=self.url).one() except NoResultFound: s = db.Series(self) s.check_alias_uniqueness() output.series('Adding follow for {s.name} ({s.alias})'.format(s=s)) db.session.add(s) db.session.commit() else: if s.following: output.warning('You are already following {s.name} ({s.alias})' .format(s=s)) else: s.directory = self.directory s.following = True db.session.commit() for chapter in self.chapters: chapter.save(s, ignore=ignore)
def get(input, directory): """Download chapters by URL or by alias:chapter. The command accepts input as either the chapter of the URL or the alias:chapter combination (e.g. 'bakuon:11'), if the chapter is already found in the database through a follow. The command will not enter the downloads in the database in case of URLs and ignores downloaded status in case of alias:chapter, so it can be used to download one-shots that don't require follows or for redownloading already downloaded chapters. """ chapter_list = [] for i in input: try: series = series_by_url(i) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(i)) continue if series: chapter_list += series.chapters try: chapter = chapter_by_url(i) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(i)) continue if chapter: chapter_list.append(chapter) if not series or chapter: try: a, c = i.split(':') except ValueError: output.warning('Invalid selection "{}"'.format(i)) else: chapters = (db.session.query(db.Chapter) .join(db.Series) .filter(db.Series.alias == a, db.Chapter.chapter == c) .all()) for chapter in chapters: chapter_list.append(chapter.to_object()) for chapter in chapter_list: chapter.directory = directory chapter.get(use_db=False)
def download(self): if getattr(self, 'r', None): r = self.r else: r = self.reader_get(1) soup = BeautifulSoup(r.text, config.get().html_parser) if soup.find('a', href='#{}_1_t'.format(self.batoto_hash)): # The chapter uses webtoon layout, meaning all of the images are on # the same page. pages = [''.join(i) for i in re.findall(self.img_path_re, r.text)] else: pages = [x.get('value') for x in soup.find('select', id='page_select').find_all('option')] if not pages: output.warning('Skipping {s.name} {s.chapter}: ' 'chapter has no pages' .format(s=self)) return # Replace the first URL with the first image URL to avoid scraping # the first page twice. pages[0] = soup.find('img', id='comic_page').get('src') next_match = re.search(self.next_img_path_re, r.text) if next_match: pages[1] = next_match.group(1) files = [None] * len(pages) futures = [] last_image = None with self.progress_bar(pages) as bar: for i, page in enumerate(pages): try: if guess_type(page)[0]: image = page else: # Predict the next URL based on the last URL for reg in [self.img_path_re, self.ch_img_path_re]: m = re.match(reg, last_image) if m: break else: raise ValueError mg = list(m.groups()) num_digits = len(mg[1]) mg[1] = "{0:0>{digits}}".format(int(mg[1]) + 1, digits=num_digits) image = "".join(mg) r = requests.get(image, stream=True) if r.status_code == 404: r.close() raise ValueError except ValueError: # If we fail to do prediction, scrape r = self.reader_get(i + 1) soup = BeautifulSoup(r.text, config.get().html_parser) image = soup.find('img', id='comic_page').get('src') image2_match = re.search(self.next_img_path_re, r.text) if image2_match: pages[i + 1] = image2_match.group(1) r = requests.get(image, stream=True) fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback(partial(self.page_download_finish, bar, files)) futures.append(fut) last_image = image concurrent.futures.wait(futures) self.create_zip(files)
def get(input, directory): """Download chapters by URL or by alias:chapter. The command accepts input as either the chapter of the URL, the alias of a followed series, or the alias:chapter combination (e.g. 'bakuon:11'), if the chapter is already found in the database through a follow. The command will not enter the downloads in the database in case of URLs and ignores downloaded status in case of alias:chapter, so it can be used to download one-shots that don't require follows or for redownloading already downloaded chapters. """ chapter_list = [] for item in input: try: series = utility.series_by_url(item) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(item)) continue except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue if series: chapter_list += series.chapters try: chapter = utility.chapter_by_url(item) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(item)) continue except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue if chapter: chapter_list.append(chapter) if not series or chapter: chapters = db.session.query(db.Chapter).join(db.Series) try: alias, chapter = item.split(':') chapters = chapters.filter(db.Series.alias == alias, db.Chapter.chapter == chapter) except ValueError: chapters = chapters.filter(db.Series.alias == item) chapters = chapters.all() if not chapters: output.warning('Invalid selection "{}"'.format(item)) for chapter in chapters: chapter_list.append(chapter.to_object()) for chapter in chapter_list: chapter.directory = directory try: chapter.get(use_db=False) except exceptions.LoginError as e: output.warning('Could not download {c.alias} {c.chapter}: {e}' .format(c=chapter, e=e.message))
def get(input, directory): """Download chapters by URL or by alias:chapter. The command accepts input as either the chapter of the URL, the alias of a followed series, or the alias:chapter combination (e.g. 'bakuon:11'), if the chapter is already found in the database through a follow. The command will not enter the downloads in the database in case of URLs and ignores downloaded status in case of alias:chapter, so it can be used to download one-shots that don't require follows or for redownloading already downloaded chapters. """ chapter_list = [] for item in input: try: series = utility.series_by_url(item) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(item)) continue except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue if series: chapter_list += series.chapters try: chapter = utility.chapter_by_url(item) except exceptions.ScrapingError: output.warning('Scraping error ({})'.format(item)) continue except exceptions.LoginError as e: output.warning('{} ({})'.format(e.message, item)) continue if chapter: chapter_list.append(chapter) if not (series or chapter): chapters = db.session.query(db.Chapter).join(db.Series) try: alias, chapter = item.split(':') chapters = chapters.filter(db.Series.alias == alias, db.Chapter.chapter == chapter) except ValueError: chapters = chapters.filter(db.Series.alias == item) chapters = chapters.all() if not chapters: output.warning('Invalid selection "{}"'.format(item)) for chapter in chapters: chapter_list.append(chapter.to_object()) for chapter in chapter_list: chapter.directory = directory try: chapter.get(use_db=False) except exceptions.LoginError as e: output.warning( 'Could not download {c.alias} {c.chapter}: {e}'.format( c=chapter, e=e.message))