def test_config_set_cbz_false(self): config.get().cbz = True config.get().write() result = self.invoke('config', 'set', 'cbz', 'False') self.assertEqual(result.exit_code, 0) self.assertFalse(config.get().cbz)
def setUp(self): self.directory = tempfile.TemporaryDirectory() config.initialize(self.directory.name) config.get().download_directory = self.directory.name config.get().madokami.password = self.madokami_password config.get().madokami.username = self.madokami_username config.get().write() self.runner = CliRunner()
def __init__(self, url, **kwargs): super().__init__(url, **kwargs) self.req_session.auth = requests.auth.HTTPBasicAuth( *config.get().madokami.login) r = self.req_session.get(url) if r.status_code == 401: raise exceptions.LoginError('Madokami login error') self.soup = BeautifulSoup(r.text, config.get().html_parser) self.chapters = self.get_chapters()
def test_config_get(self): MESSAGES = [ 'download_directory = ' + config.get().download_directory, 'madokami.password = '******'madokami.username = '******'config', 'get') self.assertEqual(result.exit_code, 0) for message in MESSAGES: self.assertIn(message, result.output)
def __init__(self, url, **kwargs): super().__init__(url, **kwargs) # convert desktop link to mobile # bypasses adult content warning js spage = self.req_session.get(url.replace("m.", "www."), cookies={"isAdult": "1"}) if spage.status_code == 404: raise exceptions.ScrapingError self.soup = BeautifulSoup(spage.text, config.get().html_parser) self.chapters = self.get_chapters()
def get_five_latest_releases(self): r = requests.get(self.MANGAHERE_URL) soup = BeautifulSoup(r.text, config.get().html_parser) chapters = soup.find("ul", class_="manga-list-1-list").find_all("li") links = [ urljoin( self.MANGAHERE_URL, x.find( "p", class_="manga-list-1-item-subtitle").find("a").get("href")) for x in chapters ] return links[:5]
def test_chapter_information_tomochan(self): URL = 'https://mangadex.org/chapter/28082' config.get().cbz = True chapter = mangadex_v5.MangadexV5Chapter.from_url(URL) self.assertEqual(chapter.alias, 'tomo-chan-wa-onna-no-ko') self.assertEqual(chapter.chapter, '1') self.assertEqual(chapter.groups, ['M@STER Scans']) self.assertEqual(chapter.name, 'Tomo-chan wa Onna no ko!') self.assertEqual(chapter.title, 'Once In A Life Time Misfire') path = os.path.join( self.directory.name, 'Tomo-chan wa Onna no ko', 'Tomo-chan wa Onna no ko - c001 [MSTER Scans].cbz') self.assertEqual(chapter.filename, path) chapter.download() self.assertTrue(os.path.isfile(path)) with zipfile.ZipFile(path) as chapter_zip: files = chapter_zip.infolist() self.assertEqual(len(files), 1)
def download(self): if not self.req_session.auth: self.req_session.auth = requests.auth.HTTPBasicAuth( *config.get().madokami.login) with closing(self.req_session.get(self.url, stream=True)) as r: if r.status_code == 401: raise exceptions.LoginError('Madokami login error') total_length = r.headers.get('content-length') with open(self.filename, 'wb') as f: if total_length is None: f.write(r.content) else: total_length = int(total_length) with self.progress_bar(total_length) as bar: for chunk in r.iter_content(chunk_size=4096): if chunk: bar.update(len(chunk)) f.write(chunk) f.flush()
def update(fast): """Gather new chapters from followed series.""" pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads) futures = [] warnings = [] aliases = {} query = db.session.query(db.Series).filter_by(following=True).all() if fast: skip_count = 0 for series in query.copy(): if not series.needs_update: skip_count += 1 query.remove(series) output.series('Updating {} series ({} skipped)' .format(len(query), skip_count)) else: output.series('Updating {} series'.format(len(query))) for follow in query: fut = pool.submit(utility.series_by_url, follow.url) futures.append(fut) aliases[fut] = follow.alias with click.progressbar(length=len(futures), show_pos=True, fill_char='>', empty_char=' ') as bar: for future in concurrent.futures.as_completed(futures): try: series = future.result() except exceptions.ConnectionError: warnings.append('Unable to update {} (connection error)' .format(aliases[future])) except exceptions.ScrapingError: warnings.append('Unable to update {} (scraping error)' .format(aliases[future])) except exceptions.LoginError as e: warnings.append('Unable to update {} ({})' .format(aliases[future], e.message)) else: series.update() bar.update(1) for w in warnings: output.warning(w) utility.list_new()
def test_chapter_no_series(self): URL = 'https://dynasty-scans.com/chapters/youre_cute' NAME = 'Umekichi' CHAPTER = "You're Cute" config.get().cbz = True chapter = dynastyscans.DynastyScansChapter.from_url(URL) self.assertEqual(chapter.alias, NAME.lower()) self.assertTrue(chapter.available()) self.assertEqual(chapter.chapter, CHAPTER) self.assertIs(chapter.directory, None) self.assertEqual(chapter.groups, ['/u/ Scanlations']) self.assertEqual(chapter.name, NAME) self.assertEqual(chapter.url, URL) path = os.path.join( self.directory.name, NAME, "Umekichi - c000 [You're Cute] [u Scanlations].cbz" ) self.assertEqual(chapter.filename, path) chapter.get(use_db=False) self.assertTrue(os.path.isfile(path)) with zipfile.ZipFile(path) as chapter_zip: files = chapter_zip.infolist() self.assertEqual(len(files), 23)
def download(self): if not getattr(self, "cpage", None): self.cpage = self.req_session.get(self.url.replace("m.", "www."), headers=chrome_headers) if self.cpage.status_code == 404: raise exceptions.ScrapingError if not getattr(self, "soup", None): self.soup = BeautifulSoup(self.cpage.text, config.get().html_parser) pages = [] (mid, cid) = (None, None) # index of script with ids may vary # it may also change as ads are added/removed from the site for f in range(0, len(self.soup.find_all("script"))): try: if len(self.soup.find_all("script")[f].contents): mid = re.search( "var comicid = ([0-9]+)", self.soup.find_all("script") [f].contents[0]).groups()[0] cid = re.search( "var chapterid =([0-9]+)", self.soup.find_all("script") [f].contents[0]).groups()[0] except AttributeError: pass if mid and cid: old_num_pages = -1 while old_num_pages != len(pages): old_num_pages = len(pages) pages = self._request_pages(mid, cid, pages) else: # some titles (seems to be ones with low page counts like webtoons) # don't use progressively-loaded pages. for these, the image list # can be extracted directly off the main page for g in range(0, len(self.soup.find_all("script"))): try: pages = loads( re.search( "var newImgs = (.+);var newImginfos", beautify( self.soup.find_all("script")[g].text).replace( "\\", "").replace("'", "\"")).groups()[0]) except AttributeError: pass if not len(pages): raise exceptions.ScrapingError for i, page in enumerate(pages): pages[i] = "https:" + page futures = [] files = [None] * len(pages) with self.progress_bar(pages) as bar: for i, page in enumerate(pages): retries = 0 while retries < 10: try: r = self.req_session.get(page, stream=True) break except requests.exceptions.ConnectionError: retries += 1 # end of chapter detection in the web ui is done by issuing requests # for nonexistent pages which return 404s (who comes up with this) if r.status_code != 404: if r.status_code != 200: r.close() output.error("Page download got status code {}".format( str(r.status_code))) raise exceptions.ScrapingError fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback( partial(self.page_download_finish, bar, files)) futures.append(fut) else: try: del files[i] except IndexError: raise exceptions.ScrapingError concurrent.futures.wait(futures) self.create_zip(files)
def test_config_set_cbz(self): result = self.invoke('config', 'set', 'cbz', 'True') self.assertEqual(result.exit_code, 0) self.assertTrue(config.get().cbz)
def test_config_get_download_directory(self): MESSAGE = 'download_directory = ' + config.get().download_directory result = self.invoke('config', 'get', 'download_directory') self.assertEqual(result.exit_code, 0) self.assertIn(MESSAGE, result.output)
def config_command(mode, setting, value): """Get or set configuration options. Mode can be either "get" or "set", depending on whether you want to read or write configuration values. If mode is "get", you can specify a setting to read that particular setting or omit it to list out all the settings. If mode is "set", you must specify the setting to change and assign it a new value. """ if mode == 'get': if setting: parameters = setting.split('.') value = config.get() for parameter in parameters: try: value = getattr(value, parameter) except AttributeError: output.error('Setting not found') exit(1) output.configuration({setting: value}) else: configuration = config.get().serialize() output.configuration(configuration) elif mode == 'set': if setting is None: output.error('You must specify a setting') exit(1) if value is None: output.error('You must specify a value') exit(1) parameters = setting.split('.') preference = config.get() for parameter in parameters[0:-1]: try: preference = getattr(preference, parameter) except AttributeError: output.error('Setting not found') exit(1) try: current_value = getattr(preference, parameters[-1]) except AttributeError: output.error('Setting not found') exit(1) if current_value is not None: if isinstance(current_value, bool): if value.lower() == 'false' or value == 0: value = False else: value = True else: try: value = type(current_value)(value) except ValueError: output.error('Type mismatch: value should be {}' .format(type(current_value).__name__)) exit(1) setattr(preference, parameters[-1], value) config.get().write() else: output.error('Mode must be either get or set') exit(1)
from abc import ABCMeta, abstractmethod from concurrent.futures import ThreadPoolExecutor from cu2 import config, db, exceptions, output from mimetypes import guess_extension from re import match, sub from sqlalchemy.exc import IntegrityError, SQLAlchemyError from sqlalchemy.orm.exc import NoResultFound from tempfile import NamedTemporaryFile import click import os import requests import sys import zipfile download_pool = ThreadPoolExecutor(config.get().download_threads) class BaseSeries(metaclass=ABCMeta): """Class that is used to represent an individual series on a site.""" def __init__(self, url, **kwargs): self.url = url self.directory = kwargs.get('directory', None) self.req_session = requests.Session() def __del__(self): self.req_session.close() @property def alias(self): """Returns an alias version of the series name, which only allows a certain command-line friendly set of characters.
def filename(self): name = self.name.replace('/', '') # Individually numbered chapter or a chapter range (e.g. '35', # '001-007'). if match(r'[0-9\-]*$', self.chapter): chapter = ('c' + '-'.join(x.zfill(3) for x in self.chapter.split('-'))) # Individually numbered chapter with letter following the number # (e.g. '35v2', '9a'). elif match(r'[0-9]*[A-Za-z][0-9]*?$', self.chapter): number = match(r'([0-9]*)[A-Za-z]', self.chapter).group(1) chapter = 'c{:0>3}'.format(number) # Individually numbered chapter with decimal (e.g. '1.5'). elif match(r'[0-9]*\.[0-9]*$', self.chapter): number, decimal = self.chapter.split('.') chapter = 'c{:0>3} x{}'.format(number, decimal) # Individually numbered chapter with double-decimal (e.g. '2.164.5'). # Used by titles with multiple volumes/seasons and special chapters. elif match(r'[0-9]*(\.[0-9]*){2}$', self.chapter): volume, number, decimal = self.chapter.split('.') chapter = 'c{:0>3} x{:0>3}.{}'.format(volume, number, decimal) # Failing all else, e.g. 'Special'. Becomes 'c000 [Special]'. else: chapter = 'c000 [{}]'.format(self.chapter) if self.groups: group = ''.join('[{}]'.format(g) for g in self.groups) else: group = '[Unknown]' if config.get().cbz: ext = 'cbz' else: ext = 'zip' directory_set = False if self.directory: directory = os.path.expanduser(self.directory) directory_set = True else: directory = name download_dir = os.path.expanduser(config.get().download_directory) download_dir = os.path.join(download_dir, directory) # only sanitize download_dir if the user did not explicitly set it # assume that if it is set, the user wanted it exactly as set # if they include bad characters and it breaks things, that's their # fault. if not directory_set: download_dir = self._strip_unwanted_characters(download_dir) download_dir = self.create_directory(download_dir) # Format the filename somewhat based on Daiz's manga naming scheme. # Remove any '/' characters to prevent the name of the manga splitting # the files into an unwanted sub-directory. filename = '{} - {} {}'.format( name, chapter, group, ).replace('/', '') filename = self._strip_unwanted_characters(filename) # Ensure that the filename is unique to avoid overwrites. i = 1 target = os.path.join(download_dir, '.'.join([filename, ext])) while os.path.isfile(target): i += 1 new_filename = '-'.join([filename, str(i)]) target = os.path.join(download_dir, '.'.join([new_filename, ext])) return target
def edit_defaults(): """Edits the Click command default values after initializing the config.""" latest_command = cli.get_command(cli, 'latest') for param in latest_command.params: if param.human_readable_name == 'relative': param.default = config.get().relative_latest