def test_config_set_cbz_false(self): config.get().cbz = True config.get().write() result = self.invoke('config', 'set', 'cbz', 'False') self.assertEqual(result.exit_code, 0) self.assertFalse(config.get().cbz)
def test_download_invalid_login(self): CHAPTER = { 'url': 'https://manga.madokami.al/Manga/Oneshots/100%20' 'Dollar%20wa%20Yasu%20Sugiru/100%24%20is%20Too%20' 'Cheap%20%5BYAMAMOTO%20Kazune%5D%20-%20000%20%5B' 'Oneshot%5D%20%5BPeebs%5D.zip', 'chapter': '000 [Oneshot]' } FOLLOW = { 'url': 'https://manga.madokami.al/Manga/Oneshots/100%20' 'Dollar%20wa%20Yasu%20Sugiru', 'name': '100 Dollar wa Yasu Sugiru', 'alias': '100-dollar-wa-yasu-sugiru' } MESSAGE = ('Could not download 100-dollar-wa-yasu-sugiru 000 ' '[Oneshot]: Madokami login error') series = self.create_mock_series(**FOLLOW) chapter = self.create_mock_chapter(**CHAPTER) series.chapters.append(chapter) series.follow() config.get().madokami.password = '******' config.get().madokami.username = '******' config.get().write() result = self.invoke('download') self.assertEqual(result.exit_code, 0) self.assertIn(MESSAGE, result.output)
def test_new_compact_empty(self): config.get().compact_new = True config.get().write() result = self.invoke('new') self.assertEqual(result.exit_code, 0) self.assertFalse(result.output)
def test_update(self): FOLLOWS = [ {"url": "http://bato.to/comic/_/comics/femme-fatale-r468", "alias": "femme-fatale", "name": "Femme Fatale"}, {"url": "http://bato.to/comic/_/comics/houkago-r9187", "alias": "houkago", "name": "Houkago"}, ] MESSAGES = [ "Updating 2 series", "femme-fatale 1 2 3 4 4.5 5 6 7 8 8.5 9 10 11 12", "houkago 1 2", ] config.get().compact_new = True config.get().write() for follow in FOLLOWS: series = self.create_mock_series(**follow) series.follow() chapters = self.db.session.query(self.db.Chapter).all() self.assertEqual(len(chapters), 0) result = self.invoke("update") chapters = self.db.session.query(self.db.Chapter).all() self.assertEqual(result.exit_code, 0) for message in MESSAGES: self.assertIn(message, result.output) self.assertEqual(len(chapters), 16)
def test_update(self): URLS = ['http://bato.to/comic/_/comics/femme-fatale-r468', 'http://bato.to/comic/_/comics/houkago-r9187'] MESSAGES = [ 'Updating 2 series', 'femme-fatale 1 2 3 4 4.5 5 6 7 8 8.5 9 10 11 12', 'houkago 1 2' ] config.get().compact_new = True config.get().write() for url in URLS: series = scrapers.BatotoSeries(url) series.follow() chapters = db.session.query(db.Chapter).all() for chapter in chapters: db.session.delete(chapter) db.session.commit() chapters = db.session.query(db.Chapter).all() assert len(chapters) == 0 result = self.invoke('update') chapters = db.session.query(db.Chapter).all() assert result.exit_code == 0 for message in MESSAGES: assert message in result.output assert len(chapters) == 16
def test_outdated_session(self): URL = 'http://bato.to/comic/_/comics/femme-fatale-r468' config.get().batoto.cookie = '0da7ed' config.get().batoto.member_id = '0da7ed' config.get().batoto.pass_hash = '0da7ed' config.get().write() series = batoto.BatotoSeries(url=URL)
def get_chapters(self): # Loops through as many times necessary until either registeration # notice is no longer found on the series page or login attempt limit # is reached, at which point LoginError is raised by config. while True: if self.soup.find('div', id='register_notice'): config.get().batoto.login() self._get_page() else: break rows = self.soup.find_all('tr', class_="row lang_English chapter_row") chapters = [] for row in rows: columns = row.find_all('td') name = columns[0].img.next_sibling if not name: # Fallback behavior when BeautifulSoup interprets "<img />TEXT" # as "<img>TEXT</img>". name = columns[0].img.string name = name.strip() name_parts = re.search(self.name_re, name) chapter = name_parts.group(1) title = name_parts.group(2) url = columns[0].find('a').get('href') groups = [g.string for g in columns[2].find_all('a')] c = BatotoChapter(name=self.name, alias=self.alias, chapter=chapter, url=url, groups=groups, title=title) chapters.append(c) return chapters
def test_update(self): FOLLOWS = [{ 'url': 'http://bato.to/comic/_/comics/femme-fatale-r468', 'alias': 'femme-fatale', 'name': 'Femme Fatale' }, { 'url': 'http://bato.to/comic/_/comics/houkago-r9187', 'alias': 'houkago', 'name': 'Houkago' }] MESSAGES = [ 'Updating 2 series', 'femme-fatale 1 2 3 4 4.5 5 6 7 8 8.5 9 10 11 12', 'houkago 1 2' ] config.get().compact_new = True config.get().write() for follow in FOLLOWS: series = self.create_mock_series(**follow) series.follow() chapters = self.db.session.query(self.db.Chapter).all() self.assertEqual(len(chapters), 0) result = self.invoke('update') chapters = self.db.session.query(self.db.Chapter).all() self.assertEqual(result.exit_code, 0) for message in MESSAGES: self.assertIn(message, result.output) self.assertEqual(len(chapters), 16)
def get_chapters(self): # Loops through as many times necessary until either registeration # notice is no longer found on the series page or login attempt limit # is reached, at which point LoginError is raised by config. while True: if self.soup.find('div', id='register_notice'): config.get().batoto.login() self._get_page() else: break rows = self.soup.find_all('tr', class_="row lang_English chapter_row") chapters = [] for row in rows: columns = row.find_all('td') name = columns[0].img.next_sibling.strip() name_parts = re.search(self.name_re, name) chapter = name_parts.group(1) title = name_parts.group(2) url = columns[0].find('a').get('href') groups = [g.string for g in columns[2].find_all('a')] c = BatotoChapter(name=self.name, alias=self.alias, chapter=chapter, url=url, groups=groups, title=title) chapters.append(c) return chapters
def test_update(self): FOLLOWS = [{ 'url': 'https://dynasty-scans.com/series/himegoto_1', 'alias': 'himegoto', 'name': 'Himegoto+' }, { 'url': ('https://manga.madokami.al/Manga/N/NU/NUSA/' 'Nusantara%20Droid%20War'), 'alias': 'nusantara-droid-war', 'name': 'Nusantara Droid War' }] MESSAGES = ['Updating 2 series', 'himegoto 1 2 3 4 5 6 7'] config.get().compact_new = True config.get().write() for follow in FOLLOWS: series = self.create_mock_series(**follow) series.follow() chapters = self.db.session.query(self.db.Chapter).all() self.assertEqual(len(chapters), 0) result = self.invoke('update') chapters = self.db.session.query(self.db.Chapter).all() self.assertEqual(result.exit_code, 0) for message in MESSAGES: self.assertIn(message, result.output) self.assertEqual(len(chapters), 7)
def filename(self): keepcharacters = [' ', '.', '-', '_', '[', ']', '/', "'"] name = self.name.replace('/', '') # Individually numbered chapter or a chapter range (e.g. '35', # '001-007'). if match(r'[0-9\-]*$', self.chapter): chapter = ('c' + '-'.join(x.zfill(3) for x in self.chapter.split('-'))) # Individually numbered chapter with letter following the number # (e.g. '35v2', '9a'). elif match(r'[0-9]*[A-Za-z][0-9]*?$', self.chapter): number = match(r'([0-9]*)[A-Za-z]', self.chapter).group(1) chapter = 'c{:0>3}'.format(number) # Individually numbered chapter with decimal (e.g. '1.5'). elif match(r'[0-9]*\.[0-9]*$', self.chapter): number, decimal = self.chapter.split('.') chapter = 'c{:0>3} x{}'.format(number, decimal) # Failing all else, e.g. 'Special'. Becomes 'c000 [Special]'. else: chapter = 'c000 [{}]'.format(self.chapter) if self.groups: group = ''.join('[{}]'.format(g) for g in self.groups) else: group = '[Unknown]' if config.get().cbz: ext = 'cbz' else: ext = 'zip' if self.directory: directory = os.path.expanduser(self.directory) else: directory = name download_dir = os.path.expanduser(config.get().download_directory) download_dir = os.path.join(download_dir, directory) # Format the filename somewhat based on Daiz's manga naming scheme. # Remove any '/' characters to prevent the name of the manga splitting # the files into an unwanted sub-directory. filename = '{} - {} {}.{}'.format(name, chapter, group, ext).replace('/', '') # Join the path parts and sanitize any unwanted characters that might # cause issues with filesystems. Remove repeating whitespaces. target = os.path.join(download_dir, filename) target = ''.join([c for c in target if c.isalpha() or c.isdigit() or c in keepcharacters]).rstrip() target = sub(' +', ' ', target) # Make sure that the path exists before the filename is returned. directory = os.path.dirname(target) if not os.path.exists(directory): os.makedirs(directory) return target
def setUp(self): self.directory = tempfile.TemporaryDirectory() config.initialize(self.directory.name) config.get().download_directory = self.directory.name config.get().madokami.password = self.madokami_password config.get().madokami.username = self.madokami_username config.get().write() self.runner = CliRunner()
def setUp(self): global batoto self.directory = tempfile.TemporaryDirectory() config.initialize(directory=self.directory.name) config.get().batoto.password = os.environ['BATOTO_PASSWORD'] config.get().batoto.username = os.environ['BATOTO_USERNAME'] config.get().download_directory = self.directory.name from cum.scrapers import batoto
def setUp(self): global madokami self.directory = tempfile.TemporaryDirectory() config.initialize(directory=self.directory.name) config.get().madokami.password = os.environ['MADOKAMI_PASSWORD'] config.get().madokami.username = os.environ['MADOKAMI_USERNAME'] config.get().download_directory = self.directory.name from cum.scrapers import madokami
def test_chapter_invalid_login(self): URL = ('https://manga.madokami.al/Manga/Oneshots/100%20Dollar%20wa%20' 'Yasu%20Sugiru/100%24%20is%20Too%20Cheap%20%5BYAMAMOTO%20Kazune' '%5D%20-%20000%20%5BOneshot%5D%20%5BPeebs%5D.zip') config.get().madokami.password = '******' config.get().madokami.username = '******' with self.assertRaises(exceptions.LoginError): madokami.MadokamiChapter.from_url(URL)
def test_config_set_batoto_password(self): PASSWORD = '******' config.get().batoto.password = None config.get().write() result = self.invoke('config', 'set', 'batoto.password', PASSWORD) self.assertEqual(result.exit_code, 0) self.assertEqual(config.get().batoto.password, PASSWORD)
def test_config_set_batoto_password(self): PASSWORD = '******' config.get().batoto.password = None config.get().write() result = self.invoke('config', 'set', 'batoto.password', PASSWORD) assert result.exit_code == 0 assert config.get().batoto.password == PASSWORD
def test_series_invalid_login_2(self): URL = 'https://bato.to/comic/_/comics/stretch-r11259' config.get().batoto.password = '******' config.get().batoto.username = '******' config.get().batoto.member_id = 'Invalid' config.get().batoto.pass_hash = 'Invalid' config.get().batoto.cookie = 'Invalid' with self.assertRaises(exceptions.LoginError): series = batoto.BatotoSeries(url=URL)
def test_config_get(self): MESSAGES = ['download_directory = ' + config.get().download_directory, 'madokami.password = '******'madokami.username = '******'config', 'get') self.assertEqual(result.exit_code, 0) for message in MESSAGES: self.assertIn(message, result.output)
def test_new_compact(self): MESSAGE = 'blood 1-4 5-8 9-12 13-16 17-20' config.get().compact_new = True config.get().write() self.create_test_data() result = self.invoke('new') self.assertEqual(result.exit_code, 0) self.assertIn(MESSAGE, result.output)
def test_new_compact(self): MESSAGE = 'nijipuri 1 2 3 4 5 6' config.get().compact_new = True config.get().write() self.create_test_data() result = self.invoke('new') self.assertEqual(result.exit_code, 0) self.assertIn(MESSAGE, result.output)
def __init__(self, url, **kwargs): super().__init__(url, **kwargs) self.session = requests.Session() self.session.auth = requests.auth.HTTPBasicAuth( *config.get().madokami.login) r = self.session.get(url) if r.status_code == 401: raise exceptions.LoginError('Madokami login error') self.soup = BeautifulSoup(r.text, config.get().html_parser) self.chapters = self.get_chapters()
def filename(self): name = self.name.replace('/', '') # Individually numbered chapter or a chapter range (e.g. '35', # '001-007'). if match(r'[0-9\-]*$', self.chapter): chapter = ('c' + '-'.join(x.zfill(3) for x in self.chapter.split('-'))) # Individually numbered chapter with letter following the number # (e.g. '35v2', '9a'). elif match(r'[0-9]*[A-Za-z][0-9]*?$', self.chapter): number = match(r'([0-9]*)[A-Za-z]', self.chapter).group(1) chapter = 'c{:0>3}'.format(number) # Individually numbered chapter with decimal (e.g. '1.5'). elif match(r'[0-9]*\.[0-9]*$', self.chapter): number, decimal = self.chapter.split('.') chapter = 'c{:0>3} x{}'.format(number, decimal) # Failing all else, e.g. 'Special'. Becomes 'c000 [Special]'. else: chapter = 'c000 [{}]'.format(self.chapter) if self.groups: group = ''.join('[{}]'.format(g) for g in self.groups) else: group = '[Unknown]' if config.get().cbz: ext = 'cbz' else: ext = 'zip' if self.directory: directory = os.path.expanduser(self.directory) else: directory = name download_dir = os.path.expanduser(config.get().download_directory) download_dir = os.path.join(download_dir, directory) download_dir = self._strip_unwanted_characters(download_dir) download_dir = self.create_directory(download_dir) # Format the filename somewhat based on Daiz's manga naming scheme. # Remove any '/' characters to prevent the name of the manga splitting # the files into an unwanted sub-directory. filename = '{} - {} {}'.format(name, chapter, group,).replace('/', '') filename = self._strip_unwanted_characters(filename) # Ensure that the filename is unique to avoid overwrites. i = 1 target = os.path.join(download_dir, '.'.join([filename, ext])) while os.path.isfile(target): i += 1 new_filename = '-'.join([filename, str(i)]) target = os.path.join(download_dir, '.'.join([new_filename, ext])) return target
def config_command(mode, setting, value): """Get or set configuration options. Mode can be either "get" or "set", depending on whether you want to read or write configuration values. If mode is "get", you can specify a setting to read that particular setting or omit it to list out all the settings. If mode is "set", you must specify the setting to change and assign it a new value. """ if mode == 'get': if setting: parameters = setting.split('.') value = config.get() for parameter in parameters: try: value = getattr(value, parameter) except AttributeError: output.error('Setting not found') exit(1) output.configuration({setting: value}) else: configuration = config.get().serialize() output.configuration(configuration) elif mode == 'set': if setting is None: output.error('You must specify a setting') exit(1) if value is None: output.error('You must specify a value') exit(1) parameters = setting.split('.') preference = config.get() for parameter in parameters[0:-1]: try: preference = getattr(preference, parameter) except AttributeError: output.error('Setting not found') exit(1) try: current_value = getattr(preference, parameters[-1]) except AttributeError: output.error('Setting not found') exit(1) if current_value is not None: try: value = type(current_value)(value) except ValueError: output.error('Type mismatch: value should be {}' .format(type(current_value).__name__)) exit(1) setattr(preference, parameters[-1], value) config.get().write() else: output.error('Mode must be either get or set') exit(1)
def test_config_get(self): MESSAGES = ['batoto.password = '******'batoto.username = '******'download_directory = ' + config.get().download_directory, 'madokami.password = '******'madokami.username = '******'config', 'get') self.assertEqual(result.exit_code, 0) for message in MESSAGES: self.assertIn(message, result.output)
def test_follow_madokami_download_invalid_login(self): URL = 'https://manga.madokami.al/Manga/A/AK/AKUM/Akuma%20no%20Riddle' MESSAGE = '==> Madokami login error (' + URL + ')' config.get().madokami.password = '******' config.get().madokami.username = '******' config.get().write() result = self.invoke('follow', URL, '--download') self.assertEqual(result.exit_code, 0) self.assertIn(MESSAGE, result.output)
def test_follow_batoto_invalid_login(self): URL = 'http://bato.to/comic/_/comics/hot-road-r2243' MESSAGE = 'Batoto login error ({})'.format(URL) config.get().batoto.password = '******' config.get().batoto.username = '******' config.get().write() result = self.invoke('follow', URL) self.assertEqual(result.exit_code, 0) self.assertIn(MESSAGE, result.output)
def test_follow_batoto_invalid_login(self): URL = 'http://bato.to/comic/_/comics/hot-road-r2243' MESSAGE = 'Batoto login error ({})'.format(URL) config.get().batoto.password = '******' config.get().batoto.username = '******' config.get().write() result = self.invoke('follow', URL) assert result.exit_code == 0 assert MESSAGE in result.output
def test_get_series_batoto_invalid_login(self): URL = 'http://bato.to/comic/_/comics/gekkou-spice-r2863' MESSAGE = 'Batoto login error ({})'.format(URL) config.get().batoto.password = '******' config.get().batoto.username = '******' config.get().write() result = self.invoke('get', URL) assert result.exit_code == 0 assert MESSAGE in result.output
def test_get_series_batoto_invalid_login(self): URL = 'http://bato.to/comic/_/comics/gekkou-spice-r2863' MESSAGE = 'Batoto login error ({})'.format(URL) config.get().batoto.password = '******' config.get().batoto.username = '******' config.get().write() result = self.invoke('get', URL) self.assertEqual(result.exit_code, 0) self.assertIn(MESSAGE, result.output)
def test_config_get(self): MESSAGES = ['batoto.password = '******'batoto.username = '******'download_directory = ' + config.get().download_directory, 'madokami.password = '******'madokami.username = '******'config', 'get') assert result.exit_code == 0 for message in MESSAGES: assert message in result.output
def test_follow_madokami_download_invalid_login(self): URL = 'https://manga.madokami.com/Manga/A/AK/AKUM/Akuma%20no%20Riddle' MESSAGE = ('Could not download akuma-no-riddle 00-08: ' 'Madokami login error') config.get().madokami.password = '******' config.get().madokami.username = '******' config.get().write() result = self.invoke('follow', URL, '--download') assert result.exit_code == 0 assert MESSAGE in result.output
def test_get_chapter_batoto_invalid_login(self): URL = 'http://bato.to/reader#f0fbe77dbcc60780' MESSAGES = ['Batoto username:'******'Batoto password:'******'Batoto login error ({})'.format(URL)] config.get().batoto.username = None config.get().batoto.password = None config.get().write() result = self.invoke('get', URL, input='a\na') for message in MESSAGES: assert message in result.output
def test_get_chapter_batoto_invalid_login(self): URL = 'http://bato.to/reader#f0fbe77dbcc60780' MESSAGES = [ 'Batoto username:'******'Batoto password:'******'Batoto login error ({})'.format(URL) ] config.get().batoto.username = None config.get().batoto.password = None config.get().write() result = self.invoke('get', URL, input='a\na') for message in MESSAGES: self.assertIn(message, result.output)
def get_groups(self): r = requests.get(self.url) soup = BeautifulSoup(r.text, config.get().html_parser) scanlators = soup.find('span', class_='scanlators') if scanlators: links = scanlators.find_all('a') else: links = [] groups = [] for link in links: r = requests.get(urljoin(self.url, link.get('href'))) s = BeautifulSoup(r.text, config.get().html_parser) g = s.find('h2', class_='tag-title').b.string groups.append(g) return groups
def test_download_invalid_login(self): URL = ('https://manga.madokami.com/Manga/Oneshots/100%20Dollar%20wa%20' 'Yasu%20Sugiru') MESSAGE = ('Could not download 100-dollar-wa-yasu-sugiru 000 ' '[Oneshot]: Madokami login error') series = scrapers.MadokamiSeries(URL) series.follow() config.get().madokami.password = '******' config.get().madokami.username = '******' config.get().write() result = self.invoke('download') assert result.exit_code == 0 assert MESSAGE in result.output
def download(self): if getattr(self, 'r', None): r = self.r else: r = self.reader_get(1) soup = BeautifulSoup(r.text, config.get().html_parser) chapter_hash = re.search(self.hash_re, r.text).group(1) pages_var = re.search(self.pages_re, r.text) pages = re.findall(self.single_page_re, pages_var.group(1)) files = [None] * len(pages) mirror = re.search(self.server_re, r.text).group(1) server = urljoin('https://mangadex.com', mirror) futures = [] last_image = None with self.progress_bar(pages) as bar: for i, page in enumerate(pages): if guess_type(page)[0]: image = server + chapter_hash + '/' + page else: print('Unkown image type for url {}'.format(page)) raise ValueError r = requests.get(image, stream=True) if r.status_code == 404: r.close() raise ValueError fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback( partial(self.page_download_finish, bar, files)) futures.append(fut) last_image = image concurrent.futures.wait(futures) self.create_zip(files)
def update(): """Gather new chapters from followed series.""" pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads) futures = [] warnings = [] aliases = {} query = db.session.query(db.Series).filter_by(following=True).all() output.series('Updating {} series'.format(len(query))) for follow in query: fut = pool.submit(series_by_url, follow.url) futures.append(fut) aliases[fut] = follow.alias with click.progressbar(length=len(futures), show_pos=True, fill_char='>', empty_char=' ') as bar: for future in concurrent.futures.as_completed(futures): try: series = future.result() except requests.exceptions.ConnectionError as e: warnings.append('Unable to update {} (connection error)' .format(aliases[future])) except exceptions.ScrapingError: warnings.append('Unable to update {} (scraping error)' .format(follow.alias)) else: series.update() bar.update(1) for w in warnings: output.warning(w) list_new()
def get_five_latest_releases(self): r = requests.get(self.MANGADEX_URL + 'updates') soup = BeautifulSoup(r.text, config.get().html_parser) chapters = soup.find_all('a', href=mangadex.MangadexChapter.url_re) chapters = [a for a in chapters if language_filter(a)] links = [urljoin(self.MANGADEX_URL, x.get('href')) for x in chapters] return links[:5]
def test_get_chapter_madokami_invalid_login(self): URL = ('https://manga.madokami.al/Manga/Oneshots/12-ji%20no%20Kane%20' 'ga%20Naru/12%20O%27Clock%20Bell%20Rings%20%5BKISHIMOTO%20' 'Seishi%5D%20-%20000%20%5BOneshot%5D%20%5BTurtle%20Paradise%5D' '.zip') MESSAGES = [ 'Madokami username:'******'Madokami password:'******'Madokami login error' ] config.get().madokami.username = None config.get().madokami.password = None config.get().write() result = self.invoke('get', URL, input='a\na') for message in MESSAGES: self.assertIn(message, result.output)
def test_get_chapter_madokami_invalid_login(self): URL = ('https://manga.madokami.com/Manga/Oneshots/12-ji%20no%20Kane%20' 'ga%20Naru/12%20O%27Clock%20Bell%20Rings%20%5BKISHIMOTO%20' 'Seishi%5D%20-%20000%20%5BOneshot%5D%20%5BTurtle%20Paradise%5D' '.zip') MESSAGES = ['Madokami username:'******'Madokami password:'******'Madokami login error'] config.get().madokami.username = None config.get().madokami.password = None config.get().write() result = self.invoke('get', URL, input='a\na') for message in MESSAGES: assert message in result.output
def get_five_latest_releases(self): r = requests.get(self.BATOTO_URL) soup = BeautifulSoup(r.text, config.get().html_parser) english_chapters = (soup.find('table', class_='chapters_list') .find_all('tr', class_='lang_English')) links = [] for chapter in english_chapters: links += [urljoin(self.BATOTO_URL, x.get('href')) for x in chapter.find_all(href=re.compile(r'/reader#.*'))] return links[:5]
def __init__(self, url, **kwargs): super().__init__(url, **kwargs) self.session = requests.Session() self.session.auth = requests.auth.HTTPBasicAuth(*config .get().madokami.login) r = self.session.get(url) if r.status_code == 401: raise exceptions.LoginError('Madokami login error') self.soup = BeautifulSoup(r.text, config.get().html_parser) self.chapters = self.get_chapters()
def test_new_compact(self): URL = 'http://bato.to/comic/_/comics/blood-r5840' MESSAGE = 'blood 1-4 5-8 9-12 13-16 17-20' config.get().compact_new = True series = scrapers.BatotoSeries(URL) series.follow() result = self.invoke('new') assert result.exit_code == 0 assert MESSAGE in result.output
def from_url(url): chapter_hash = re.search(BatotoChapter.url_re, url).group(1) r = BatotoChapter._reader_get(chapter_hash, 1) soup = BeautifulSoup(r.text, config.get().html_parser) try: series_url = soup.find('a', href=BatotoSeries.url_re)['href'] except TypeError: raise exceptions.ScrapingError('Chapter has no parent series link') series = BatotoSeries(series_url) for chapter in series.chapters: if chapter.url.lstrip('htps') == url.lstrip('htps'): return chapter
def from_url(url): r = MangadexChapter._reader_get(url, 1) soup = BeautifulSoup(r.text, config.get().html_parser) try: series_url = soup.find('a', href=MangadexSeries.url_re)['href'] except TypeError: raise exceptions.ScrapingError('Chapter has no parent series link') series = MangadexSeries(urljoin('https://mangadex.com', series_url)) for chapter in series.chapters: parsed_chapter_url = ''.join(urlparse(chapter.url)[1:]) parsed_url = ''.join(urlparse(url)[1:]) if parsed_chapter_url == parsed_url: return chapter
def get_five_latest_releases(self): r = requests.get(self.BATOTO_URL) soup = BeautifulSoup(r.text, config.get().html_parser) english_chapters = (soup.find( 'table', class_='chapters_list').find_all('tr', class_='lang_English')) links = [] for chapter in english_chapters: links += [ urljoin(self.BATOTO_URL, x.get('href')) for x in chapter.find_all(href=re.compile(r'/reader#.*')) ] return links[:5]
def __init__(self, url, **kwargs): super().__init__(url, **kwargs) response = requests.get(url) self.soup = BeautifulSoup(response.content, config.get().html_parser) # mangakakalot does not return 404 if there is no such title try: self.cached_name = self.soup.select('.manga-info-text h1')[0].text except IndexError: raise exceptions.ScrapingError() self.chapters = self.get_chapters()
def test_outdated_session_max_retries(self): URL = 'http://bato.to/comic/_/comics/femme-fatale-r468' config.get().batoto._login_attempts = 1 config.get().batoto.cookie = '0da7ed' config.get().batoto.member_id = '0da7ed' config.get().batoto.pass_hash = '0da7ed' config.get().write() with self.assertRaises(exceptions.LoginError): series = batoto.BatotoSeries(url=URL)
def test_chapter_information_tomochan(self): URL = 'https://mangadex.com/chapter/28082' config.get().cbz = True chapter = mangadex.MangadexChapter.from_url(URL) self.assertEqual(chapter.alias, 'tomo-chan-wa-onna-no-ko') self.assertEqual(chapter.chapter, '1') self.assertEqual(chapter.groups, ['M@STER Scans']) self.assertEqual(chapter.name, 'Tomo-chan wa Onna no ko!') self.assertEqual(chapter.title, 'Once In A Life Time Misfire') path = os.path.join( self.directory.name, 'Tomo-chan wa Onna no ko', 'Tomo-chan wa Onna no ko - c001 [MSTER Scans].cbz') self.assertEqual(chapter.filename, path) chapter.download() self.assertTrue(os.path.isfile(path)) with zipfile.ZipFile(path) as chapter_zip: files = chapter_zip.infolist() self.assertEqual(len(files), 1)
def update(fast): """Gather new chapters from followed series.""" pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads) futures = [] warnings = [] aliases = {} query = db.session.query(db.Series).filter_by(following=True).all() if fast: skip_count = 0 for series in query.copy(): if not series.needs_update: skip_count += 1 query.remove(series) output.series('Updating {} series ({} skipped)'.format( len(query), skip_count)) else: output.series('Updating {} series'.format(len(query))) for follow in query: fut = pool.submit(utility.series_by_url, follow.url) futures.append(fut) aliases[fut] = follow.alias with click.progressbar(length=len(futures), show_pos=True, fill_char='>', empty_char=' ') as bar: for future in concurrent.futures.as_completed(futures): try: series = future.result() except exceptions.ConnectionError: warnings.append( 'Unable to update {} (connection error)'.format( aliases[future])) except exceptions.ScrapingError: warnings.append('Unable to update {} (scraping error)'.format( aliases[future])) except exceptions.LoginError as e: warnings.append('Unable to update {} ({})'.format( aliases[future], e.message)) else: series.update() bar.update(1) for w in warnings: output.warning(w) utility.list_new()