def download(self): if getattr(self, 'r', None): r = self.r else: r = self.reader_get(1) soup = BeautifulSoup(r.text, config.get().html_parser) chapter_hash = re.search(self.hash_re, r.text).group(1) pages_var = re.search(self.pages_re, r.text) pages = re.findall(self.single_page_re, pages_var.group(1)) files = [None] * len(pages) mirror = re.search(self.server_re, r.text).group(1) server = urljoin('https://mangadex.com', mirror) futures = [] last_image = None with self.progress_bar(pages) as bar: for i, page in enumerate(pages): if guess_type(page)[0]: image = server + chapter_hash + '/' + page else: print('Unkown image type for url {}'.format(page)) raise ValueError r = requests.get(image, stream=True) if r.status_code == 404: r.close() raise ValueError fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback( partial(self.page_download_finish, bar, files)) futures.append(fut) last_image = image concurrent.futures.wait(futures) self.create_zip(files)
def download(self): if getattr(self, 'r', None): r = self.r else: r = self.reader_get(1) chapter_hash = self.json['hash'] pages = self.json['page_array'] files = [None] * len(pages) # This can be a mirror server or data path. Example: # var server = 'https://s2.mangadex.org/' # var server = '/data/' mirror = self.json['server'] server = urljoin('https://mangadex.org', mirror) futures = [] last_image = None with self.progress_bar(pages) as bar: for i, page in enumerate(pages): if guess_type(page)[0]: image = server + chapter_hash + '/' + page else: print('Unkown image type for url {}'.format(page)) raise ValueError r = requests.get(image, stream=False, headers=MangadexSeries.headers) if r.status_code == 404: r.close() raise ValueError fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback(partial(self.page_download_finish, bar, files)) futures.append(fut) last_image = image concurrent.futures.wait(futures) self.create_zip(files)
def download(self): files = [None] * len(self.pages) futures = [] with self.progress_bar(self.pages) as bar: for i, page in enumerate(self.pages): r = requests.get(page.attrs['src'], stream=True) fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback( partial(self.page_download_finish, bar, files)) futures.append(fut) concurrent.futures.wait(futures) self.create_zip(files)
def download(self): response = requests.get(self.api_hook_details).json() pages = response['pages'] files = [None] * len(pages) futures = [] with self.progress_bar(pages) as bar: for i, page in enumerate(pages): r = requests.get(page['url'], stream=True) fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback(partial(self.page_download_finish, bar, files)) futures.append(fut) concurrent.futures.wait(futures) self.create_zip(files)
def download(self): r = requests.get(self.url) pages = re.findall(r'"image":"(.*?)"', r.text) files = [None] * len(pages) futures = [] with self.progress_bar(pages) as bar: for i, page in enumerate(pages): r = requests.get(urljoin(self.url, page), stream=True) fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback(partial(self.page_download_finish, bar, files)) futures.append(fut) concurrent.futures.wait(futures) self.create_zip(files)
def download(self): response = requests.get(self.api_hook_details).json() pages = response['pages'] files = [None] * len(pages) futures = [] with self.progress_bar(pages) as bar: for i, page in enumerate(pages): r = requests.get(page['url'], stream=True) fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback( partial(self.page_download_finish, bar, files)) futures.append(fut) concurrent.futures.wait(futures) self.create_zip(files)
def download(self): data = requests.get(self.url + '.json').json() pages = [ urljoin('https://dynasty-scans.com', u['url']) for u in data['pages'] ] files = [None] * len(pages) futures = [] with self.progress_bar(pages) as bar: for i, page in enumerate(pages): r = requests.get(urljoin(self.url, page), stream=True) fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback( partial(self.page_download_finish, bar, files)) futures.append(fut) concurrent.futures.wait(futures) self.create_zip(files)
def download(self): if getattr(self, 'r', None): r = self.r else: r = self.reader_get(1) soup = BeautifulSoup(r.text, config.get().html_parser) if soup.find('a', href='#{}_1_t'.format(self.batoto_hash)): # The chapter uses webtoon layout, meaning all of the images are on # the same page. pages = [''.join(i) for i in re.findall(self.img_path_re, r.text)] else: pages = [x.get('value') for x in soup.find('select', id='page_select').find_all('option')] if not pages: output.warning('Skipping {s.name} {s.chapter}: ' 'chapter has no pages' .format(s=self)) return # Replace the first URL with the first image URL to avoid scraping # the first page twice. pages[0] = soup.find('img', id='comic_page').get('src') next_match = re.search(self.next_img_path_re, r.text) if next_match: pages[1] = next_match.group(1) files = [None] * len(pages) futures = [] last_image = None with self.progress_bar(pages) as bar: for i, page in enumerate(pages): try: if guess_type(page)[0]: image = page else: # Predict the next URL based on the last URL for reg in [self.img_path_re, self.ch_img_path_re]: m = re.match(reg, last_image) if m: break else: raise ValueError mg = list(m.groups()) num_digits = len(mg[1]) mg[1] = "{0:0>{digits}}".format(int(mg[1]) + 1, digits=num_digits) image = "".join(mg) r = requests.get(image, stream=True) if r.status_code == 404: r.close() raise ValueError except ValueError: # If we fail to do prediction, scrape r = self.reader_get(i + 1) soup = BeautifulSoup(r.text, config.get().html_parser) image = soup.find('img', id='comic_page').get('src') image2_match = re.search(self.next_img_path_re, r.text) if image2_match: pages[i + 1] = image2_match.group(1) r = requests.get(image, stream=True) fut = download_pool.submit(self.page_download_task, i, r) fut.add_done_callback(partial(self.page_download_finish, bar, files)) futures.append(fut) last_image = image concurrent.futures.wait(futures) self.create_zip(files)