async def load_preview(self, session, preview_page=0): url = self.get_preview_page_url(preview_page) self.raw_html = html = await fetch_text_ensure(session, url) self.parsed_document = doc = parse_html(html) name_en = doc.find('.//h1[@id="gn"]') if name_en is not None: self.name_en = name_en.text name_jp = doc.find('.//h1[@id="gj"]') if name_jp is not None: self.name_jp = name_jp.text parts = doc.find('.//div[@class="gtb"]/p[@class="gpc"]').text.split(' ') self.page_count = parse_int(parts[5]) preview_beg = parse_int(parts[1]) preview_end = parse_int(parts[3]) if preview_end != self.page_count: self.preview_range = preview_end - preview_beg + 1 imgs = doc.findall('.//div[@id="gdt"]/div/div/a') page_urls = [img.attrib['href'] for img in imgs] pages = [GalleryPage.from_url(url) for url in page_urls] if self.content_map is None: self.content_map = {} self.content_map.update({page.page: page for page in pages}) self.loaded = True
async def load(self, session): url = self.get_url() self.raw_html = html = await fetch_text_ensure(session, url) self.parsed_document = doc = parse_html(html) self.img_url = doc.find('.//img[@id="img"]').attrib['src'] prev_url = doc.find('.//a[@id="prev"]').attrib['onclick'] prev_page = int(get_between(prev_url, '(', ',')) prev_token = get_between(prev_url, ',', ')')[1:-1] self.prev = GalleryPage(self.gallery_id, prev_token, prev_page, self.base_url) next_url = doc.find('.//a[@id="next"]').attrib['onclick'] next_page = int(get_between(next_url, '(', ',')) next_token = get_between(next_url, ',', ')')[1:-1] self.next = GalleryPage(self.gallery_id, next_token, next_page, self.base_url) preview_url = doc.find('.//div[@id="i5"]/div/a').attrib['href'] self.preview_page = int(preview_url.split('?p=')[-1]) if '?p=' in preview_url else 0 self.file_name, self.img_size, self.file_length = doc.find('.//div[@id="i4"]/div[1]').text.split(' :: ') self.img_size = [int(n) for n in self.img_size.split(' x ')] reload_info = doc.find('.//a[@id="loadfail"]').attrib['onclick'] reload_info = get_between(reload_info, "('", "')")[1:-1] self.append_reload_info(reload_info) if doc.find('.//div[@id="i7"]'): self.origin_url = doc.find('.//div[@id="i7"]/a').attrib['href'] else: self.origin_url = self.img_url self.gallery_name = doc.find('.//h1').text self.loaded = True
async def load_preview(self, session, preview_page=0): url = self.get_preview_page_url(preview_page) self.raw_html = html = await fetch_text_ensure(session, url) if '<title>Gallery Not Available - ' in html: raise BadGalleryError() self.parsed_document = doc = parse_html(html) name_en = doc.find('.//h1[@id="gn"]') if name_en is not None: self.name_en = name_en.text name_jp = doc.find('.//h1[@id="gj"]') if name_jp is not None: self.name_jp = name_jp.text parts = doc.find('.//div[@class="gtb"]/p[@class="gpc"]').text.split( ' ') self.page_count = parse_int(parts[5]) preview_beg = parse_int(parts[1]) preview_end = parse_int(parts[3]) if preview_end != self.page_count: self.preview_range = preview_end - preview_beg + 1 imgs = doc.findall('.//div[@id="gdt"]/div/a') page_urls = [img.attrib['href'] for img in imgs] pages = [GalleryPage.from_url(url) for url in page_urls] if self.content_map is None: self.content_map = {} self.content_map.update({page.page: page for page in pages}) self.loaded = True
async def fetch_results(self, page: int) -> list: params = self._make_params(page) html = await fetch_text_ensure(self.session, self.base_url + '/', params=params) doc = parse_html(html) urls = [a.get('href') for a in doc.findall('.//a')] urls = [url for url in urls if url.startswith(self.base_url + '/g/')] update_times = [ t.text for t in doc.findall('.//td[@class="itd"]') if t.text is not None ] return [ SearchResult(url, dateutil.parser.parse(update_time)) for url, update_time in zip(urls, update_times) ]
async def load(self, session): url = self.get_url() self.raw_html = html = await fetch_text_ensure(session, url) self.parsed_document = doc = parse_html(html) self.img_url = doc.find('.//img[@id="img"]').attrib['src'] prev_url = doc.find('.//a[@id="prev"]').attrib['onclick'] prev_page = int(get_between(prev_url, '(', ',')) prev_token = get_between(prev_url, ',', ')')[1:-1] self.prev = GalleryPage(self.gallery_id, prev_token, prev_page, self.base_url) next_url = doc.find('.//a[@id="next"]').attrib['onclick'] next_page = int(get_between(next_url, '(', ',')) next_token = get_between(next_url, ',', ')')[1:-1] self.next = GalleryPage(self.gallery_id, next_token, next_page, self.base_url) preview_url = doc.find('.//div[@id="i5"]/div/a').attrib['href'] self.preview_page = int( preview_url.split('?p=')[-1]) if '?p=' in preview_url else 0 self.file_name, self.img_size, self.file_length = doc.find( './/div[@id="i4"]/div[1]').text.split(' :: ') self.img_size = [int(n) for n in self.img_size.split(' x ')] reload_info = doc.find('.//a[@id="loadfail"]').attrib['onclick'] reload_info = get_between(reload_info, "('", "')")[1:-1] self.append_reload_info(reload_info) if doc.find('.//div[@id="i7"]'): self.origin_url = doc.find('.//div[@id="i7"]/a').attrib['href'] else: self.origin_url = self.img_url self.gallery_name = doc.find('.//h1').text self.loaded = True