def getImgList(self, url): decoder = AutoDecoder(isfeed=False) opener = URLOpener(self.host, timeout=60) imgList = [] result = opener.open(url) if result.status_code != 200 or not result.content: self.log.warn('fetch comic page failed: %s' % url) return imgList content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers) soup = BeautifulSoup(content, 'html.parser') scripts = soup.findAll("script", {"type": "text/javascript"}) for script in scripts: if script.text != "": raw_content = script.text break res = re.search(r'window\["\\x65\\x76\\x61\\x6c"\](.*\))', raw_content).group(1) lz_encoded = re.search(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", res).group(1) lz_decoded = decompressFromBase64(lz_encoded) res = re.sub(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", "'%s'.split('|')"%(lz_decoded), res) codes = self.get_node_online(res) pages_opts = json.loads(re.search(r'^SMH.reader\((.*)\)\.preInit\(\);$', codes).group(1)) cid = self.getChapterId(url) md5 = pages_opts["sl"]["md5"] images = pages_opts["images"] for img in images: img_url = u'https://i.hamreus.com{}?cid={}&md5={}'.format(img, cid, md5) imgList.append(img_url) return imgList
def getChapterList(self, url): decoder = AutoDecoder(isfeed=False) opener = URLOpener(self.host, timeout=60) chapterList = [] if url.startswith( "https://www.manhuagui.com" ): url = url.replace('https://www.manhuagui.com', 'https://m.manhuagui.com') result = opener.open(url) if result.status_code != 200 or not result.content: self.log.warn('fetch comic page failed: %s' % url) return chapterList content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers) soup = BeautifulSoup(content, 'html.parser') invisible_input = soup.find("input", {"id":'__VIEWSTATE'}) if invisible_input: lz_encoded=invisible_input.get("value") lz_decoded = decompressFromBase64(lz_encoded) soup = BeautifulSoup(lz_decoded, 'html.parser') else: soup = soup.find("div", {"class": 'chapter-list', "id":'chapterList'}) lias = soup.findAll('a') for aindex in range(len(lias)): rindex = len(lias)-1-aindex href = "https://m.manhuagui.com" + lias[rindex].get("href") chapterList.append(href) return chapterList
def getImgList(self, url): decoder = AutoDecoder(isfeed=False) opener = URLOpener(self.host, timeout=60) imgList = [] result = opener.open(url) if result.status_code != 200 or not result.content: self.log.warn('fetch comic page failed: %s' % url) return imgList content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers) soup = BeautifulSoup(content, 'html.parser') scripts = soup.findAll("script", {"type": "text/javascript"}) for script in scripts: if "window[\"\\x65\\x76\\x61\\x6c\"]" in script.text != "": raw_content = script.text break if (raw_content is None): self.log.warn('raw_content href is not exist.') return imgList res = re.search(r'window\["\\x65\\x76\\x61\\x6c"\](.*\))', raw_content).group(1) lz_encoded = re.search(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", res).group(1) lz_decoded = decompressFromBase64(lz_encoded) res = re.sub(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", "'%s'.split('|')"%(lz_decoded), res) codes = self.get_node_online(res) pages_opts = json.loads(re.search(r'^SMH.reader\((.*)\)\.preInit\(\);$', codes).group(1)) # cid = self.getChapterId(url) m = pages_opts["sl"]["m"] e = pages_opts["sl"]["e"] images = pages_opts["images"] if (images is None): self.log.warn('image list is not exist.') return imgList for img in images: # https://i.hamreus.com/ps3/p/pingxingtt_gbl/%E7%AC%AC117%E8%AF%9D/1_7684.jpg.webp?e=1769209619&m=MOn_QAAi-qwQBaRjlmNYkA img_url = u'https://i.hamreus.com{}?e={}&m={}'.format(img, e, m) imgList.append(img_url) return imgList
break except Exception, e: if njRetry < 5: self.log.warn('fetch comic page failed: %s, retry' % url) continue else: self.log.warn('fetch comic page failed: %s' % url) return chapterList content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers) soup = BeautifulSoup(content, 'html.parser') invisible_input = soup.find("input", {"id":'__VIEWSTATE'}) if invisible_input: lz_encoded=invisible_input.get("value") lz_decoded = decompressFromBase64(lz_encoded) soup = BeautifulSoup(lz_decoded, 'html.parser') else: soup = soup.find("div", {"class": 'chapter-list', "id":'chapterList'}) if (soup is None): self.log.warn('chapterList is not exist.') return chapterList lias = soup.findAll('a') if (lias is None): self.log.warn('chapterList href is not exist.') return chapterList for index, a in enumerate(lias): href = self.urljoin("https://m.manhuagui.com", a.get('href', ''))