Beispiel #1
0
    def getImgList(self, url):
        decoder = AutoDecoder(isfeed=False)
        opener = URLOpener(self.host, timeout=60)
        imgList = []

        result = opener.open(url)
        if result.status_code != 200 or not result.content:
            self.log.warn('fetch comic page failed: %s' % url)
            return imgList

        content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers)
        soup = BeautifulSoup(content, 'html.parser')
        scripts = soup.findAll("script", {"type": "text/javascript"})
        for script in scripts:
            if script.text != "":
                raw_content = script.text
                break

        res = re.search(r'window\["\\x65\\x76\\x61\\x6c"\](.*\))', raw_content).group(1)
        lz_encoded = re.search(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", res).group(1)
        lz_decoded = decompressFromBase64(lz_encoded)
        res = re.sub(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", "'%s'.split('|')"%(lz_decoded), res)
        codes = self.get_node_online(res)
        pages_opts = json.loads(re.search(r'^SMH.reader\((.*)\)\.preInit\(\);$', codes).group(1))

        cid = self.getChapterId(url)
        md5 = pages_opts["sl"]["md5"]
        images = pages_opts["images"]
        for img in images:
            img_url = u'https://i.hamreus.com{}?cid={}&md5={}'.format(img, cid, md5)
            imgList.append(img_url)

        return imgList
Beispiel #2
0
    def getChapterList(self, url):
        decoder = AutoDecoder(isfeed=False)
        opener = URLOpener(self.host, timeout=60)
        chapterList = []

        if url.startswith( "https://www.manhuagui.com" ):
            url = url.replace('https://www.manhuagui.com', 'https://m.manhuagui.com')

        result = opener.open(url)
        if result.status_code != 200 or not result.content:
            self.log.warn('fetch comic page failed: %s' % url)
            return chapterList

        content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers)

        soup = BeautifulSoup(content, 'html.parser')
        invisible_input = soup.find("input", {"id":'__VIEWSTATE'})
        if invisible_input:
            lz_encoded=invisible_input.get("value")
            lz_decoded = decompressFromBase64(lz_encoded)
            soup = BeautifulSoup(lz_decoded, 'html.parser')
        else:
            soup = soup.find("div", {"class": 'chapter-list', "id":'chapterList'})

        lias = soup.findAll('a')
        for aindex in range(len(lias)):
            rindex = len(lias)-1-aindex
            href = "https://m.manhuagui.com" + lias[rindex].get("href")
            chapterList.append(href)

        return chapterList
Beispiel #3
0
    def getImgList(self, url):
        decoder = AutoDecoder(isfeed=False)
        opener = URLOpener(self.host, timeout=60)
        imgList = []

        result = opener.open(url)
        if result.status_code != 200 or not result.content:
            self.log.warn('fetch comic page failed: %s' % url)
            return imgList

        content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers)
        soup = BeautifulSoup(content, 'html.parser')
        scripts = soup.findAll("script", {"type": "text/javascript"})
        for script in scripts:
            if "window[\"\\x65\\x76\\x61\\x6c\"]" in script.text != "":
                raw_content = script.text
                break

        if (raw_content is None):
            self.log.warn('raw_content href is not exist.')
            return imgList

        res = re.search(r'window\["\\x65\\x76\\x61\\x6c"\](.*\))', raw_content).group(1)
        lz_encoded = re.search(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", res).group(1)
        lz_decoded = decompressFromBase64(lz_encoded)
        res = re.sub(r"'([A-Za-z0-9+/=]+)'\['\\x73\\x70\\x6c\\x69\\x63'\]\('\\x7c'\)", "'%s'.split('|')"%(lz_decoded), res)
        codes = self.get_node_online(res)
        pages_opts = json.loads(re.search(r'^SMH.reader\((.*)\)\.preInit\(\);$', codes).group(1))

        # cid = self.getChapterId(url)
        m = pages_opts["sl"]["m"]
        e = pages_opts["sl"]["e"]
        images = pages_opts["images"]

        if (images is None):
            self.log.warn('image list is not exist.')
            return imgList

        for img in images:
            # https://i.hamreus.com/ps3/p/pingxingtt_gbl/%E7%AC%AC117%E8%AF%9D/1_7684.jpg.webp?e=1769209619&m=MOn_QAAi-qwQBaRjlmNYkA
            img_url = u'https://i.hamreus.com{}?e={}&m={}'.format(img, e, m)
            imgList.append(img_url)

        return imgList
Beispiel #4
0
                    break
            except Exception, e:
                if njRetry < 5:
                    self.log.warn('fetch comic page failed: %s, retry' % url)
                    continue
                else:
                    self.log.warn('fetch comic page failed: %s' % url)
                    return chapterList

        content = self.AutoDecodeContent(result.content, decoder, self.feed_encoding, opener.realurl, result.headers)

        soup = BeautifulSoup(content, 'html.parser')
        invisible_input = soup.find("input", {"id":'__VIEWSTATE'})
        if invisible_input:
            lz_encoded=invisible_input.get("value")
            lz_decoded = decompressFromBase64(lz_encoded)
            soup = BeautifulSoup(lz_decoded, 'html.parser')
        else:
            soup = soup.find("div", {"class": 'chapter-list', "id":'chapterList'})

        if (soup is None):
            self.log.warn('chapterList is not exist.')
            return chapterList

        lias = soup.findAll('a')
        if (lias is None):
            self.log.warn('chapterList href is not exist.')
            return chapterList

        for index, a in enumerate(lias):
            href = self.urljoin("https://m.manhuagui.com", a.get('href', ''))