Esempio n. 1
0
def getChapterHtml(libName, libUrl, libIndex, chapterIdx):
    baseUrl = re.search("www(.*?)/", libUrl).group()
    baseUrl = re.sub("/", "", baseUrl)

    limitData = URL_LIMIT[libName]
    if limitData['count'] == 2:
        url = libUrl.format(math.floor(libIndex / 1000), libIndex)
    if limitData['count'] == 1:
        url = libUrl.format(libIndex)

    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Connection': 'keep-alive',
        'Host': baseUrl,
        # 'Referer': url,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': User_Agent,
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
    }

    questTimes = 0
    while questTimes < 3:
        try:
            html = SESSION.get(url + chapterIdx,
                               headers=headers,
                               params={},
                               verify=False,
                               timeout=3)
            questTimes = 5
        except Exception as e:
            Helper.printError(string="request {} again".format(libIndex))
            questTimes = questTimes + 1
            time.sleep(Helper.randomFloat())

    if questTimes < 5:
        return

    return Helper.decodeHtml(html)
Esempio n. 2
0
def getBookChapterHtml(libIndex, libName, libUrl):
    baseUrl = re.search("www(.*?)/", libUrl).group()
    baseUrl = re.sub("/", "", baseUrl)

    limitData = URL_LIMIT[libName]
    if limitData['count'] == 2:
        url = libUrl.format(math.floor(libIndex / 1000), libIndex)
    if limitData['count'] == 1:
        url = libUrl.format(libIndex)

    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language':
        'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': baseUrl,
        'Referer': 'https://' + baseUrl,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': User_Agent,
    }

    questTimes = 0
    while questTimes < 2:
        try:
            html = SESSION.get(url,
                               headers=headers,
                               params={},
                               verify=False,
                               timeout=3)
            return Helper.decodeHtml(html)
        except Exception as e:
            questTimes = questTimes + 1
            time.sleep(Helper.randomFloat())

    return ""