def __getNextPage(self, imgUrl): url = makeURL( 'https://' + self.domain + '.book118.com/PW/GetPage/?', { 'f': self.pdfInfo['Url'], 'img': imgUrl, 'isMobile': 'false', 'isNet': 'True', 'readLimit': self.pdfInfo['ReadLimit'], 'furl': self.pdfInfo['Furl'] }) result = '' while True: result = getHTML(url) if (result.startswith('{') and result.endswith('}')): break else: print(result, file=sys.stderr) res = json.loads(result) if self.total == 0: self.total = res['PageCount'] self.index = res['PageIndex'] self.imgList.append(res['NextPage']) print(self.index, '/', self.total, 'url finish', res['NextPage']) return res
def __getPdfInfo(self): url = makeURL( 'https://max.book118.com/index.php?', { 'g': 'Home', 'm': 'View', 'a': 'viewUrl', 'cid': str(self.pid), 'flag': '1' }) viewPage = getHTML(url) self.domain = re.findall(r'//(.*?)\..*', viewPage)[0] rawHTML = getHTML('https:' + viewPage) res = re.findall(r'<input type="hidden" id="(.*?)" value="(.*?)".*?/>', rawHTML) for lst in res: self.pdfInfo[lst[0]] = lst[1]
def __getIMG(self): if os.path.exists('./temp'): shutil.rmtree('./temp') os.makedirs('./temp') for (idx, img) in enumerate(self.imgList): res = getHTML( makeURL('http://' + self.domain + '.book118.com/img/?', {'img': img}), byte=True) with open('./temp/' + str(idx + 1) + '.jpg', 'wb') as f: f.write(res) print(idx + 1, '/', self.total, 'download finish', str(idx + 1) + '.jpg') self.imgFileList.append('./temp/' + str(idx + 1) + '.jpg') # ?img=Hs92T42xAvsP_ycWPqjcj8Iw69WUDaxvq4HtxAb3Zl3WYzxX1hdIsZzydhmmGAtm pass
def __getNextPage(self, imgUrl): url = makeURL( 'https://' + self.domain + '.book118.com/pdf/GetNextPage/?', { 'f': self.pdfInfo['Url'], 'img': imgUrl, 'isMobile': 'false', 'isNet': 'True', 'readLimit': self.pdfInfo['ReadLimit'], 'furl': self.pdfInfo['Furl'] }) result = getHTML(url) res = json.loads(result) if self.total == 0: self.total = res['PageCount'] self.index = res['PageIndex'] self.imgList.append(res['NextPage']) print(self.index, '/', self.total, 'url finish', res['NextPage']) return res