def download(self, start): for i in range(start, len(self.chapters)): self.downChap(i) if not self.hasCover: weblib().download(self.sresult[self.selection]['cover'], '%s/cover.jpg' % self.path) return self.path
def getInfo(self, selection): self.selection = selection book = self.sresult[self.selection] if not 'http' in book['url']: book['url'] = self.baseurl + book['url'] icont = weblib().get(book['url'], chardet=True) if not self.coverrule == '': try: book['cover'] = findstr(self.coverrule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['cover'] = '' if not self.timerule == '': try: book['time'] = findstr(self.timerule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['time'] = '' if not self.lastrule == '': try: book['last'] = findstr(self.lastrule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['last'] = '' if not self.desrule == '': try: book['des'] = findstr(self.desrule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['des'] = '' return book
def getCata(self): book = self.sresult[self.selection] book['url'] = self.cataurl % self.getsid(book['id']) cinfo = json.loads(weblib().get(book['url']), True) for chapter in cinfo['chapters']: self.chapters.append({'title': chapter['title'], 'url': self.chapterurl % quote(chapter['link'])}) return self.chapters
def getInfo(self, selection): self.selection = selection book = self.sresult[self.selection] icont = json.loads(weblib().get(self.infourl % book['url']), True) book['cover'] = icont['cover'] book['time'] = ' '.join(icont['updated'].split('.')[0].split('T')) book['last'] = icont['lastChapter'] book['des'] = icont['longIntro'] return book
def search(self, keyword): scont = json.loads(weblib().get(self.searchurl % quote(keyword)), True) self.sresult = [] if scont['ok']: scont = scont['books'] bcount = 10 if len(scont) > 10 else len(scont) for i in range(0, bcount): self.sresult.append({'id': scont[i]['_id'], 'title': scont[i]['title'], 'url': scont[i]['_id'], 'author': scont[i]['author']}) return self.sresult
def search(self, keyword): scont = weblib().get(self.searchurl % quote(keyword), chardet=True) self.sresult = [] sids = findstr(self.sidrule, scont) stitles = findstr(self.stitlerule, scont) surls = findstr(self.surlrule, scont) sauthors = findstr(self.sauthorrule, scont) for i in range(0, len(stitles)): self.sresult.append({'id': sids[i], 'title': stitles[i], 'url': surls[i], 'author': sauthors[i]}) return self.sresult
def downChap(self, i): cinfo = self.chapters[i] print('\t%s' % cinfo['title'].encode('gbk', 'ignore').decode('gbk')) ccont = json.loads(weblib().get(cinfo['url']), True) if ccont['ok'] == True: content = ' %s' % ccont['chapter']['body'].replace('\n', '<br /> ') else: content = '' fn = '%d.html' % (i+1) filelib().write('%s/%s' % (self.path, fn), self.filecont % {'title': cinfo['title'], 'content': content}, encoding='utf-8')
def getCata(self): book = self.sresult[self.selection] cinfo = weblib().get(book['url'], chardet=True) cinfo = self.listleft.join(cinfo.split(self.listleft)[1:]) cinfo = self.listright.join(cinfo.split(self.listright)[:-1]) chaps = findstr(self.listrule, cinfo) for chap in chaps: if not 'http' in chap[0]: url = self.baseurl + chap[0] else: url = chap[0] self.chapters.append({'title': chap[1], 'url': url}) return self.chapters
def getInfo(self, selection): self.selection = selection book = self.sresult[self.selection] icont = weblib().get(book['url'], chardet=True) book['cover'] = findstr( '<div id="fmimg"><img.*src="(.*?\d*/.*?\.jpg)" ?/?><span class="b">', icont)[0] book['time'] = findstr('<p>最后更新:(.*?)</p>', icont)[0] book['last'] = '' book['des'] = findstr( '<div id="intro"><p>([\s\S]*?)<a href="https?://down.*?".*?>.*?</a></p></div>', icont)[0] return book
def downChap(self, i): cinfo = self.chapters[i] print('\t%s' % cinfo['title'].encode('gbk', 'ignore').decode('gbk')) ccont = weblib().get(cinfo['url'], chardet=True) content = findstr(self.chaprule, ccont)[0] for item in self.chapclean: content = content.replace(item, '') for item in self.chapcleanre: content = re.sub(item, '', content) content = re.sub('<br.*?>', '<br /> ', content) cinfo['content'] = ' %s' % content fn = '%d.html' % (i+1) book = self.sresult[self.selection] chapdata = self.output chapdata = chapdata.replace('\\n', '\n').replace('{title}', cinfo['title']).replace('{author}', book['author']).replace('{url}', cinfo['url']).replace('{content}', cinfo['content']) filelib().write('%s/%s' % (self.path, fn), chapdata, encoding='utf-8')
def getCata(self): book = self.sresult[self.selection] book['url'] = self.cataurl % book['id'] cinfo = weblib().get(book['url'], chardet=True) cinfo = findstr( '<div id="list"><dl><dt>[\s\S]*</dt>([\s\S]*?)</dl></div>', cinfo)[0] chapters = findstr( '<dd><a href="(/biquge_\d*/\d*.html)">(.*?)</a></dd>', cinfo) for chapter in chapters: url, title = chapter self.chapters.append({ 'title': title, 'url': self.chapterurl % url }) return self.chapters
def search(self, keyword): scont = weblib().get(self.searchurl % quote(keyword), chardet=True) results = findstr('<tr id="nr">[\s\S]*?</tr>', scont) self.sresult = [] for result in results: rid, title = findstr( '<td class="odd"><a href=".*?/txt/(\d*).html">(.*?)</a></td>', result)[0] author = findstr('<td class="odd">([^<>]*?)</td>', result)[0] self.sresult.append({ 'id': rid, 'title': title, 'url': 'http://www.gebiqu.com/biquge_%s/' % rid, 'author': author }) return self.sresult
def downChap(self, i): cinfo = self.chapters[i] print('\t%s' % cinfo['title'].encode('gbk', 'ignore').decode('gbk')) ccont = weblib().get(cinfo['url'], chardet=True) try: content = findstr('<div id="content">([\s\S]*?)</div>', ccont)[0].replace(' ', '').replace( ' ', '').replace('www.gebiqu.com', '') content = re.sub('<br ?/?><br ?/?>', '<br /> ', content) except: content = '' fn = '%d.html' % (i + 1) filelib().write('%s/%s' % (self.path, fn), self.filecont % { 'title': cinfo['title'], 'content': content }, encoding='utf-8')
def getsid(self, bid): sources = json.loads(weblib().get(self.sourceurl % bid), True) for source in sources: if source['source'] == 'xbiquge': return source['_id']