def getInfo(self, selection): self.selection = selection book = self.sresult[self.selection] if not 'http' in book['url']: book['url'] = self.baseurl + book['url'] icont = weblib().get(book['url'], chardet=True) if not self.coverrule == '': try: book['cover'] = findstr(self.coverrule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['cover'] = '' if not self.timerule == '': try: book['time'] = findstr(self.timerule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['time'] = '' if not self.lastrule == '': try: book['last'] = findstr(self.lastrule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['last'] = '' if not self.desrule == '': try: book['des'] = findstr(self.desrule, icont)[0].encode('gbk', 'ignore').decode('gbk') except: book['des'] = '' return book
def search(self, keyword): scont = weblib().get(self.searchurl % quote(keyword), chardet=True) self.sresult = [] sids = findstr(self.sidrule, scont) stitles = findstr(self.stitlerule, scont) surls = findstr(self.surlrule, scont) sauthors = findstr(self.sauthorrule, scont) for i in range(0, len(stitles)): self.sresult.append({'id': sids[i], 'title': stitles[i], 'url': surls[i], 'author': sauthors[i]}) return self.sresult
def getInfo(self, selection): self.selection = selection book = self.sresult[self.selection] icont = weblib().get(book['url'], chardet=True) book['cover'] = findstr( '<div id="fmimg"><img.*src="(.*?\d*/.*?\.jpg)" ?/?><span class="b">', icont)[0] book['time'] = findstr('<p>最后更新:(.*?)</p>', icont)[0] book['last'] = '' book['des'] = findstr( '<div id="intro"><p>([\s\S]*?)<a href="https?://down.*?".*?>.*?</a></p></div>', icont)[0] return book
def loadcfg(self, path): cfgfile = filelib().json(path) try: self.enabled = cfgfile['enabled'] except: self.enabled = False if self.enabled: try: self.debug = cfgfile['debug'] self.url = cfgfile['url'] self.baseurl = findstr('(http[s]?://.*?)/', self.url)[0] self.idrule = cfgfile['idrule'] self.searchurl = cfgfile['searchurl'] self.sidrule = cfgfile['sidrule'] self.stitlerule = cfgfile['stitlerule'] self.surlrule = cfgfile['surlrule'] self.sauthorrule = cfgfile['sauthorrule'] self.coverrule = cfgfile['coverrule'] self.timerule = cfgfile['timerule'] self.lastrule = cfgfile['lastrule'] self.desrule = cfgfile['desrule'] self.listleft = cfgfile['listleft'] self.listright = cfgfile['listright'] self.listrule = cfgfile['listrule'] self.chaprule = cfgfile['chaprule'] self.chapclean = cfgfile['chapclean'] self.chapcleanre = cfgfile['chapcleanre'] self.output = cfgfile['output'] except: pass
def getCata(self): book = self.sresult[self.selection] book['url'] = self.cataurl % book['id'] cinfo = weblib().get(book['url'], chardet=True) cinfo = findstr( '<div id="list"><dl><dt>[\s\S]*</dt>([\s\S]*?)</dl></div>', cinfo)[0] chapters = findstr( '<dd><a href="(/biquge_\d*/\d*.html)">(.*?)</a></dd>', cinfo) for chapter in chapters: url, title = chapter self.chapters.append({ 'title': title, 'url': self.chapterurl % url }) return self.chapters
def search(self, keyword): scont = weblib().get(self.searchurl % quote(keyword), chardet=True) results = findstr('<tr id="nr">[\s\S]*?</tr>', scont) self.sresult = [] for result in results: rid, title = findstr( '<td class="odd"><a href=".*?/txt/(\d*).html">(.*?)</a></td>', result)[0] author = findstr('<td class="odd">([^<>]*?)</td>', result)[0] self.sresult.append({ 'id': rid, 'title': title, 'url': 'http://www.gebiqu.com/biquge_%s/' % rid, 'author': author }) return self.sresult
def getCata(self): book = self.sresult[self.selection] cinfo = weblib().get(book['url'], chardet=True) cinfo = self.listleft.join(cinfo.split(self.listleft)[1:]) cinfo = self.listright.join(cinfo.split(self.listright)[:-1]) chaps = findstr(self.listrule, cinfo) for chap in chaps: if not 'http' in chap[0]: url = self.baseurl + chap[0] else: url = chap[0] self.chapters.append({'title': chap[1], 'url': url}) return self.chapters
def downChap(self, i): cinfo = self.chapters[i] print('\t%s' % cinfo['title'].encode('gbk', 'ignore').decode('gbk')) ccont = weblib().get(cinfo['url'], chardet=True) content = findstr(self.chaprule, ccont)[0] for item in self.chapclean: content = content.replace(item, '') for item in self.chapcleanre: content = re.sub(item, '', content) content = re.sub('<br.*?>', '<br /> ', content) cinfo['content'] = ' %s' % content fn = '%d.html' % (i+1) book = self.sresult[self.selection] chapdata = self.output chapdata = chapdata.replace('\\n', '\n').replace('{title}', cinfo['title']).replace('{author}', book['author']).replace('{url}', cinfo['url']).replace('{content}', cinfo['content']) filelib().write('%s/%s' % (self.path, fn), chapdata, encoding='utf-8')
def downChap(self, i): cinfo = self.chapters[i] print('\t%s' % cinfo['title'].encode('gbk', 'ignore').decode('gbk')) ccont = weblib().get(cinfo['url'], chardet=True) try: content = findstr('<div id="content">([\s\S]*?)</div>', ccont)[0].replace(' ', '').replace( ' ', '').replace('www.gebiqu.com', '') content = re.sub('<br ?/?><br ?/?>', '<br /> ', content) except: content = '' fn = '%d.html' % (i + 1) filelib().write('%s/%s' % (self.path, fn), self.filecont % { 'title': cinfo['title'], 'content': content }, encoding='utf-8')