def execute(self): # self.test(); #如果在数据库中能找到visitUrl,说明程序已经运行过了,可以从数据库中恢复现场 Log.V("[I] onExecute()") #检查是否有visiting的book visitingBookUrl = self.visitingBookUrl() if visitingBookUrl != None: self.downloadBook(visitingBookUrl) #检查是否有visiting的url visitingUrl = self.visitingUrl() if visitingUrl != None: self.visit(visitUrl) #否则从root开始搜索 visitUrl = self.nextVisitUrl() self.visit(visitUrl) Log.I("[I] willVisitNext") #处理下一个 while True: visitUrl = self.nextVisitUrl() if visitUrl != None: self.visit(visitUrl) else: break Log.I("[I] willVisitNextPageUrl") #网页遍历完成 while True: bookUrl = self.nextPageUrl() if bookUrl != None: self.downloadBook(bookUrl) else: break Log.V("------parse finished------")
def __init__(self): cfg = Config.shared self.rootUrl = cfg.url self.scheme = cfg.scheme self.host = cfg.host self.outPath = cfg.outPath self.tmpPath = cfg.tmpPath self.aescode = cfg.aesCode self.sectionDownloadSuccCount = 0 self.sectionDownloadFailedCount = 0 self.bookDb = DbFactory.shared.get("BookinfoDb") self.visitUrlDb = DbFactory.shared.get("VisitUrlDb") self.visitBookUrlDb = DbFactory.shared.get("VisitBookUrlDb") self.kvDb = DbFactory.shared.get("KeyValueDb") self.storge = StorgeFactory.shared.get(cfg.storgeName, cfg.outPath) Log.V("[I] Parser.inited")
Log.D("--test--") dropAllTables() sys.exit(0) # 将Config.sample.py复制出一个Config.py文件 def createConfigFile(): if not os.path.exists("Config.py"): fd = open("Config.sample.py", "r") content = fd.read() fd.close() wfd = open("Config.py", "w") wfd.write(content) wfd.flush() wfd.close() if __name__ == '__main__': createConfigFile() # test(); try: from src.Prepare import Prepare Prepare() except Exception, e: Log.E("--------异常退出--------") Log.Exc(e) Log.V(Log.traceback()) sys.exit(0) else: Log.E("-错误")
def __del__(self): Log.V("[I] Parser dealloc")
def downloadBook(self, url): Log.V("[I] on begin downloadBook() " + str(url)) if url == None: return #检查是否下载过了 if self.checkDownloadedBookUrl(url): return #设置visiting self.setVisitingBookUrl(url) #尝试获取bookInfo bookInfo = self.getBookinfo(url) sectionInfo = None existsBookId = None if bookInfo != None: existsBookId = bookInfo.bookId Log.D(" downloadBook existsBookId = " + str(existsBookId)) sectionInfo = self.getChapter(bookInfo.bookId) Log.D(" downloadBook sectionInfo = " + str(sectionInfo)) if sectionInfo == None or sectionInfo.chapters == None or len( sectionInfo.chapters) == 0: sectionInfo = None bookInfo = None else: sectionInfo.bookInfo = bookInfo if bookInfo == None: Log.I("[I] on downloadBook() will get soup " + url) bookSoup = Utils.soupUrl(url) Log.I("[I] on downloadBook() did get soup (%s) %s " % (str(bookSoup != None), str(url))) if bookSoup != None: Log.I("[I] on downloadBook() will get muluSoup") muluUrl = self.bookMuluUrl(bookSoup) if muluUrl != None: muluSoup = Utils.soupUrl(muluUrl) Log.I("[I] on downloadBook() did get muluSoup %s" % (str(muluSoup != None))) if muluSoup != None: bookInfo = self.bookInfo(bookSoup, muluSoup) Log.I("[I] on downloadBook get bookInfo " + str(bookInfo)) if bookInfo != None: bookInfo.setUniqueKey() bookInfo.downBookUrl = url bookInfo.downMuluUrl = muluUrl #bookId if existsBookId == None: Log.D(" downloadBook will create new bookId") BookId.init(self.kvDb) bookInfo.bookId = BookId.nextBookId() else: Log.D(" downloadBook use exists bookId") bookInfo.bookId = existsBookId #获取章节信息 sectionInfo = self.sectionInfo( bookInfo, muluUrl, muluSoup) if sectionInfo != None: Log.D( " downloadBook parse sectionInfo success") #最新章节 bookInfo.chapterCount = len( sectionInfo.chapters) else: Log.D( " downloadBook error cant parser sectionInfo" ) bookInfo.status = BookInfoStatus.Error bookInfo.downloadStatus = BookDownloadStatus.Completed Log.D(" downloadBook save BookInfo " + str(bookInfo) + ", bookId = " + str(bookInfo.bookId) + ", save chapters " + str(sectionInfo)) #保存bookInfo self.saveBookinfo(bookInfo) self.saveChapter(bookInfo.bookId, sectionInfo) #下载bookImg if bookInfo != None and bookInfo.bookImg != None: self.downloadBookImg(bookInfo.bookImg, bookInfo.uniqueKey) if sectionInfo != None: self.downloadSection(sectionInfo) self.setDownloadedForBookUrl(url) self.chapterDb = None #移除visiting book url self.removeBookUrl(url) Log.V("on finished downloadBook() " + str(url))