Esempio n. 1
0
    def execute(self):
        # self.test();
        #如果在数据库中能找到visitUrl,说明程序已经运行过了,可以从数据库中恢复现场
        Log.V("[I] onExecute()")

        #检查是否有visiting的book
        visitingBookUrl = self.visitingBookUrl()
        if visitingBookUrl != None:
            self.downloadBook(visitingBookUrl)

        #检查是否有visiting的url
        visitingUrl = self.visitingUrl()
        if visitingUrl != None:
            self.visit(visitUrl)

        #否则从root开始搜索
        visitUrl = self.nextVisitUrl()
        self.visit(visitUrl)

        Log.I("[I] willVisitNext")
        #处理下一个
        while True:
            visitUrl = self.nextVisitUrl()
            if visitUrl != None:
                self.visit(visitUrl)
            else:
                break

        Log.I("[I] willVisitNextPageUrl")
        #网页遍历完成
        while True:
            bookUrl = self.nextPageUrl()
            if bookUrl != None:
                self.downloadBook(bookUrl)
            else:
                break
        Log.V("------parse finished------")
Esempio n. 2
0
    def __init__(self):
        cfg = Config.shared

        self.rootUrl = cfg.url

        self.scheme = cfg.scheme
        self.host = cfg.host

        self.outPath = cfg.outPath
        self.tmpPath = cfg.tmpPath
        self.aescode = cfg.aesCode

        self.sectionDownloadSuccCount = 0
        self.sectionDownloadFailedCount = 0

        self.bookDb = DbFactory.shared.get("BookinfoDb")
        self.visitUrlDb = DbFactory.shared.get("VisitUrlDb")
        self.visitBookUrlDb = DbFactory.shared.get("VisitBookUrlDb")
        self.kvDb = DbFactory.shared.get("KeyValueDb")

        self.storge = StorgeFactory.shared.get(cfg.storgeName, cfg.outPath)

        Log.V("[I] Parser.inited")
Esempio n. 3
0
    Log.D("--test--")
    dropAllTables()
    sys.exit(0)


# 将Config.sample.py复制出一个Config.py文件
def createConfigFile():
    if not os.path.exists("Config.py"):
        fd = open("Config.sample.py", "r")
        content = fd.read()
        fd.close()
        wfd = open("Config.py", "w")
        wfd.write(content)
        wfd.flush()
        wfd.close()


if __name__ == '__main__':
    createConfigFile()
    # test();
    try:
        from src.Prepare import Prepare
        Prepare()
    except Exception, e:
        Log.E("--------异常退出--------")
        Log.Exc(e)
        Log.V(Log.traceback())
        sys.exit(0)
else:
    Log.E("-错误")
Esempio n. 4
0
 def __del__(self):
     Log.V("[I] Parser dealloc")
Esempio n. 5
0
    def downloadBook(self, url):
        Log.V("[I] on begin downloadBook() " + str(url))

        if url == None:
            return

        #检查是否下载过了
        if self.checkDownloadedBookUrl(url):
            return

        #设置visiting
        self.setVisitingBookUrl(url)

        #尝试获取bookInfo
        bookInfo = self.getBookinfo(url)
        sectionInfo = None
        existsBookId = None
        if bookInfo != None:
            existsBookId = bookInfo.bookId
            Log.D(" downloadBook existsBookId = " + str(existsBookId))
            sectionInfo = self.getChapter(bookInfo.bookId)
            Log.D(" downloadBook sectionInfo = " + str(sectionInfo))
            if sectionInfo == None or sectionInfo.chapters == None or len(
                    sectionInfo.chapters) == 0:
                sectionInfo = None
                bookInfo = None
            else:
                sectionInfo.bookInfo = bookInfo

        if bookInfo == None:
            Log.I("[I] on downloadBook() will get soup " + url)
            bookSoup = Utils.soupUrl(url)
            Log.I("[I] on downloadBook() did get soup (%s) %s " %
                  (str(bookSoup != None), str(url)))
            if bookSoup != None:
                Log.I("[I] on downloadBook() will get muluSoup")
                muluUrl = self.bookMuluUrl(bookSoup)
                if muluUrl != None:
                    muluSoup = Utils.soupUrl(muluUrl)
                    Log.I("[I] on downloadBook() did get muluSoup %s" %
                          (str(muluSoup != None)))
                    if muluSoup != None:
                        bookInfo = self.bookInfo(bookSoup, muluSoup)
                        Log.I("[I] on downloadBook get bookInfo " +
                              str(bookInfo))
                        if bookInfo != None:
                            bookInfo.setUniqueKey()
                            bookInfo.downBookUrl = url
                            bookInfo.downMuluUrl = muluUrl

                            #bookId
                            if existsBookId == None:
                                Log.D(" downloadBook will create new bookId")
                                BookId.init(self.kvDb)
                                bookInfo.bookId = BookId.nextBookId()
                            else:
                                Log.D(" downloadBook use exists bookId")
                                bookInfo.bookId = existsBookId

                            #获取章节信息
                            sectionInfo = self.sectionInfo(
                                bookInfo, muluUrl, muluSoup)
                            if sectionInfo != None:
                                Log.D(
                                    " downloadBook parse sectionInfo success")
                                #最新章节
                                bookInfo.chapterCount = len(
                                    sectionInfo.chapters)
                            else:
                                Log.D(
                                    " downloadBook error cant parser sectionInfo"
                                )
                                bookInfo.status = BookInfoStatus.Error
                                bookInfo.downloadStatus = BookDownloadStatus.Completed
                            Log.D(" downloadBook save BookInfo " +
                                  str(bookInfo) + ", bookId = " +
                                  str(bookInfo.bookId) + ", save chapters " +
                                  str(sectionInfo))
                            #保存bookInfo
                            self.saveBookinfo(bookInfo)
                            self.saveChapter(bookInfo.bookId, sectionInfo)

        #下载bookImg
        if bookInfo != None and bookInfo.bookImg != None:
            self.downloadBookImg(bookInfo.bookImg, bookInfo.uniqueKey)
        if sectionInfo != None:
            self.downloadSection(sectionInfo)
            self.setDownloadedForBookUrl(url)
        self.chapterDb = None

        #移除visiting book url
        self.removeBookUrl(url)

        Log.V("on finished downloadBook() " + str(url))