Esempio n. 1
0
    def downloadSection(self, sectionInfo):
        sectionCount = len(sectionInfo.chapters)
        Log.I("[I] on downloadSection() enter will download section count %s" %
              (str(sectionCount)))
        self.sectionDownloadSuccCount = 0
        self.sectionDownloadFailedCount = 0
        toDir = sectionInfo.bookInfo.uniqueKey
        for i in range(0, sectionCount):
            self.downloadOneSection(i, sectionInfo.chapters[i], toDir)

        Log.I(
            "[I] on downloadSection() exit download all section(%s) succ section(%s) failed section(%s)"
            % (str(sectionCount), str(self.sectionDownloadSuccCount),
               str(self.sectionDownloadFailedCount)))
Esempio n. 2
0
 def downloadBookImg(self, url, toDir):
     Log.I("[I] dowlonad bookImg " + url)
     uniqueKey = Utils.md5str(url)
     self.storge.checkFileExists(
         uniqueKey, toDir,
         lambda exists: not exists and self._downloadBookImg(
             url, uniqueKey, toDir))
Esempio n. 3
0
    def sectionInfo(self, bookInfo, muluUrl, bookMuluSoup):
        Log.I("[I] on get sectionInfo");
        model = SectionInfoModel();
        model.bookInfo = bookInfo;

        muluList = bookMuluSoup.find(lambda tag: tag.name == "ul" and tag.has_attr("class") and tag["class"][0] == "mulu_list");
        if muluList == None or muluList.contents == None or len(muluList.contents) <= 0:
            return None;

        setted = False;
        for c in muluList.contents:
            atag = c.find("a");
            if atag != -1:
                href = Utils.absoluteUrl(atag["href"], muluUrl, None) ;
                title = atag.string;
                if href != None and title != None:
                    model.addChapter(str(href), str(title));
                    setted = True;
                else:
                    Log.W(" on getSection found invalid tag " + str(atag) + ", href=" + str(href) + ",title=" + str(title));

        if not setted:
            return None;

        return model;
Esempio n. 4
0
 def onDownloadSectionCompleted(self, idx, uniqueKey, succ):
     Log.I("[I] download section(%s) completed succ(%s)" %
           (str(idx), str(succ)))
     if succ:
         self.sectionDownloadSuccCount += 1
         self.chapterDb.setDownloaded(uniqueKey, 1)
     else:
         self.sectionDownloadFailedCount += 1
         self.chapterDb.setDownloaded(uniqueKey, 2)
Esempio n. 5
0
 def executeSql(self, sql):
     try:
         self.cursor.execute(sql);
         Log.I("[I] 执行 " + sql.strip() + " 成功");
         return True; 
     except Exception as e:
         if not isinstance(e, pymysql.err.IntegrityError) or len(e.args) <= 0 or e.args[0] != 1062:
             Log.E("[I] 执行 " + sql.strip() + " 失败");
             Log.Exc(e);
     return False;
Esempio n. 6
0
 def downloadOneSection(self, idx, oneSectionModel, toDir):
     Log.I("[I] downloading section(%s) (%s) (%s)" %
           (str(idx), str(
               oneSectionModel.title), str(oneSectionModel.downUrl)))
     uniqueKey = oneSectionModel.uniqueKey
     url = oneSectionModel.downUrl
     self.storge.checkFileExists(
         oneSectionModel.uniqueKey, toDir, lambda exists: not exists and
         (self._downloadOneSection(idx, url, uniqueKey, toDir) or True
          ) or self.onDownloadSectionCompleted(idx, uniqueKey, True))
Esempio n. 7
0
    def visit(self, url):
        Log.I("[I] on visit() " + str(url))
        self.setVisitingUrl(url)
        soup = Utils.soupUrl(url)
        if not soup:
            Log.W("[W] on visit() soup is None " + str(url))
            return
        Log.I("[I] on visit() did get soup")
        #将本页所有url放入数据库中
        urls = self.addUrlsFromSoup(soup, url)
        if not Utils.isValidArr(urls):
            Log.W("[W] on visit() urls not found")
            return
        #获取匹配的书页
        bookUrls = self.addBookPageUrls(urls)
        #遍历书页
        if len(bookUrls) > 0:
            for bookUrl in bookUrls:
                self.downloadBook(bookUrl)

        self.removeVisitUrl(url)

        Log.I("[I] on visit() finished " + str(url))
Esempio n. 8
0
    def execute(self):
        # self.test();
        #如果在数据库中能找到visitUrl,说明程序已经运行过了,可以从数据库中恢复现场
        Log.V("[I] onExecute()")

        #检查是否有visiting的book
        visitingBookUrl = self.visitingBookUrl()
        if visitingBookUrl != None:
            self.downloadBook(visitingBookUrl)

        #检查是否有visiting的url
        visitingUrl = self.visitingUrl()
        if visitingUrl != None:
            self.visit(visitUrl)

        #否则从root开始搜索
        visitUrl = self.nextVisitUrl()
        self.visit(visitUrl)

        Log.I("[I] willVisitNext")
        #处理下一个
        while True:
            visitUrl = self.nextVisitUrl()
            if visitUrl != None:
                self.visit(visitUrl)
            else:
                break

        Log.I("[I] willVisitNextPageUrl")
        #网页遍历完成
        while True:
            bookUrl = self.nextPageUrl()
            if bookUrl != None:
                self.downloadBook(bookUrl)
            else:
                break
        Log.V("------parse finished------")
Esempio n. 9
0
    def chapterContent(self, chapterSoup):
        tag = chapterSoup.find(id = "htmlContent");
        if tag == None:
            return None;

        content = "";   
        for c in tag.contents:
            if Utils.isSoupStr(c):
                part = c;
                part = part.strip();
                if "全本小说" in part:
                    Log.I("[W] ignore line " + str(part));
                    continue;
                if len(part) > 0:
                    content += part + "\n";
        return content;
Esempio n. 10
0
    def bookInfo(self, bookPageSoup, bookMuluSoup):
        Log.I("[I] on get bookInfo ");
        model = BookInfoModel();
        #检查书页中的tlj标签
        tLJTags = Utils.findAllClassTag(bookPageSoup, "div", "tLJ");
        for tag in tLJTags:
            self._checkTLJTag(tag, model);

        #字数
        find = Utils.findAll(r"已写了(\d+)字", str(bookPageSoup));
        if find and len(find) > 0:
            model.wordsCount = find[0];

        #检查目录页中的数据
        metaTags = bookMuluSoup.find_all(lambda t: t.name == "meta" and t.has_attr("property") and Utils.isMatch("og:.+?\"", t["property"]) != None);
        for tag in metaTags:
            self._checkMetaTag(tag, model);

        return model;
Esempio n. 11
0
    def downloadBook(self, url):
        Log.V("[I] on begin downloadBook() " + str(url))

        if url == None:
            return

        #检查是否下载过了
        if self.checkDownloadedBookUrl(url):
            return

        #设置visiting
        self.setVisitingBookUrl(url)

        #尝试获取bookInfo
        bookInfo = self.getBookinfo(url)
        sectionInfo = None
        existsBookId = None
        if bookInfo != None:
            existsBookId = bookInfo.bookId
            Log.D(" downloadBook existsBookId = " + str(existsBookId))
            sectionInfo = self.getChapter(bookInfo.bookId)
            Log.D(" downloadBook sectionInfo = " + str(sectionInfo))
            if sectionInfo == None or sectionInfo.chapters == None or len(
                    sectionInfo.chapters) == 0:
                sectionInfo = None
                bookInfo = None
            else:
                sectionInfo.bookInfo = bookInfo

        if bookInfo == None:
            Log.I("[I] on downloadBook() will get soup " + url)
            bookSoup = Utils.soupUrl(url)
            Log.I("[I] on downloadBook() did get soup (%s) %s " %
                  (str(bookSoup != None), str(url)))
            if bookSoup != None:
                Log.I("[I] on downloadBook() will get muluSoup")
                muluUrl = self.bookMuluUrl(bookSoup)
                if muluUrl != None:
                    muluSoup = Utils.soupUrl(muluUrl)
                    Log.I("[I] on downloadBook() did get muluSoup %s" %
                          (str(muluSoup != None)))
                    if muluSoup != None:
                        bookInfo = self.bookInfo(bookSoup, muluSoup)
                        Log.I("[I] on downloadBook get bookInfo " +
                              str(bookInfo))
                        if bookInfo != None:
                            bookInfo.setUniqueKey()
                            bookInfo.downBookUrl = url
                            bookInfo.downMuluUrl = muluUrl

                            #bookId
                            if existsBookId == None:
                                Log.D(" downloadBook will create new bookId")
                                BookId.init(self.kvDb)
                                bookInfo.bookId = BookId.nextBookId()
                            else:
                                Log.D(" downloadBook use exists bookId")
                                bookInfo.bookId = existsBookId

                            #获取章节信息
                            sectionInfo = self.sectionInfo(
                                bookInfo, muluUrl, muluSoup)
                            if sectionInfo != None:
                                Log.D(
                                    " downloadBook parse sectionInfo success")
                                #最新章节
                                bookInfo.chapterCount = len(
                                    sectionInfo.chapters)
                            else:
                                Log.D(
                                    " downloadBook error cant parser sectionInfo"
                                )
                                bookInfo.status = BookInfoStatus.Error
                                bookInfo.downloadStatus = BookDownloadStatus.Completed
                            Log.D(" downloadBook save BookInfo " +
                                  str(bookInfo) + ", bookId = " +
                                  str(bookInfo.bookId) + ", save chapters " +
                                  str(sectionInfo))
                            #保存bookInfo
                            self.saveBookinfo(bookInfo)
                            self.saveChapter(bookInfo.bookId, sectionInfo)

        #下载bookImg
        if bookInfo != None and bookInfo.bookImg != None:
            self.downloadBookImg(bookInfo.bookImg, bookInfo.uniqueKey)
        if sectionInfo != None:
            self.downloadSection(sectionInfo)
            self.setDownloadedForBookUrl(url)
        self.chapterDb = None

        #移除visiting book url
        self.removeBookUrl(url)

        Log.V("on finished downloadBook() " + str(url))