def sectionInfo(self, bookInfo, muluUrl, bookMuluSoup): Log.I("[I] on get sectionInfo"); model = SectionInfoModel(); model.bookInfo = bookInfo; muluList = bookMuluSoup.find(lambda tag: == "ul" and tag.has_attr("class") and tag["class"][0] == "mulu_list"); if muluList == None or muluList.contents == None or len(muluList.contents) <= 0: return None; setted = False; for c in muluList.contents: atag = c.find("a"); if atag != -1: href = Utils.absoluteUrl(atag["href"], muluUrl, None) ; title = atag.string; if href != None and title != None: model.addChapter(str(href), str(title)); setted = True; else: Log.W(" on getSection found invalid tag " + str(atag) + ", href=" + str(href) + ",title=" + str(title)); if not setted: return None; return model;
def visit(self, url): Log.I("[I] on visit() " + str(url)) self.setVisitingUrl(url) soup = Utils.soupUrl(url) if not soup: Log.W("[W] on visit() soup is None " + str(url)) return Log.I("[I] on visit() did get soup") #将本页所有url放入数据库中 urls = self.addUrlsFromSoup(soup, url) if not Utils.isValidArr(urls): Log.W("[W] on visit() urls not found") return #获取匹配的书页 bookUrls = self.addBookPageUrls(urls) #遍历书页 if len(bookUrls) > 0: for bookUrl in bookUrls: self.downloadBook(bookUrl) self.removeVisitUrl(url) Log.I("[I] on visit() finished " + str(url))
def _checkTLJTag(self, tag, model): tljMap = { "总点击数:": "clickCount", "本月点击:": "monthClickCount", "本周点击:": "weekClickCount", "收 藏 数:": "collectionCount", "总推荐数:": "recommendCount", "本月推荐:": "monthRecommendCount", "本周推荐:": "weekRecommendCount", "文章状态:": "status" } setted = False; for key, value in tljMap.items(): count = self._getIntFromTLJTag(tag, key); if count != None: setattr(model, value, count); setted = True; break; if not setted: Log.W("[W] 未完全设置tljtag "+ str(tag));
def _checkMetaTag(self, tag, model): metaMap = { "og:novel:category": "category", "og:novel:book_name": "title", "og:novel:author": "author", "og:description": "des", "og:novel:update_time": "updateTime", "og:image": "bookImg" } setted = False; for key, value in metaMap.items(): if tag["property"] == key: c = tag["content"]; if value == "updateTime": c = Utils.getTimestamp(c, "%Y-%m-%d %H:%M"); elif value == "category": c = self.bookCategory(c); setattr(model, value, c); setted = True; break; if not setted: Log.W("[W] 未完全设置metatag "+ str(tag));