def sectionInfo(self, bookInfo, muluUrl, bookMuluSoup): Log.I("[I] on get sectionInfo"); model = SectionInfoModel(); model.bookInfo = bookInfo; muluList = bookMuluSoup.find(lambda tag: tag.name == "ul" and tag.has_attr("class") and tag["class"][0] == "mulu_list"); if muluList == None or muluList.contents == None or len(muluList.contents) <= 0: return None; setted = False; for c in muluList.contents: atag = c.find("a"); if atag != -1: href = Utils.absoluteUrl(atag["href"], muluUrl, None) ; title = atag.string; if href != None and title != None: model.addChapter(str(href), str(title)); setted = True; else: Log.W(" on getSection found invalid tag " + str(atag) + ", href=" + str(href) + ",title=" + str(title)); if not setted: return None; return model;
def addUrlsFromSoup(self, soup, curUrl): if not soup: return None urls = [] for tag in soup.find_all("a"): url = Utils.absoluteUrl(tag["href"], curUrl, self.rootUrl) if url != None and url not in urls: urls.append(url) urls = filter(lambda url: self.isValidVisitUrl(url), urls) for url in urls: self.visitUrlDb.insertUrl(url) return urls