Beispiel #1
0
    def sectionInfo(self, bookInfo, muluUrl, bookMuluSoup):
        Log.I("[I] on get sectionInfo");
        model = SectionInfoModel();
        model.bookInfo = bookInfo;

        muluList = bookMuluSoup.find(lambda tag: tag.name == "ul" and tag.has_attr("class") and tag["class"][0] == "mulu_list");
        if muluList == None or muluList.contents == None or len(muluList.contents) <= 0:
            return None;

        setted = False;
        for c in muluList.contents:
            atag = c.find("a");
            if atag != -1:
                href = Utils.absoluteUrl(atag["href"], muluUrl, None) ;
                title = atag.string;
                if href != None and title != None:
                    model.addChapter(str(href), str(title));
                    setted = True;
                else:
                    Log.W(" on getSection found invalid tag " + str(atag) + ", href=" + str(href) + ",title=" + str(title));

        if not setted:
            return None;

        return model;
Beispiel #2
0
    def addUrlsFromSoup(self, soup, curUrl):
        if not soup:
            return None
        urls = []
        for tag in soup.find_all("a"):
            url = Utils.absoluteUrl(tag["href"], curUrl, self.rootUrl)
            if url != None and url not in urls:
                urls.append(url)

        urls = filter(lambda url: self.isValidVisitUrl(url), urls)

        for url in urls:
            self.visitUrlDb.insertUrl(url)

        return urls