Ejemplo n.º 1
0
def on_get_news_list():
    return success(CnbetaArticle.load_list_from_db())
Ejemplo n.º 2
0
                article = CnbetaArticle()
                titleTag = item.find(attrs={"class": "title"}).find("a")
                contentTag = item.find("span", attrs={"class": "newsinfo"}).find("p")

                article.title = "".join(titleTag.contents)
                article.url = self.targetMainUrl + titleTag["href"]
                article.cover = item.find("div", attrs={"class": "pic"}).find("a").find("img")['src']
                briefStr = contentTag.renderContents().decode("utf-8")

                article.brief = briefStr
                self.itemList.append(article)
            except Exception as e:
                # print(e)
                pass

        logutil.log("CnbetaSpider", "getItemList finished")

    def fetchItemDetail(self):
        for item in self.itemList:
            html = self.scratch(item.url)
            htmlSoup = BeautifulSoup(html, "html.parser")
            content = htmlSoup.find("div", attrs={"class": "content"})
            item.content = content.renderContents().decode("utf-8")


if __name__ == '__main__':
    spider = CnbetaSpider()
    spider.startSpider()
    article_list = CnbetaArticle.load_list_from_db()
    print(article_list)