Exemple #1
0
    def index_page(self, response):
        cur_depth = response.save.get('cur_depth', 0)
        if cur_depth > 0:
            for each in response.doc('a[href^="http"]').items():
                article = Article('', language='zh', memoize_articles=False, fetch_images=False)
                article.download(html=response.text)
                article.parse()
                for a in article.articles():
                    print(a)
                for c in article.category_urls():
                    print(c)

        else:
            return self.parse(response)