Esempio n. 1
0
    def process_item(self, item, spider):
        article = Article()
        article.title = item["title"]
        article.create_date = item["create_date"]
        article.content = remove_tags(item["content"]).strip().replace("\r\n","").replace("\t","")
        article.front_image_url = item["front_image_url"]
        # article.front_image_path = item["front_image_path"]
        article.praise_nums = item["praise_nums"]
        article.comment_nums = item["comment_nums"]
        article.fav_nums = item["fav_nums"]
        article.url = item["url"]
        article.tags = item["tags"]
        article.id = item["url_object_id"]

        title_suggest = self.gen_suggests(article.title, article.tags)
        article.title_suggest = title_suggest

        article.save()

        return item
Esempio n. 2
0
    def save_to_es(self):
        article = Article()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = remove_tags(self["content"])
        article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:
            article.front_image_path = self["front_image_path"]
        article.praise_nums = self["praise_nums"]
        article.fav_nums = self["fav_nums"]
        article.comment_nums = self["comment_nums"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.meta.id = self["url_object_id"]

        article.title_suggest = gen_suggests(Article._doc_type.index, ((article.title, 7), (article.tags, 8)))

        article.save()

        redis_cli.incr("jobbole_count")

        return