def process_item(self, item, spider): article = Article() article.title = item["title"] article.create_date = item["create_date"] article.content = remove_tags(item["content"]).strip().replace("\r\n","").replace("\t","") article.front_image_url = item["front_image_url"] # article.front_image_path = item["front_image_path"] article.praise_nums = item["praise_nums"] article.comment_nums = item["comment_nums"] article.fav_nums = item["fav_nums"] article.url = item["url"] article.tags = item["tags"] article.id = item["url_object_id"] title_suggest = self.gen_suggests(article.title, article.tags) article.title_suggest = title_suggest article.save() return item
def save_to_es(self): article = Article() article.title = self['title'] article.create_date = self["create_date"] article.content = remove_tags(self["content"]) article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.meta.id = self["url_object_id"] article.title_suggest = gen_suggests(Article._doc_type.index, ((article.title, 7), (article.tags, 8))) article.save() redis_cli.incr("jobbole_count") return