def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self["create_date"] article.content = remove_tags(self["content"]) article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.meta.id = self["url_object_id"] # article.suggest = [{"input":[], "weight":2}] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() redis_cli.incr("jobble_count") return
def save_to_es(self): article = ArticleType() article.title = self["title"] article.url = self["url"] article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.type = self["type"] article.size = self["size"] article.update_time = self["update_time"] article.content = remove_tags(self["content"]) article.tag = self["tag"] article.fav_nums = self["fav_nums"] if "download_urls" in self: article.download_urls = self["download_urls"] article.meta.id = self["url_object_id"] article.suggest = gen_suggestions(ArticleType._doc_type.index, ((article.title, 10), (article.tag, 7))) article.save() #redis_cli.incr("lcsoft_count") return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = remove_tags(self['content']) article.front_image_url = self['front_image_url'] if 'front_image_path' in self: article.front_image_path = self['front_image_path'] article.praise_nums = self['praise_nums'] article.fav_nums = self['fav_nums'] article.comment_nums = self['comment_nums'] article.url = self['url'] article.tags = self['tags'] article.meta.id = self['url_object_id'] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) #article.suggest = gen_suggests(ArticleType._doc_type.index,((article.title,10),(article.tags,7))) article.save() #save into elastics search after called in pipelines #全局变量 redis and then test redis_cli.incr("jobbole_count") #automatic +1 from 1 return
def save_to_es(self): # 将item转换为es的数据 article = ArticleType() article.title = self['title'] article.create_time = self['create_time'] article.content = remove_tags(self['content']) article.front_image_url = self['front_image_url'] if "front_image_path" in self: article.front_image_path = self['front_image_path'] article.praise_num = self['praise_num'] article.fav_num = self['fav_num'] article.comment_num = self['comment_num'] article.url = self['url'] article.tags = self['tags'] article.meta.id = self['url_object_id'] article.suggest=gen_suggests(ArticleType._doc_type.index,((article.title,10),(article.tags,7))) article.save() redis_cli.incr("jobbole_count") return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = remove_tags(self['content']) if 'front_image_path' in self: article.front_image_path = self['front_image_path'] article.front_image_url = self['front_image_url'] article.praise_nums = self['praise_nums'] article.fav_nums = self['fav_nums'] article.comment_nums = self['comment_nums'] article.url = self['url'] article.tags = self['tags'] article.meta.id = self['url_object_id'] article.suggest = gen_suggests(ArticleType._doc_type.index,((article.title,10),(article.tags,7))) article.save() return
def save_to_es(self): article = ArticleType() article.title = self["title"] article.create_date = self["create_date"] article.content = remove_tags(self["content"]) article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.comment_nums = self["comment_nums"] article.fav_nums = self["fav_nums"] article.url = self["url"] article.tags = self["tags"] article.meta.id = self["url_object_id"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() return
def process_item(self, item, spider): article = ArticleType() article.title = item["title"] article.url = item["url"] article.front_image_path = item.get("front_image_path") article.front_image_url = item["front_image_url"] article.create_date = item["create_date"] article.praise_nums = item["praise_nums"] article.fav_nums = item["fav_nums"] article.comment_nums = item["comment_nums"] article.tag = item["tag"] article.content = remove_tags(item["content"]) article.meta.id = item["url_object_id"] article.suggest = self.get_suggest(ArticleType._doc_type.index, ((article.title, 10), (article.tag, 7))) article.save() return item
def save2elastic(self): # 将item转换为es数据 article = ArticleType() article.title = self["title"] article.url_object_id = self["url_object_id"] article.url = self["url"] article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.creat_date = self["creat_date"] article.praise_num = self["praise_num"] article.collect_num = self["collect_num"] article.comment_num = self["comment_num"] article.content = remove_tags(self["content"]) article.tags = self["tags"] article.meta.id = self["url_object_id"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save()
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] if "front_image_path" in self: article.front_image_url = self["front_image_path"] article.front_image_path = self["front_image_path"] article.praise_nums = self["praise_nums"] article.fav_nums = self["fav_nums"] article.comment_nums = self["comment_nums"] article.url = self["url"] article.tags = self["tags"] article.meta.id = self["url_object_id"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.content = self["content"] article.save() # redis 记录插入item数 redis_cli.incr("jobbole_count") return
def save_to_es(self): # 将item转换为es的数据 article = ArticleType() article.title = self["title"] article.create_date = self["create_date"] article.url = self["url"] article.content = self["content"] article.meta.id = self["url_object_id"] article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.tags = self["tags"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() return
def save_to_es(self): article = ArticleType() article.title = self['title'] article.create_date = self['create_date'] article.content = remove_tags(self['content']) article.front_image_url = self['front_image_url'] article.front_image_path = self[ 'front_image_path'] if "front_image_path" in self else None article.praise_nums = self['praise_nums'] article.fav_nums = self['praise_nums'] article.comment_nums = self['praise_nums'] article.tags = self['tags'] article.url = self['url'] article.meta.id = self["url_object_id"] article.suggest = gen_suggest(ArticleType, ((article.title, 10), (article.tags, 7))) article.save() redis_cli.incr("jobbole_count") return