Example #1
0
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self['create_date']
        article.content = remove_tags(self['content'])
        article.front_image_url = self['front_image_url']
        if 'front_image_path' in self:
            article.front_image_path = self['front_image_path']
        try:
            article.praise_number = self["praise_number"]
        except:
            print("items出错")
            article.praise_number = 99999
        article.comment_nums = self['comment_nums']
        article.content = self['content']
        article.tags = self['tags']
        article.fav_nums = self['fav_nums']
        article.url = self['url']
        article.meta.id = self['url_object_id']

        article.suggest = gen_suggests(ArticleType._doc_type.index,((article.title,10),(article.tags,7))) #分词

        article.save()

        return 
Example #2
0
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = (self["content"])
        if "front_image_url" in self:
            article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:
            article.front_image_path = self["front_image_path"]
        article.praise_nums = self["praise_nums"]
        article.fav_nums = self["fav_nums"]
        article.comment_nums = self["comment_nums"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.id = self["url_object_id"]

        # 生成搜索建议词
        article.suggest = gen_suggests(ArticleType._doc_type.index,
                                       ((article.title, 10),
                                        (article.tags, 7)))

        article.save()
        # 数据加1操作
        redis_cli.incr("cnblogs_count")

        return
Example #3
0
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = remove_tags(self["content"])
        article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:
            article.front_image_path = self["front_image_path"]
        if "praise_nums" in self:
            article.praise_nums = self["praise_nums"]
        else:
            article.praise_nums = 0
        article.fav_nums = self["fav_nums"]
        article.comment_nums = self["comment_nums"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.meta.id = self["url_object_id"]

        article.suggest = gen_suggests(ArticleType._doc_type.index,
                                       ((article.title, 10),
                                        (article.tags, 7)))

        article.save()

        redis_cli.incr("jobbole_count")

        return
Example #4
0
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = remove_tags(self["content"])
        article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:
            article.front_image_path = self["front_image_path"]
        article.praise_nums = self["praise_nums"]
        article.fav_nums = self["fav_nums"]
        article.comment_nums = self["comment_nums"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.meta.id = self["url_object_id"]

        article.save()
        return
Example #5
0
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = remove_tags(self["contents"])
        article.front_img_url = self["front_img_url"]
        if "front_img_path" in self:
            article.front_img_path = self["front_img_path"]
        article.store = self["store"]
        article.zan = self["zan"]
        article.comments = self["comments"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.url_md5= self["url_md5"]

        article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10), (article.tags, 7)))

        article.save()
        return
Example #6
0
    def save_to_es(self):

        article = ArticleType()
        article.title = self['title']
        article.create_date = self['create_date']
        article.content = self['content']
        article.url = self['url']
        article.tags = self['tags']
        article.fav_nums = self['fav_nums']
        article.praise_nums = self['praise_nums']
        article.comment_nums = self['comment_nums']

        article.suggest = gen_suggests(ArticleType._doc_type.index,
                                       ((article.title, 10),
                                        (article.tags, 7)))

        article.save()

        return
Example #7
0
 def process_item(self, item, spider):
     #将item转换为es的数据
     article = ArticleType()
     article.title = item['title']
     article.create_date = item["create_date"]
     article.content = remove_tags(item["content"])
     article.front_image_url = item["front_image_url"]
     if "front_image_path" in item:
         article.front_image_path = item["front_image_path"]
     article.praise_nums = item["praise_nums"]
     article.fav_nums = item["fav_nums"]
     article.comment_nums = item["comment_nums"]
     article.url = item["url"]
     article.tags = item["tags"]
     article.meta.id = item["url_object_id"]
     article.suggest = gen_suggests(ArticleType._doc_type.index,
                                    ((article.title, 10),
                                     (article.tags, 7)))
     article.save()
     return item
Example #8
0
    def save_to_es(self):
        # 将jobbole文章item转换为es的数据
        article = ArticleType()
        article.title = self['title']
        article.create_date = self['create_date']
        article.content = remove_tags(self['content'])
        article.front_image_url = self['front_image_url']
        if "front_image_path" in self:
            article.front_image_path = self['front_image_path']
        article.comments_nums = self['comments_nums']
        article.praise_nums = self['praise_nums']
        article.fav_nums = self['fav_nums']
        article.url = self['url']
        article.tags = self['tags']
        article.meta.id = self['url_object_id']
        article.suggest = gen_suggests(ArticleType._doc_type.index,
                                       ((article.title, 10),
                                        (article.tags, 7)))

        article.save()
    def process_item(self, item, spider):
        article = ArticleType()
        article.title = item["title"]
        article.create_date = item["create_date"]
        article.content = remove_tags(item["content"]).strip().replace(
            "\r\n", "").replace("\t", "")
        article.front_image_url = item["front_image_url"]
        # article.front_image_path = item["front_image_path"]
        article.praise_nums = item["praise_nums"]
        article.comment_nums = item["comment_nums"]
        article.fav_nums = item["fav_nums"]
        article.url = item["url"]
        article.tags = item["tags"]
        article.id = item["url_object_id"]

        title_suggest = self.gen_suggests(article.title, article.tags)
        article.title_suggest = title_suggest

        article.save()
        redis_cli.incr("jobbole_count")
        return item
Example #10
0
    def save_to_es(self):
        # 将item转换为es的数据
        article = ArticleType()
        article.title = self['title']
        article.create_date = self['create_date']
        article.content = remove_tags(self['content'])
        article.front_image_url = self['front_image_url']
        if "front_image_path" in self:
            article.front_image_path = self['front_image_path']
        article.praise_nums = self['praise_nums']
        article.fav_nums = self['fav_nums']
        article.comment_nums = self['comment_nums']
        article.url = self['url']
        article.tags = self['tags']
        article.meta.id = self['url_object_id']

        # article.suggest = [{"input":[],"weight":2}]
        article.suggest = gen_suggests(ArticleType._index._name,
                                       ((article.title, 10),
                                        (article.tags, 7)))
        article.save()
        return
Example #11
0
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self['create_date']
        article.content = remove_tags(self['content'])
        article.front_image_url = self['front_image_url']
        if 'front_image_path' in self:
            article.front_image_path = self['front_image_path']
        article.praise_nums = self['praise_nums']
        article.fav_nums = self['fav_nums']
        article.comment_nums = self['comment_nums']
        article.url = self['url']
        article.tags = self['tags']
        article.meta.id = self['url_object_id']

        article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7)))

        article.save()

        redis_cli.incr("jobbole_count")

        return
    def save_to_es(self):
        article = ArticleType()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = remove_tags(self["content"])
        article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:  #front_image_path可能不存在
            article.front_image_path = self["front_image_path"]
        article.praise_nums = self["praise_nums"]
        article.fav_nums = self["fav_nums"]
        article.comment_nums = self["comment_nums"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.meta.id = self["url_object_id"]  # 用 url_object_id 作为es的id

        # article.suggest = [{"input":[]},{"weight":2}]  # input 可以用es的GET _analyze来获取
        article.suggest = gen_suggests(ArticleType._doc_type.index,
                                       ((article.title, 10),
                                        (article.tags, 7)))  # 设置搜索建议的值
        #                        #gen_suggests(index,                       info_tuple)
        article.save()

        return