예제 #1
0
    def save_to_es(self):
        job = ArticleType()
        job.url = self['url']
        job.url_object_id = self['url_object_id']
        job.title = self['title']
        job.salary = self['salary']
        job.job_city = self['job_city']
        job.work_years = self['work_years']
        job.degree_need = self['degree_need']
        job.job_type = self['job_type']
        if self['publish_time']:
            job.publish_time = self['publish_time']
        job.tags = self['tags']
        job.job_advantage = self['job_advantage']
        job.job_desc = self['job_desc']
        job.job_addr = self['job_addr']
        job.company_url = self['company_url']
        job.company_name = self['company_name']
        job.crawl_time = self['crawl_time']
        #job.crawl_update_time = self['crawl_update_time']

        job.suggest = gen_suggests(ArticleType._doc_type.index,
                                   ((job.title, 10), (job.tags, 7)))

        job.save()
        redis_cli.incr("lagou_count")

        return
예제 #2
0
    def save2elastic(self):
        # 将item转换为es数据
        article = ArticleType()
        article.title = self["title"]
        article.url_object_id = self["url_object_id"]
        article.url = self["url"]
        article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:
            article.front_image_path = self["front_image_path"]
        article.creat_date = self["creat_date"]
        article.praise_num = self["praise_num"]
        article.collect_num = self["collect_num"]
        article.comment_num = self["comment_num"]
        article.content = remove_tags(self["content"])
        article.tags = self["tags"]
        article.meta.id = self["url_object_id"]

        article.suggest = gen_suggests(ArticleType._doc_type.index,
                                       ((article.title, 10),
                                        (article.tags, 7)))
        article.save()