def save_to_es(self): job = ArticleType() job.url = self['url'] job.url_object_id = self['url_object_id'] job.title = self['title'] job.salary = self['salary'] job.job_city = self['job_city'] job.work_years = self['work_years'] job.degree_need = self['degree_need'] job.job_type = self['job_type'] if self['publish_time']: job.publish_time = self['publish_time'] job.tags = self['tags'] job.job_advantage = self['job_advantage'] job.job_desc = self['job_desc'] job.job_addr = self['job_addr'] job.company_url = self['company_url'] job.company_name = self['company_name'] job.crawl_time = self['crawl_time'] #job.crawl_update_time = self['crawl_update_time'] job.suggest = gen_suggests(ArticleType._doc_type.index, ((job.title, 10), (job.tags, 7))) job.save() redis_cli.incr("lagou_count") return
def save2elastic(self): # 将item转换为es数据 article = ArticleType() article.title = self["title"] article.url_object_id = self["url_object_id"] article.url = self["url"] article.front_image_url = self["front_image_url"] if "front_image_path" in self: article.front_image_path = self["front_image_path"] article.creat_date = self["creat_date"] article.praise_num = self["praise_num"] article.collect_num = self["collect_num"] article.comment_num = self["comment_num"] article.content = remove_tags(self["content"]) article.tags = self["tags"] article.meta.id = self["url_object_id"] article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save()