Example #1
0
    def save_to_es(self):
        job = ArticleType()
        job.url = self['url']
        job.url_object_id = self['url_object_id']
        job.title = self['title']
        job.salary = self['salary']
        job.job_city = self['job_city']
        job.work_years = self['work_years']
        job.degree_need = self['degree_need']
        job.job_type = self['job_type']
        if self['publish_time']:
            job.publish_time = self['publish_time']
        job.tags = self['tags']
        job.job_advantage = self['job_advantage']
        job.job_desc = self['job_desc']
        job.job_addr = self['job_addr']
        job.company_url = self['company_url']
        job.company_name = self['company_name']
        job.crawl_time = self['crawl_time']
        #job.crawl_update_time = self['crawl_update_time']

        job.suggest = gen_suggests(ArticleType._doc_type.index,
                                   ((job.title, 10), (job.tags, 7)))

        job.save()
        redis_cli.incr("lagou_count")

        return
Example #2
0
    def process_item(self, item, spider):
        """
        将item转换为es的数据格式
        :param item:
        :param spider:
        :return:
        """
        # 初始化一个es的document
        article = ArticleType()

        # 将该条document的id设置为url_object_id
        article.meta.id = item['url_object_id']

        article.url = item['url']
        article.title = item['title']
        article.article_type = item['article_type']
        article.data_source = item['data_source']
        article.publish_time = item['publish_time']
        article.abstract = item['abstract']
        if item['tags']:
            article.tags = item['tags']
        else:
            article.tags = '无'

        # 热度计算公式
        article.hot_score = 8 * item['comment_num'] + 3 * item[
            'praise_num'] + 5 * item['collection_num'] + item['read_num']

        # 传入的元组需要按权值从大到小排列
        article.suggest = self.gen_suggests(ArticleType._doc_type.index,
                                            ((article.title, 10),
                                             (article.article_type, 5),
                                             (article.tags, 3)))

        # 调用save方法直接存储到es中
        article.save()

        return item