class NewsType(DocType): title = Text(analyzer="ik_max_word") suggest = Completion(analyzer=ik_analyzer) url = Keyword() url_md5 = Keyword() category = Text(analyzer="ik_smart") summary = Text(analyzer="ik_smart") image_urls = Keyword() image_path = Keyword() from_platform = Keyword() news_time = Keyword() crawl_time = Date() news_score = Keyword() class Meta: index = "information" doc_type = "news"
class NewsType(Document): suggest = Completion(analyzer='ik_max_word') id = Keyword() pubDate = Date() title = Text(analyzer="ik_max_word") summary = Text(analyzer="ik_max_word") infoSource = Keyword() sourceUrl = Keyword() provinceId = Keyword() crawlTime = Date() provinceName = Text(analyzer="ik_max_word") class Index: name = "news" settings = { "number_of_shards": 1, }
class ZhihuQuestionType(DocType): suggest = Completion(analyzer=ik_analyzer) zhihu_id = Keyword() topics = Text(analyzer="ik_max_word") url = Keyword() title = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Meta: index = "zhihu_question" doc_type = "article"
class ArticalType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') create_date = Date() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_nums = Integer() comment_nums = Integer() favor_nums = Integer() tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = "artical_linux" doc_type = 'jobbole'
class WeiboType(Document): suggest = Completion(analyzer='ik_max_word') id = Keyword() created_at = Date() user = Object() pics = Keyword(multi=True) reposts_count = Integer() comments_count = Integer() attitudes_count = Integer() text = Text(analyzer="ik_max_word") video_info = Object() class Index: name = "weibo" settings = { "number_of_shards": 4, }
class ZhihuAnswerType(DocType): # 伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) zhihu_id = Keyword() url = Keyword() question_id = Keyword() author_id = Keyword() content = Text(analyzer="ik_max_word") praise_num = Integer() comments_num = Integer() create_time = Date() update_time = Date() crawl_time = Date() class Meta: index = "zhihu" doc_type = "zhihuanswer"
class Block(ZubbiDoc): name_suggest = Completion(contexts=[ { "name": "private", "type": "category", "path": "private" }, { "name": "tenants", "type": "category", "path": "tenants" }, ]) repo = Text(analyzer="whitespace") tenants = Text(multi=True, analyzer="whitespace") # NOTE (fschmidt): Elasticsearch does not support context suggestion for # Boplean fields. As we are using the private flag to filter the auto- # completion results, this must be Text. private = Text() url = Text() description = Text(analyzer="whitespace") description_html = Text() platforms = Text(multi=True, analyzer="whitespace") last_updated = Date(default_timezone="UTC") @staticmethod def suggest_field(): return "name_suggest" @property def has_html_description(self): return bool(self.description_html) @property def has_html_changelog(self): return bool(self.changelog_html) @property def description_rendered(self): return self._renderable_field(self.description_html, self.description) def _renderable_field(self, html, raw): if html: return jinja2.Markup(html) elif raw: return jinja2.Markup("<pre>{}</pre>".format(jinja2.escape(raw)))
class SihouArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') author = Text(analyzer='ik_max_word') create_date = Date() url = Keyword() url_object_id = Keyword() view_count = Integer() praise_count = Integer() front_image_url = Keyword() front_image_path = Keyword() tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = 'sihou' doc_type = 'sihou_article'
class Article(DocType): title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer) title = Text(analyzer='ik_max_word', search_analyzer="ik_max_word", fields={'title': Keyword()}) id = Text() url = Text() front_image_url = Text() front_image_path = Text() create_date = Date() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer='ik_max_word', fields={'tags': Keyword()}) content = Text(analyzer='ik_max_word') class Meta: index = 'jobbole' doc_type = 'jobbole_article'
class ZhiHuQuestionType(DocType): #知乎问题类型 suggest = Completion() zhihu_id = Keyword() topics = Text(analyzer=ik_analyzer) url = Keyword() title = Text(analyzer=ik_analyzer) content = Text(analyzer=ik_analyzer) answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Meta: index = "zhihu" doc_type = "question"
class AnswerType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') time = Date() link_url = Keyword() content = Text(analyzer='ik_max_word') url_object_id = Keyword() tag = Text(analyzer='ik_max_word') # 标签 comment_num = Integer() # 评论数 read_num = Integer() # 阅读数 Collection_num = Integer() # 收藏数 praise_num = Integer() # 点赞数 source = Keyword() # 来源 class Meta: index = "answer" # 必须小写 doc_type = "tec_answer"
class PipipaneType(DocType): # pipipan 类型 suggest = Completion(analyzer=ik_analyzer) # 词条推荐 title = Text(analyzer="ik_max_word") url = Keyword() url_object_id = Keyword() read_num = Integer() type = Text(analyzer="ik_smart") source_website = Keyword() upload_time = Date() crawl_time = Date() tag = Text(analyzer="ik_smart") description = Text(analyzer="ik_smart") class Meta: index = "ebooksearch" doc_type = "pipipan"
class JobType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() url_object_id = Keyword() job_type = Text(analyzer="ik_max_word") job_city = Keyword() job_desc = Text(analyzer="ik_max_word") company_name = Text(analyzer="ik_max_word") tags = Text(analyzer="ik_max_word") job_advantage = Text(analyzer="ik_max_word") job_addr = Text(analyzer=ik_analyzer) class Meta: index = "jobinfo" doc_type = "job"
class ArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) # 自动补全 article_type = Keyword() # 文章类型 nick_name = Keyword() # 昵称 article_title = Keyword() # 文章标题 article_link = Keyword() # 文章链接 article_desc = Text(analyzer="ik_max_word") # 文章描述 comments = Keyword() # 文章评论 digg = Keyword() # 文章点赞数 user_name = Keyword() # user用户名 views = Keyword() # 观看数 source = Keyword() # 来源 class Meta: # 数据库名称和表名称 index = ELASTICSEARCH_INDEX doc_type = ELASTICSEARCH_TYPE
class Declaration(DocType): """Declaration document. Assumes there's a dynamic mapping with all fields not indexed by default.""" general = Object( properties={ 'full_name_suggest': Completion(preserve_separators=False), 'full_name': String(index='analyzed'), 'name': String(index='analyzed'), 'patronymic': String(index='analyzed'), 'last_name': String(index='analyzed'), 'family_raw': String(index='analyzed'), 'family': Nested( properties={ 'name': String(index='analyzed'), 'relations': String(index='no'), 'inn': String(index='no') }), 'post_raw': String(index='analyzed'), 'post': Object( properties={ 'region': String(index='not_analyzed'), 'office': String(index='not_analyzed'), 'post': String(index='analyzed') }) }) declaration = Object( properties={ 'date': NoneAwareDate(), 'notfull': Boolean(index='no'), 'notfull_lostpages': String(index='no'), 'additional_info': Boolean(index='no'), 'additional_info_text': String(index='no'), 'needs_scancopy_check': Boolean(index='no') }) class Meta: index = 'declarations'
class AtricleType(DocType): # 移入自定义一个analyzer suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() # 函数处理 url = Keyword() url_object_id = Keyword() front_image_url = Keyword() # 为下载图片做准备 front_image_path = Keyword() pertain_to = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Meta: index = "jobbole" doc_type = "article"
class NewsClsType(DocType): # 财联社新闻 构建 es 模型 suggest = Completion(analyzer=ik_analyzer) article_id = Integer() title = Text(analyzer="ik_max_word") brief = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") create_date = Date() stock_code = Keyword() stock_name = Keyword() url = Keyword() source = Keyword() website = Keyword() class Meta: index = "news" doc_type = "news_cls"
class Article_4houType(DocType): suggest = Completion(analyzer=ik_analyzer) #搜索建议 image_local = Keyword() title = Text(analyzer="ik_max_word") url_id = Keyword() create_time = Date() url = Keyword() author = Keyword() tags = Text(analyzer="ik_max_word") watch_nums = Integer() comment_nums = Integer() praise_nums = Integer() content = Text(analyzer="ik_max_word") class Meta: index = "teachnical_4hou" doc_type = "A_4hou"
class MovieType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") url = Keyword() url_object_id = Keyword() create_date = Keyword() front_image_url = Keyword() tags = Text(analyzer="ik_max_word") duration = Keyword() score = Keyword() description = Text(analyzer="ik_max_word") image_url = Keyword() class Meta: index = "new_movie" doc_type = "movie"
class ArticleType(DocType): # jobbole article suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Meta: index = "jobbole" doc_type = "artice"
class ZhiHuQuestionIndex(Document): suggest = Completion(analyzer=my_analyzer) question_id = Keyword() topics = Text(analyzer="ik_max_word") url = Keyword() title = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Index: name = 'zhihu_question'
class ArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') # text类型会分词解析 create_date = Date() url = Keyword() # 不进行分词解析,只进行全量保存 url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_num = Integer() comment_num = Integer() share_num = Integer() fav_num = Integer() tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = "jobbole" doc_type = "article"
class ZhiHuType(DocType): suggest = Completion(analyzer=ik_analyzer) #伯乐在线文章类型 # 知乎的问题 item zhihu_id = Keyword() topics = Keyword() url = Keyword() title = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Meta: index = "zhihu" doc_type = "question"
class CareersType(DocType): # 招聘会 suggest = Completion(analyzer=ik_analyzer) url = Keyword() tianyan_company_url = Keyword() company_name = Text(analyzer="ik_max_word") professionals = Text(analyzer="ik_max_word") company_property = Text(analyzer="ik_max_word") industry_category = Text(analyzer="ik_max_word") city_name = Text(analyzer="ik_max_word") meet_name = Keyword() school_name = Text(analyzer="ik_max_word") meet_time = Date() address = Keyword() class Meta: index = "careers" doc_type = "careers_type"
class LagouJobType(DocType): title = Text(analyzer="ik_max_word") salary = Text(analyzer="ik_max_word") tags = Text(analyzer="ik_max_word") job_addr = Text(analyzer="ik_max_word") job_advantage = Keyword() job_city = Keyword() job_type = Keyword() job_desc = Text(analyzer="ik_max_word") company_url = Keyword() url = Keyword() company_name = Keyword() publish_time = Date() suggest = Completion(analyzer=ik_analyzer) class Meta: index = "lagou" doc_type = "job"
class ESLAL(DocType): suggest = Completion() title = Text() url = Keyword() journal = Keyword() doi = Keyword() id = Keyword() abs_img_url = Keyword() abstract = Text() keywords = Text(fields={'keywords': Keyword()}) authors = Text(fields={'keywords': Keyword()}) year = Integer() citing_num = Integer() company = Keyword() class Meta: index = 'lal' doc_type = 'article'
class JobboleArticleType(Document): suggest = Completion(analyzer="ik_max_word") title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() image_url = Keyword() image_path = Keyword() vote_num = Integer() comment_num = Integer() book_num = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Index: name = "jobbole" class Meta: doc_type = "article"
class DoubanMovieType(Document): suggest = Completion(analyzer="ik_max_word") movie_name = Text(analyzer="ik_max_word") movie_url = Keyword() image_url = Keyword() year = Integer() country = Text() quote = Text() score = Text() ranking = Integer() comment_num = Integer() tags = Text(analyzer="ik_max_word") class Index: name = "douban" class Meta: doc_type = "movie"
class FilmType(DocType): # 实现搜索建议 suggest = Completion(analyzer=ik_analyzer) # 定义主要字段 url = Keyword() title = Text(analyzer="ik_max_word") magnet = Keyword() publish_time = Date() content = Text(analyzer="ik_max_word") imdb_score = Double() douban_score = Double() ftp_address = Keyword() front_image_url = Keyword() front_image_path = Keyword() class Meta: index = 'entertainment' doc_type = 'film'
class ArticleType(DocType): #伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") # 标题 create_time = Date() # 创建时间 url = Keyword() # url url_object_id = Keyword() # url md5 front_image_url = Keyword() # 列表图片url front_path_url = Keyword() # 本地图片url praise_num = Integer() # 点赞数 comment_num = Integer() # 评论数 fav_num = Integer() # 收藏数 tags = Text(analyzer="ik_max_word") # 标签 content = Text(analyzer="ik_max_word") # 内容 class Meta: index = "jobbole" doc_type = "article"