class AnquankeArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') author = Text(analyzer='ik_max_word') create_date = Date() url = Keyword() url_object_id = Keyword() view_count = Integer() comment_count = Integer() like_count = Integer() front_image_url = Keyword() front_image_path = Keyword() tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = 'anquanke' doc_type = 'anquanke_article'
class ZhiHuType(DocType): suggest = Completion(analyzer=ik_analyzer) #伯乐在线文章类型 # 知乎的问题 item zhihu_id = Keyword() topics = Keyword() url = Keyword() title = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Meta: index = "zhihu" doc_type = "question"
class Paper(Document): """ Paper object inside elasticserach """ title = Text(analyzer='snowball', fields={'raw': Keyword()}) abstract = Text(analyzer='snowball') full_text = Text(analyzer='snowball') authors = Text(analyzer='snowball') date = Date() class Index: name = 'arxiv_papers' settings = { "number_of_shards": 2, } def save(self, ** kwargs): return super(Paper, self).save(** kwargs)
class LagouJobType(DocType): title = Text(analyzer="ik_max_word") salary = Text(analyzer="ik_max_word") tags = Text(analyzer="ik_max_word") job_addr = Text(analyzer="ik_max_word") job_advantage = Keyword() job_city = Keyword() job_type = Keyword() job_desc = Text(analyzer="ik_max_word") company_url = Keyword() url = Keyword() company_name = Keyword() publish_time = Date() suggest = Completion(analyzer=ik_analyzer) class Meta: index = "lagou" doc_type = "job"
class Pagina(Document): autor = Text() titulo = Text() categoria = Text() idioma = Text() instituicao = Text() acessos = int() pagina = int() base64 = Text() texto = Text() created_at = Date() class Index: name = ELASTICSEARCH_INDEX def save(self, ** kwargs): self.created_at = datetime.now() return super().save(** kwargs)
class ArticleType(DocType): #伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") # 标题 create_time = Date() # 创建时间 url = Keyword() # url url_object_id = Keyword() # url md5 front_image_url = Keyword() # 列表图片url front_path_url = Keyword() # 本地图片url praise_num = Integer() # 点赞数 comment_num = Integer() # 评论数 fav_num = Integer() # 收藏数 tags = Text(analyzer="ik_max_word") # 标签 content = Text(analyzer="ik_max_word") # 内容 class Meta: index = "jobbole" doc_type = "article"
class JobboleArticleType(Document): suggest = Completion(analyzer="ik_max_word") title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() image_url = Keyword() image_path = Keyword() vote_num = Integer() comment_num = Integer() book_num = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Index: name = "jobbole" class Meta: doc_type = "article"
class ArticleType(DocType): #boleonline type suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') # parent index cannot have same title create_date = Date() url = Keyword() url_object_id = Keyword() #md5 for long url front_image_url = Keyword() front_image_path = Keyword() #upgrade in pipiline and settings praise_nums = Integer() comment_nums = Integer fav_nums = Integer tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = 'jobbole' doc_type = 'article'
class ArticleType(DocType): #伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) # 指定suggest为Completion type,完成自动补全功能 # analyzer="ik_max_word" title = Text(analyzer="ik_max_word") #与es中的type相对应 #设置分析器 create_date = Date() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Meta: index = "jobbole" #index doc_type = "article" #type
class ArticleType(DocType): # 博客园文章类型 suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_time = Date() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Meta: index = "cnblogs" doc_type = "article"
class Mail(DocType): mail_from = Text() attachments = Keyword() mail_to = Text() tags = Keyword() sent_day = Date() att_count = Integer() virus = Keyword() id = Text() class Meta: index = 'mail' def save(self, **kwargs): return super(Mail, self).save(**kwargs) def add_tag(self, tag): self.tags.append({'tags': tag})
class ArticleType(DocType): # 伯乐在线文章item suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() url_obj_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() thumb_up_num = Integer() bookmark_num = Integer() comment_num = Integer() content = Text(analyzer="ik_max_word") tags = Text(analyzer="ik_max_word") class Meta: index = "jobbole" doc_type = "article"
class Doc(Document): doc = Text() created_at = Date() qa_pair = Nested(QA) class Index: name = es_param.index_name def add_qa_pair(self, ans_id, ans_str, query_id, query_str): self.qa_pair.append( QA(ans_id=ans_id, ans_str=ans_str, query_id=query_id, query_str=query_str)) def save(self, **kwargs): self.created_at = datetime.now() return super().save(**kwargs)
class Image(DocType): title = String(analyzer="english") identifier = String(index="not_analyzed") creator = String() creator_url = String(index="not_analyzed") tags = String(multi=True) created_on = Date() url = String(index="not_analyzed") thumbnail = String(index="not_analyzed") provider = String(index="not_analyzed") source = String(index="not_analyzed") license = String(index="not_analyzed") license_version = String() foreign_landing_url = String(index="not_analyzed") removed_from_source = Boolean() class Meta: index = settings.ELASTICSEARCH_INDEX
class ArticleType(DocType): # 设置elasticsearch的mappings suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer=ik_analyzer) create_date = Date() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer=ik_analyzer) content = Text(analyzer=ik_analyzer) class Meta: index = 'jobbole' doc_type = "article"
class ArticleType(DocType): # 文章类型 #title = Text(analyzer="ik_max_word") create_date = Date() ranking = Keyword() movie_name = Keyword() score = Keyword() score_num = Keyword() #tags = Text(analyzer="ik_max_word") #content = Text(analyzer="ik_max_word") class Meta: # 数据库名称和表名称 index = "douban" doc_type = "movice"
class ArticleType(DocType): #伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) # 自动不全的字段 title = Text(analyzer='ik_max_word') create_date = Date() zan = Integer() store = Integer() comments = Integer() contents = Text(analyzer='ik_max_word') tags = Text(analyzer='ik_max_word') front_img_url = Keyword() front_img_path = Keyword() url = Keyword() url_md5 = Keyword() class Meta: index = 'jobbole' doc_type = 'article'
class ArticleType(DocType): # Cnblogs Type Mappings suggest = Completion(analyzer=ik_analyzer) # careful! title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() # 全部保存不做分词 url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() like_nums = Integer() view_nums = Integer() comment_nums = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Meta: index = "cnblogs" doc_type = "article"
class FilmType(DocType): # 实现搜索建议 suggest = Completion(analyzer=ik_analyzer) # 定义主要字段 url = Keyword() title = Text(analyzer="ik_max_word") magnet = Keyword() publish_time = Date() content = Text(analyzer="ik_max_word") imdb_score = Double() douban_score = Double() ftp_address = Keyword() front_image_url = Keyword() front_image_path = Keyword() class Meta: index = 'entertainment' doc_type = 'film'
class EsArea(Document): suggest = Completion(analyzer='ik_max_word') continentName = Text(analyzer='ik_max_word') countryName = Text(analyzer='ik_max_word') provinceName = Text(analyzer='ik_max_word') cityName = Text(analyzer='ik_max_word') confirmedCount = Integer() suspectedCount = Integer() curedCount = Integer() deadCount = Integer() currentConfirmedCount = Integer() updateTime = Date() class Index: name = "area" settings = { "number_of_shards": 3, }
class ChatUpdate(DocType): channel_id = Keyword() from_id = Keyword() title = Text() about = Text() pinnedMessage = Text() sentimentPolarity = Float() sentimentSubjectivity = Float() username = Keyword() participants_count = Integer() created_at = Date() class Meta: index = 'chatupdates' def save(self, **kwargs): self.created_at = datetime.utcnow() return super().save(**kwargs)
class EsOverAll(Document): confirmedCount = Integer() confirmedIncr = Integer() suspectedCount = Integer() suspectedIncr = Integer() curedCount = Integer() curedIncr = Integer() deadCount = Integer() deadIncr = Integer() currentConfirmedCount = Integer() currentConfirmedIncr = Integer() updateTime = Date() class Index: name = "overall" settings = { "number_of_shards": 3, }
class CareersType(DocType): # 招聘会 suggest = Completion(analyzer=ik_analyzer) url = Keyword() tianyan_company_url = Keyword() company_name = Text(analyzer="ik_max_word") professionals = Text(analyzer="ik_max_word") company_property = Text(analyzer="ik_max_word") industry_category = Text(analyzer="ik_max_word") city_name = Text(analyzer="ik_max_word") meet_name = Keyword() school_name = Text(analyzer="ik_max_word") meet_time = Date() address = Keyword() class Meta: index = "careers" doc_type = "careers_type"
class ArticleType(DocType): #伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") #定义存储类型,分析器 create_date = Date() url = Keyword() # 文章链接 url_object_id = Keyword() # md5后链接 front_image_url = Keyword() # 图片链接 front_image_path = Keyword() # 图片存放路径 praise_number = Integer() fav_nums = Integer() comment_nums = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Meta: index = "jobbole" # 设置index名 doc_type = "article" # 设置表名
class IndexedCMSPage(Document): body = Text(analyzer='english') description = Text(analyzer='english') django_id = Text(fields={'_exact': Keyword()}) language = Text(analyzer='english') page_id = Long() pub_date = Date() site_id = Long() slug = Text(analyzer='english', fields={'_exact': Keyword()}) text = Text(analyzer='english') title = Text(analyzer='english', fields={'_exact': Keyword()}) url = Text(fields={'_exact': Keyword()}) class Index: name = settings.ES_INDICES['web_content']['alias'] class Meta: dynamic = MetaField('strict')
class Douban(Document): url = Keyword() title = Text(analyzer="ik_max_word") time = Date() director = Keyword() area = Keyword() language = Text(analyzer="ik_max_word") nickname = Text(analyzer="ik_max_word") score = Keyword() introduction = Text(analyzer="ik_max_word") front_image_url = Keyword() front_image_path = Keyword() class Meta: doc_type = "info" class Index: name = "douban"
class ZhiHuQuestionIndex(Document): """知乎问题""" suggest = Completion(analyzer=my_analyzer) question_id = Keyword() topics = Text(analyzer="ik_max_word") url = Keyword() title = Text(analyzer="ik_max_word") title_keyword = Keyword() content = Text(analyzer="ik_max_word") answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Index: name = 'zhihu_question'
class JobboleEsType(DocType): suggest = Completion(analyzer=ik_analyzer) # 注意,elasticsearch_dsl中源码对Completion没有做analyzer的选择支持,需要把ik自定义进去 title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() content = Text(analyzer="ik_max_word") tag = Text(analyzer="ik_max_word") class Meta: index = "jobbole" doc_type = "article"
class ArticleType(DocType): #伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() url_object_id = Keyword() # md5 固定url长度 front_image_url = Keyword() front_image_path = Keyword() praise_nums = Integer() fav_nums = Integer() comments_nums = Integer() content = Text(analyzer="ik_max_word") tags = Text(analyzer="ik_max_word") class Meta: index = "jobbole" doc_type = "article"
class XianzhiArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') author = Text(analyzer='ik_max_word') create_date = Date() url = Keyword() url_object_id = Keyword() view_count = Integer() follow_count = Integer() mark_count = Integer() front_image_url = Keyword() front_image_path = Keyword() tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = 'xianzhi' doc_type = 'xianzhi_article'