class Article(Document): title = Text(analyzer='snowball', fields={'raw': Keyword()}) body = Text(analyzer=html_strip) tags = Keyword() language = Text() country = Text() publication_date = Date() source = Nested(Source) category = Keyword() url = Text() image_url = Text() class Index: name = "article-index" settings = { "number_of_shards": 2, }
class Article(DocType): title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer) title = Text(analyzer='ik_max_word', search_analyzer="ik_max_word", fields={'title': Keyword()}) id = Text() url = Text() front_image_url = Text() front_image_path = Text() create_date = Date() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer='ik_max_word', fields={'tags': Keyword()}) content = Text(analyzer='ik_max_word') class Meta: index = '' doc_type = 'jobbole_article'
class WeiboType(Document): suggest = Completion(analyzer='ik_max_word') id = Keyword() created_at = Date() user = Object() pics = Keyword(multi=True) reposts_count = Integer() comments_count = Integer() attitudes_count = Integer() text = Text(analyzer="ik_max_word") video_info = Object() class Index: name = "weibo" settings = { "number_of_shards": 4, }
class StockMeta(BaseDocType): id = Keyword() type = Keyword() exchange = Keyword() code = Keyword() name = Keyword() listDate = Date() indexCategory = Keyword() sinaIndustry = Keyword() sinaConcept = Keyword() sinaArea = Keyword() sector = Keyword() industry = Keyword() class Meta: index = 'stock_meta' doc_type = 'doc'
class NewsClsType(DocType): # 财联社新闻 构建 es 模型 suggest = Completion(analyzer=ik_analyzer) article_id = Integer() title = Text(analyzer="ik_max_word") brief = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") create_date = Date() stock_code = Keyword() stock_name = Keyword() url = Keyword() source = Keyword() website = Keyword() class Meta: index = "news" doc_type = "news_cls"
class CryptoCurrencyKData(BaseDocType): id = Keyword() securityId = Keyword() timestamp = Date() code = Keyword() open = Float() close = Float() high = Float() low = Float() volume = Float() preClose = Float() change = Float() changePct = Float() class Meta: all = MetaField(enabled=False) doc_type = 'doc'
class JobType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() url_object_id = Keyword() job_type = Text(analyzer="ik_max_word") job_city = Keyword() job_desc = Text(analyzer="ik_max_word") company_name = Text(analyzer="ik_max_word") tags = Text(analyzer="ik_max_word") job_advantage = Text(analyzer="ik_max_word") job_addr = Text(analyzer=ik_analyzer) class Meta: index = "jobinfo" doc_type = "job"
class ZhiHuQuestionType(DocType): suggest = Completion(analyzer=ik_analyzer) # 知乎的问题 item zhihu_id = Keyword() topics = Text(analyzer="ik_max_word") url = Keyword() title = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Meta: index = "zhihu" doc_type = "question"
class NewsType(DocType): title = Text(analyzer="ik_max_word") suggest = Completion(analyzer=ik_analyzer) url = Keyword() url_md5 = Keyword() category = Text(analyzer="ik_smart") summary = Text(analyzer="ik_smart") image_urls = Keyword() image_path = Keyword() from_platform = Keyword() news_time = Keyword() crawl_time = Date() news_score = Keyword() class Meta: index = "information" doc_type = "news"
class SihouArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') author = Text(analyzer='ik_max_word') create_date = Date() url = Keyword() url_object_id = Keyword() view_count = Integer() praise_count = Integer() front_image_url = Keyword() front_image_path = Keyword() tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = 'sihou' doc_type = 'sihou_article'
class IndexedCMSPage(DocType): body = Text(analyzer='english') description = Text(analyzer='english') django_id = String(fields={'_exact': Keyword()}) language = String(analyzer='english') page_id = Long() pub_date = Date() site_id = Long() slug = String(analyzer='english', fields={'_exact': Keyword()}) text = Text(analyzer='english') title = String(analyzer='english', fields={'_exact': Keyword()}) url = String(fields={'_exact': Keyword()}) class Meta: index = settings.ES_INDICES['web_content']['name'] doc_type = settings.ES_INDICES['web_content']['documents'][0]['name'] dynamic = MetaField('strict')
class User(Document): username = Text() email = Text() password = Text() name = Text() surname = Text() birthday = Date() gender = Text() login_logs = Nested(LoginLog) groups = Keyword(multi = True) #location = Text() #description = Text() #url = Text() #registerDate = Date() #profileImagePath = Text() #phone = Text() #website = Text() #postCount = Integer() #posts = Nested(Post) #friendsCount = Integer() #friends = Nested(User) class Index: name = 'user' def save(self, ** kwargs): return super().save(** kwargs) def add_log(self, device, ip, state, date): entry = LoginLog(device=device, ip=ip, state=state, date=date) self.login_logs.append(entry) return entry def get_login_logs(self): return self.login_logs def addGroup(self,group_id): if group_id not in self.groups: self.groups.append(group_id) def removeGroup(self, group_id): if group_id in self.groups: self.groups.remove(group_id)
class Article_4houType(DocType): suggest = Completion(analyzer=ik_analyzer) #搜索建议 image_local = Keyword() title = Text(analyzer="ik_max_word") url_id = Keyword() create_time = Date() url = Keyword() author = Keyword() tags = Text(analyzer="ik_max_word") watch_nums = Integer() comment_nums = Integer() praise_nums = Integer() content = Text(analyzer="ik_max_word") class Meta: index = "teachnical_4hou" doc_type = "A_4hou"
class ArticleType(DocType): suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") class Meta: index = "jobbole" doc_type = "article"
class GeneralDoc(Document): symbol = Keyword() # Text(fields={"keyword": Keyword()}) company_name = Text() high_52_week = Double() low_52_week = Double() close_price = Double() avg_volume_30_days = Long() timestamp = Date() def save(self, **kwargs): self.timestamp = datetime.now() return super().save(**kwargs) class Index: name = "general"
class Article(Document): title = Text(analyzer='snowball') body = Text(analyzer='snowball') tags = Keyword() published_from = Date() lines = Integer class Index: name = "blog" settings = {"number_of_shards": 3} def save(self, **kwargs): self.lines = len(self.body.split()) return super(Article, self).save(**kwargs) def is_published(self): return datetime.now() >= self.published_from
class TechnologyType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') time = Date() link_url = Keyword() content = Text(analyzer='ik_max_word') url_object_id = Keyword() tag = Text(analyzer='ik_max_word') # 标签 comment_num = Integer() # 评论数 read_num = Integer() # 阅读数 Collection_num = Integer() # 收藏数 praise_num = Integer() # 点赞数 source = Keyword() # 来源 class Meta: index = "technology" doc_type = "tec_articles"
class B2BProductIndex(DocType): django_id = Integer() name = String(analyzer='snowball', fields={'raw': String(index='no')}) description = String(analyzer=html_strip) country = Integer() branches = Integer(multi=True) b2b_categories = Integer(multi=True) organization = Integer() price = Double() is_active = Boolean() is_deleted = Boolean() created_at = Date() @staticmethod def get_model(): from b24online.models import B2BProduct return B2BProduct @classmethod def get_queryset(cls): return cls.get_model().objects.all().prefetch_related('company', 'company__countries', 'branches') @classmethod def to_index(cls, obj): index_instance = cls( django_id=obj.pk, name=obj.name, description=obj.description, organization=obj.company.pk, is_active=obj.is_active, is_deleted=obj.is_deleted, country=obj.company.country.pk, price=obj.cost, created_at=obj.created_at ) index_instance.b2b_categories = list(set([item for category in obj.categories.all() for item in category.get_ancestors(include_self=True).values_list('pk', flat=True)])) index_instance.branches = list(set([item for branch in obj.branches.all() for item in branch.get_ancestors(include_self=True).values_list('pk', flat=True)])) return index_instance
class UserIndex(Document): user_id = Long() name = Text() name_keyword = Keyword() skills = Nested(UserSkillIndex) created_at = Date() class Index: name = IndexName.USER_INDEX settings = { "number_of_shards": 2, } def save(self, **kwargs): self.created_at = datetime.now() self.name_keyword = self.name return super().save(**kwargs) def add_skill(self, skill_id, name, score): self.skills.append( UserSkillIndex(skill_id=skill_id, name=name, score=score, name_keyword=name)) @staticmethod def search_by_skill_name(skill_name, using=None, index=None): skill_sort = { 'skills.score': { 'order': 'desc', 'mode': 'min', 'nested_path': 'skills', 'nested_filter': { 'match': { 'skills.name': skill_name } } } } search_obj = UserIndex.search(using=using, index=index).query( "nested", path="skills", query=Q('match', **{'skills.name': skill_name})).sort(skill_sort, '_score') return search_obj
class Project(DocType): name = Text() normalized_name = Text(analyzer=NameAnalyzer, index_options="docs") version = Keyword(multi=True) latest_version = Keyword() summary = Text(analyzer="snowball") description = Text(analyzer="snowball") author = Text() author_email = Text(analyzer=EmailAnalyzer) maintainer = Text() maintainer_email = Text(analyzer=EmailAnalyzer) license = Text() home_page = Keyword() download_url = Keyword() keywords = Text(analyzer="snowball") platform = Keyword() created = Date() classifiers = Keyword(multi=True) class Meta: # disable the _all field to save some space all = MetaField(enabled=False) @classmethod def from_db(cls, release): obj = cls(meta={"id": release.normalized_name}) obj["name"] = release.name obj["normalized_name"] = release.normalized_name obj["version"] = release.all_versions obj["latest_version"] = release.latest_version obj["summary"] = release.summary obj["description"] = release.description obj["author"] = release.author obj["author_email"] = release.author_email obj["maintainer"] = release.maintainer obj["maintainer_email"] = release.maintainer_email obj["home_page"] = release.home_page obj["download_url"] = release.download_url obj["keywords"] = release.keywords obj["platform"] = release.platform obj["created"] = release.created obj["classifiers"] = release.classifiers return obj
class Project(Document): name = Text() normalized_name = Text(analyzer=NameAnalyzer) version = Keyword(multi=True) latest_version = Keyword() summary = Text(analyzer="snowball") description = Text(analyzer="snowball") author = Text() author_email = Text(analyzer=EmailAnalyzer) maintainer = Text() maintainer_email = Text(analyzer=EmailAnalyzer) license = Text() home_page = Keyword() download_url = Keyword() keywords = Text(analyzer="snowball") platform = Keyword() created = Date() classifiers = Keyword(multi=True) zscore = Float() @classmethod def from_db(cls, release): obj = cls(meta={"id": release.normalized_name}) obj["name"] = release.name obj["normalized_name"] = release.normalized_name obj["version"] = sorted( release.all_versions, key=lambda r: packaging.version.parse(r), reverse=True ) obj["latest_version"] = release.latest_version obj["summary"] = release.summary obj["description"] = release.description obj["author"] = release.author obj["author_email"] = release.author_email obj["maintainer"] = release.maintainer obj["maintainer_email"] = release.maintainer_email obj["home_page"] = release.home_page obj["download_url"] = release.download_url obj["keywords"] = release.keywords obj["platform"] = release.platform obj["created"] = release.created obj["classifiers"] = release.classifiers obj["zscore"] = release.zscore return obj
class _AggregateReportDoc(Document): class Index: name = "dmarc_aggregate" xml_schema = Text() org_name = Text() org_email = Text() org_extra_contact_info = Text() report_id = Text() date_range = Date() errors = Text() published_policy = Object(_PublishedPolicy) source_ip_address = Ip() source_country = Text() source_reverse_dns = Text() source_Base_domain = Text() message_count = Integer disposition = Text() dkim_aligned = Boolean() spf_aligned = Boolean() passed_dmarc = Boolean() policy_overrides = Nested(_PolicyOverride) header_from = Text() envelope_from = Text() envelope_to = Text() dkim_results = Nested(_DKIMResult) spf_results = Nested(_SPFResult) def add_policy_override(self, type_, comment): self.policy_overrides.append( _PolicyOverride(type=type_, comment=comment)) def add_dkim_result(self, domain, selector, result): self.dkim_results.append( _DKIMResult(domain=domain, selector=selector, result=result)) def add_spf_result(self, domain, scope, result): self.spf_results.append( _SPFResult(domain=domain, scope=scope, result=result)) def save(self, **kwargs): self.passed_dmarc = False self.passed_dmarc = self.spf_aligned or self.dkim_aligned return super().save(**kwargs)
class CompaniesHouseCompany(BaseESModel): """Elasticsearch representation of CompaniesHouseCompany model.""" id = Keyword() company_category = fields.SortableCaseInsensitiveKeywordText() company_number = fields.SortableCaseInsensitiveKeywordText() company_status = fields.SortableCaseInsensitiveKeywordText() incorporation_date = Date() name = fields.SortableText(copy_to=[ 'name_keyword', 'name_trigram', ], ) name_keyword = fields.SortableCaseInsensitiveKeywordText() name_trigram = fields.TrigramText() registered_address_1 = Text() registered_address_2 = Text() registered_address_town = fields.SortableCaseInsensitiveKeywordText() registered_address_county = Text() registered_address_postcode = Text( copy_to='registered_address_postcode_trigram') registered_address_postcode_trigram = fields.TrigramText() registered_address_country = fields.nested_id_name_field() sic_code_1 = Text() sic_code_2 = Text() sic_code_3 = Text() sic_code_4 = Text() uri = Text() MAPPINGS = { 'id': str, 'registered_address_country': dict_utils.id_name_dict, } SEARCH_FIELDS = ( # to match names like A & B 'name', 'name_trigram', 'company_number', 'registered_address_postcode_trigram', ) class Meta: """Default document meta data.""" doc_type = 'companieshousecompany'
class ZhiHuQuestionIndex(Document): suggest = Completion(analyzer=my_analyzer) question_id = Keyword() topics = Text(analyzer="ik_max_word") url = Keyword() title = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() crawl_time = Date() class Index: name = 'zhihu_question'
class ArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer='ik_max_word') # text类型会分词解析 create_date = Date() url = Keyword() # 不进行分词解析,只进行全量保存 url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() praise_num = Integer() comment_num = Integer() share_num = Integer() fav_num = Integer() tags = Text(analyzer='ik_max_word') content = Text(analyzer='ik_max_word') class Meta: index = "jobbole" doc_type = "article"
class Paper(Document): """ Paper object inside elasticserach """ title = Text(analyzer='snowball', fields={'raw': Keyword()}) abstract = Text(analyzer='snowball') full_text = Text(analyzer='snowball') authors = Text(analyzer='snowball') date = Date() class Index: name = 'arxiv_papers' settings = { "number_of_shards": 2, } def save(self, ** kwargs): return super(Paper, self).save(** kwargs)
class ArticleType(DocType): #伯乐在线文章类型 suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") create_date = Date() content = Keyword() url = Keyword() url_object_id = Keyword() front_image_url = Keyword() front_image_path = Keyword() tags = Text(analyzer="ik_max_word") class Meta: index = "jobbole" doc_type = "article"
class IndexedCMSPage(Document): body = Text(analyzer='english') description = Text(analyzer='english') django_id = Text(fields={'_exact': Keyword()}) language = Text(analyzer='english') page_id = Long() pub_date = Date() site_id = Long() slug = Text(analyzer='english', fields={'_exact': Keyword()}) text = Text(analyzer='english') title = Text(analyzer='english', fields={'_exact': Keyword()}) url = Text(fields={'_exact': Keyword()}) class Index: name = settings.ES_INDICES['web_content']['alias'] class Meta: dynamic = MetaField('strict')
class Pagina(Document): autor = Text() titulo = Text() categoria = Text() idioma = Text() instituicao = Text() acessos = int() pagina = int() base64 = Text() texto = Text() created_at = Date() class Index: name = ELASTICSEARCH_INDEX def save(self, ** kwargs): self.created_at = datetime.now() return super().save(** kwargs)
class CareersType(DocType): # 招聘会 suggest = Completion(analyzer=ik_analyzer) url = Keyword() tianyan_company_url = Keyword() company_name = Text(analyzer="ik_max_word") professionals = Text(analyzer="ik_max_word") company_property = Text(analyzer="ik_max_word") industry_category = Text(analyzer="ik_max_word") city_name = Text(analyzer="ik_max_word") meet_name = Keyword() school_name = Text(analyzer="ik_max_word") meet_time = Date() address = Keyword() class Meta: index = "careers" doc_type = "careers_type"