class ndrcType(Document): # 自定义一个类来继承DocType类 # Text类型需要分词,所以需要知道中文分词器,ik_max_wordwei为中文分词器 name = "国家发改委" # 国家发改委 title = Text(analyzer="ik_max_word") # 设置,字段名称=字段类型,Text为字符串类型并且可以分词建立倒排索引 content = Text(analyzer="ik_max_word") file_content = Text(analyzer="ik_max_word") url = Keyword() # 设置,字段名称=字段类型,Keyword为普通字符串类型,不分词 date = Date() # 设置,字段名称=字段类型,Date日期类型 year = Integer() month = Integer() day = Integer() image_urls = Keyword() attachments = Keyword() class0 = Keyword() class1 = Keyword() class2 = Keyword() class3 = Keyword() website = Keyword() class Index: name = "国家发改委" # 国家发改委 # 设置索引名称(相当于数据库名称)
class Ips(Document): location = Nested(Location) infos = Nested(Info) target = Keyword() published_from = Date() class Index: name = 'w12scan' settings = { "number_of_shards": 2, } class Meta: doc_type = 'ips' def save(self, **kwargs): if not self.published_from: self.published_from = datetime.now() return super().save(**kwargs)
def nested_company_field(field): """Nested field for lists of companies.""" return Nested( properties={ 'id': Keyword(), 'name': SortableCaseInsensitiveKeywordText( copy_to=f'{field}.name_trigram'), 'name_trigram': TrigramText(), 'trading_name': SortableCaseInsensitiveKeywordText( copy_to=f'{field}.trading_name_trigram', ), 'trading_name_trigram': TrigramText(), }, include_in_parent=True, )
class ESRelatedModel(BaseESModel): """Elasticsearch representation of SimpleModel model.""" id = Keyword() simpleton = fields.id_name_field() MAPPINGS = { 'simpleton': dict_utils.id_name_dict, } SEARCH_FIELDS = ('simpleton.name', ) class Index: doc_type = DEFAULT_MAPPING_TYPE class Meta: """Default document meta data.""" doc_type = DEFAULT_MAPPING_TYPE
class Project(Document): name = Text() normalized_name = Text(analyzer=NameAnalyzer) version = Keyword(multi=True) latest_version = Keyword() summary = Text(analyzer="snowball") description = Text(analyzer="snowball") author = Text() author_email = Text(analyzer=EmailAnalyzer) maintainer = Text() maintainer_email = Text(analyzer=EmailAnalyzer) license = Text() home_page = Keyword() download_url = Keyword() keywords = Text(analyzer="snowball") platform = Keyword() created = Date() classifiers = Keyword(multi=True) zscore = Float() @classmethod def from_db(cls, release): obj = cls(meta={"id": release.normalized_name}) obj["name"] = release.name obj["normalized_name"] = release.normalized_name obj["version"] = sorted(release.all_versions, key=lambda r: packaging.version.parse(r), reverse=True) obj["latest_version"] = release.latest_version obj["summary"] = release.summary obj["description"] = release.description obj["author"] = release.author obj["author_email"] = release.author_email obj["maintainer"] = release.maintainer obj["maintainer_email"] = release.maintainer_email obj["home_page"] = release.home_page obj["download_url"] = release.download_url obj["keywords"] = release.keywords obj["platform"] = release.platform obj["created"] = release.created obj["classifiers"] = release.classifiers obj["zscore"] = release.zscore return obj class Index: # make sure this class can match any index so it will always be used to # deserialize data coming from elasticsearch. name = "*"
class Doc(Document): page_id = Keyword() # in es index news-page title = Text() summary = Text() text = Text() # array lang = Keyword() extracted_event_ids = Keyword() extracted_entity_ids = Keyword() # array user_event_ids = Keyword() user_entity_ids = Keyword() class Index: name = "learning-flair" settings = {'number_of_shards': 1, 'number_of_replicas': 0}
class ESCachedRequestIndex(Document): es_index = Keyword() es_query = Keyword() es_aggs = Keyword() es_request_digest = Keyword() host = Keyword() run_env_type = Keyword() is_cached = Boolean() # Do not use elasticsearch_dsl Date type, it does not serializes correctly request_date = Integer() class Index: name = 'chembl_glados_es_cache_usage' using = MONITORING_CONNECTION
class TestRusult(Document): testrun_id = Keyword() case_id = Keyword() case_name = Text() case_tags = Keyword() suite_name = Keyword() env = Keyword() result = Keyword() case_comment = Text() stdout = Text() traceback = Text() class Index: name = 'test-result-*'
class MovieInfo(DocType): suggest = Completion() title = Text(analyzer=ik_analyzer) douban_score = Keyword() IMDb_score = Keyword() age = Keyword() introduction = Text(analyzer=ik_analyzer) type=Text(analyzer=ik_analyzer) url = Keyword() front_img_path = Keyword() download_url =Keyword() class Meta: index = 'movie'
def address_field(index_country=True): """Address field as nested object.""" if index_country: nested_country_field = country_field() else: nested_country_field = Object(properties={ 'id': Keyword(index=False), 'name': Text(index=False), }, ) return Object(properties={ 'line_1': Text(index=False), 'line_2': Text(index=False), 'town': Text(index=False), 'county': Text(index=False), 'postcode': Text(fields={ 'trigram': TrigramText(), }, ), 'country': nested_country_field, }, )
class MoviesAutoIndex(Document): title = Text(fields={'keyword': Keyword()}) created = Date() year = Integer() rating = Float() genre = Text() suggest = Completion(analyzer=ascii_fold) def clean(self): self.suggest = { # 'input': [' '.join(p) for p in permutations(self.title.split())], # 'input': [' '.join(p) for p in permutations(["the","batman"])], # 'input': [''.join(p) for p in self.title.split(" ")], # i = self.title 'input': [self.title[:j] for j in range(len(self.title), 1, -1)], } class Index: name = S_index settings = {'number_of_shards': 1, 'number_of_replicas': 0}
class Vocabulary(Document): name = Text() meaning = Text() tags = Keyword() score: Float() created_at: Date() = datetime.now() updated_at: Date() = datetime.now() class Index: name = "vocabularies" def save(self, **kwargs): # TODO: check vocab existence return super().save(**kwargs) @staticmethod def from_feed(feed_data: VocabFeed): return Vocabulary(name=feed_data.name, meaning=feed_data.meaning, score=feed_data.frequency)
class ElasticSearchCard(Document): Name = Text() Name_Suggest = Completion() Description = Text() Description_Suggest = Completion() Topic = Text() Topic_Suggest = Completion() category = Text(analyzer='snowball', fields={'raw': Keyword()}) class Index: name = 'timelapsed' def save(self, **kwargs): return super().save(**kwargs) def delete(self, **kwargs): # Fix me. return super().delete(**kwargs)
class Device(MyDocType): device_type = Keyword() pos_x = Integer() pos_y = Integer() radius = Integer() key = Keyword() mqtt_account = Object(doc_class=MQTTAccount, properties={ 'username': Keyword(), 'password': Keyword(), 'server': Keyword(), 'port': Integer(), 'keep_alive': Keyword(), 'clients_topic': Keyword(), 'response_topic': Keyword() }) class Meta: index = 'bluetooth' def verify_key(self, key): return self.key == hash_sha256(key)
class PercolatorDoc(Document): """ Document class used for storing the percolation queries. """ # relevant fields from BlogPost must be also present here for the queries # to be able to use them. Another option would be to use document # inheritance but save() would have to be reset to normal behavior. content = Text() # the percolator query to be run against the doc query = Percolator() # list of tags to append to a document tags = Keyword(multi=True) class Index: name = 'test-percolator' settings = { "number_of_shards": 1, "number_of_replicas": 0 }
def contact_or_adviser_field(include_dit_team=False): """Object field for advisers and contacts.""" props = { 'id': Keyword(), 'first_name': NormalizedKeyword(), 'last_name': NormalizedKeyword(), 'name': Text(fields={ 'keyword': NormalizedKeyword(), 'trigram': TrigramText(), }, ), } if include_dit_team: props['dit_team'] = id_name_field() return Object(properties=props)
class EnumField(CustomField): builtin_type = Keyword() def __init__(self, *args, choice_type: Type[Enum] = None, default=None, **kwargs): super().__init__(*args, **kwargs) self.choice_type = choice_type self.default = default def _empty(self): return self.default def _serialize(self, data): return self.choice_type(data).name def _deserialize(self, data): return self.choice_type(data)
class Project(DocType): name = Text() normalized_name = Text(analyzer=NameAnalyzer) version = Keyword(multi=True) latest_version = Keyword() summary = Text(analyzer="snowball") description = Text(analyzer="snowball") author = Text() author_email = Text(analyzer=EmailAnalyzer) maintainer = Text() maintainer_email = Text(analyzer=EmailAnalyzer) license = Text() home_page = Keyword() download_url = Keyword() keywords = Text(analyzer="snowball") platform = Keyword() created = Date() classifiers = Keyword(multi=True) class Meta: # disable the _all field to save some space all = MetaField(enabled=False) @classmethod def from_db(cls, release): obj = cls(meta={"id": release.normalized_name}) obj["name"] = release.name obj["normalized_name"] = release.normalized_name obj["version"] = sorted( release.all_versions, key=lambda r: packaging.version.parse(r), reverse=True, ) obj["latest_version"] = release.latest_version obj["summary"] = release.summary obj["description"] = release.description obj["author"] = release.author obj["author_email"] = release.author_email obj["maintainer"] = release.maintainer obj["maintainer_email"] = release.maintainer_email obj["home_page"] = release.home_page obj["download_url"] = release.download_url obj["keywords"] = release.keywords obj["platform"] = release.platform obj["created"] = release.created obj["classifiers"] = release.classifiers return obj
class Post(Document): title = Text() title_suggest = Completion() created_at = Date() published = Boolean() category = Text(analyzer=html_strip, fields={'raw': Keyword()}) comments = Nested(Comment) class Index: name = INDEX using = CONNECTION_ALIAS def add_comment(self, author, content): self.comments.append( Comment(author=author, content=content, created_at=datetime.now())) def save(self, **kwargs): self.created_at = datetime.now() return super().save(**kwargs)
class MyType(DocType): # 伯乐在线文章类型 user_id = Keyword() criterion = Text(analyzer=ik_analyzer, similarity="BM25") #同义词 intellectual_property = Text(analyzer="ik_max_word", similarity="BM25") paper = Text(analyzer="ik_max_word", similarity="BM25") research_project = Text(analyzer="ik_max_word", similarity="BM25") professional_certificate = Text(analyzer="ik_max_word", similarity="BM25") academic_activities = Text(analyzer="ik_max_word", similarity="BM25") experience = Text(analyzer="ik_max_word", similarity="BM25") further_study = Text(analyzer="ik_max_word", similarity="BM25") personal_register = Text(analyzer="ik_max_word", similarity="BM25") expert_title = Text(analyzer="ik_max_word", similarity="BM25") research = Text(analyzer="ik_max_word", similarity="BM25") domestic_studies = Text(analyzer="ik_max_word", similarity="BM25") professional_qualification = Text(analyzer="ik_max_word", similarity="BM25") class Meta: index = "roger" doc_type = "test"
class BuildDoc(DocType): id = Keyword(required=True) # Note! The reason for using Object() instead of Nested() is because # SearchKit doesn't work if it's nested. This works though. build = Object(_Build) source = Object(_Source) target = Object(_Target) download = Object(_Download) @classmethod def create(cls, id, **doc): assert id and isinstance(id, int) and id > 0 return BuildDoc( meta={"id": id}, id=id, build=_Build(**doc["build"]), source=_Source(**doc["source"]), target=_Target(**doc["target"]), download=_Download(**doc["download"]), )
class XuanType(DocType): suggest = Completion(analyzer=ik_analyzer) # 宣讲会根据题目和城市搜索 title = Text(analyzer="ik_max_word") city = Text(analyzer="ik_max_word") img = Keyword() address = Keyword() time = Keyword() status = Keyword() detail_url = Keyword() from_school = Keyword() class Meta: index = "xuan" # 索引===数据库 doc_type = "info" # 类型===表名
def test_mapping_can_collect_all_analyzers(): a1 = analysis.analyzer('my_analyzer1', tokenizer='keyword', filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], ) a2 = analysis.analyzer('english') a3 = analysis.analyzer('unknown_custom') a4 = analysis.analyzer('my_analyzer2', tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], ) a5 = analysis.analyzer('my_analyzer3', tokenizer='keyword') m = mapping.Mapping('article') m.field('title', 'text', analyzer=a1, fields={ 'english': Text(analyzer=a2), 'unknown': Keyword(search_analyzer=a3), } ) m.field('comments', Nested(properties={ 'author': Text(analyzer=a4) })) m.meta('_all', analyzer=a5) assert { 'analyzer': { 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}, 'my_analyzer3': {'tokenizer': 'keyword', 'type': 'custom'}, }, 'filter': { 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}, }, 'tokenizer': { 'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}, } } == m._collect_analysis() assert json.loads(json.dumps(m.to_dict())) == m.to_dict()
class User(Document): id = Integer() name = Text(fields={'keywords': Keyword()}) suggest = Completion(analyzer=ascii_fold) class Index: name = 'user' settings = { 'number_of_shards': 1, 'number_of_replicas': 0 } def clean(self): """ Automatically construct the suggestion input and weight by taking all possible permutation of Person's name as ``input`` and taking their popularity as ``weight``. """ self.suggest = { 'input': [' '.join(p) for p in permutations(self.name.split())], }
class JobType(DocType): #猎聘网类型 suggest = Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") s = Keyword() url = Keyword() url_object_id = Keyword() # md5 固定url长度 salary = Keyword() work_years = Text(analyzer="ik_max_word") degree_need = Text(analyzer="ik_max_word") content = Text(analyzer="ik_max_word") create_date = Keyword() job_addr = Text(analyzer="ik_max_word") company_url = Keyword() company_name = Keyword() class Meta: index = "liepin" doc_type = "job"
class Account(DocType): # 机器人名字 botName = Keyword() # 可用现金 cash = Float() # 具体仓位 positions = Nested() # 市值 value = Float() # 市值+cash allValue = Float() # 时间 timestamp = Date() # 收盘计算 closing = Boolean() class Meta: index = 'account' doc_type = 'doc' all = MetaField(enabled=False)
class ESSimpleModel(BaseESModel): """Elasticsearch representation of SimpleModel model.""" id = Keyword() name = fields.SortableText(copy_to=['name_keyword', 'name_trigram']) name_keyword = fields.SortableCaseInsensitiveKeywordText() name_trigram = fields.TrigramText() MAPPINGS = { 'id': str, } SEARCH_FIELDS = ( 'name', 'name_trigram', ) class Meta: """Default document meta data.""" doc_type = 'simplemodel'
class Order(DocType): comment = Text(fields={'raw': Keyword()}) status = Text() qty = Float() published = Boolean() created_at = Date() delta_series = Nested(Delta) class Meta: index = 'bitmex' def add_delta(self, **kwargs): self.delta_series.append( Delta(created_at=datetime.now(), **kwargs) ) def save(self, **kwargs): self.created_at = datetime.now() return super().save(**kwargs)
class SFNDNS(InnerDoc): event_type = Text() domain_name = Text(analyzer='snowball', fields={'raw': Keyword()}) device_name = Text(analyzer='snowball', fields={'raw': Keyword()}) host = Text(analyzer='snowball', fields={'raw': Keyword()}) threat_id = Text(analyzer='snowball') threat_name = Text(analyzer='snowball') tag_name = Text(fields={'raw': Keyword()}) tag_class = Text(fields={'raw': Keyword()}) tag_group = Text(fields={'raw': Keyword()}) tag_description = Text(analyzer='snowball') public_tag_name = Text(analyzer='snowball') confidence_level = Integer() sample_date = Date() file_type = Text(fields={'raw': Keyword()}) updated_at = Date() processed = Integer() src_ip = Ip() dst_ip = Ip()
class BlogPost(Document): title = Text() published = Date() tags = Keyword(multi=True) content = Text() def is_published(self): return self.published and datetime.now() > self.published @classmethod def _matches(cls, hit): # override _matches to match indices in a pattern instead of just ALIAS # hit is the raw dict as returned by elasticsearch return fnmatch(hit["_index"], PATTERN) class Index: # we will use an alias instead of the index name = ALIAS # set settings and possibly other attributes of the index like # analyzers settings = {"number_of_shards": 1, "number_of_replicas": 0}