class RapidNHEvent(DocType): class Meta: index = settings.ES_INDICES['rapid']['name'] doc_type = settings.ES_INDICES['rapid']['documents'][1]['name'] dynamic = MetaField('strict') event_date = Date() created_date = Date() title = Text(analyzer='english') event_type = String(fields={'_exact': Keyword()}) location_description = Text(analyzer='english', fields={'_exact': Keyword()}) location = GeoPoint() main_image_url = String(fields={'_exact': Keyword()}) main_image_uuid = String(fields={'_exact': Keyword()}) datasets = Nested( properties={ "id": String(fields={'_exact': Keyword()}), "title": String(fields={'_exact': Keyword()}), "doi": String(fields={'_exact': Keyword()}), "url": String(fields={'_exact': Keyword()}), }) def save(self, **kwargs): # self.created_date = datetime.utcnow() return super(RapidNHEvent, self).save(**kwargs)
class Products(DocType): about = Text(fields={"keyword": Keyword()}, required=True) brand = Text(fields={"keyword": Keyword()}, required=True) care = Text(fields={"keyword": Keyword()}, required=True) code = Text(fields={"keyword": Keyword()}, required=True) details = Text(fields={"keyword": Keyword()}, required=True) gender = Text(fields={"keyword": Keyword()}, required=True) images = Text(fields={"keyword": Keyword()}, required=True) kind = Text(fields={"keyword": Keyword()}, required=True) link = Text(fields={"keyword": Keyword()}, required=True) name = Text(fields={"keyword": Keyword()}, required=True) price = Nested( properties={ "currency": Text(fields={'keyword': Keyword()}, required=True), "outlet": Float(required=True), "retail": Float(required=True) }) sessionid = Text(fields={"keyword": Keyword()}, required=True) sessionname = Text(fields={"keyword": Keyword()}, required=True) storename = Text(fields={"keyword": Keyword()}, required=True) class Meta: index = "store" type = "products" doc_type = "products"
class Article(DocType): article_id = Keyword(ignore_above=256) article_title = Text(analyzer='ik_max_word', search_analyzer='ik_max_word') author = Keyword(ignore_above=256) author_parsed = Keyword(ignore_above=256) board = Keyword(ignore_above=256) content = Text(analyzer='ik_max_word', search_analyzer='ik_max_word') date = Keyword(ignore_above=256) date_parsed = Date(default_timezone='Asia/Taipei') ip = Keyword(ignore_above=256) message_count = Nested(MessageCount) messages = Nested(Message) class Meta: index = 'ptt-2018-06' doc_type = 'article'
class CRECDoc(DocType): """An elasticsearch_dsl document model for CREC documents. """ title = Text() title_part = Text() date_issued = Date() content = Text(fielddata=True) crec_id = Text() pdf_url = Text() html_url = Text() page_start = Text() page_end = Text() speakers = Text() segments = Nested( properties={ 'segment_id': Text(), 'speaker': Text(), 'text': Text(), 'bioguide_id': Text() } ) class Meta: index = settings.ES_CW_INDEX
class IndexedSocialIdentity(InnerObjectWrapper): """Contact indexed social identity model.""" name = Text() type = Keyword() # Abstract everything else in a map infos = Nested()
class MentorProject(Document): name = Keyword(required=True) company = Text() bio = Text() backgroundRural = Boolean(required=True) preferStudentUnderRep = Short(required=True) # (0-2) preferToolExistingKnowledge = Boolean(required=True) okExtended = Boolean(required=True) okTimezoneDifference = Boolean(required=True) timezone = Integer(required=True) # +- UTC id = Keyword(required=True) proj_description = Text(required=True) proj_tags = Keyword(multi=True) numStudentsSelected = Short() listStudentsSelected = Nested(StudentVote) track = Keyword(required=True) class Index: name = "mentors_index" settings = { "number_of_shards": 1, "number_of_replicas": 0, } def add_vote(self, student_id, choice): self.listStudentsSelected.append( StudentVote(student_id=student_id, choice=choice)) def save(self, **kwargs): self.numStudentsSelected = 0 return super().save(**kwargs)
class CustomType(DocType): business = Nested( properties={ 'id': Integer(), 'name': String(), } )
class Comment(InnerDoc): content = Text() created_at = Date() author = Object(User) history = Nested(History) class Meta: dynamic = MetaField(False)
class AgentDocument(DocumentBase): id = Keyword() task_id = Keyword() names = Nested(AgentNameDocument) start_date = Date() end_date = Date() @classmethod def get_model(cls): return Agent @classmethod def from_obj(cls, obj): if obj.task is None: task_id = None else: task_id = str(obj.task.pk) doc = AgentDocument( _id=str(obj.pk), id=str(obj.pk), task_id=task_id, names=[ AgentNameDocument.from_obj(name) for name in obj.names.iterator() ], start_date=obj.start_date, end_date=obj.end_date, ) return doc class Index: name = 'agent' analyzers = [autocomplete_analyzer]
class SocialmediaType(DocType): # 微博账户类型 # suggest = Completion(analyzer=ik_analyzer) type = Keyword() name = Keyword() # account_domain = Keyword() screen_name = Text(analyzer="ik_max_word") icon = Keyword() tweets = Integer() followers = Integer() following = Integer() brief = Keyword() # thumb_image = Keyword() category = Keyword() alpha = Keyword() language = Keyword() country = Nested(properties={ "id": Keyword(), "name": Text(analyzer="ik_max_word"), }) class Meta: index = "weibo_account" doc_type = "weibo_account" settings = { "number_of_shards": 5, }
def test_mapping_can_collect_all_analyzers(): a1 = analysis.analyzer('my_analyzer1', tokenizer='keyword', filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], ) a2 = analysis.analyzer('english') a3 = analysis.analyzer('unknown_custom') a4 = analysis.analyzer('my_analyzer2', tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], ) m = mapping.Mapping('article') m.field('title', 'string', analyzer=a1, fields={ 'english': String(analyzer=a2), 'unknown': String(analyzer=a3), } ) m.field('comments', Nested(properties={ 'author': String(analyzer=a4) })) assert { 'analyzer': { 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}}, 'filter': { 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}, }, 'tokenizer': { 'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}, } } == m._collect_analysis()
class eBPF_Program_Instance_Document(Base_Document): """Represents an eBPF program installed in an execution environment.""" # id already defined by Elasticsearch ebpf_program_catalog_id = Text(required=True) exec_env_id = Text(required=True) parameters = Nested(eBPF_Program_Instance_Parameter_Inner_Doc) description = Text() class Index: """Elasticsearch configuration.""" name = 'ebpf-program-instance' def edit_parameter(self, parameter): so = self.Status_Operation id = parameter.get('id', None) for p in self.parameters: val = parameter.get('value', None) ts = parameter.get('timestamp', None) if p.id == id: if p.value != val or p.timestamp != ts: p.value = val p.timestamp = ts return so.UPDATED return so.NOT_MODIFIED self.parameters.append( eBPF_Program_Instance_Parameter_Inner_Doc(**parameter)) return so.UPDATED
class GoodsSKUIndex(Document): name = Text(analyzer=html_strip) desc = Text(analyzer=html_strip) types = Nested(GoodsTypeIndex) class Index: index = 'goodssku-index' name = 'goodssku-index' def add_types(self, name): self.types.append(GoodsTypeIndex(name=name)) def save(self, **kwargs): return super(GoodsSKUIndex, self).save(**kwargs) def delete(self, **kwargs): super(GoodsSKUIndex, self).delete(**kwargs) # Simple search function, return id def search(query="", nested_path="types"): q1 = Q("multi_match", query=query) q2 = Q('nested', inner_hits={}, path=nested_path, query=Q('multi_match', query=query)) s1 = Search().query(q1) s2 = Search().query(q2) response1 = s1.execute() response2 = s2.execute() result1 = (hit.meta.id for hit in response1 if hit.meta) result2 = (hit.meta.id for hit in response2 if hit.meta) ids = chain(result1, result2) return ids
class DomainResourceRecordDoc(GenericDoc): class Meta: doc_type = "opendns_domain_resource_record" first_seen = Date() last_seen = Date() rrs = Nested(doc_class=ResourceRecordDoc)
class Company(Document): """Class representing the searchable companies metadata""" name = Text(analyzer='snowball', fields={'raw': Keyword()}) number = Keyword() registered_address = Object(Address) category = Keyword() status = Keyword() country_of_origin = Keyword() dissolution = Date(format='dd/MM/yyyy') incorporation = Date(format='dd/MM/yyyy') accounts = Object(Accounts) returns = Object(Returns) mortgages = Object(Mortgages) SIC_code = [ Text(analyzer='snowball', fields={'raw': Keyword()}), Text(analyzer='snowball', fields={'raw': Keyword()}), Text(analyzer='snowball', fields={'raw': Keyword()}), Text(analyzer='snowball', fields={'raw': Keyword()}) ] limited_partnerships = Object(LimitedPartnerships) URI = Keyword() previous_name = Nested(PreviousName) confirmation_statement = Object(Returns) def add_address(self, care_of, po_box, line1, line2, town, county, country, post_code): """Change the registered address""" self.registered_address.update( Address( care_of=care_of, po_box=po_box, line1=line1, line2=line2, town=town, county=county, country=country, post_code=post_code)) def age(self): """Calculate the current age of the company""" if self.is_dissolved(): return self.dissolution - self.incorporation return datetime.now() - self.incorporation def is_dissolved(self): """Check if the company has been dissolved""" return self.dissolution >= self.incorporation class Index: """The index that all instances of this metadata will be saved to""" name = 'companies' settings = { "number_of_shards" : 1, "mapping.ignore_malformed": True, } def save(self, **kwargs): """Saves the current item to the index""" # self.lines = len(self.body.split()) return super(Company, self).save(**kwargs)
def test_properties_can_iterate_over_all_the_fields(): m = mapping.Mapping('testing') m.field('f1', 'string', test_attr='f1', fields={'f2': String(test_attr='f2')}) m.field('f3', Nested(test_attr='f3', properties={ 'f4': String(test_attr='f4')})) assert set(('f1', 'f2', 'f3', 'f4')) == set(f.test_attr for f in m.properties._collect_fields())
class Alternative(Document): content = Text() confidence = Float() alternate = Nested(Alternate) class Index: name = 'alternatives'
class Agent_Catalog_Resource_Inner_Doc(Inner_Doc): """Agent resource.""" id = Text(required=True) config = Nested(Agent_Catalog_Resource_Config_Inner_Doc, required=True) description = Text() example = Text()
class BookmarkSearch(DocType): curator_title = Text() created_at = Date() updated_on = Date() rating = Text() description = Text() user_id = Text() work = Nested(BookmarkWorkSearch) class Meta: index = 'bookmark' class Index: name = 'bookmark' def save(self, ** kwargs): self.created_at = datetime.now() return super().save(** kwargs) def create_from_json(self, bookmark_json): BookmarkSearch.init() self.curator_title=bookmark_json['curator_title'] if "description" in bookmark_json: self.description=bookmark_json['description'] if "rating" in bookmark_json: self.rating=bookmark_json['rating'] self.user_id=bookmark_json['user_id'] self.meta.id = bookmark_json['id'] bookmark_work_search = BookmarkWorkSearch() bookmark_work_search.create_from_json(bookmark_json['work']) self.work.append( bookmark_work_search) self.save()
class HooverDoc(Document): """ Document with the same fields as documents in hoover-search. This document will be used for created indices. The only difference is, that the Text field is replaced with the nested TextUnits. """ attachments = Boolean() content_type = Keyword() date = Date() date_created = Date() email_domains = Keyword() filetype = Keyword() id = Keyword() in_reply_to = Keyword() lang = Keyword() md5 = Keyword() message = Keyword() message_id = Keyword() path = Keyword() path_text = Text() path_parts = Keyword() references = Keyword() rev = Integer() sha1 = Keyword() size = Integer() suffix = Keyword() thread_index = Keyword() word_count = Integer() body = Nested(TextUnit) def add_unit(self, content, position): self.body.append(TextUnit(content=content, position=position))
class Model(DocType): id = Integer() name = String(fields={'sub': String()}) other_models = Nested( doc_class=OtherModel, multi=True, properties={ 'id': Integer(), 'name': String(fields={ 'raw': String(index='not_analyzed'), 'sub': String() }) }) class Meta: index = 'models' def get_term_query(self, column_name, value, default_op='should'): tq = ElasticSearchMixin.get_match_query(value, default_op) if column_name is None: column_name = 'name' return {'match': {column_name: tq}}
class GenreDocument(Document): """ Genre Document """ genre_id = Text() name = Text(analyzer='snowball', fields={'row': Keyword()}, fielddata=True) name_lower = Text(analyzer='snowball', fields={'row': Keyword()}, fielddata=True) picture = Text() is_active = Boolean() is_deleted = Boolean() is_picture_deleted = Boolean() genre_movies = Nested(properties={ 'id': Text(), 'movie__id': Text(), 'movie__name': Text(), }) class Index: """ Index class for MovieDocument """ name = ES_DOCUMENT_SET['genre'] settings = { "number_of_shards": ES_DOCUMENT_SET['genre_index_shared'], }
class Account(DocType): traderId = Keyword() cash = Float() positions = Nested() allValue = Float() timestamp = Date() tradingClose = Boolean() def copy_for_save(self, trading_close): account = Account() account.cash = self.cash account.traderId = self.traderId account.allValue = self.allValue account.positions = self.positions account.timestamp = account.timestamp account.tradingClose = trading_close return account def save(self, using=None, index=None, validate=True, **kwargs): self.meta.id = "{}_{}".format( self.traderId, self.timestamp.strftime('%Y-%m-%d %H:%M:%S')) return super().save(using, index, validate, **kwargs) class Meta: doc_type = 'doc' all = MetaField(enabled=False)
class Agent_Catalog_Action_Inner_Doc(Inner_Doc): """Agent action.""" config = Nested(Agent_Catalog_Action_Config_Inner_Doc, required=True) status = Text() description = Text() example = Text()
class StructureUnitDocument(DocumentBase): id = Keyword() task_id = Keyword() name = Text(analyzer=autocomplete_analyzer, search_analyzer='standard', fields={'keyword': { 'type': 'keyword' }}) type = Keyword() desc = Text() # python migrate_structure_unit_field.py comment = Text() reference_code = Text(analyzer=autocomplete_analyzer, search_analyzer='standard', fields={'keyword': { 'type': 'keyword' }}) archive = Nested(InnerArchiveDocument) start_date = Date() end_date = Date() @classmethod def get_model(cls): return StructureUnit @classmethod def get_index_queryset(cls): return StructureUnit.objects.filter(structure__is_template=False) @classmethod def from_obj(cls, obj): structure_set = obj.structure.tagstructure_set archive_doc = None if structure_set.exists(): archive = structure_set.first().get_root().tag.current_version if archive is not None: archive_doc = InnerArchiveDocument.from_obj(archive) if obj.task is None: task_id = None else: task_id = str(obj.task.pk) doc = StructureUnitDocument( _id=obj.pk, id=obj.pk, task_id=task_id, name=obj.name, type=obj.type.name, desc=obj.description, comment=obj.comment, reference_code=obj.reference_code, archive=archive_doc, start_date=obj.start_date, end_date=obj.end_date, ) return doc class Index: name = 'structure_unit' analyzers = [autocomplete_analyzer]
class _AggregateReportDoc(Document): class Index: name = "dmarc_aggregate" xml_schema = Text() org_name = Text() org_email = Text() org_extra_contact_info = Text() report_id = Text() date_range = Date() date_begin = Date() date_end = Date() errors = Text() published_policy = Object(_PublishedPolicy) source_ip_address = Ip() source_country = Text() source_reverse_dns = Text() source_Base_domain = Text() message_count = Integer disposition = Text() dkim_aligned = Boolean() spf_aligned = Boolean() passed_dmarc = Boolean() policy_overrides = Nested(_PolicyOverride) header_from = Text() envelope_from = Text() envelope_to = Text() dkim_results = Nested(_DKIMResult) spf_results = Nested(_SPFResult) def add_policy_override(self, type_, comment): self.policy_overrides.append( _PolicyOverride(type=type_, comment=comment)) def add_dkim_result(self, domain, selector, result): self.dkim_results.append( _DKIMResult(domain=domain, selector=selector, result=result)) def add_spf_result(self, domain, scope, result): self.spf_results.append( _SPFResult(domain=domain, scope=scope, result=result)) def save(self, **kwargs): self.passed_dmarc = False self.passed_dmarc = self.spf_aligned or self.dkim_aligned return super().save(**kwargs)
class BlogPost(DocType): authors = Nested(required=True, doc_class=Author, properties={ 'name': String(required=True), 'email': String(required=True) }) created = Date()
class Post(Document): author = Text() content = Text() created_at = Date() comments = Nested(Comment) co = Object(Comment)
class Publication(Record): authors = Nested(Author) publication_date = Date(format='yyyy-MM-dd') publisher = Text(fields={'keyword': Keyword()}) serial = Text(fields={'keyword': Keyword()}) class Meta: index = 'publications'
class Port(Document): services = Nested(Service) class Index: name = 'port' settings = { 'number_of_shards': 2, }
def build_mapping(cls): """Create elasticsearch indexed_contacts mapping object for an user.""" m = Mapping(cls.doc_type) m.meta('_all', enabled=True) m.field('user_id', 'keyword') m.field('contact_id', 'keyword') m.field('additional_name', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) # addresses addresses = Nested(doc_class=IndexedPostalAddress, include_in_all=True, properties={ "address_id": "keyword", "label": "text", "type": "keyword", "is_primary": "boolean", "street": "text", "city": "text", "postal_code": "keyword", "country": "text", "region": "text" }) m.field("addresses", addresses) m.field("avatar", "keyword") m.field('date_insert', 'date') m.field('date_update', 'date') m.field('deleted', 'date') # emails internet_addr = Nested(doc_class=IndexedInternetAddress, include_in_all=True, ) internet_addr.field("address", "text", analyzer="text_analyzer", fields={ "raw": {"type": "keyword"}, "parts": {"type": "text", "analyzer": "email_analyzer"} }) internet_addr.field("email_id", Keyword()) internet_addr.field("is_primary", Boolean()) internet_addr.field("label", "text", analyzer="text_analyzer") internet_addr.field("type", Keyword()) m.field("emails", internet_addr) m.field('family_name', "text", fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('given_name', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field("groups", Keyword(multi=True)) # social ids social_ids = Nested(doc_class=IndexedSocialIdentity, include_in_all=True, properties={ "name": "text", "type": "keyword", "infos": Nested() }) m.field("identities", social_ids) m.field("ims", internet_addr) m.field("infos", Nested()) m.field('name_prefix', 'keyword') m.field('name_suffix', 'keyword') # organizations organizations = Nested(doc_class=IndexedOrganization, include_in_all=True) organizations.field("deleted", Boolean()) organizations.field("department", "text", analyzer="text_analyzer") organizations.field("is_primary", Boolean()) organizations.field("job_description", "text") organizations.field("label", "text", analyzer="text_analyzer") organizations.field("name", 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) organizations.field("organization_id", Keyword()) organizations.field("title", Keyword()) organizations.field("type", Keyword()) m.field("organizations", organizations) # phones phones = Nested(doc_class=IndexedPhone, include_in_all=True, properties={ "is_primary": "boolean", "number": "text", "normalized_number": "text", "phone_id": "keyword", "type": "keyword", "uri": "keyword" }) m.field("phones", phones) # pi pi = Object(doc_class=PIIndexModel, include_in_all=True, properties={ "comportment": "integer", "context": "integer", "date_update": "date", "technic": "integer", "version": "integer" }) m.field("pi", pi) m.field("privacy_features", Object(include_in_all=True)) m.field("public_key", Nested()) m.field("social_identities", social_ids) m.field("tags", Keyword(multi=True)) m.field('title', 'text', analyzer="text_analyzer", fields={ "raw": {"type": "keyword"} }) return m
def build_mapping(cls): """Generate the mapping definition for indexed messages""" m = Mapping(cls.doc_type) m.meta('_all', enabled=True) m.field('user_id', 'keyword') # attachments m.field('attachments', Nested(doc_class=IndexedMessageAttachment, include_in_all=True, properties={ "content_type": Keyword(), "file_name": Keyword(), "is_inline": Boolean(), "size": Integer(), "temp_id": Keyword(), "url": Keyword(), "mime_boundary": Keyword() }) ) m.field('body_html', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('body_plain', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('date', 'date') m.field('date_delete', 'date') m.field('date_insert', 'date') m.field('date_sort', 'date') m.field('discussion_id', 'keyword') # external references m.field('external_references', Nested(doc_class=IndexedExternalReferences, include_in_all=True, properties={ "ancestors_ids": Keyword(), "message_id": Keyword(), "parent_id": Keyword() }) ) m.field('importance_level', 'short') m.field('is_answered', 'boolean') m.field('is_draft', 'boolean') m.field('is_unread', 'boolean') m.field('is_received', 'boolean') m.field('message_id', 'keyword') m.field('parent_id', 'keyword') # participants participants = Nested(doc_class=IndexedParticipant, include_in_all=True) participants.field("address", "text", analyzer="text_analyzer", fields={ "raw": {"type": "keyword"}, "parts": {"type": "text", "analyzer": "email_analyzer"} }) participants.field("contact_ids", Keyword(multi=True)) participants.field("label", "text", analyzer="text_analyzer") participants.field("protocol", Keyword()) participants.field("type", Keyword()) m.field('participants', participants) # PI pi = Object(doc_class=PIIndexModel, include_in_all=True, properties={ "technic": "integer", "comportment": "integer", "context": "integer", "version": "integer", "date_update": "date" }) m.field("pi", pi) m.field('privacy_features', Object(include_in_all=True)) m.field('raw_msg_id', "keyword") m.field('subject', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('tags', Keyword(multi=True)) m.field('subject', 'text') m.field('tags', Keyword(multi=True)) m.field('protocol', 'keyword') m.field('user_identities', Keyword(multi=True)) return m