def build_mapping(cls): """Generate the mapping definition for indexed messages""" m = Mapping(cls.doc_type) m.meta('_all', enabled=True) m.field('user_id', 'keyword') # attachments m.field('attachments', Nested(doc_class=IndexedMessageAttachment, include_in_all=True, properties={ "content_type": Keyword(), "file_name": Keyword(), "is_inline": Boolean(), "size": Integer(), "temp_id": Keyword(), "url": Keyword(), "mime_boundary": Keyword() }) ) m.field('body_html', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('body_plain', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('date', 'date') m.field('date_delete', 'date') m.field('date_insert', 'date') m.field('date_sort', 'date') m.field('discussion_id', 'keyword') # external references m.field('external_references', Nested(doc_class=IndexedExternalReferences, include_in_all=True, properties={ "ancestors_ids": Keyword(), "message_id": Keyword(), "parent_id": Keyword() }) ) m.field('importance_level', 'short') m.field('is_answered', 'boolean') m.field('is_draft', 'boolean') m.field('is_unread', 'boolean') m.field('is_received', 'boolean') m.field('message_id', 'keyword') m.field('parent_id', 'keyword') # participants participants = Nested(doc_class=IndexedParticipant, include_in_all=True) participants.field("address", "text", analyzer="text_analyzer", fields={ "raw": {"type": "keyword"}, "parts": {"type": "text", "analyzer": "email_analyzer"} }) participants.field("contact_ids", Keyword(multi=True)) participants.field("label", "text", analyzer="text_analyzer") participants.field("protocol", Keyword()) participants.field("type", Keyword()) m.field('participants', participants) # PI pi = Object(doc_class=PIIndexModel, include_in_all=True, properties={ "technic": "integer", "comportment": "integer", "context": "integer", "version": "integer", "date_update": "date" }) m.field("pi", pi) m.field('privacy_features', Object(include_in_all=True)) m.field('raw_msg_id', "keyword") m.field('subject', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('tags', Keyword(multi=True)) m.field('subject', 'text') m.field('tags', Keyword(multi=True)) m.field('protocol', 'keyword') m.field('user_identities', Keyword(multi=True)) return m
################################## # PERSISTENCE # Mappings from elasticsearch_dsl import Keyword, Mapping, Nested, Text m = Mapping('my-type') m.field('title', 'text') m.field('category', 'text', fields={'raw': Keyword()}) comment = Nested() comment.field('author', Text()) comment.field('created_at', Date()) m.field('comments', comment) m.meta('_all', enabled=False) m.save('my-index') # We can also get the mapping from our production cluster m = Mapping.from_es('my-index', 'my-type', using='prod') m.update_from_es('my-index', using='qa') m.save('my-index', using='prod') ################################################# # DOCTYPE
es = Elasticsearch(host=es_host) # In[27]: from elasticsearch_dsl.connections import connections from elasticsearch_dsl import Mapping, String, Nested, Integer, Boolean from elasticsearch_dsl import analyzer, tokenizer whitespace_analyzer = analyzer('whitespace_analyzer', tokenizer=tokenizer('whitespace')) con = connections.create_connection(host=es_host) mapping = Mapping('user_log') fact = Nested(multi=True, include_in_parent=True) fact.field('domain', String(analyzer=whitespace_analyzer)) fact.field('address', String(analyzer=whitespace_analyzer)) fact.field('param', String(analyzer=whitespace_analyzer)) fact.field('title', String(analyzer=whitespace_analyzer)) mapping.field('fact', fact) mapping.field('fold', Integer(index='not_analyzed')) mapping.save('user') # In[29]: import itertools def chunk_iterator(iterator, size):
def write_user_mapping(index='buddyupusers', doc_type='user'): """Write the `user` mapping for buddy up.""" m = Mapping(doc_type) public = Object() public.field('first_name', 'string', index='not_analyzed') public.field('last_name', 'string', index='not_analyzed') public.field('signed_up_at', 'date') m.field('public', public) groups = Nested() groups.field('creator', 'string', index='not_analyzed') groups.field('group_id', 'string', index='not_analyzed') groups.field('school_id', 'string', index='not_analyzed') groups.field('subject', 'string', index='not_analyzed') groups.field('subject_code', 'string', index='not_analyzed') groups.field('subject_icon', 'string', index='not_analyzed') groups.field('start', 'date') groups.field('end', 'date') m.field('groups', groups) private = Object() m.field('private', private) internal = Object() m.field('internal', internal) schools = Nested() m.field('schools', schools) classes = Nested() classes.field('course_id', 'string', index='not_analyzed') classes.field('id', 'string', index='not_analyzed') classes.field('school_id', 'string', index='not_analyzed') classes.field('subject_icon', 'string', index='not_analyzed') m.field('classes', classes) buddies = Nested() buddies.field('user_id', 'string', index='not_analyzed') buddies.field('first_name', 'string', index='not_analyzed') buddies.field('last_name', 'string', index='not_analyzed') m.field('buddies', buddies) buddies_outgoing = Nested() buddies_outgoing.field('user_id', 'string', index='not_analyzed') m.field('buddies_outgoing', buddies_outgoing) m.save(index)
def build_mapping(cls): """Create elasticsearch indexed_contacts mapping object for an user.""" m = Mapping(cls.doc_type) m.meta('_all', enabled=True) m.field('additional_name', 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) # addresses addresses = Nested(doc_class=IndexedPostalAddress, include_in_all=True, properties={ "address_id": "keyword", "label": "text", "type": "keyword", "is_primary": "boolean", "street": "text", "city": "text", "postal_code": "keyword", "country": "text", "region": "text" }) m.field("addresses", addresses) m.field("avatar", "keyword") m.field('date_insert', 'date') m.field('date_update', 'date') m.field('deleted', 'date') # emails internet_addr = Nested( doc_class=IndexedInternetAddress, include_in_all=True, ) internet_addr.field("address", "text", analyzer="text_analyzer", fields={ "raw": { "type": "keyword" }, "parts": { "type": "text", "analyzer": "email_analyzer" } }) internet_addr.field("email_id", Keyword()) internet_addr.field("is_primary", Boolean()) internet_addr.field("label", "text", analyzer="text_analyzer") internet_addr.field("type", Keyword()) m.field("emails", internet_addr) m.field('family_name', "text", fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) m.field('given_name', 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) m.field("groups", Keyword(multi=True)) # social ids social_ids = Nested(doc_class=IndexedSocialIdentity, include_in_all=True, properties={ "name": "text", "type": "keyword", "infos": Nested() }) m.field("identities", social_ids) m.field("ims", internet_addr) m.field("infos", Nested()) m.field('name_prefix', 'keyword') m.field('name_suffix', 'keyword') # organizations organizations = Nested(doc_class=IndexedOrganization, include_in_all=True) organizations.field("deleted", Boolean()) organizations.field("department", "text", analyzer="text_analyzer") organizations.field("is_primary", Boolean()) organizations.field("job_description", "text") organizations.field("label", "text", analyzer="text_analyzer") organizations.field("name", 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) organizations.field("organization_id", Keyword()) organizations.field("title", Keyword()) organizations.field("type", Keyword()) m.field("organizations", organizations) # phones phones = Nested(doc_class=IndexedPhone, include_in_all=True, properties={ "is_primary": "boolean", "number": "text", "normalized_number": "text", "phone_id": "keyword", "type": "keyword", "uri": "keyword" }) m.field("phones", phones) # pi pi = Nested(doc_class=PIIndexModel, include_in_all=True, properties={ "comportment": "integer", "context": "integer", "date_update": "date", "technic": "integer", "version": "integer" }) m.field("pi", pi) m.field("privacy_features", Nested(include_in_all=True)) m.field("public_key", Nested()) m.field("social_identities", social_ids) m.field("tags", Keyword(multi=True)) m.field('title', 'text') return m
def build_mapping(cls): """Create elasticsearch indexed_contacts mapping object for an user.""" m = Mapping(cls.doc_type) m.meta('_all', enabled=True) m.field('user_id', 'keyword') m.field('contact_id', 'keyword') m.field('additional_name', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) # addresses addresses = Nested(doc_class=IndexedPostalAddress, include_in_all=True, properties={ "address_id": "keyword", "label": "text", "type": "keyword", "is_primary": "boolean", "street": "text", "city": "text", "postal_code": "keyword", "country": "text", "region": "text" }) m.field("addresses", addresses) m.field("avatar", "keyword") m.field('date_insert', 'date') m.field('date_update', 'date') m.field('deleted', 'date') # emails internet_addr = Nested(doc_class=IndexedInternetAddress, include_in_all=True, ) internet_addr.field("address", "text", analyzer="text_analyzer", fields={ "raw": {"type": "keyword"}, "parts": {"type": "text", "analyzer": "email_analyzer"} }) internet_addr.field("email_id", Keyword()) internet_addr.field("is_primary", Boolean()) internet_addr.field("label", "text", analyzer="text_analyzer") internet_addr.field("type", Keyword()) m.field("emails", internet_addr) m.field('family_name', "text", fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field('given_name', 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) m.field("groups", Keyword(multi=True)) # social ids social_ids = Nested(doc_class=IndexedSocialIdentity, include_in_all=True, properties={ "name": "text", "type": "keyword", "infos": Nested() }) m.field("identities", social_ids) m.field("ims", internet_addr) m.field("infos", Nested()) m.field('name_prefix', 'keyword') m.field('name_suffix', 'keyword') # organizations organizations = Nested(doc_class=IndexedOrganization, include_in_all=True) organizations.field("deleted", Boolean()) organizations.field("department", "text", analyzer="text_analyzer") organizations.field("is_primary", Boolean()) organizations.field("job_description", "text") organizations.field("label", "text", analyzer="text_analyzer") organizations.field("name", 'text', fields={ "normalized": {"type": "text", "analyzer": "text_analyzer"} }) organizations.field("organization_id", Keyword()) organizations.field("title", Keyword()) organizations.field("type", Keyword()) m.field("organizations", organizations) # phones phones = Nested(doc_class=IndexedPhone, include_in_all=True, properties={ "is_primary": "boolean", "number": "text", "normalized_number": "text", "phone_id": "keyword", "type": "keyword", "uri": "keyword" }) m.field("phones", phones) # pi pi = Object(doc_class=PIIndexModel, include_in_all=True, properties={ "comportment": "integer", "context": "integer", "date_update": "date", "technic": "integer", "version": "integer" }) m.field("pi", pi) m.field("privacy_features", Object(include_in_all=True)) m.field("public_key", Nested()) m.field("social_identities", social_ids) m.field("tags", Keyword(multi=True)) m.field('title', 'text', analyzer="text_analyzer", fields={ "raw": {"type": "keyword"} }) return m