Esempio n. 1
0
    def build_mapping(cls):
        """Generate the mapping definition for indexed messages"""
        m = Mapping(cls.doc_type)
        m.meta('_all', enabled=True)
        m.field('user_id', 'keyword')
        # attachments
        m.field('attachments', Nested(doc_class=IndexedMessageAttachment,
                                      include_in_all=True,
                                      properties={
                                          "content_type": Keyword(),
                                          "file_name": Keyword(),
                                          "is_inline": Boolean(),
                                          "size": Integer(),
                                          "temp_id": Keyword(),
                                          "url": Keyword(),
                                          "mime_boundary": Keyword()
                                      })
                )
        m.field('body_html', 'text', fields={
            "normalized": {"type": "text", "analyzer": "text_analyzer"}
        })
        m.field('body_plain', 'text', fields={
            "normalized": {"type": "text", "analyzer": "text_analyzer"}
        })
        m.field('date', 'date')
        m.field('date_delete', 'date')
        m.field('date_insert', 'date')
        m.field('date_sort', 'date')
        m.field('discussion_id', 'keyword')
        # external references
        m.field('external_references',
                Nested(doc_class=IndexedExternalReferences,
                       include_in_all=True,
                       properties={
                           "ancestors_ids": Keyword(),
                           "message_id": Keyword(),
                           "parent_id": Keyword()
                       })
                )
        m.field('importance_level', 'short')
        m.field('is_answered', 'boolean')
        m.field('is_draft', 'boolean')
        m.field('is_unread', 'boolean')
        m.field('is_received', 'boolean')
        m.field('message_id', 'keyword')
        m.field('parent_id', 'keyword')
        # participants
        participants = Nested(doc_class=IndexedParticipant,
                              include_in_all=True)
        participants.field("address", "text", analyzer="text_analyzer",
                           fields={
                               "raw": {"type": "keyword"},
                               "parts": {"type": "text",
                                         "analyzer": "email_analyzer"}
                           })
        participants.field("contact_ids", Keyword(multi=True))
        participants.field("label", "text", analyzer="text_analyzer")
        participants.field("protocol", Keyword())
        participants.field("type", Keyword())
        m.field('participants', participants)
        # PI
        pi = Object(doc_class=PIIndexModel, include_in_all=True,
                    properties={
                        "technic": "integer",
                        "comportment": "integer",
                        "context": "integer",
                        "version": "integer",
                        "date_update": "date"
                    })
        m.field("pi", pi)
        m.field('privacy_features', Object(include_in_all=True))
        m.field('raw_msg_id', "keyword")
        m.field('subject', 'text', fields={
            "normalized": {"type": "text", "analyzer": "text_analyzer"}
        })
        m.field('tags', Keyword(multi=True))

        m.field('subject', 'text')
        m.field('tags', Keyword(multi=True))
        m.field('protocol', 'keyword')
        m.field('user_identities', Keyword(multi=True))

        return m


##################################
# PERSISTENCE

# Mappings

from elasticsearch_dsl import Keyword, Mapping, Nested, Text

m = Mapping('my-type')
m.field('title', 'text')
m.field('category', 'text', fields={'raw': Keyword()})

comment = Nested()
comment.field('author', Text())
comment.field('created_at', Date())
m.field('comments', comment)
m.meta('_all', enabled=False)
m.save('my-index')

# We can also get the mapping from our production cluster
m = Mapping.from_es('my-index', 'my-type', using='prod')

m.update_from_es('my-index', using='qa')

m.save('my-index', using='prod')


#################################################
# DOCTYPE
Esempio n. 3
0
es = Elasticsearch(host=es_host)


# In[27]:

from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import Mapping, String, Nested, Integer, Boolean
from elasticsearch_dsl import analyzer, tokenizer

whitespace_analyzer = analyzer('whitespace_analyzer', tokenizer=tokenizer('whitespace'))
con = connections.create_connection(host=es_host)

mapping = Mapping('user_log')

fact = Nested(multi=True, include_in_parent=True)
fact.field('domain', String(analyzer=whitespace_analyzer))
fact.field('address', String(analyzer=whitespace_analyzer))
fact.field('param', String(analyzer=whitespace_analyzer))
fact.field('title', String(analyzer=whitespace_analyzer))

mapping.field('fact', fact)
mapping.field('fold', Integer(index='not_analyzed'))

mapping.save('user')


# In[29]:

import itertools 

def chunk_iterator(iterator, size):
Esempio n. 4
0
def write_user_mapping(index='buddyupusers', doc_type='user'):
    """Write the `user` mapping for buddy up."""
    m = Mapping(doc_type)

    public = Object()
    public.field('first_name', 'string', index='not_analyzed')
    public.field('last_name', 'string', index='not_analyzed')
    public.field('signed_up_at', 'date')
    m.field('public', public)

    groups = Nested()
    groups.field('creator', 'string', index='not_analyzed')
    groups.field('group_id', 'string', index='not_analyzed')
    groups.field('school_id', 'string', index='not_analyzed')
    groups.field('subject', 'string', index='not_analyzed')
    groups.field('subject_code', 'string', index='not_analyzed')
    groups.field('subject_icon', 'string', index='not_analyzed')
    groups.field('start', 'date')
    groups.field('end', 'date')
    m.field('groups', groups)

    private = Object()
    m.field('private', private)

    internal = Object()
    m.field('internal', internal)

    schools = Nested()
    m.field('schools', schools)

    classes = Nested()
    classes.field('course_id', 'string', index='not_analyzed')
    classes.field('id', 'string', index='not_analyzed')
    classes.field('school_id', 'string', index='not_analyzed')
    classes.field('subject_icon', 'string', index='not_analyzed')
    m.field('classes', classes)

    buddies = Nested()
    buddies.field('user_id', 'string', index='not_analyzed')
    buddies.field('first_name', 'string', index='not_analyzed')
    buddies.field('last_name', 'string', index='not_analyzed')
    m.field('buddies', buddies)

    buddies_outgoing = Nested()
    buddies_outgoing.field('user_id', 'string', index='not_analyzed')
    m.field('buddies_outgoing', buddies_outgoing)

    m.save(index)
Esempio n. 5
0
    def build_mapping(cls):
        """Create elasticsearch indexed_contacts mapping object for an user."""
        m = Mapping(cls.doc_type)
        m.meta('_all', enabled=True)
        m.field('additional_name',
                'text',
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        # addresses
        addresses = Nested(doc_class=IndexedPostalAddress,
                           include_in_all=True,
                           properties={
                               "address_id": "keyword",
                               "label": "text",
                               "type": "keyword",
                               "is_primary": "boolean",
                               "street": "text",
                               "city": "text",
                               "postal_code": "keyword",
                               "country": "text",
                               "region": "text"
                           })
        m.field("addresses", addresses)
        m.field("avatar", "keyword")
        m.field('date_insert', 'date')
        m.field('date_update', 'date')
        m.field('deleted', 'date')
        # emails
        internet_addr = Nested(
            doc_class=IndexedInternetAddress,
            include_in_all=True,
        )
        internet_addr.field("address",
                            "text",
                            analyzer="text_analyzer",
                            fields={
                                "raw": {
                                    "type": "keyword"
                                },
                                "parts": {
                                    "type": "text",
                                    "analyzer": "email_analyzer"
                                }
                            })
        internet_addr.field("email_id", Keyword())
        internet_addr.field("is_primary", Boolean())
        internet_addr.field("label", "text", analyzer="text_analyzer")
        internet_addr.field("type", Keyword())
        m.field("emails", internet_addr)

        m.field('family_name',
                "text",
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        m.field('given_name',
                'text',
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        m.field("groups", Keyword(multi=True))
        # social ids
        social_ids = Nested(doc_class=IndexedSocialIdentity,
                            include_in_all=True,
                            properties={
                                "name": "text",
                                "type": "keyword",
                                "infos": Nested()
                            })
        m.field("identities", social_ids)
        m.field("ims", internet_addr)
        m.field("infos", Nested())
        m.field('name_prefix', 'keyword')
        m.field('name_suffix', 'keyword')
        # organizations
        organizations = Nested(doc_class=IndexedOrganization,
                               include_in_all=True)
        organizations.field("deleted", Boolean())
        organizations.field("department", "text", analyzer="text_analyzer")
        organizations.field("is_primary", Boolean())
        organizations.field("job_description", "text")
        organizations.field("label", "text", analyzer="text_analyzer")
        organizations.field("name",
                            'text',
                            fields={
                                "normalized": {
                                    "type": "text",
                                    "analyzer": "text_analyzer"
                                }
                            })
        organizations.field("organization_id", Keyword())
        organizations.field("title", Keyword())
        organizations.field("type", Keyword())
        m.field("organizations", organizations)
        # phones
        phones = Nested(doc_class=IndexedPhone,
                        include_in_all=True,
                        properties={
                            "is_primary": "boolean",
                            "number": "text",
                            "normalized_number": "text",
                            "phone_id": "keyword",
                            "type": "keyword",
                            "uri": "keyword"
                        })

        m.field("phones", phones)
        # pi
        pi = Nested(doc_class=PIIndexModel,
                    include_in_all=True,
                    properties={
                        "comportment": "integer",
                        "context": "integer",
                        "date_update": "date",
                        "technic": "integer",
                        "version": "integer"
                    })
        m.field("pi", pi)
        m.field("privacy_features", Nested(include_in_all=True))
        m.field("public_key", Nested())
        m.field("social_identities", social_ids)
        m.field("tags", Keyword(multi=True))
        m.field('title', 'text')

        return m
Esempio n. 6
0
    def build_mapping(cls):
        """Create elasticsearch indexed_contacts mapping object for an user."""
        m = Mapping(cls.doc_type)
        m.meta('_all', enabled=True)

        m.field('user_id', 'keyword')
        m.field('contact_id', 'keyword')

        m.field('additional_name', 'text', fields={
            "normalized": {"type": "text", "analyzer": "text_analyzer"}
        })
        # addresses
        addresses = Nested(doc_class=IndexedPostalAddress, include_in_all=True,
                           properties={
                               "address_id": "keyword",
                               "label": "text",
                               "type": "keyword",
                               "is_primary": "boolean",
                               "street": "text",
                               "city": "text",
                               "postal_code": "keyword",
                               "country": "text",
                               "region": "text"
                           })
        m.field("addresses", addresses)
        m.field("avatar", "keyword")
        m.field('date_insert', 'date')
        m.field('date_update', 'date')
        m.field('deleted', 'date')
        # emails
        internet_addr = Nested(doc_class=IndexedInternetAddress,
                               include_in_all=True,
                               )
        internet_addr.field("address", "text", analyzer="text_analyzer",
                            fields={
                                "raw": {"type": "keyword"},
                                "parts": {"type": "text",
                                          "analyzer": "email_analyzer"}
                            })
        internet_addr.field("email_id", Keyword())
        internet_addr.field("is_primary", Boolean())
        internet_addr.field("label", "text", analyzer="text_analyzer")
        internet_addr.field("type", Keyword())
        m.field("emails", internet_addr)

        m.field('family_name', "text", fields={
            "normalized": {"type": "text", "analyzer": "text_analyzer"}
        })
        m.field('given_name', 'text', fields={
            "normalized": {"type": "text", "analyzer": "text_analyzer"}
        })
        m.field("groups", Keyword(multi=True))
        # social ids
        social_ids = Nested(doc_class=IndexedSocialIdentity,
                            include_in_all=True,
                            properties={
                                "name": "text",
                                "type": "keyword",
                                "infos": Nested()
                            })
        m.field("identities", social_ids)
        m.field("ims", internet_addr)
        m.field("infos", Nested())
        m.field('name_prefix', 'keyword')
        m.field('name_suffix', 'keyword')
        # organizations
        organizations = Nested(doc_class=IndexedOrganization,
                               include_in_all=True)
        organizations.field("deleted", Boolean())
        organizations.field("department", "text", analyzer="text_analyzer")
        organizations.field("is_primary", Boolean())
        organizations.field("job_description", "text")
        organizations.field("label", "text", analyzer="text_analyzer")
        organizations.field("name", 'text', fields={
            "normalized": {"type": "text", "analyzer": "text_analyzer"}
        })
        organizations.field("organization_id", Keyword())
        organizations.field("title", Keyword())
        organizations.field("type", Keyword())
        m.field("organizations", organizations)
        # phones
        phones = Nested(doc_class=IndexedPhone, include_in_all=True,
                        properties={
                            "is_primary": "boolean",
                            "number": "text",
                            "normalized_number": "text",
                            "phone_id": "keyword",
                            "type": "keyword",
                            "uri": "keyword"
                        })

        m.field("phones", phones)
        # pi
        pi = Object(doc_class=PIIndexModel, include_in_all=True,
                    properties={
                        "comportment": "integer",
                        "context": "integer",
                        "date_update": "date",
                        "technic": "integer",
                        "version": "integer"
                    })
        m.field("pi", pi)
        m.field("privacy_features", Object(include_in_all=True))
        m.field("public_key", Nested())
        m.field("social_identities", social_ids)
        m.field("tags", Keyword(multi=True))
        m.field('title', 'text', analyzer="text_analyzer",
                fields={
                    "raw": {"type": "keyword"}
                })

        return m