Ejemplo n.º 1
0
class ForumDocument(SumoDocument):
    """
    ES document for forum posts. Thread information is duplicated across all posts in that thread.
    """

    thread_title = field.Text()
    thread_forum_id = field.Keyword()
    thread_created = field.Date()
    thread_creator_id = field.Keyword()
    thread_is_locked = field.Boolean()
    thread_is_sticky = field.Boolean()

    content = field.Text()
    author_id = field.Keyword()
    created = field.Date()
    updated = field.Date()
    updated_by_id = field.Keyword()

    class Index:
        name = config.FORUM_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    def get_field_value(self, field, instance, *args):
        if field.startswith("thread_"):
            instance = instance.thread
            field = field[len("thread_"):]
        return super().get_field_value(field, instance, *args)

    @classmethod
    def get_model(cls):
        return Post

    @classmethod
    def get_queryset(cls):
        return Post.objects.select_related("thread")
    def prepare_doc(self):
        _fields, _map = {}, {}
        for idx, _f in enumerate(self.schema['fields'], 1):
            alias_name = _f['name']
            field_name = 'col{}'.format(idx)
            _field = self._schema2doc_map[_f['type']]
            _map[field_name] = alias_name
            _fields[field_name] = _field

        if self.has_geo_data:
            _fields['shape'] = dsl_field.GeoShape()
            _fields['point'] = dsl_field.GeoPoint()
            _fields['label'] = dsl_field.Text()
            _fields['shape_type'] = dsl_field.Integer()

        _fields['resource'] = dsl_field.Nested(
            properties={
                'id':
                dsl_field.Integer(),
                'title':
                dsl_field.Text(analyzer=polish_analyzer,
                               fields={'raw': dsl_field.Keyword()})
            })

        _fields['updated_at'] = dsl_field.Date()
        _fields['row_no'] = dsl_field.Long()
        _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Ejemplo n.º 3
0
class Manga(Document):
    title = field.Text()
    title = field.Text(analyzer=titles,
                       multi=True,
                       fields={
                           'space': field.Text(analyzer=titles_space,
                                               multi=True),
                           'keyword': field.Keyword(multi=True),
                       })
    tags = field.Object(Tag)
    upload_at = field.Date()
    scan_at = field.Date()

    url = field.Keyword()
    cover_url = field.Keyword()
    images_urls = field.Keyword(multi=True)
    images_len = field.Integer()

    class Index:
        name = 'nhentai__mangas'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando manga {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False
Ejemplo n.º 4
0
class User(InnerDoc):
    name = field.Text(analyzer=titles,
                      fields={
                          'space': field.Text(analyzer=titles_space),
                          'keyword': field.Keyword(),
                      })
    url = field.Keyword()
Ejemplo n.º 5
0
class Document(BaseDocument):
    url = field.Keyword()
    url_text = field.Text()
    referer = field.Keyword()
    title = field.Text()
    html = field.Text()
    text = field.Text()
    timestamp = field.Date(default_timezone=settings.TIME_ZONE)
Ejemplo n.º 6
0
class Activity(InnerDoc):
    action = field.Text(analyzer=titles,
                        fields={
                            'space': field.Text(analyzer=titles_space),
                            'keyword': field.Keyword(),
                        })
    date = field.Date()
    user = field.Object(User)
Ejemplo n.º 7
0
class MessageIndex(DocType):
    room = field.Keyword()
    user = field.Text()
    created = field.Date()
    message = field.Text()
    status = field.Text()
    tags = Nested(properties={'tags': field.Text()})

    class Meta:
        index = 'Message'
Ejemplo n.º 8
0
class Data_set_resource(InnerDoc):
    title = field.Text(analyzer=titles,
                       fields={
                           'space': field.Text(analyzer=titles_space),
                           'keyword': field.Keyword(),
                       })
    description = field.Text(analyzer=titles,
                             fields={
                                 'space': field.Text(analyzer=titles_space),
                                 'keyword': field.Keyword(),
                             })
    download_link = field.Keyword()
    kind = field.Keyword()
Ejemplo n.º 9
0
class CompanyDocument(Document):
    address = field.Nested(
        properties={
            'care_of': field.Keyword(index=False, store=True),
            'po_box': field.Keyword(index=False, store=True),
            'address_line_1': field.Keyword(index=False, store=True),
            'address_line_2': field.Keyword(index=False, store=True),
            'locality': field.Keyword(index=False, store=True),
            'region': field.Keyword(index=False, store=True),
            'country': field.Keyword(index=False, store=True),
            'postal_code': field.Keyword(index=False, store=True)
        })
    country_of_origin = field.Keyword(index=False, store=True)
    address_snippet = field.Keyword(index=False, store=True)
    company_name = field.Text()
    company_number = field.Text()
    company_status = field.Keyword(index=False, store=True)
    type = field.Keyword(index=False, store=True)
    date_of_cessation = field.Date(index=False, format='yyyy-MM-dd')
    date_of_creation = field.Date(index=False, format='yyyy-MM-dd')
    sic_codes = field.Keyword(index=False, store=True)

    class Meta:
        index = settings.ELASTICSEARCH_COMPANY_INDEX_ALIAS

    def to_dict(self, include_meta=False):
        meta = super().to_dict(include_meta)
        if '_source' in meta:
            company = meta['_source']
            company['title'] = company['company_name']
            company['address']['country'] = company['country_of_origin']
            company['company_type'] = company['type']
            meta['_source'] = self.reformat_date(company)
        return meta

    def to_profile_dict(self):
        company = self.to_dict()
        company['registered_office_address'] = company['address']
        return self.reformat_date(company)

    @staticmethod
    def reformat_date(company):
        if 'date_of_creation' in company:
            company['date_of_creation'] = (
                company['date_of_creation'].strftime('%Y-%m-%d'))
        if 'date_of_cessation' in company:
            company['date_of_cessation'] = (
                company['date_of_cessation'].strftime('%Y-%m-%d'))
        return company
Ejemplo n.º 10
0
class ProfileIndex(DocType):
    id = Integer()
    username = Text()
    first_name = Text()
    last_name = Text()
    profile = Keyword()
    organization = Text()
    position = Keyword()
    type = Keyword(fields={
        'text': field.Text(),
        'english': field.Text(analyzer='english')
    })

    class Meta:
        index = 'profile-index'
Ejemplo n.º 11
0
def test_field_supports_multiple_analyzers():
    f = field.Text(analyzer="snowball", search_analyzer="keyword")
    assert {
        "analyzer": "snowball",
        "search_analyzer": "keyword",
        "type": "text",
    } == f.to_dict()
Ejemplo n.º 12
0
class Ssn_trace(InnerDoc):
    is_valid = field.Boolean()
    is_deceased = field.Boolean()

    ssn = field.Keyword()
    human_message = field.Text()
    issued = field.Object(Ssn_issued)
Ejemplo n.º 13
0
class DocWithNested(document.DocType):
    comments = field.Nested(
        properties={
            'title': field.Text(),
            'tags': field.Keyword(multi=True)
        }
    )
Ejemplo n.º 14
0
def test_multifield_supports_multiple_analyzers():
    f = field.Text(fields={
        'f1': field.Text(search_analyzer='keyword', analyzer='snowball'),
        'f2': field.Text(analyzer='keyword')
    })
    assert {
       'fields': {
           'f1': {'analyzer': 'snowball',
                  'search_analyzer': 'keyword',
                  'type': 'text'
           },
           'f2': {
               'analyzer': 'keyword', 'type': 'text'}
       },
       'type': 'text'
    } == f.to_dict()
Ejemplo n.º 15
0
class ProfileDocument(SumoDocument):
    username = field.Keyword(normalizer="lowercase")
    name = field.Text(fields={"keyword": field.Keyword()})
    email = field.Keyword()
    # store avatar url so we don't need to hit the db when searching users
    # but set enabled=False to ensure ES does no parsing of it
    avatar = field.Object(enabled=False)

    timezone = field.Keyword()
    country = field.Keyword()
    locale = field.Keyword()

    involved_from = field.Date()

    product_ids = field.Keyword(multi=True)
    group_ids = field.Keyword(multi=True)

    class Index:
        name = config.USER_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    def prepare_username(self, instance):
        return instance.user.username

    def prepare_email(self, instance):
        if instance.public_email:
            return instance.user.email

    def prepare_avatar(self, instance):
        if avatar := instance.fxa_avatar:
            return InnerDoc(url=avatar)
Ejemplo n.º 16
0
def test_field_supports_multiple_analyzers():
    f = field.Text(analyzer='snowball', search_analyzer='keyword')
    assert {
        'analyzer': 'snowball',
        'search_analyzer': 'keyword',
        'type': 'text'
    } == f.to_dict()
Ejemplo n.º 17
0
 class User(document.Document):
     username = field.Text()
     class Meta:
         all = document.MetaField(enabled=False)
         _index = document.MetaField(enabled=True)
         dynamic = document.MetaField('strict')
         dynamic_templates = document.MetaField([42])
Ejemplo n.º 18
0
    def doc(self):
        if not self._doc_cache:
            _fields, _map = {}, {}
            for idx, _f in enumerate(self.schema['fields']):
                alias_name = _f['name']
                field_name = 'col{}'.format(idx + 1)
                _field = _schema2doc_map[_f['type']]
                _map[field_name] = alias_name
                _fields[field_name] = _field

            _fields['resource'] = dsl_field.Nested(
                properties={
                    'id': dsl_field.Integer(),
                    'title': dsl_field.Text(
                        analyzer=polish_analyzer,
                        fields={'raw': dsl_field.Keyword()})
                }
            )

            _fields['updated_at'] = dsl_field.Date()
            _fields['row_no'] = dsl_field.Long()

            doc = type(self.idx_name, (DocType,), _fields)
            doc._doc_type.index = self.idx_name
            doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
            doc._doc_type.mapping._meta['_meta']
            self._doc_cache = doc
        return self._doc_cache
Ejemplo n.º 19
0
class GroupIndex(DocType):
    id = Integer()
    title = Text(
        fields={
            'pattern': field.Text(analyzer=pattern_analyzer),
            'english': field.Text(analyzer='english')
        })
    title_sortable = Keyword()
    description = Text()
    json = Text()
    type = Keyword(fields={
        'text': field.Text(),
        'english': field.Text(analyzer='english')
    })

    class Meta:
        index = 'group-index'
Ejemplo n.º 20
0
def test_nested_provides_direct_access_to_its_fields():
    f = field.Nested(
        properties={'name': {
            'type': 'text',
            'index': 'not_analyzed'
        }})

    assert 'name' in f
    assert f['name'] == field.Text(index='not_analyzed')
Ejemplo n.º 21
0
def test_nested_provides_direct_access_to_its_fields():
    f = field.Nested(
        properties={"name": {
            "type": "text",
            "index": "not_analyzed"
        }})

    assert "name" in f
    assert f["name"] == field.Text(index="not_analyzed")
Ejemplo n.º 22
0
class ProfileIndex(DocType):
    id = Integer()
    username = Text()
    first_name = Text()
    last_name = Text()
    profile = Keyword()
    organization = Text()
    position = Keyword()
    type = Keyword(fields={
        'text': field.Text(),
        'english': field.Text(analyzer='english')
    })
    avatar_100 = Text()
    layers_count = Integer()
    maps_count = Integer()
    documents_count = Integer()

    class Meta:
        index = 'profile-index'
Ejemplo n.º 23
0
class Population(Document):
    name = field.Text(fields={
        'raw': field.Keyword(),
    })
    description = field.Text()
    dweller = field.Object(Dweller_inner)
    samples = field.Object(Dweller_inner, multi=True)

    class Index:
        name = "population"

    def add_sample(self, sample_class=None, index=None):
        result = {}
        if sample_class is None:
            sample_class = Sample
        result['klass'] = export(sample_class)
        if index is not None:
            result['index'] = index
        self.samples.append(result)
Ejemplo n.º 24
0
class Entry(document.Document):
    forms = field.Nested(Form)
    created = field.Date()
    superentry = field.Text()

    def save(self, **kwargs):
        return super(Entry, self).save(**kwargs)

    def is_published(self):
        return datetime.now() > self.created
Ejemplo n.º 25
0
class Document(DocType):
    id = field.Integer()
    title = field.String(analyzer='snowball'),
    author = field.String(analyzer='snowball'),
    creation_date = field.Date(),
    pages = field.Integer(),
    content = field.String(analyzer='snowball'),
    lang = field.String(),
    size = field.Integer(),
    tags = field.String(index='not_analyzed')
    autocomplete = field.Text(analyzer = ngram_analyzer)
Ejemplo n.º 26
0
def test_multifield_supports_multiple_analyzers():
    f = field.Text(
        fields={
            "f1": field.Text(search_analyzer="keyword", analyzer="snowball"),
            "f2": field.Text(analyzer="keyword"),
        })
    assert {
        "fields": {
            "f1": {
                "analyzer": "snowball",
                "search_analyzer": "keyword",
                "type": "text",
            },
            "f2": {
                "analyzer": "keyword",
                "type": "text"
            },
        },
        "type": "text",
    } == f.to_dict()
    class User(document.Document):
        pwd_hash = field.Text()

        def check_password(self, pwd):
            return md5(pwd).hexdigest() == self.pwd_hash

        @property
        def password(self):
            raise AttributeError('readonly')

        @password.setter
        def password(self, pwd):
            self.pwd_hash = md5(pwd).hexdigest()
Ejemplo n.º 28
0
class ForumDocument(SumoDocument):
    """
    ES document for forum posts. Thread information is duplicated across all posts in that thread.
    """

    thread_title = field.Text()
    thread_forum_id = field.Keyword()
    forum_slug = field.Keyword()
    thread_id = field.Keyword()
    thread_created = field.Date()
    thread_creator_id = field.Keyword()
    thread_is_locked = field.Boolean()
    thread_is_sticky = field.Boolean()

    content = field.Text()
    author_id = field.Keyword()
    created = field.Date()
    updated = field.Date()
    updated_by_id = field.Keyword()

    class Index:
        pass

    def prepare_forum_slug(self, instance):
        return instance.thread.forum.slug

    def get_field_value(self, field, instance, *args):
        if field.startswith("thread_"):
            instance = instance.thread
            field = field[len("thread_"):]
        return super().get_field_value(field, instance, *args)

    @classmethod
    def get_model(cls):
        return Post

    @classmethod
    def get_queryset(cls):
        return Post.objects.prefetch_related("thread", "thread__forum")
    def prepare_doc(self):
        _fields = {
            'shape':
            dsl_field.GeoShape(),
            'point':
            dsl_field.GeoPoint(),
            'shape_type':
            dsl_field.Integer(),
            'label':
            dsl_field.Text(),
            'resource':
            dsl_field.Nested(
                properties={
                    'id':
                    dsl_field.Integer(),
                    'title':
                    dsl_field.Text(analyzer=polish_analyzer,
                                   fields={'raw': dsl_field.Keyword()})
                }),
            'updated_at':
            dsl_field.Date(),
            'row_no':
            dsl_field.Long()
        }
        _map = {}

        for idx, _f in enumerate(self.schema, 1):
            if _f.type not in self._schema2doc_map:
                continue
            alias_name = _f.name
            field_name = f'col{idx}'
            _field = self._schema2doc_map[_f.type]
            _map[field_name] = alias_name
            _fields[field_name] = _field
            _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Ejemplo n.º 30
0
class Dataset(Document):
    resources = field.Object(Data_set_resource, multi=True)
    tags = field.Text(analyzer=titles,
                      multi=True,
                      fields={
                          'space': field.Text(analyzer=titles_space,
                                              multi=True),
                          'keyword': field.Keyword(multi=True),
                      })

    metadata = field.Object(Metadata)
    activity = field.Object(Activity, multi=True)
    url = field.Keyword()
    status = field.Keyword()
    created_at = field.Date()

    class Index:
        name = 'chibi_gob__open_data__dataset'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando dataset {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False

    @classmethod
    def get_by_url(cls, url):
        logger.info(f"get dataset {url}")
        result = cls.search().filter("term", url=url)[:1].execute()
        if result:
            return result[0]
        return None

    def save(self, *args, **kw):
        super().save(*args, **kw)