class ForumDocument(SumoDocument): """ ES document for forum posts. Thread information is duplicated across all posts in that thread. """ thread_title = field.Text() thread_forum_id = field.Keyword() thread_created = field.Date() thread_creator_id = field.Keyword() thread_is_locked = field.Boolean() thread_is_sticky = field.Boolean() content = field.Text() author_id = field.Keyword() created = field.Date() updated = field.Date() updated_by_id = field.Keyword() class Index: name = config.FORUM_INDEX_NAME using = config.DEFAULT_ES7_CONNECTION def get_field_value(self, field, instance, *args): if field.startswith("thread_"): instance = instance.thread field = field[len("thread_"):] return super().get_field_value(field, instance, *args) @classmethod def get_model(cls): return Post @classmethod def get_queryset(cls): return Post.objects.select_related("thread")
def prepare_doc(self): _fields, _map = {}, {} for idx, _f in enumerate(self.schema['fields'], 1): alias_name = _f['name'] field_name = 'col{}'.format(idx) _field = self._schema2doc_map[_f['type']] _map[field_name] = alias_name _fields[field_name] = _field if self.has_geo_data: _fields['shape'] = dsl_field.GeoShape() _fields['point'] = dsl_field.GeoPoint() _fields['label'] = dsl_field.Text() _fields['shape_type'] = dsl_field.Integer() _fields['resource'] = dsl_field.Nested( properties={ 'id': dsl_field.Integer(), 'title': dsl_field.Text(analyzer=polish_analyzer, fields={'raw': dsl_field.Keyword()}) }) _fields['updated_at'] = dsl_field.Date() _fields['row_no'] = dsl_field.Long() _fields['Index'] = type('Index', (type, ), {'name': self.idx_name}) doc = type(self.idx_name, (Document, ), _fields) doc._doc_type.mapping._meta['_meta'] = {'headers': _map} return doc
class Manga(Document): title = field.Text() title = field.Text(analyzer=titles, multi=True, fields={ 'space': field.Text(analyzer=titles_space, multi=True), 'keyword': field.Keyword(multi=True), }) tags = field.Object(Tag) upload_at = field.Date() scan_at = field.Date() url = field.Keyword() cover_url = field.Keyword() images_urls = field.Keyword(multi=True) images_len = field.Integer() class Index: name = 'nhentai__mangas' settings = {'number_of_shards': 2, 'number_of_replicas': 1} @classmethod def url_is_scaned(cls, url): logger.info(f"buscando manga {url}") if cls.search().filter("term", url=url).count() > 0: return True return False
class User(InnerDoc): name = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) url = field.Keyword()
class Document(BaseDocument): url = field.Keyword() url_text = field.Text() referer = field.Keyword() title = field.Text() html = field.Text() text = field.Text() timestamp = field.Date(default_timezone=settings.TIME_ZONE)
class Activity(InnerDoc): action = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) date = field.Date() user = field.Object(User)
class MessageIndex(DocType): room = field.Keyword() user = field.Text() created = field.Date() message = field.Text() status = field.Text() tags = Nested(properties={'tags': field.Text()}) class Meta: index = 'Message'
class Data_set_resource(InnerDoc): title = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) description = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) download_link = field.Keyword() kind = field.Keyword()
class CompanyDocument(Document): address = field.Nested( properties={ 'care_of': field.Keyword(index=False, store=True), 'po_box': field.Keyword(index=False, store=True), 'address_line_1': field.Keyword(index=False, store=True), 'address_line_2': field.Keyword(index=False, store=True), 'locality': field.Keyword(index=False, store=True), 'region': field.Keyword(index=False, store=True), 'country': field.Keyword(index=False, store=True), 'postal_code': field.Keyword(index=False, store=True) }) country_of_origin = field.Keyword(index=False, store=True) address_snippet = field.Keyword(index=False, store=True) company_name = field.Text() company_number = field.Text() company_status = field.Keyword(index=False, store=True) type = field.Keyword(index=False, store=True) date_of_cessation = field.Date(index=False, format='yyyy-MM-dd') date_of_creation = field.Date(index=False, format='yyyy-MM-dd') sic_codes = field.Keyword(index=False, store=True) class Meta: index = settings.ELASTICSEARCH_COMPANY_INDEX_ALIAS def to_dict(self, include_meta=False): meta = super().to_dict(include_meta) if '_source' in meta: company = meta['_source'] company['title'] = company['company_name'] company['address']['country'] = company['country_of_origin'] company['company_type'] = company['type'] meta['_source'] = self.reformat_date(company) return meta def to_profile_dict(self): company = self.to_dict() company['registered_office_address'] = company['address'] return self.reformat_date(company) @staticmethod def reformat_date(company): if 'date_of_creation' in company: company['date_of_creation'] = ( company['date_of_creation'].strftime('%Y-%m-%d')) if 'date_of_cessation' in company: company['date_of_cessation'] = ( company['date_of_cessation'].strftime('%Y-%m-%d')) return company
class ProfileIndex(DocType): id = Integer() username = Text() first_name = Text() last_name = Text() profile = Keyword() organization = Text() position = Keyword() type = Keyword(fields={ 'text': field.Text(), 'english': field.Text(analyzer='english') }) class Meta: index = 'profile-index'
def test_field_supports_multiple_analyzers(): f = field.Text(analyzer="snowball", search_analyzer="keyword") assert { "analyzer": "snowball", "search_analyzer": "keyword", "type": "text", } == f.to_dict()
class Ssn_trace(InnerDoc): is_valid = field.Boolean() is_deceased = field.Boolean() ssn = field.Keyword() human_message = field.Text() issued = field.Object(Ssn_issued)
class DocWithNested(document.DocType): comments = field.Nested( properties={ 'title': field.Text(), 'tags': field.Keyword(multi=True) } )
def test_multifield_supports_multiple_analyzers(): f = field.Text(fields={ 'f1': field.Text(search_analyzer='keyword', analyzer='snowball'), 'f2': field.Text(analyzer='keyword') }) assert { 'fields': { 'f1': {'analyzer': 'snowball', 'search_analyzer': 'keyword', 'type': 'text' }, 'f2': { 'analyzer': 'keyword', 'type': 'text'} }, 'type': 'text' } == f.to_dict()
class ProfileDocument(SumoDocument): username = field.Keyword(normalizer="lowercase") name = field.Text(fields={"keyword": field.Keyword()}) email = field.Keyword() # store avatar url so we don't need to hit the db when searching users # but set enabled=False to ensure ES does no parsing of it avatar = field.Object(enabled=False) timezone = field.Keyword() country = field.Keyword() locale = field.Keyword() involved_from = field.Date() product_ids = field.Keyword(multi=True) group_ids = field.Keyword(multi=True) class Index: name = config.USER_INDEX_NAME using = config.DEFAULT_ES7_CONNECTION def prepare_username(self, instance): return instance.user.username def prepare_email(self, instance): if instance.public_email: return instance.user.email def prepare_avatar(self, instance): if avatar := instance.fxa_avatar: return InnerDoc(url=avatar)
def test_field_supports_multiple_analyzers(): f = field.Text(analyzer='snowball', search_analyzer='keyword') assert { 'analyzer': 'snowball', 'search_analyzer': 'keyword', 'type': 'text' } == f.to_dict()
class User(document.Document): username = field.Text() class Meta: all = document.MetaField(enabled=False) _index = document.MetaField(enabled=True) dynamic = document.MetaField('strict') dynamic_templates = document.MetaField([42])
def doc(self): if not self._doc_cache: _fields, _map = {}, {} for idx, _f in enumerate(self.schema['fields']): alias_name = _f['name'] field_name = 'col{}'.format(idx + 1) _field = _schema2doc_map[_f['type']] _map[field_name] = alias_name _fields[field_name] = _field _fields['resource'] = dsl_field.Nested( properties={ 'id': dsl_field.Integer(), 'title': dsl_field.Text( analyzer=polish_analyzer, fields={'raw': dsl_field.Keyword()}) } ) _fields['updated_at'] = dsl_field.Date() _fields['row_no'] = dsl_field.Long() doc = type(self.idx_name, (DocType,), _fields) doc._doc_type.index = self.idx_name doc._doc_type.mapping._meta['_meta'] = {'headers': _map} doc._doc_type.mapping._meta['_meta'] self._doc_cache = doc return self._doc_cache
class GroupIndex(DocType): id = Integer() title = Text( fields={ 'pattern': field.Text(analyzer=pattern_analyzer), 'english': field.Text(analyzer='english') }) title_sortable = Keyword() description = Text() json = Text() type = Keyword(fields={ 'text': field.Text(), 'english': field.Text(analyzer='english') }) class Meta: index = 'group-index'
def test_nested_provides_direct_access_to_its_fields(): f = field.Nested( properties={'name': { 'type': 'text', 'index': 'not_analyzed' }}) assert 'name' in f assert f['name'] == field.Text(index='not_analyzed')
def test_nested_provides_direct_access_to_its_fields(): f = field.Nested( properties={"name": { "type": "text", "index": "not_analyzed" }}) assert "name" in f assert f["name"] == field.Text(index="not_analyzed")
class ProfileIndex(DocType): id = Integer() username = Text() first_name = Text() last_name = Text() profile = Keyword() organization = Text() position = Keyword() type = Keyword(fields={ 'text': field.Text(), 'english': field.Text(analyzer='english') }) avatar_100 = Text() layers_count = Integer() maps_count = Integer() documents_count = Integer() class Meta: index = 'profile-index'
class Population(Document): name = field.Text(fields={ 'raw': field.Keyword(), }) description = field.Text() dweller = field.Object(Dweller_inner) samples = field.Object(Dweller_inner, multi=True) class Index: name = "population" def add_sample(self, sample_class=None, index=None): result = {} if sample_class is None: sample_class = Sample result['klass'] = export(sample_class) if index is not None: result['index'] = index self.samples.append(result)
class Entry(document.Document): forms = field.Nested(Form) created = field.Date() superentry = field.Text() def save(self, **kwargs): return super(Entry, self).save(**kwargs) def is_published(self): return datetime.now() > self.created
class Document(DocType): id = field.Integer() title = field.String(analyzer='snowball'), author = field.String(analyzer='snowball'), creation_date = field.Date(), pages = field.Integer(), content = field.String(analyzer='snowball'), lang = field.String(), size = field.Integer(), tags = field.String(index='not_analyzed') autocomplete = field.Text(analyzer = ngram_analyzer)
def test_multifield_supports_multiple_analyzers(): f = field.Text( fields={ "f1": field.Text(search_analyzer="keyword", analyzer="snowball"), "f2": field.Text(analyzer="keyword"), }) assert { "fields": { "f1": { "analyzer": "snowball", "search_analyzer": "keyword", "type": "text", }, "f2": { "analyzer": "keyword", "type": "text" }, }, "type": "text", } == f.to_dict()
class User(document.Document): pwd_hash = field.Text() def check_password(self, pwd): return md5(pwd).hexdigest() == self.pwd_hash @property def password(self): raise AttributeError('readonly') @password.setter def password(self, pwd): self.pwd_hash = md5(pwd).hexdigest()
class ForumDocument(SumoDocument): """ ES document for forum posts. Thread information is duplicated across all posts in that thread. """ thread_title = field.Text() thread_forum_id = field.Keyword() forum_slug = field.Keyword() thread_id = field.Keyword() thread_created = field.Date() thread_creator_id = field.Keyword() thread_is_locked = field.Boolean() thread_is_sticky = field.Boolean() content = field.Text() author_id = field.Keyword() created = field.Date() updated = field.Date() updated_by_id = field.Keyword() class Index: pass def prepare_forum_slug(self, instance): return instance.thread.forum.slug def get_field_value(self, field, instance, *args): if field.startswith("thread_"): instance = instance.thread field = field[len("thread_"):] return super().get_field_value(field, instance, *args) @classmethod def get_model(cls): return Post @classmethod def get_queryset(cls): return Post.objects.prefetch_related("thread", "thread__forum")
def prepare_doc(self): _fields = { 'shape': dsl_field.GeoShape(), 'point': dsl_field.GeoPoint(), 'shape_type': dsl_field.Integer(), 'label': dsl_field.Text(), 'resource': dsl_field.Nested( properties={ 'id': dsl_field.Integer(), 'title': dsl_field.Text(analyzer=polish_analyzer, fields={'raw': dsl_field.Keyword()}) }), 'updated_at': dsl_field.Date(), 'row_no': dsl_field.Long() } _map = {} for idx, _f in enumerate(self.schema, 1): if _f.type not in self._schema2doc_map: continue alias_name = _f.name field_name = f'col{idx}' _field = self._schema2doc_map[_f.type] _map[field_name] = alias_name _fields[field_name] = _field _fields['Index'] = type('Index', (type, ), {'name': self.idx_name}) doc = type(self.idx_name, (Document, ), _fields) doc._doc_type.mapping._meta['_meta'] = {'headers': _map} return doc
class Dataset(Document): resources = field.Object(Data_set_resource, multi=True) tags = field.Text(analyzer=titles, multi=True, fields={ 'space': field.Text(analyzer=titles_space, multi=True), 'keyword': field.Keyword(multi=True), }) metadata = field.Object(Metadata) activity = field.Object(Activity, multi=True) url = field.Keyword() status = field.Keyword() created_at = field.Date() class Index: name = 'chibi_gob__open_data__dataset' settings = {'number_of_shards': 2, 'number_of_replicas': 1} @classmethod def url_is_scaned(cls, url): logger.info(f"buscando dataset {url}") if cls.search().filter("term", url=url).count() > 0: return True return False @classmethod def get_by_url(cls, url): logger.info(f"get dataset {url}") result = cls.search().filter("term", url=url)[:1].execute() if result: return result[0] return None def save(self, *args, **kw): super().save(*args, **kw)