class Manga(Document): title = field.Text() title = field.Text(analyzer=titles, multi=True, fields={ 'space': field.Text(analyzer=titles_space, multi=True), 'keyword': field.Keyword(multi=True), }) tags = field.Object(Tag) upload_at = field.Date() scan_at = field.Date() url = field.Keyword() cover_url = field.Keyword() images_urls = field.Keyword(multi=True) images_len = field.Integer() class Index: name = 'nhentai__mangas' settings = {'number_of_shards': 2, 'number_of_replicas': 1} @classmethod def url_is_scaned(cls, url): logger.info(f"buscando manga {url}") if cls.search().filter("term", url=url).count() > 0: return True return False
class ForumDocument(SumoDocument): """ ES document for forum posts. Thread information is duplicated across all posts in that thread. """ thread_title = field.Text() thread_forum_id = field.Keyword() thread_created = field.Date() thread_creator_id = field.Keyword() thread_is_locked = field.Boolean() thread_is_sticky = field.Boolean() content = field.Text() author_id = field.Keyword() created = field.Date() updated = field.Date() updated_by_id = field.Keyword() class Index: name = config.FORUM_INDEX_NAME using = config.DEFAULT_ES7_CONNECTION def get_field_value(self, field, instance, *args): if field.startswith("thread_"): instance = instance.thread field = field[len("thread_"):] return super().get_field_value(field, instance, *args) @classmethod def get_model(cls): return Post @classmethod def get_queryset(cls): return Post.objects.select_related("thread")
class Inner_sub_profile(InnerDoc): sub_profile_id = field.Keyword() status = field.Keyword() @property def sub_profile(self): return Sub_profile.get(self.sub_profile_id)
class User(InnerDoc): name = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) url = field.Keyword()
class Metadata(InnerDoc): language = field.Keyword() fuente = field.Keyword() frequency = field.Keyword() name_publisher = field.Keyword() email_publisher = field.Keyword() published = field.Date()
class Index_inner(InnerDoc): index = field.Keyword() klass = field.Keyword() @property def map(self): if self.klass: cls = import_(self.klass) else: cls = Dweller if self.index: cls._index._name = self.index return cls @map.setter def map(self, value): self.klass = export(value) self.index = value._index._name @property def exists(self): return self.map._index.exists() def purge(self): if '*' in self.map._index._name: raise Dangerous_purge(self.map._index._name) self.map._index.delete()
class Document(BaseDocument): url = field.Keyword() url_text = field.Text() referer = field.Keyword() title = field.Text() html = field.Text() text = field.Text() timestamp = field.Date(default_timezone=settings.TIME_ZONE)
def _schema2doc_map(self): _map = { 'integer': dsl_field.Long(), 'number': dsl_field.ScaledFloat(scaling_factor=100), 'string': dsl_field.Text(analyzer=polish_analyzer, fields={ 'raw': dsl_field.Text(), 'keyword': dsl_field.Keyword(), }), 'any': dsl_field.Text(analyzer=polish_analyzer, fields={ 'raw': dsl_field.Text(), 'keyword': dsl_field.Keyword(), }), 'boolean': dsl_field.Boolean(), 'time': dsl_field.Text( fields={ 'text': dsl_field.Text(), 'time': dsl_field.Date( format=constance_config.TIME_FORMATS), }), 'duration': dsl_field.DateRange(), 'default': dsl_field.Text(), 'date': dsl_field.Text( fields={ 'text': dsl_field.Text(), 'date': dsl_field.Date( format=constance_config.DATE_FORMATS), }), 'datetime': dsl_field.Text( fields={ 'text': dsl_field.Text(), 'datetime': dsl_field.Date(format=constance_config.DATE_FORMATS), }) } for key, val in _map.items(): _map[key] = CustomObject(properties={ 'val': val, 'repr': dsl_field.Keyword(), }) return _map
class Data_set_resource(InnerDoc): title = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) description = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) download_link = field.Keyword() kind = field.Keyword()
class ProfileDocument(SumoDocument): username = field.Keyword(normalizer="lowercase") name = field.Text(fields={"keyword": field.Keyword()}) email = field.Keyword() # store avatar url so we don't need to hit the db when searching users # but set enabled=False to ensure ES does no parsing of it avatar = field.Object(enabled=False) timezone = field.Keyword() country = field.Keyword() locale = field.Keyword() involved_from = field.Date() product_ids = field.Keyword(multi=True) group_ids = field.Keyword(multi=True) class Index: name = config.USER_INDEX_NAME using = config.DEFAULT_ES7_CONNECTION def prepare_username(self, instance): return instance.user.username def prepare_email(self, instance): if instance.public_email: return instance.user.email def prepare_avatar(self, instance): if avatar := instance.fxa_avatar: return InnerDoc(url=avatar)
class D(document.Document): kw = field.Keyword() class Meta: doc_type = 'not-doc' class Index: name = 'test-not-doc-index'
def doc(self): if not self._doc_cache: _fields, _map = {}, {} for idx, _f in enumerate(self.schema['fields']): alias_name = _f['name'] field_name = 'col{}'.format(idx + 1) _field = _schema2doc_map[_f['type']] _map[field_name] = alias_name _fields[field_name] = _field _fields['resource'] = dsl_field.Nested( properties={ 'id': dsl_field.Integer(), 'title': dsl_field.Text( analyzer=polish_analyzer, fields={'raw': dsl_field.Keyword()}) } ) _fields['updated_at'] = dsl_field.Date() _fields['row_no'] = dsl_field.Long() doc = type(self.idx_name, (DocType,), _fields) doc._doc_type.index = self.idx_name doc._doc_type.mapping._meta['_meta'] = {'headers': _map} doc._doc_type.mapping._meta['_meta'] self._doc_cache = doc return self._doc_cache
def prepare_doc(self): _fields, _map = {}, {} for idx, _f in enumerate(self.schema['fields'], 1): alias_name = _f['name'] field_name = 'col{}'.format(idx) _field = self._schema2doc_map[_f['type']] _map[field_name] = alias_name _fields[field_name] = _field if self.has_geo_data: _fields['shape'] = dsl_field.GeoShape() _fields['point'] = dsl_field.GeoPoint() _fields['label'] = dsl_field.Text() _fields['shape_type'] = dsl_field.Integer() _fields['resource'] = dsl_field.Nested( properties={ 'id': dsl_field.Integer(), 'title': dsl_field.Text(analyzer=polish_analyzer, fields={'raw': dsl_field.Keyword()}) }) _fields['updated_at'] = dsl_field.Date() _fields['row_no'] = dsl_field.Long() _fields['Index'] = type('Index', (type, ), {'name': self.idx_name}) doc = type(self.idx_name, (Document, ), _fields) doc._doc_type.mapping._meta['_meta'] = {'headers': _map} return doc
class DocWithNested(document.DocType): comments = field.Nested( properties={ 'title': field.Text(), 'tags': field.Keyword(multi=True) } )
class Ssn_trace(InnerDoc): is_valid = field.Boolean() is_deceased = field.Boolean() ssn = field.Keyword() human_message = field.Text() issued = field.Object(Ssn_issued)
class Activity(InnerDoc): action = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) date = field.Date() user = field.Object(User)
class MessageIndex(DocType): room = field.Keyword() user = field.Text() created = field.Date() message = field.Text() status = field.Text() tags = Nested(properties={'tags': field.Text()}) class Meta: index = 'Message'
def get_document_fields(document, excluding=None): excluding = excluding or [] attributes = {"_id": elasticsearch_fields.Keyword()} for ( attr_name, attr, ) in document._doc_type.mapping.properties.properties._d_.items(): if attr_name in excluding: continue attributes[attr_name] = attr return OrderedDict(sorted(attributes.items()))
class Image(Dweller): mime = field.Keyword() extension = field.Keyword() file = field.Keyword() dir = field.Keyword() album = field.Keyword() base_path = field.Keyword() thumbnail_dir = field.Keyword() thumbnail_path = field.Keyword() def __init__(self, *args, **kw): super().__init__(*args, **kw) if self.value_raw and not self.value.exists: logger.warning("cannot find the file '{}'".format(self.value.path)) @Dweller.value.getter def value(self): return Chibi_image(self.value_raw) @value.setter def value(self, value): if value is None: value = self.value_raw if isinstance(value, str): value = Chibi_image(value) self.value_raw = value.path self.mime = value.properties.mime self.extension = value.properties.extension self.file = value.file_name self.dir = value.dir self.album = os.path.split(self.dir)[-1] self.base_path = list(os.path.split(self.dir)) self.base_path.pop() self.base_path = join(*self.base_path) if self.thumbnail_dir: self.thumbnail_path = Chibi_path(self.thumbnail_dir) else: self.thumbnail_path = add_extensions(self.base_path, "thumbnail") self.thumbnail_path = self.thumbnail_path + self.album mkdir(self.thumbnail_path) thumbnail = value.thumbnail(self.thumbnail_path) self.thumbnail_path = thumbnail.path @property def thumbnail(self): return Chibi_image(self.thumbnail_path)
class Resource(Document): title = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) description = field.Text(analyzer=titles, fields={ 'space': field.Text(analyzer=titles_space), 'keyword': field.Keyword(), }) kind = field.Keyword() url = field.Keyword() created_at = field.Date() tags = field.Text(analyzer=titles, multi=True, fields={ 'space': field.Text(analyzer=titles_space, multi=True), 'keyword': field.Keyword(multi=True), }) metadata = field.Object(Metadata) class Index: name = 'chibi_gob__open_data__dataset__resource' settings = {'number_of_shards': 2, 'number_of_replicas': 1} @classmethod def url_is_scaned(cls, url): logger.info(f"buscando dataset {url}") if cls.search().filter("term", url=url).count() > 0: return True return False def save(self, *args, **kw): super().save(*args, **kw)
class Dataset(Document): resources = field.Object(Data_set_resource, multi=True) tags = field.Text(analyzer=titles, multi=True, fields={ 'space': field.Text(analyzer=titles_space, multi=True), 'keyword': field.Keyword(multi=True), }) metadata = field.Object(Metadata) activity = field.Object(Activity, multi=True) url = field.Keyword() status = field.Keyword() created_at = field.Date() class Index: name = 'chibi_gob__open_data__dataset' settings = {'number_of_shards': 2, 'number_of_replicas': 1} @classmethod def url_is_scaned(cls, url): logger.info(f"buscando dataset {url}") if cls.search().filter("term", url=url).count() > 0: return True return False @classmethod def get_by_url(cls, url): logger.info(f"get dataset {url}") result = cls.search().filter("term", url=url)[:1].execute() if result: return result[0] return None def save(self, *args, **kw): super().save(*args, **kw)
class Article( Document ): title = field.Text( analyzer=titles, multi=True, fields={ 'space': field.Text( analyzer=titles_space, multi=True ), 'keyword': field.Keyword( multi=True ), } ) text = field.Text( analyzer=titles, multi=True, fields={ 'space': field.Text( analyzer=titles_space, multi=True ), 'keyword': field.Keyword( multi=True ), } ) category = field.Text( analyzer=category, multi=True, fields={ 'keyword': field.Keyword( multi=True ), } ) create_at = field.Date() upload_at = field.Date() scan_at = field.Date() url = field.Keyword() class Index: name = 'somos_kudasai__articles' settings = { 'number_of_shards': 2, 'number_of_replicas': 1 } @classmethod def url_is_scaned( cls, url ): logger.info( f"buscando articulo {url}" ) if cls.search().filter( "term", url=url ).count() > 0: return True return False def save( self, *args, **kw ): super().save( *args, **kw )
class CaseStudyInnerDoc(InnerDoc): wildcard = field.Text() pk = field.Integer(index=False) title = field.Text(copy_to='wildcard') short_summary = field.Text(copy_to='wildcard') description = field.Text(copy_to='wildcard') sector = field.Text(copy_to='wildcard') keywords = field.Text(copy_to='wildcard') image = field.Text(index=False) company_number = field.Text(index=False) image_one_caption = field.Text(copy_to='wildcard') image_two_caption = field.Text(copy_to='wildcard') image_three_caption = field.Text(copy_to='wildcard') testimonial = field.Text(copy_to='wildcard') testimonial_name = field.Keyword(copy_to='wildcard') testimonial_job_title = field.Text(copy_to='wildcard') slug = field.Text(index=False)
class ProfileDocument(SumoDocument): username = field.Keyword(normalizer="lowercase") name = field.Text(fields={"keyword": field.Keyword()}) email = field.Keyword() # store avatar url so we don't need to hit the db when searching users # but set enabled=False to ensure ES does no parsing of it avatar = field.Object(enabled=False) timezone = field.Keyword() country = field.Keyword() locale = field.Keyword() involved_from = field.Date() product_ids = field.Keyword(multi=True) group_ids = field.Keyword(multi=True) class Index: name = config.USER_INDEX_NAME using = config.DEFAULT_ES7_CONNECTION @classmethod def prepare(cls, instance): """Override super method to exclude docs from indexing.""" # Add a discard field in the document if the following conditions are met # User is not active if not instance.user.is_active: instance.es_discard_doc = "unindex_me" return super(ProfileDocument, cls).prepare(instance) def prepare_username(self, instance): return instance.user.username def prepare_email(self, instance): if instance.public_email: return instance.user.email def prepare_avatar(self, instance): if avatar := instance.fxa_avatar: return InnerDoc(url=avatar)
class Population(Document): name = field.Text(fields={ 'raw': field.Keyword(), }) description = field.Text() dweller = field.Object(Dweller_inner) samples = field.Object(Dweller_inner, multi=True) class Index: name = "population" def add_sample(self, sample_class=None, index=None): result = {} if sample_class is None: sample_class = Sample result['klass'] = export(sample_class) if index is not None: result['index'] = index self.samples.append(result)
class Dweller(Document): value_raw = field.Keyword() real_raw = field.Float() def __init__(self, *args, value=None, **kw): super().__init__(*args, **kw) if value is not None: self.value = value @property def value(self): return self.value_raw @value.setter def value(self, value): self.value_raw = str(value) @property def real(self): return self.real_raw
def prepare_doc(self): _fields = { 'shape': dsl_field.GeoShape(), 'point': dsl_field.GeoPoint(), 'shape_type': dsl_field.Integer(), 'label': dsl_field.Text(), 'resource': dsl_field.Nested( properties={ 'id': dsl_field.Integer(), 'title': dsl_field.Text(analyzer=polish_analyzer, fields={'raw': dsl_field.Keyword()}) }), 'updated_at': dsl_field.Date(), 'row_no': dsl_field.Long() } _map = {} for idx, _f in enumerate(self.schema, 1): if _f.type not in self._schema2doc_map: continue alias_name = _f.name field_name = f'col{idx}' _field = self._schema2doc_map[_f.type] _map[field_name] = alias_name _fields[field_name] = _field _fields['Index'] = type('Index', (type, ), {'name': self.idx_name}) doc = type(self.idx_name, (Document, ), _fields) doc._doc_type.mapping._meta['_meta'] = {'headers': _map} return doc
class Tag(InnerDoc): artists = field.Text(analyzer=tag, multi=True, fields={'keyword': field.Keyword(multi=True)}) categories = field.Text(analyzer=tag, multi=True, fields={'keyword': field.Keyword(multi=True)}) characters = field.Text(analyzer=tag, multi=True, fields={'keyword': field.Keyword(multi=True)}) groups = field.Text(analyzer=tag, multi=True, fields={'keyword': field.Keyword(multi=True)}) languages = field.Text(analyzer=tag, multi=True, fields={'keyword': field.Keyword(multi=True)}) parodies = field.Text(analyzer=tag, multi=True, fields={'keyword': field.Keyword(multi=True)}) tags = field.Text(analyzer=tag, multi=True, fields={'keyword': field.Keyword(multi=True)})
class ForumDocument(SumoDocument): """ ES document for forum posts. Thread information is duplicated across all posts in that thread. """ thread_title = field.Text() thread_forum_id = field.Keyword() forum_slug = field.Keyword() thread_id = field.Keyword() thread_created = field.Date() thread_creator_id = field.Keyword() thread_is_locked = field.Boolean() thread_is_sticky = field.Boolean() content = field.Text() author_id = field.Keyword() created = field.Date() updated = field.Date() updated_by_id = field.Keyword() class Index: pass def prepare_forum_slug(self, instance): return instance.thread.forum.slug def get_field_value(self, field, instance, *args): if field.startswith("thread_"): instance = instance.thread field = field[len("thread_"):] return super().get_field_value(field, instance, *args) @classmethod def get_model(cls): return Post @classmethod def get_queryset(cls): return Post.objects.prefetch_related("thread", "thread__forum")
class OptionalObjectWithRequiredField(document.Document): comments = field.Nested(properties={'title': field.Keyword(required=True)})