class LinkDocument(Document): subjects = fields.TextField(attr="subjects_to_string") user = fields.ObjectField(properties={'username': fields.TextField()}) title_suggestions = fields.CompletionField(attr='title') class Index: name = Indices.LINKS.value settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Link fields = ['title', 'link', 'private'] @classmethod def get_suggestions(cls, query): return get_title_suggestions(cls, query) @staticmethod def build_query(search_query): return Q("match", title={'query': search_query, 'boost': 5}) \ | Q("match", subjects={'query': search_query, 'boost': 5}) \ | Q("match_phrase_prefix", title={'query': search_query, 'max_expansions': 10}) \ | Q("match", title={'query': search_query, 'fuzziness': 'AUTO'}) \ | Q("match", link={'query': search_query, 'fuzziness': 'AUTO'}) \ | Q("match_phrase_prefix", link={'query': search_query, 'max_expansions': 3})
class CorpusContentDocument(Document): class Index: name = settings.INDEX_CORPUS settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = CorpusContent fields = [ 'id', 'phrase', ] related_models = [Corpus] corpus = fields.ObjectField( properties={ 'id': fields.IntegerField(), 'name': fields.TextField(), 'description': fields.TextField(), 'language': fields.TextField(), 'user': fields.TextField(attr="get_user_id"), }) def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the instance(s) from the related model.""" if isinstance(related_instance, Corpus): return related_instance.corpuscontent_set.all()
class OfferDocType(DocType): fullname = fields.TextField(analyzer='with_morphology') category_name = fields.TextField(analyzer='with_morphology') fullname_translit = fields.TextField(analyzer='autocomplete') is_published = fields.BooleanField() views = fields.IntegerField() class Django: model = Offer def prepare_fullname(self, instance): return ' '.join([ instance.product.article, instance.product.search_title, instance.product.title ]) def prepare_category_name(self, instance): return instance.product.category.title def prepare_fullname_translit(self, instance): result = ' '.join([ instance.product.article, instance.product.search_title, instance.product.title ]) return translit(result, 'ru') def prepare_is_published(self, instance): return not instance.product.category.get_ancestors( include_self=True).filter(is_published=False).exists() def prepare_views(self, instance): return instance.product.category.views
class ProductDocument(Document): title = fields.TextField( analyzer=custom_analyzer, fields={ 'raw' : fields.KeywordField() } ) description = fields.TextField( analyzer=custom_analyzer, fields={'raw' : fields.KeywordField()} ) features = fields.TextField( analyzer=custom_analyzer, fields={'raw' : fields.KeywordField()} ) class Index: name = 'products' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Product fields = [ 'id', # 'description', # 'features', # 'title', ]
class PaperDocument(Document): class Index: name = 'papers' settings = { 'number_of_shards': 1, 'number_of_replicas': 0, 'max_result_window': 100000 } authors = fields.ObjectField(properties={ 'pk': fields.IntegerField(), 'full_name': fields.TextField(), }) title = fields.TextField(analyzer='english') abstract = fields.TextField(analyzer='english') class Django: model = Paper related_models = [Author] def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the paper instance(s) from the related model. The related_models option should be used with caution because it can lead in the index to the updating of a lot of items. """ if isinstance(related_instance, Author): return related_instance.publications.all()
class NewCollegeBasicInfoDocument(Document): name = fields.TextField(attr='college_name', fields={ 'suggest': fields.Completion(), }) city_name = fields.TextField(fields={ 'suggest': fields.Completion(), }) def prepare_city_name(self, instance): try: city_text = NewCollegeCollegeCities.objects.get(id=instance.city) except NewCollegeCollegeCities.DoesNotExist: city_text = None if city_text: return city_text.city return "default" class Django: model = NewCollegeBasicInfo fields = [ 'id', 'college_name', ] queryset_pagination = 50
class ExerciseDocument(Document): """Exercise Elasticsearch document.""" id = fields.IntegerField(attr='id') exercise_title = fields.TextField( attr='exercise_title', analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword', multi=True), 'suggest': fields.CompletionField(multi=True), }, ) muscle_group_title = fields.TextField( attr='muscle_group_indexing', analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword', multi=True), 'suggest': fields.CompletionField(multi=True), }, multi=True, ) class Django(object): """Inner nested class Django.""" model = Exercise # The model associate with this Document
class NoteDocument(Document): label = fields.ObjectField(properties={ 'name': fields.TextField() }) title = fields.TextField( analyzer=html_strip ) reminder = fields.StringField( ) note = fields.TextField( analyzer=html_strip ) user_id = fields.IntegerField() color = fields.StringField() class Index: # Name of the Elasticsearch index name = 'search_note' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Note
class JobListingDocument(DocType): keywords = fields.TextField(attr="description", fielddata=True) # shingles = fields.TextField(attr="description", analyzer="shingle", fielddata=True) # triple_shingles = fields.TextField(attr="description", analyzer="triple_shingle", fielddata=True) location_keywords = fields.TextField(attr="location", fielddata=True) location_shingles = fields.TextField(attr="location", analyzer="shingle", fielddata=True) #location_triple_shingles = fields.TextField(attr="location", analyzer="triple_shingle", fielddata=True) class Meta: model = JobListing # The model associated with this DocType # The fields of the model you want to be indexed in Elasticsearch fields = [ 'indeed_id', 'posted_date', 'title', 'location', 'company', 'description', ] # Ignore auto updating of Elasticsearch when a model is saved # or deleted: # ignore_signals = True # Don't perform an index refresh after every update (overrides global setting): # auto_refresh = False # Paginate the django queryset used to populate the index with the specified size # (by default there is no pagination) # queryset_pagination = 5000 @staticmethod def get_analyzer(): return keyword_analyzer
class SchoolDocument(Document): autocomplete = fields.TextField(analyzer=label_autocomplete) text = fields.TextField(attr='primary_alias', boost=10) nicknames = fields.TextField() def prepare_autocomplete(self, instance): alias_strings = [a.alias for a in instance.alias_set.all()] nickname_strings = [n.nickname for n in instance.nickname_set.all()] return alias_strings + nickname_strings def prepare_nicknames(self, instance): return [n.nickname for n in instance.nickname_set.all()] class Index: name = 'paying-for-college' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = School fields = [ 'school_id', 'city', 'state', 'zip5', ]
class TweetsDocument(Document): # search_item = fields.TextField(attrs="search_item") excepted_words = fields.ListField(field=fields.TextField) author = fields.ObjectField() entities = fields.ObjectField() text = fields.TextField(fields={"raw": fields.KeywordField()}, analyzer="text_analyser") raw_text = fields.TextField() retweet_count = fields.IntegerField() class Django: model = models.Tweet fields = [ "search_item", "id", # "author", "created_date", # "entities", "source", "lang", ] auto_refresh = False ignore_signals = False # Paginate the django queryset used to populate the index with the specified size (by # default there is no pagination) queryset_pagination = 1000
class TenderDoc(DocType): description = fields.TextField( analyzer=case_insensitive_analyzer, fielddata=True, fields={ 'raw': fields.KeywordField(multi=True, ignore_above=256, normalizer=case_insensitive_normalizer) }) title = fields.TextField(analyzer=case_insensitive_analyzer, fielddata=True, fields={ 'raw': fields.KeywordField( multi=True, ignore_above=256, normalizer=case_insensitive_normalizer) }) reference = fields.KeywordField(attr='reference') class Meta: model = Tender fields = [ 'unspsc_codes', 'cpv_codes', 'organization', 'source', 'notified', ]
class MemberDocument(Document): """Member Elasticsearch document.""" id = fields.IntegerField(attr='id') full_name = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) birthday = fields.DateField() roles = fields.NestedField( properties={ 'title': fields.TextField(analyzer=html_strip, attr='role', fields={ 'raw': KeywordField(), }), }) member_url = fields.TextField(attr='get_absolute_url') image = fields.FileField(attr="image") class Django(object): """Inner nested class Django.""" model = Members # The model associate with this Document
class CompanyDocument(DocType): # ID id = fields.IntegerField(attr='id') avatar = fields.TextField() slug = fields.StringField() name = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) about = fields.TextField() headquarter = fields.TextField() is_address_public = fields.BooleanField() website = fields.StringField() since = fields.StringField() size_from = fields.IntegerField() size_to = fields.IntegerField() creator = fields.ObjectField(attr='create_field_indexing', properties={'id': fields.IntegerField()}) tags = fields.ObjectField(attr='tag_field_indexing', properties={ 'id': fields.IntegerField(), 'name': fields.StringField() }) class Django: model = Company
class PersonDocument(BaseDocument): profile_image = es_fields.TextField(attr='profile_image_indexing') user_reputation = es_fields.IntegerField(attr='user_reputation_indexing') author_score = es_fields.IntegerField(attr='author_score') description = es_fields.TextField(attr='description', analyzer=content_analyzer) full_name = es_fields.TextField(attr='full_name', analyzer=content_analyzer) person_types = es_fields.KeywordField(attr='person_types_indexing') headline = es_fields.ObjectField(attr='headline', properties={ 'title': es_fields.TextField(), }) class Index: name = 'person' class Django: model = Author fields = [ 'id', 'first_name', 'last_name', ] def should_remove_from_index(self, obj): should_delete = False try: author_user = User.objects.get(id=obj.user_id) if author_user.is_suspended: should_delete = True except Exception as e: pass return should_delete
class ElasticSearchDocument(ESDocument): ''' Elastic Search Management Class ''' class Index: ''' Elastic Search Index Management ''' # Name of the Elasticsearch index name = 'mydocument' # must be lowercase for es requirements # See Elasticsearch Indices API reference for available settings settings = {'number_of_shards': 1, 'number_of_replicas': 0} content = fields.TextField( analyzer="ik_max_word", search_analyzer="ik_smart", ) title = fields.TextField( analyzer="ik_max_word", search_analyzer="ik_smart", ) class Django: ''' Django Supervisor for Elastic Search can auto update ES when encountering db change ''' model = MyDocument # The model associated with this ES Document # The fields of the model you want to be indexed in Elasticsearch fields = [ 'id', 'status', 'src' # 'content', # configured manually above (Due to Chinese analyzer) ]
class PostDocument(DocType): ''' 适配Elasticsearch的类,跟着django-elasticsearch-dsl写的。 ''' title = fields.TextField(analyzer=ik_max_word, search_analyzer=ik_max_word) content = fields.TextField(analyzer=ik_max_word, search_analyzer=ik_max_word) category = fields.ObjectField(properties={ 'name': fields.TextField(), 'cate_id': fields.TextField() }) def get_queryset(self): return super(PostDocument, self).get_queryset().select_related('category') def get_instances_from_related(self, related_instance): if isinstance(related_instance, Category): return related_instance.in_category_posts.all() class Meta: model = Post fields = [ 'abstract', 'create_time', 'update_time', 'status', ] related_models = [ Category, ]
class ArticleDocument(Document): category = fields.ObjectField( properties={ 'id': fields.TextField(), 'name': fields.TextField(), 'image': fields.FileField(), }) cover = fields.FileField() tags = fields.ListField(fields.TextField()) class Index: name = 'articles' settings = {'number_of_shards': 1, 'number_of_replicas': 1} class Django: model = Articles fields = [ 'id', 'updated_at', 'title', 'subtitle', 'author_name', 'user', 'realease', 'is_active', 'slug' ] # related_models = [Category] def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super(ArticleDocument, self).get_queryset().select_related('category')
class ArticleDocument(Document): class Index: # Name of the Elasticsearch index name = 'articles' # See Elasticsearch Indices API reference for available settings settings = {'number_of_shards': 1, 'number_of_replicas': 0} get_absolute_url = fields.TextField(attr="get_absolute_url") get_type_display = fields.TextField(attr="get_type_display") get_lead = fields.TextField(attr="get_lead") section = fields.TextField() def prepare_section(self, instance): return instance.section.__unicode__() if instance.section else u'' class Django: model = Article # The model associated with this Document # The fields of the model you want to be indexed in Elasticsearch fields = [ 'headline', 'deck', 'lead', 'body', 'home_lead', 'is_published', 'date_published', 'type', ]
class NoteDocument(Document): """ Document for the Note index. """ id = fields.IntegerField(attr='id') user = fields.KeywordField(attr='user_id') course_id = fields.KeywordField() usage_id = fields.KeywordField() quote = fields.TextField(analyzer=html_strip) text = fields.TextField(analyzer=html_strip) ranges = fields.KeywordField() created = fields.DateField() updated = fields.DateField() tags = fields.TextField(multi=True, analyzer=case_insensitive_keyword) def prepare_data(self, instance): """ Prepare data. """ return '{0}{1}'.format(instance.text, instance.tags) def prepare_tags(self, instance): return json.loads(instance.tags) class Django: model = Note class Meta: parallel_indexing = True queryset_pagination = 50
class ManufacturerDocument(Document): name = fields.TextField() address = fields.TextField() country = fields.ObjectField( properties={ 'name': fields.TextField() } ) class Index: name = 'manufacturers' class Django: model = Manufacturer related_models = [Country, ] def get_queryset(self): return super(ManufacturerDocument, self).get_queryset().select_related( 'country_id', ) def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the Country instance(s) from the related model. The related_models option should be used with caution because it can lead in the index to the updating of a lot of items. """ if isinstance(related_instance, Country): return related_instance.manufacturer_set.all() return []
class UniversityDocument(Document): """University Elasticsearch document.""" id = fields.IntegerField(attr='id') name = fields.TextField(fields={ 'raw': fields.KeywordField(normalizer=keyword_normalizer), }, ) domain = fields.TextField( fields={'raw': fields.KeywordField(normalizer=keyword_normalizer)}) web_page = fields.TextField( fields={'raw': fields.KeywordField(normalizer=keyword_normalizer)}) country = fields.ObjectField( properties={ 'name': fields.TextField(), 'code': fields.TextField( fields={ 'raw': fields.KeywordField(normalizer=keyword_normalizer) }), }) createdAt = fields.DateField() isDelete = fields.BooleanField() class Django(object): """Meta options.""" model = University # The model associate with this DocType
class ProjectDocument(RTDDocTypeMixin, DocType): class Meta(object): model = Project fields = ('name', 'slug', 'description') auto_refresh = False url = fields.TextField(attr='get_absolute_url') users = fields.NestedField(properties={ 'username': fields.TextField(), 'id': fields.IntegerField(), }) language = fields.KeywordField() @classmethod def faceted_search(cls, query, language=None, using=None, index=None): kwargs = { 'using': using or cls._doc_type.using, 'index': index or cls._doc_type.index, 'doc_types': [cls], 'model': cls._doc_type.model, 'query': query } if language: kwargs['filters'] = {'language': language} return ProjectSearch(**kwargs)
class ProjectDocument(DocType): # Metadata url = fields.TextField(attr='get_absolute_url') users = fields.NestedField( properties={ 'username': fields.TextField(), 'id': fields.IntegerField(), } ) language = fields.KeywordField() modified_model_field = 'modified_date' class Meta: model = Project fields = ('name', 'slug', 'description') ignore_signals = True @classmethod def faceted_search(cls, query, user, language=None): from readthedocs.search.faceted_search import ProjectSearch kwargs = { 'user': user, 'query': query, } if language: kwargs['filters'] = {'language': language} return ProjectSearch(**kwargs)
class NoteDocument(DocType): """ Using decorator create the notedocument class which describe the fields of note model """ title = fields.StringField( analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword'), } ) description = fields.TextField( analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), } ) color = fields.StringField( analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword'), } ) reminder = fields.StringField( analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword'), } ) # defining the meta class class Meta(object): model = Note
class TokenSearch(Document): source = fields.TextField(attr="source_to_str") searchq = fields.TextField(analyzer='keyword') class Index: # name of the elasticsearch index name = 'tokens' # see Elasticsearch Indices API reference for available settings settings = common_settings class Django: model = Token fields = common_fields def to_dict(self): return { "id": self.id, "name": self.name, "priority": self.priority, "source": self.source, "source_verbose": self.source_verbose, "dpi": self.dpi, "searchq": self.searchq, "thumbpath": self.thumbpath, "date": dateformat.format(self.date, datestring), }
class TMContentDocument(Document): class Index: name = settings.INDEX_TM settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = TMContent fields = [ 'id', 'src_sentence', 'tar_sentence', ] related_models = [TranslationMemory] translation_memory = fields.ObjectField( properties={ 'id': fields.IntegerField(), 'name': fields.TextField(), 'description': fields.TextField(), 'src_lang': fields.TextField(), 'tar_lang': fields.TextField(), 'user': fields.TextField(attr="get_user_id"), }) def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the instance(s) from the related model.""" if isinstance(related_instance, TranslationMemory): return related_instance.tmcontent_set.all()
class CarDocument(DocType): name = fields.TextField(attr='name', fields={ 'suggest': fields.Completion(), }) manufacturer = fields.ObjectField(properties={ 'name': fields.TextField(), 'country_code': fields.TextField(), }) auction_title = fields.TextField(attr='get_auction_title') points = fields.IntegerField() def prepare_points(self, instance): if instance.color == 'silver': return 2 return 1 class Meta: model = Car fields = [ 'id', 'color', 'description', 'type', ] related_models = [Manufacturer] def get_queryset(self): return super().get_queryset().select_related('manufacturer') def get_instances_from_related(self, related_instance): if isinstance(related_instance, Manufacturer): return related_instance.car_set.all()
class CarDocument(DocType): color = fields.TextField() name = fields.TextField() class Django: fields = ['name'] model = Car
class HubDocument(BaseDocument): auto_refresh = True paper_count = es_fields.IntegerField(attr='paper_count') subscriber_count = es_fields.IntegerField(attr='subscriber_count') discussion_count = es_fields.IntegerField(attr='discussion_count') description = es_fields.TextField(attr='description', analyzer=content_analyzer) name = es_fields.TextField(attr='name', analyzer=title_analyzer) class Index: name = 'hub' class Django: model = Hub fields = [ 'id', 'acronym', 'is_locked', ] def should_remove_from_index(self, obj): if obj.is_removed or obj.is_locked: return True return False