class JobDocument(DocType): founder = fields.ObjectField( properties={ 'id': fields.IntegerField(), 'startup_name': fields.StringField(), 'description': fields.StringField(), 'logo': fields.StringField(attr="logo_to_string"), 'is_filled': fields.BooleanField(), 'field': fields.StringField(attr="field", analyzer=leave_default), 'user': fields.ObjectField( properties={ 'is_active': fields.BooleanField(), 'is_account_disabled': fields.BooleanField(), }) }) get_pay_display = fields.StringField(attr="get_pay_display") get_level_display = fields.StringField(attr="get_level_display") pay = fields.StringField(attr="pay", analyzer=leave_default) level = fields.StringField(attr="level", analyzer=leave_default) class Meta: model = Job fields = ['title', 'description']
class ExperimentDocument(Document): class Index: name = 'experiments' settings = {'number_of_shards': 1, 'number_of_replicas': 0} id = fields.IntegerField() title = fields.TextField(fields={'raw': fields.KeywordField()}, analyzer=analyzer) description = fields.TextField(fields={'raw': fields.KeywordField()}, analyzer=analyzer) public_access = fields.IntegerField() created_time = fields.DateField() start_time = fields.DateField() end_time = fields.DateField() update_time = fields.DateField() institution_name = fields.StringField() created_by = fields.ObjectField(properties={ 'username': fields.StringField(fields={'raw': fields.KeywordField()}, ) }) objectacls = fields.ObjectField(properties={ 'pluginId': fields.StringField(), 'entityId': fields.StringField() }) class Django: model = Experiment related_models = [User, ObjectACL] def get_instances_from_related(self, related_instance): if isinstance(related_instance, User): return related_instance.experiment_set.all() if isinstance(related_instance, ObjectACL): return related_instance.content_object return None
class SiteReportDocument(Document): class Index: # Name of the Elasticsearch index name = "site_reports" # See Elasticsearch Indices API reference for available settings settings = {"number_of_shards": 1, "number_of_replicas": 0} campaign = fields.ObjectField(properties={"slug": fields.KeywordField()}) topic = fields.ObjectField(properties={"slug": fields.KeywordField()}) class Django: model = SiteReport fields = [ "created_on", "assets_total", "assets_published", "assets_not_started", "assets_in_progress", "assets_waiting_review", "assets_completed", "assets_unpublished", "items_published", "items_unpublished", "projects_published", "projects_unpublished", "anonymous_transcriptions", "transcriptions_saved", "distinct_tags", "tag_uses", "campaigns_published", "campaigns_unpublished", "users_registered", "users_activated", ]
def common_setfields(model, attr=None): if attr is None: attr = model return ObjectField( properties={ "descriptions": ObjectField(properties={ 'text': text_field("text"), 'pk': fields.IntegerField() }, multi=True), model: ObjectField(attr=attr, properties={ "pk": fields.FloatField(), }), "comments": fields.ObjectField(properties={ 'text': text_field("text"), 'user': fields.ObjectField( properties={ 'first_name': string_field("first_name"), 'last_name': string_field("last_name"), 'pk': string_field("last_name"), 'username': string_field("username"), }) }, multi=True) })
class CompanyDocument(DocType): # ID id = fields.IntegerField(attr='id') avatar = fields.TextField() slug = fields.StringField() name = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) about = fields.TextField() headquarter = fields.TextField() is_address_public = fields.BooleanField() website = fields.StringField() since = fields.StringField() size_from = fields.IntegerField() size_to = fields.IntegerField() creator = fields.ObjectField(attr='create_field_indexing', properties={'id': fields.IntegerField()}) tags = fields.ObjectField(attr='tag_field_indexing', properties={ 'id': fields.IntegerField(), 'name': fields.StringField() }) class Django: model = Company
class BillDocument(DocType): sponsors = fields.NestedField() action_dates = fields.ObjectField() companions = fields.NestedField() votes = fields.ObjectField() sources = fields.ObjectField() documents = fields.ObjectField() bill_versions = fields.NestedField() bill_text = fields.TextField() bill_text_analysis = fields.ObjectField() class Meta: model = Bill fields = [ 'created', 'modified', 'state', 'state_id', 'remote_id', 'session', 'chamber', 'title', 'summary', 'type', 'content', 'bill_raw_text', ]
class TweetsDocument(Document): # search_item = fields.TextField(attrs="search_item") excepted_words = fields.ListField(field=fields.TextField) author = fields.ObjectField() entities = fields.ObjectField() text = fields.TextField(fields={"raw": fields.KeywordField()}, analyzer="text_analyser") raw_text = fields.TextField() retweet_count = fields.IntegerField() class Django: model = models.Tweet fields = [ "search_item", "id", # "author", "created_date", # "entities", "source", "lang", ] auto_refresh = False ignore_signals = False # Paginate the django queryset used to populate the index with the specified size (by # default there is no pagination) queryset_pagination = 1000
class DocketEntryDocument(DocType): court = fields.ObjectField(properties={ 'description': fields.TextField(), 'name': fields.KeywordField(), }) case = fields.ObjectField( properties={ 'year': fields.KeywordField(), 'number': fields.KeywordField(), 'office': fields.KeywordField(), 'type': fields.KeywordField(), }) class Meta: model = models.DocketEntry fields = [ 'case_number', 'case_name', 'title', 'time_filed', ] # related_models = [models.Court] ignore_signals = True auto_refresh = False def get_queryset(self): qs = super().get_queryset().select_related('court') # FIXME out of +1M entries, only 10 have a blank case_name, for now just exclude them qs = qs.exclude( case_name='') # ES doesn't allow emtpy values on completions qs = qs[:1000] # for testing only index the first X items return qs
class UserDocument(Document): post = fields.ObjectField(properties = { 'id' : fields.IntegerField(), 'statement' : fields.TextField(), 'location' : fields.ObjectField(properties = { 'city' : fields.TextField(), 'country' : fields.TextField(), }), 'file' : fields.ObjectField(properties={ 'file' : fields.FileField() }), 'created_date' : fields.DateField(), }) class Django: model = User fields = [ 'id', 'username', 'birthday', 'avatar', 'info', 'web_page', 'active', 'timestamp', ] related_models = [Post]
class TagDocument(Document): post = fields.ObjectField(properties = { 'id' : fields.IntegerField(), 'statement' : fields.TextField(), 'file' : fields.ObjectField(properties={ 'file' : fields.FileField() }), 'created_date' : fields.DateField(), 'user' : fields.ObjectField(properties={ 'username' : fields.TextField(), 'avatar' : fields.FileField(), }) }) tag = fields.ObjectField(properties = { 'title' : fields.TextField() }) class Django: model = TagPost fields = [ 'title', ] #burada hata var. çözemezsem nedtedfield ile sanırım oluyordu o şekilde Tag'dan gelip tagpost'u ve onun postunu almaya çalış. related_models = [Post, Tag]
class TagCollectionDocument(DocType): tags = fields.NestedField(properties={"value": fields.TextField()}) asset = fields.ObjectField( properties={ "title": fields.TextField(), "slug": fields.TextField(), "transcription_status": fields.TextField(), "item": fields.ObjectField( properties={ "item_id": fields.TextField(), "project": fields.ObjectField( properties={ "slug": fields.TextField(), "campaign": fields.ObjectField( properties={"slug": fields.TextField()}), }), }), }) user = fields.ObjectField(properties={"username": fields.TextField()}) class Meta: model = UserAssetTagCollection fields = ["created_on", "updated_on"]
class MotionDocument(DocType): convention = fields.ObjectField( properties={ 'label': fields.KeywordField(), 'slot': fields.KeywordField(), 'year': fields.DateField(), }) section = fields.ObjectField(properties={ 'name': fields.KeywordField(), }) identifier = fields.KeywordField() submitters = fields.NestedField(properties={'name': fields.KeywordField()}) referrals = fields.NestedField(properties={'name': fields.KeywordField()}) status = fields.KeywordField() created = fields.DateField() class Meta: model = Motion fields = [ 'title', 'body', ]
class AssetDocument(Document): class Index: # Name of the Elasticsearch index name = "assets" # See Elasticsearch Indices API reference for available settings settings = {"number_of_shards": 1, "number_of_replicas": 0} item = fields.ObjectField( properties={ "item_id": fields.KeywordField(), "project": fields.ObjectField( properties={ "slug": fields.KeywordField(), "campaign": fields.ObjectField( properties={"slug": fields.KeywordField()} ), "topics": fields.NestedField( properties={"slug": fields.KeywordField()} ), } ), } ) transcription_status = fields.KeywordField() latest_transcription = fields.ObjectField( properties={ "created_on": fields.DateField(), "updated_on": fields.DateField(), "accepted": fields.DateField(), "rejected": fields.DateField(), "submitted": fields.DateField(), } ) submission_count = fields.IntegerField() def prepare_submission_count(self, instance): return Transcription.objects.filter( asset=instance, submitted__isnull=True ).count() class Django: model = Asset fields = ["published", "difficulty", "slug", "sequence", "year"] def get_queryset(self): return ( super() .get_queryset() .order_by("pk") .prefetch_related( "item", "item__project", "item__project__topics", "item__project__campaign", ) )
class GroupDocument(Document): city = fields.ObjectField(properties={"id": fields.IntegerField()}) club = fields.ObjectField(properties={"id": fields.IntegerField()}) sports = fields.NestedField(properties={"id": fields.IntegerField()}) class Django: model = Group fields = ["id", "name", "description", "cover_photo", "contact_email"]
class CollectionDocument(Document): class Index: name = 'collections' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') public_can_view = fields.TextField(attr='public_can_view') locale = fields.ListField(fields.KeywordField()) owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase') owner_type = fields.KeywordField(attr='parent_resource_type') collection_type = fields.KeywordField(attr='collection_type', normalizer='lowercase') is_active = fields.KeywordField(attr='is_active') version = fields.KeywordField(attr='version') name = fields.KeywordField(attr='name', normalizer='lowercase') canonical_url = fields.KeywordField(attr='canonical_url', normalizer='lowercase') mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase') extras = fields.ObjectField() identifier = fields.ObjectField() publisher = fields.KeywordField(attr='publisher', normalizer='lowercase') immutable = fields.KeywordField(attr='immutable') created_by = fields.KeywordField() class Django: model = Collection fields = [ 'full_name', 'custom_validation_schema', 'revision_date', 'retired', ] @staticmethod def prepare_locale(instance): return get(instance.supported_locales, []) @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) return value or {} @staticmethod def prepare_identifier(instance): value = {} if instance.identifier: value = jsonify_safe(instance.identifier) return value or {} @staticmethod def prepare_created_by(instance): return instance.created_by.username
class ActDocument(Document): subject = fields.ObjectField( properties={ 'subject_title': fields.TextField(), 'subject_url': fields.TextField(), 'item': fields.ObjectField( properties={ 'item_title': fields.TextField(), 'item_number': fields.IntegerField(), 'item_text': fields.TextField(), 'period': fields.ObjectField( properties={ 'period_text': fields.TextField(), 'start_date': fields.DateField(), 'end_date': fields.DateField(), 'parent_url': fields.TextField(), 'period_url': fields.TextField(), }) }) }) class Index: # Name of the Elasticsearch index name = 'acts' # See Elasticsearch Indices API reference for available settings settings = {'number_of_shards': 1, 'number_of_replicas': 0} content = fields.TextField(analyzer=act_analyzer, ) class Django: model = Act fields = [ 'id', 'title', 'content_url', 'file_type', 'city', ] related_models = [ Subject, Item, Period ] # Optional: to ensure the Act will be re-saved when Subject is updated # def get_queryset(self): # """Not mandatory but to improve performance we can select related in one sql request""" # return super(ActDocument, self).get_queryset().select_related( # 'subject', 'subject__item', 'subject__item__period', # ) def get_instances_from_related(self, related_instance): if isinstance(related_instance, Subject): return related_instance.act_set.all()
def get_resource_field(options: Optional[Dict] = {}) -> fields.ObjectField: return fields.ObjectField( properties={ "id": fields.IntegerField(), "title": fields.ObjectField( properties={"main_title": fields.TextField(**options)}), })
class RepositoryExampleDocument(Document): time_based = False repository_version_language = fields.ObjectField( properties={ "pk": fields.IntegerField(), "language": fields.TextField(fields={"raw": fields.KeywordField()}), } ) intent = fields.ObjectField( properties={"text": fields.TextField(fields={"raw": fields.KeywordField()})} ) entities = fields.NestedField( properties={ "entity": fields.ObjectField( properties={ "value": fields.TextField(fields={"raw": fields.KeywordField()}), } ), } ) pk = fields.IntegerField() class Django: model = RepositoryExample fields = [ "id", "text", ] related_models = [ RepositoryVersionLanguage, RepositoryIntent, RepositoryExampleEntity, ] def get_queryset(self): return ( super(RepositoryExampleDocument, self) .get_queryset() .select_related( "repository_version_language", "intent", ) .prefetch_related( "entities", "translations", ) ) def get_instances_from_related(self, related_instance): if isinstance(related_instance, RepositoryVersionLanguage): return related_instance.added.all() elif isinstance(related_instance, RepositoryIntent): return related_instance.repositoryexample_set.all() elif isinstance(related_instance, RepositoryExampleEntity): return related_instance.repository_example
class ArticleDocument(Document): body = fields.TextField(analyzer='ik_max_word', search_analyzer='ik_smart', attr="get_body") # 通过在 model 中定义get_body 返回内容 title = fields.TextField(analyzer='ik_max_word', search_analyzer='ik_smart') author = fields.ObjectField( properties={ 'nickname': fields.TextField(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': fields.IntegerField() }) category = fields.ObjectField( properties={ 'name': fields.TextField(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': fields.IntegerField() }) tags = fields.ObjectField( properties={ 'name': fields.TextField(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': fields.IntegerField() }) # pub_time = fields.Date() # status = fields.Text() # comment_status = fields.Text() # type = fields.Text() # views = fields.Integer() # article_order = fields.Integer() class Index: name = 'blog' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Article # The model associated with this Document fields = [ "pub_time", "status", "comment_status", "type", "views", "article_order", ] # 上面重新定义了 这里就不用定义了
class JobDocument(Document): # Object/List fields analyzers_to_execute = fields.ListField(fields.KeywordField()) connectors_to_execute = fields.ListField(fields.KeywordField()) # Normal fields errors = fields.TextField() # Keyword fields to allow aggregations/vizualizations source = fields.KeywordField() status = fields.KeywordField() md5 = fields.KeywordField() tlp = fields.KeywordField() observable_name = fields.KeywordField() observable_classification = fields.KeywordField() file_name = fields.KeywordField() file_mimetype = fields.KeywordField() # Nested (ForeignKey) fields tags = fields.NestedField( properties={"label": fields.KeywordField(), "color": fields.TextField()} ) analyzer_reports = fields.NestedField( properties={ "name": fields.KeywordField(), "status": fields.KeywordField(), "report": fields.ObjectField(), "errors": fields.TextField(), "start_time": fields.DateField(), "end_time": fields.DateField(), } ) connector_reports = fields.NestedField( properties={ "name": fields.KeywordField(), "status": fields.KeywordField(), "report": fields.ObjectField(), "errors": fields.TextField(), "start_time": fields.DateField(), "end_time": fields.DateField(), } ) class Index: # Name of the Elasticsearch index name = "jobs" class Django: model = Job # The model associated with this Document # The fields of the model you want to be indexed in Elasticsearch fields = [ "is_sample", "received_request_time", "finished_analysis_time", ]
class PostDocument(BaseDocument): auto_refresh = True hubs_flat = es_fields.TextField(attr='hubs_indexing_flat') hot_score = es_fields.IntegerField(attr='hot_score') score = es_fields.IntegerField(attr='score') discussion_count = es_fields.IntegerField(attr='discussion_count') unified_document_id = es_fields.IntegerField(attr='unified_document_id') title = es_fields.TextField(analyzer=title_analyzer) created_date = es_fields.DateField(attr='created_date') updated_date = es_fields.DateField(attr='updated_date') preview_img = es_fields.TextField(attr='preview_img') renderable_text = es_fields.TextField(attr='renderable_text', analyzer=content_analyzer) created_by_id = es_fields.IntegerField(attr='created_by_id') authors = es_fields.ObjectField( attr='authors_indexing', properties={ 'first_name': es_fields.TextField(), 'last_name': es_fields.TextField(), 'full_name': es_fields.TextField(), } ) hubs = es_fields.ObjectField( attr='hubs_indexing', properties={ 'hub_image': es_fields.TextField(), 'id': es_fields.IntegerField(), 'is_locked': es_fields.TextField(), 'is_removed': es_fields.TextField(), 'name': es_fields.KeywordField(), } ) class Index: name = 'post' class Django: model = ResearchhubPost queryset_pagination = 250 fields = [ 'id', 'document_type', ] def should_remove_from_index(self, obj): if obj.is_removed: return True return False
class NSEStockDocument(Document): symbol = fields.ObjectField(properties={ 'symbol_name': fields.TextField(), }) def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super(NSEStockDocument, self).get_queryset().select_related('symbol') def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the NSEHistoricalData instance(s) from the related model. The related_models option should be used with caution because it can lead in the index to the updating of a lot of items. """ if isinstance(related_instance, Symbol): return related_instance.nsehistoricaldata_set.all() class Index: name = 'nse-stocks' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = NSEHistoricalData fields = [ 'ISIN', 'series', 'open', 'high', 'low', 'close', 'last', 'prev_close', 'total_traded_quantity', 'total_traded_value', 'timestamp', 'total_trades' ] related_models = [Symbol]
class MovieDocument(Document): user = fields.ObjectField(properties={ 'username': fields.TextField(), 'id': fields.IntegerField(), }) class Index: # Name of the Elasticsearch index name = 'movie' # See Elasticsearch Indices API reference for available settings settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Movie # The model associated with this Document # The fields of the model you want to be indexed in Elasticsearch fields = [ 'title', 'description', 'date', 'hates', 'likes', 'id', ] related_models = [User] ignore_signals = False def get_instances_from_related(self, tag_instance): return tag_instance.movie_set.all()
class urldetailsDocument(DocType): user = fields.ObjectField(properties={ 'pk': fields.IntegerField(), 'username': fields.TextField(), }) class Index: name = 'urldetails' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = urldetails fields = [ 'full_url', 'name', 'short_url', 'key', 'created_on', 'updated_on', ] related_models = [User] def get_queryset(self): return super(urldetailsDocument, self).get_queryset().select_related('user', ) def get_instances_from_related(self, related_instance): if isinstance(related_instance, User): return related_instance.urldetails_set.all()
class TMContentDocument(Document): class Index: name = settings.INDEX_TM settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = TMContent fields = [ 'id', 'src_sentence', 'tar_sentence', ] related_models = [TranslationMemory] translation_memory = fields.ObjectField( properties={ 'id': fields.IntegerField(), 'name': fields.TextField(), 'description': fields.TextField(), 'src_lang': fields.TextField(), 'tar_lang': fields.TextField(), 'user': fields.TextField(attr="get_user_id"), }) def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the instance(s) from the related model.""" if isinstance(related_instance, TranslationMemory): return related_instance.tmcontent_set.all()
class CorpusContentDocument(Document): class Index: name = settings.INDEX_CORPUS settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = CorpusContent fields = [ 'id', 'phrase', ] related_models = [Corpus] corpus = fields.ObjectField( properties={ 'id': fields.IntegerField(), 'name': fields.TextField(), 'description': fields.TextField(), 'language': fields.TextField(), 'user': fields.TextField(attr="get_user_id"), }) def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the instance(s) from the related model.""" if isinstance(related_instance, Corpus): return related_instance.corpuscontent_set.all()
class PaperDocument(Document): class Index: name = 'papers' settings = { 'number_of_shards': 1, 'number_of_replicas': 0, 'max_result_window': 100000 } authors = fields.ObjectField(properties={ 'pk': fields.IntegerField(), 'full_name': fields.TextField(), }) title = fields.TextField(analyzer='english') abstract = fields.TextField(analyzer='english') class Django: model = Paper related_models = [Author] def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the paper instance(s) from the related model. The related_models option should be used with caution because it can lead in the index to the updating of a lot of items. """ if isinstance(related_instance, Author): return related_instance.publications.all()
class OrganizationDocument(Document): class Index: name = 'organizations' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') public_can_view = fields.BooleanField(attr='public_can_view') name = fields.KeywordField(attr='name', normalizer="lowercase") mnemonic = fields.KeywordField(attr='mnemonic', normalizer="lowercase") extras = fields.ObjectField() user = fields.ListField(fields.KeywordField()) class Django: model = Organization fields = [ 'is_active', 'company', 'location', ] @staticmethod def prepare_extras(instance): return instance.extras or {} @staticmethod def prepare_user(instance): return list(instance.members.values_list('username', flat=True))
class ManufacturerDocument(Document): name = fields.TextField() address = fields.TextField() country = fields.ObjectField( properties={ 'name': fields.TextField() } ) class Index: name = 'manufacturers' class Django: model = Manufacturer related_models = [Country, ] def get_queryset(self): return super(ManufacturerDocument, self).get_queryset().select_related( 'country_id', ) def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the Country instance(s) from the related model. The related_models option should be used with caution because it can lead in the index to the updating of a lot of items. """ if isinstance(related_instance, Country): return related_instance.manufacturer_set.all() return []
class StartupDocument(DocType): user = fields.ObjectField( properties={ 'is_active': fields.BooleanField(), 'is_founder': fields.BooleanField(), 'is_account_disabled': fields.BooleanField(), 'first_name': fields.StringField(), 'last_name': fields.StringField(), 'last_activity': fields.DateField(attr='last_activity.date') }) job_set = fields.NestedField( properties={ 'title': fields.StringField(), 'description': fields.StringField(), 'level': fields.StringField(attr='get_level_display'), 'pay': fields.StringField(attr='get_pay_display') }) logo = fields.StringField(attr='logo_url') # logo_thumbnail = fields.StringField(attr='logo_thumbnail_url') get_stage_display = fields.StringField(attr='get_stage_display') get_field_display = fields.StringField(attr='get_field_display') stage = fields.StringField(attr='stage', analyzer=leave_default) field = fields.StringField(attr='field', analyzer=leave_default) class Meta: model = Founder related_models = [MyUser, Job] fields = ['startup_name', 'description', 'is_filled', 'employee_count'] def get_instances_from_related(self, related_instance): if isinstance(related_instance, MyUser): if hasattr(related_instance, 'founder'): return related_instance.founder if isinstance(related_instance, Job): return related_instance.founder