class OutputDocument(Document): pk = fields.IntegerField('pk') study = string_field('study_name') group = ObjectField( properties={ 'pk': fields.IntegerField(), 'count': fields.IntegerField(), 'name': string_field('name') }) individual = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name') }) interventions = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name') }, multi=True) substance = string_field("substance_name") choice = string_field("choice") ex = ObjectField(properties={'pk': string_field('pk')}) normed = fields.BooleanField() calculated = fields.BooleanField() raw = ObjectField(properties={'pk': fields.IntegerField()}) timecourse = ObjectField(properties={'pk': fields.IntegerField()}) value = fields.FloatField('null_value') mean = fields.FloatField('null_mean') median = fields.FloatField('null_median') min = fields.FloatField('null_min') max = fields.FloatField('null_max') se = fields.FloatField('null_se') sd = fields.FloatField('null_sd') cv = fields.FloatField('null_cv') unit = string_field('unit') time_unit = string_field('time_unit') time = fields.FloatField('null_time') tissue = string_field('tissue_name') measurement_type = string_field("measurement_type_name") access = string_field('access') allowed_users = fields.ObjectField( attr="allowed_users", properties={'username': string_field("username")}, multi=True) class Django: model = Output # Ignore auto updating of Elasticsearch when a model is saved/deleted ignore_signals = True # Don't perform an index refresh after every update auto_refresh = False class Index: name = 'outputs' settings = { 'number_of_shards': 5, 'number_of_replicas': 1, 'max_result_window': 100000 }
class MovieDocument(Document): """Movie Elasticsearch document.""" id = fields.IntegerField(attr="id") title = fields.TextField( fields={ "raw": fields.TextField(analyzer="keyword"), "suggest": fields.CompletionField(), } ) description = fields.TextField( fields={ "raw": fields.TextField(analyzer="keyword"), } ) director = fields.TextField( fields={ "raw": fields.TextField(analyzer="keyword"), } ) year = fields.IntegerField() poster = fields.TextField() imdb_rating = fields.FloatField() rating_average = fields.FloatField() rating_count = fields.IntegerField() view_count = fields.IntegerField() slug = fields.TextField( fields={ "raw": fields.TextField(analyzer="keyword"), } ) genres = fields.TextField( attr="genres_indexing", fields={ "raw": fields.TextField(analyzer="keyword", multi=True), }, multi=True, ) class Index: # Name of the Elasticsearch index name = "movie" # See Elasticsearch Indices API reference for available settings settings = {"number_of_shards": 1, "number_of_replicas": 0} class Django: Movie = get_movie_model() model = Movie class Meta: Movie = get_movie_model() model = Movie
class OutputInterventionDocument(Document): study_sid = string_field('study_sid') study_name = string_field('study_name') output_pk = fields.IntegerField('output_pk') intervention_pk = fields.IntegerField('intervention_pk') group_pk = fields.IntegerField('group_pk') individual_pk = fields.IntegerField('individual_pk') measurement_type = string_field("measurement_type") substance = string_field("substance") normed = fields.BooleanField() calculated = fields.BooleanField() tissue = string_field('tissue') time = fields.FloatField('time') time_unit = string_field('time_unit') unit = string_field('unit') choice = string_field('choice') # output fields value = fields.FloatField('value') mean = fields.FloatField('mean') median = fields.FloatField('median') min = fields.FloatField('min') max = fields.FloatField('max') se = fields.FloatField('se') sd = fields.FloatField('sd') cv = fields.FloatField('cv') # for permissions access = string_field('access') allowed_users = fields.ObjectField( attr="allowed_users", properties={'username': string_field("username")}, multi=True) class Django: model = OutputIntervention # Ignore auto updating of Elasticsearch when a model is saved/deleted ignore_signals = True # Don't perform an index refresh after every update auto_refresh = False class Index: name = 'outputs_interventions' settings = { 'number_of_shards': 5, 'number_of_replicas': 1, 'max_result_window': 100000 } def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super(OutputInterventionDocument, self).get_queryset().select_related( 'intervention', 'output')
class PropertyAddressDocument(Document): """PropertyAddress Elasticsearch document.""" id = fields.IntegerField(attr="id") property = fields.TextField(attr="property_indexing", ) address = fields.TextField() stateName = fields.TextField() latitude = fields.FloatField() longitude = fields.FloatField() class Django(object): """The model associate with this Document""" model = PropertyAddress
class OutputDocument(DocType): pk = fields.IntegerField('pk') study = string_field('study') group = ObjectField(properties={ 'pk': fields.IntegerField(), 'count': fields.IntegerField(), 'name': string_field('name')}) individual = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name')}) interventions = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name') }, multi=True) substance = ObjectField(properties={ 'name': string_field('name')} ) ex = ObjectField(properties={ 'pk': string_field('pk')} ) normed = fields.BooleanField() calculated = fields.BooleanField() raw = ObjectField(properties={ 'pk': fields.IntegerField()} ) timecourse = ObjectField(properties={ 'pk': fields.IntegerField()} ) value = fields.FloatField('null_value') mean = fields.FloatField('null_mean') median = fields.FloatField('null_median') min = fields.FloatField('null_min') max = fields.FloatField('null_max') se = fields.FloatField('null_se') sd = fields.FloatField('null_sd') cv = fields.FloatField('null_cv') unit = string_field('unit') time_unit = string_field('time_unit') time = fields.FloatField('null_time') tissue = string_field('tissue') pktype = string_field("pktype_key") class Meta(object): model = Output # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = True # Don't perform an index refresh after every update (overrides global setting): auto_refresh = False
class TimecourseDocument(Document): study = string_field('study_name') pk = fields.IntegerField('pk') group = ObjectField( properties={ 'pk': fields.IntegerField(), 'name': string_field('name'), 'count': fields.IntegerField() }) individual = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name') }) interventions = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name') }, multi=True) substance = string_field("substance_name") ex = ObjectField(properties={'pk': string_field('pk')}) normed = fields.BooleanField() raw = ObjectField(properties={'pk': fields.IntegerField()}) pharmacokinetics = ObjectField(properties={'pk': fields.IntegerField()}, multi=True) value = fields.FloatField('null_value', multi=True) mean = fields.FloatField('null_mean', multi=True) median = fields.FloatField('null_median', multi=True) min = fields.FloatField('null_min', multi=True) max = fields.FloatField('null_max', multi=True) se = fields.FloatField('null_se', multi=True) sd = fields.FloatField('null_sd', multi=True) cv = fields.FloatField('null_cv', multi=True) unit = string_field('unit') time_unit = string_field('time_unit') figure = string_field('figure') time = fields.FloatField('null_time', multi=True) tissue = string_field('tissue_name') measurement_type = string_field("measurement_type_name") access = string_field('access') allowed_users = fields.ObjectField( attr="allowed_users", properties={'username': string_field("username")}, multi=True) class Django: model = Timecourse # Ignore auto updating of Elasticsearch when a model is saved/deleted ignore_signals = True # Don't perform an index refresh after every update auto_refresh = False class Index: name = 'timecourses' settings = elastic_settings
def common_setfields(model, attr=None): if attr is None: attr = model return ObjectField( properties={ "descriptions": ObjectField(properties={ 'text': text_field("text"), 'pk': fields.IntegerField() }, multi=True), model: ObjectField(attr=attr, properties={ "pk": fields.FloatField(), }), "comments": fields.ObjectField(properties={ 'text': text_field("text"), 'user': fields.ObjectField( properties={ 'first_name': string_field("first_name"), 'last_name': string_field("last_name"), 'pk': string_field("last_name"), 'username': string_field("username"), }) }, multi=True) })
class MyModelDocument(Document): # year = fields.CompletionField() # name = fields.CompletionField() # country = fields.CompletionField() # productID = fields.CompletionField() # id = fields.CompletionField() class Index: name = 'mymodels' settings = {'number_of_shards': 1, 'number_of_replicas': 0} id = fields.IntegerField(attr='id') name = fields.TextField( # analyzer=html_strip, fields={'raw': fields.KeywordField()}) country = fields.TextField( # analyzer=html_strip, fields={'raw': fields.KeywordField()}) year = fields.TextField( # analyzer=html_strip, fields={'raw': fields.KeywordField()}) productID = fields.FloatField( # analyzer=html_strip, fields={'raw': fields.KeywordField()}) class Django: model = MyModel fields = []
class GroupDocument(DocType): """Individual elastic search document""" pk = fields.IntegerField(attr='pk') name = string_field('name') count = fields.IntegerField(attr='count') parent = ObjectField( properties={ 'name': string_field('name'), 'pk': fields.IntegerField('pk'), 'count': fields.IntegerField('count') }) study = ObjectField( properties={ 'name': string_field('name'), 'pk': fields.IntegerField('pk'), 'sid': fields.StringField('sid') }) ex = ObjectField(properties={'pk': fields.IntegerField('pk')}) characteristica_all_normed = fields.ObjectField(properties={ 'pk': fields.IntegerField(), 'category': string_field('category_key'), 'choice': string_field('choice'), 'value': fields.FloatField('value'), 'mean': fields.FloatField(), 'median': fields.FloatField(), 'min': fields.FloatField(), 'max': fields.FloatField(), 'se': fields.FloatField(), 'sd': fields.FloatField(), 'cv': fields.FloatField(), 'unit': string_field('unit'), 'count': fields.IntegerField('count'), }, multi=True) class Meta: model = Group # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = True # Don't perform an index refresh after every update (overrides global setting): auto_refresh = False
class CharacteristicaDocument(DocType): """Characteristica elastic search document""" id = fields.IntegerField(attr='id') group_name = fields.StringField(attr='group_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) group_pk = fields.IntegerField(attr='group_id') individual_name = fields.StringField( attr='individual_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) individual_pk = fields.IntegerField(attr='individual_id') category = fields.StringField(attr='category_key', fields={ 'raw': fields.StringField(analyzer='keyword'), }) choice = fields.StringField(fields={ 'raw': fields.StringField(analyzer='keyword'), }) unit = fields.StringField(fields={ 'raw': fields.StringField(analyzer='keyword'), }) count = fields.IntegerField() value = fields.FloatField(attr='value') mean = fields.FloatField(attr='mean') median = fields.FloatField(attr='median') min = fields.FloatField(attr='min') max = fields.FloatField(attr='max') se = fields.FloatField(attr='se') sd = fields.FloatField(attr='sd') cv = fields.FloatField(attr='cv') normed = fields.BooleanField() raw = ObjectField(properties={'pk': fields.IntegerField()}) class Meta: model = Characteristica # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = True # Don't perform an index refresh after every update (overrides global setting): auto_refresh = False
class BookDocument(Document): """Book Elasticsearch document.""" id = fields.IntegerField(attr='id') title = fields.TextField(fields={ 'raw': fields.KeywordField(), }) description = fields.TextField(fields={ 'raw': fields.KeywordField(), }) summary = fields.TextField(fields={ 'raw': fields.KeywordField(), }) publisher = fields.KeywordField( attr='publisher_indexing', #analyzer=html_strip, # fields={ # 'raw': fields.KeywordField(analyzer='keyword'), # } ) publication_date = fields.DateField() state = fields.TextField(fields={ 'raw': fields.KeywordField(), }) isbn = fields.KeywordField( #analyzer=html_strip, # fields={ # 'raw': fields.KeywordField(analyzer='keyword'), # } ) price = fields.FloatField() pages = fields.IntegerField() stock_count = fields.IntegerField() tags = fields.KeywordField( attr='tags_indexing', #analyzer=html_strip, # fields={ # 'raw': fields.KeywordField(analyzer='keyword', multi=True), # 'suggest': fields.CompletionField(multi=True), # }, multi=True) class Django(object): """Inner nested class Django.""" model = Book # The model associate with this Document
class SubstanceDocument(Document): sid = string_field('sid') url_slug = string_field('url_slug') creator = string_field('creator_username') name = string_field('name') mass = fields.FloatField() charge = fields.FloatField() formula = string_field('formula') derived = fields.BooleanField() description = text_field('description') parents = ObjectField(properties={ 'sid': string_field('sid'), 'url_slug': string_field('url_slug') }, multi=True) annotations = ObjectField(attr="annotations", multi=True, properties={ "term": string_field("term"), "relation": string_field("relation"), "collection": string_field("collection"), "description": string_field("description"), "label": string_field("label") }) synonyms = ObjectField(attr="synonyms", multi=True, properties={ "name": string_field("name"), }) class Django: model = Substance # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = False # Don't perform an index refresh after every update auto_refresh = False class Index: name = "substances" settings = elastic_settings
class MoviesDocument(Document): created = fields.DateField() title = fields.TextField(fields={ 'raw': fields.TextField(analyzer='keyword'), }) year = fields.IntegerField() rating = fields.FloatField() genre = fields.TextField( fields={'raw': fields.TextField(analyzer='keyword')}) class Django(object): model = MoviesModel
class SpotifyDocument(Document): """Spotify elasticsearch document""" acousticness = fields.FloatField() artists = fields.KeywordField(multi=True, analyzer=html_strip, fields={ 'raw': fields.KeywordField(analyzer='keyword'), }) danceability = fields.FloatField() duration_ms = fields.IntegerField() energy = fields.FloatField() explicit = fields.IntegerField() id = fields.KeywordField(analyzer=html_strip, fields={ 'raw': fields.KeywordField(analyzer='keyword'), }) instrumentalness = fields.FloatField() key = fields.IntegerField() liveness = fields.FloatField() loudness = fields.FloatField() mode = fields.IntegerField() name = fields.KeywordField(analyzer=html_strip, fields={ 'raw': fields.KeywordField(analyzer='keyword'), 'suggest': fields.CompletionField(), }) popularity = fields.IntegerField() release_date = fields.DateField() speechiness = fields.FloatField() tempo = fields.FloatField() valence = fields.FloatField() year = fields.KeywordField(analyzer=html_strip, fields={ 'raw': fields.KeywordField(analyzer='keyword'), }) class Django: model = spotify_models.Spotify
class MovieDocument(Document): """Movie Elasticsearch document.""" id = fields.IntegerField(attr='id') title = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) world_premiere = fields.DateField() country = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) rf_premiere = fields.DateField() categories = fields.NestedField( properties={ 'title': fields.TextField(analyzer=html_strip, fields={ 'raw': KeywordField(), }), }) rating_kp = fields.FloatField() rating_imdb = fields.FloatField() directors = fields.NestedField( properties={ 'full_name': fields.TextField(analyzer=html_strip), 'id': fields.IntegerField(), }) image = fields.FileField(attr="poster") movie_url = fields.TextField(attr='get_absolute_url') class Django(object): """Inner nested class Django.""" model = Movies # The model associate with this Document
class DestinationDocument(DocType): price = fields.FloatField(attr=None) agency = fields.TextField(attr='agency_to_string') original_link = fields.TextField(attr='original_url_to_string') class Meta: model = Destination fields = [ 'id', 'image', 'name', 'description', 'num_of_nights', ]
class SubstanceDocument(DocType): sid = string_field('sid') url_slug = string_field('url_slug') creator = string_field('creator_username') name = string_field('name') mass = fields.FloatField() charge = fields.FloatField() formula = string_field('formula') derived = fields.BooleanField() description = text_field('description') parents = ObjectField(properties={ 'sid': string_field('sid'), 'url_slug': string_field('url_slug') }, multi=True) class Meta(object): model = Substance # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = False # Don't perform an index refresh after every update (overrides global setting): auto_refresh = False
class ProductDocument(Document): """Products Elasticsearch document.""" id = fields.IntegerField(attr='id') name = fields.TextField(analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), }) slug = fields.TextField(analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), }) description = fields.TextField(analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), }) category = fields.TextField(attr='category', analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), }) price = fields.FloatField() quantity = fields.FloatField() class Django(object): """Inner nested class Django.""" model = Product # The model associate with this Documente
class PostDocument(Document): class Index: ''' Elasticsearch index ''' name='posts' url = fields.TextField(attr='get_absolute_url') score = fields.FloatField(attr='elastic_score') class Django: model = Post fields = [ 'id', 'title', 'content' ]
class RepositoryQANLPLogDocument(TimeBasedDocument): time_based = True user = fields.IntegerField(attr="user.id") knowledge_base = fields.IntegerField(attr="knowledge_base.id") nlp_log = fields.NestedField( properties={ "answers": fields.NestedField( properties={ "text": fields.TextField(fields={"raw": fields.KeywordField()}), "confidence": fields.FloatField(), } ), "id": fields.IntegerField(), } ) text = fields.IntegerField() repository_uuid = fields.TextField( fields={"raw": fields.KeywordField()}, ) pk = fields.IntegerField() class Django: model = QALogs fields = [ "id", "answer", "language", "confidence", "question", "from_backend", "user_agent", "created_at", ] def prepare_text(self, obj): try: return obj.knowledge_base.texts.filter(language=obj.language).first().id except AttributeError: return None def prepare_nlp_log(self, obj): return json.loads(obj.nlp_log) def prepare_repository_uuid(self, obj): return obj.knowledge_base.repository.uuid
class InterventionDocument(DocType): pk = fields.IntegerField() #category = string_field('category_key') category = fields.StringField(attr='category_key', fields={ 'raw': fields.StringField(analyzer='keyword'), }) choice = string_field('choice') application = string_field('application') time_unit = string_field('time_unit') time = fields.FloatField() substance = ObjectField(properties={'name': string_field('name')}) study = string_field('study') route = string_field('route') form = string_field('form') name = string_field('name') normed = fields.BooleanField() raw = ObjectField(properties={'pk': fields.IntegerField()}) value = fields.FloatField() mean = fields.FloatField() median = fields.FloatField() min = fields.FloatField() max = fields.FloatField() se = fields.FloatField() sd = fields.FloatField() cv = fields.FloatField() unit = string_field('unit') class Meta(object): model = Intervention # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = True # Don't perform an index refresh after every update (overrides global setting): auto_refresh = False
class RestaurantDocument(Document): id = fields.IntegerField(attr='id') name = fields.TextField(analyzer=autocomplete, attr='name') yelp_image = fields.TextField(attr="yelp_image") yelp_url = fields.TextField(attr="yelp_url") url = fields.TextField(attr="url") rating = fields.FloatField(attr="rating") price = fields.IntegerField(attr="price") address = fields.TextField(attr="location") location = fields.GeoPointField(attr="location_indexing") address1 = fields.TextField(attr='address1') phone = fields.TextField(attr='phone') city = fields.TextField(attr='city') state = fields.TextField(attr='state') categories = fields.NestedField( attr="categories_indexing", properties={ 'id': fields.IntegerField(), 'label': fields.TextField(analyzer=autocomplete), 'api_label': fields.TextField(analyzer=autocomplete) }, multi=True) review_count = fields.IntegerField(attr='review_count') option_count = fields.IntegerField(attr='option_count') comment_count = fields.IntegerField(attr='comment_count') open_hours = fields.NestedField(attr='hours_indexing', properties={ 'open': fields.IntegerField(), 'close': fields.IntegerField() }, multi=True) class Index: name = 'restaurants' settings = {"number_of_shards": 1, "number_of_replicas": 1} class Django: model = Restaurant related_models = [RestaurantCategory] def get_instances_from_related(self, related_instance): if isinstance(related_instance, RestaurantCategory): return related_instance.restaurant
class MeasurementDocument(Document): id = fields.IntegerField(attr='id') measure_value = fields.FloatField() measure_date = fields.DateField() measure_parameter = fields.TextField() device = fields.ObjectField(properties={ 'name': fields.TextField(), 'id': fields.IntegerField(attr='id'), 'building': fields.ObjectField(properties={ 'name': fields.TextField(), 'id': fields.IntegerField(attr='id'), 'user': fields.ObjectField(attr='user', properties={ 'email': fields.TextField(), 'id': fields.IntegerField(attr='id'), }) }) }) class Django: model = Measurement
class Product(InnerDoc): quantity = fields.FloatField() value = fields.KeywordField() unit = fields.KeywordField() incorporated = fields.BooleanField(attr="is_good_incorporated") name = fields.TextField(attr="good.name", copy_to="wildcard", analyzer=descriptive_text_analyzer,) description = fields.TextField(attr="good.description", copy_to="wildcard", analyzer=descriptive_text_analyzer,) comment = fields.TextField(attr="good.comment", copy_to="wildcard", analyzer=descriptive_text_analyzer) part_number = fields.TextField( attr="good.part_number", fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),}, analyzer=part_number_analyzer, copy_to="wildcard", ) is_good_controlled = fields.TextField(attr="good.is_good_controlled") control_list_entries = fields.NestedField(attr="good.control_list_entries", doc_class=CLCEntry) report_summary = fields.TextField( attr="good.report_summary", fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),}, analyzer=descriptive_text_analyzer, copy_to="wildcard", )
class ProductDocument(DocType): category = fields.NestedField(properties={ 'name': fields.TextField(), }) discount = fields.FloatField(attr='discount') class Meta: model = Product fields = [ 'name', 'regular_price', 'final_price', 'is_available', 'description', 'timestamp', ] related_models = [ProductCategory] def get_queryset(self): return super(ProductDocument, self).get_queryset().select_related('category')
class LocationDocument(DocType): """ Location document. """ # Full fields __full_fields = { "raw": KeywordField(), # edge_ngram_completion "q": StringField( analyzer=edge_ngram_completion ), } if ELASTICSEARCH_GTE_5_0: __full_fields.update( { "suggest": fields.CompletionField(), "context": fields.CompletionField( contexts=[ { "name": "category", "type": "category", "path": "category.raw", }, { "name": "occupied", "type": "category", "path": "occupied.raw", }, ] ), } ) full = StringField( analyzer=html_strip, fields=__full_fields ) # Partial fields __partial_fields = { "raw": KeywordField(), # edge_ngram_completion "q": StringField( analyzer=edge_ngram_completion ), } if ELASTICSEARCH_GTE_5_0: __partial_fields.update( { "suggest": fields.CompletionField(), "context": fields.CompletionField( contexts=[ { "name": "category", "type": "category", "path": "category.raw", }, { "name": "occupied", "type": "category", "path": "occupied.raw", }, ] ), } ) partial = StringField( analyzer=html_strip, fields=__partial_fields ) # Postcode __postcode_fields = { "raw": KeywordField(), } if ELASTICSEARCH_GTE_5_0: __postcode_fields.update( { "suggest": fields.CompletionField(), "context": fields.CompletionField( contexts=[ { "name": "category", "type": "category", "path": "category.raw", }, { "name": "occupied", "type": "category", "path": "occupied.raw", }, ] ), } ) postcode = StringField( analyzer=html_strip, fields=__postcode_fields ) # Number number = StringField( attr="address_no", analyzer=html_strip, fields={ "raw": KeywordField(), } ) # Address address = StringField( attr="address_street", analyzer=html_strip, fields={ "raw": KeywordField(), } ) # Town town = StringField( attr="address_town", analyzer=html_strip, fields={ "raw": KeywordField(), } ) # Authority authority = StringField( attr="authority_name", analyzer=html_strip, fields={ "raw": KeywordField(), } ) # URL fields /geocode/slug geocode = StringField( analyzer=html_strip, fields={ "raw": KeywordField(), } ) # Slug slug = StringField( analyzer=html_strip, fields={ "raw": KeywordField(), } ) # ********************* Filter fields ********************** # Category category = StringField( attr="group", analyzer=html_strip, fields={ "raw": KeywordField(), } ) # Occupied occupied = StringField( attr="occupation_status_text", analyzer=html_strip, fields={ "raw": KeywordField(), } ) size = fields.FloatField(attr="floor_area") staff = fields.FloatField(attr="employee_count") rent = fields.FloatField(attr="rental_valuation") revenue = fields.FloatField(attr="revenue") coordinates = fields.GeoPointField(attr="location_field_indexing") class Meta(object): """Meta options.""" model = Location # The model associate with this DocType parallel_indexing = True queryset_pagination = 50 # This will split the queryset
class BookDocument(DocType): """Book Elasticsearch document.""" # In different parts of the code different fields are used. There are # a couple of use cases: (1) more-like-this functionality, where `title`, # `description` and `summary` fields are used, (2) search and filtering # functionality where all of the fields are used. # ID id = fields.IntegerField(attr='id') # ******************************************************************** # *********************** Main data fields for search **************** # ******************************************************************** __title_fields = { 'raw': KeywordField(), 'suggest': fields.CompletionField(), 'edge_ngram_completion': StringField(analyzer=edge_ngram_completion), 'mlt': StringField(analyzer='english'), } if ELASTICSEARCH_GTE_5_0: __title_fields.update({ 'suggest_context': fields.CompletionField(contexts=[ { "name": "tag", "type": "category", "path": "tags.raw", }, { "name": "state", "type": "category", "path": "state.raw", }, { "name": "publisher", "type": "category", "path": "publisher.raw", }, ]), }) title = StringField(analyzer=html_strip, fields=__title_fields) description = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'mlt': StringField(analyzer='english'), }) summary = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'mlt': StringField(analyzer='english'), }) # ******************************************************************** # ********** Additional fields for search and filtering ************** # ******************************************************************** authors = fields.ListField( StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), })) # Publisher publisher = StringField(attr='publisher_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) # Publication date publication_date = fields.DateField() # State state = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) # ISBN isbn = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) # Price price = fields.FloatField() # Pages pages = fields.IntegerField() # Stock count stock_count = fields.IntegerField() # Tags tags = StringField(attr='tags_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(multi=True), 'suggest': fields.CompletionField(multi=True), }, multi=True) # Date created created = fields.DateField() null_field = StringField(attr='null_field_indexing') class Meta(object): """Meta options.""" model = Book # The model associate with this DocType parallel_indexing = True def prepare_summary(self, instance): """Prepare summary.""" return instance.summary[:32766] def prepare_authors(self, instance): """Prepare authors.""" return [author.name for author in instance.authors.all()]
class ActivityDocument(DocType): title_keyword = fields.KeywordField(attr='title') title = fields.TextField(fielddata=True) description = fields.TextField() status = fields.KeywordField() status_score = fields.FloatField() created = fields.DateField() date = fields.DateField() deadline = fields.DateField() type = fields.KeywordField() owner = fields.NestedField(properties={ 'id': fields.KeywordField(), 'full_name': fields.TextField() }) initiative = fields.NestedField(properties={ 'title': fields.TextField(), 'pitch': fields.TextField(), 'story': fields.TextField(), }) theme = fields.NestedField( attr='initiative.theme', properties={ 'id': fields.KeywordField(), } ) categories = fields.NestedField( attr='initiative.categories', properties={ 'id': fields.LongField(), 'slug': fields.KeywordField(), } ) position = fields.GeoPointField() country = fields.KeywordField() expertise = fields.NestedField( properties={ 'id': fields.KeywordField(), } ) segments = fields.NestedField( properties={ 'id': fields.KeywordField(), 'type': fields.KeywordField(attr='type.slug'), 'name': fields.TextField() } ) location = fields.NestedField( attr='location', properties={ 'id': fields.LongField(), 'formatted_address': fields.TextField(), } ) initiative_location = fields.NestedField( attr='initiative.location', properties={ 'id': fields.LongField(), 'name': fields.TextField(), 'city': fields.TextField(), } ) contributions = fields.DateField() contribution_count = fields.IntegerField() activity_date = fields.DateField() class Meta(object): model = Activity def get_queryset(self): return super(ActivityDocument, self).get_queryset().select_related( 'initiative', 'owner', ) @classmethod def search(cls, using=None, index=None): # Use search class that supports polymorphic models return Search( using=using or cls._doc_type.using, index=index or cls._doc_type.index, doc_type=[cls], model=cls._doc_type.model ) def prepare_contributions(self, instance): return [ contribution.created for contribution in instance.contributions.filter(status__in=('new', 'success')) ] def prepare_type(self, instance): return str(instance.__class__.__name__.lower()) def prepare_contribution_count(self, instance): return len(instance.contributions.filter(status__in=('new', 'success'))) def prepare_country(self, instance): if instance.initiative.location: return instance.initiative.location.country_id if instance.initiative.place: return instance.initiative.place.country_id def prepare_location(self, instance): if hasattr(instance, 'location') and instance.location: return { 'id': instance.location.pk, 'formatted_address': instance.location.formatted_address } def prepare_expertise(self, instance): if hasattr(instance, 'expertise') and instance.expertise: return {'id': instance.expertise_id} def prepare_position(self, instance): return None def prepare_deadline(self, instance): return None def prepare_date(self, instance): return None
class JournalDocument(Document): """Journal Elasticsearch document.""" # In different parts of the code different fields are used. There are # a couple of use cases: (1) more-like-this functionality, where `title`, # `description` and `summary` fields are used, (2) search and filtering # functionality where all of the fields are used. # ISBN/ID isbn = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) # ******************************************************************** # *********************** Main data fields for search **************** # ******************************************************************** title = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), 'edge_ngram_completion': StringField(analyzer=edge_ngram_completion), 'mlt': StringField(analyzer='english'), }) description = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'mlt': StringField(analyzer='english'), }) summary = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'mlt': StringField(analyzer='english'), }) # ******************************************************************** # ********** Additional fields for search and filtering ************** # ******************************************************************** # Publication date publication_date = fields.DateField() # Price price = fields.FloatField() # Pages pages = fields.IntegerField() # Stock count stock_count = fields.IntegerField() # Date created created = fields.DateField(attr='created_indexing') class Django(object): model = Journal # The model associate with this Document class Meta(object): parallel_indexing = True # queryset_pagination = 50 # This will split the queryset # # into parts while indexing def prepare_summary(self, instance): """Prepare summary.""" return instance.summary[:32766] if instance.summary else None
class FoiRequestDocument(Document): content = fields.TextField(analyzer=analyzer, search_analyzer=search_analyzer, search_quote_analyzer=search_quote_analyzer, index_options='offsets') title = fields.TextField() description = fields.TextField() resolution = fields.KeywordField() status = fields.KeywordField() costs = fields.FloatField() tags = fields.ListField(fields.KeywordField()) classification = fields.ListField(fields.IntegerField()) categories = fields.ListField(fields.IntegerField()) campaign = fields.IntegerField() due_date = fields.DateField() first_message = fields.DateField() last_message = fields.DateField() publicbody = fields.IntegerField(attr='public_body_id') jurisdiction = fields.IntegerField(attr='public_body.jurisdiction_id') user = fields.IntegerField(attr='user_id') team = fields.IntegerField(attr='team_id') public = fields.BooleanField() class Django: model = FoiRequest queryset_chunk_size = 50 def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return FoiRequest.objects.select_related( 'jurisdiction', 'public_body', ) def prepare_content(self, obj): return render_to_string('foirequest/search/foirequest_text.txt', {'object': obj}) def prepare_tags(self, obj): return [tag.id for tag in obj.tags.all()] def prepare_public(self, obj): return obj.in_public_search_index() def prepare_campaign(self, obj): return obj.campaign_id def prepare_classification(self, obj): if obj.public_body_id is None: return [] if obj.public_body.classification is None: return [] classification = obj.public_body.classification return [classification.id ] + [c.id for c in classification.get_ancestors()] def prepare_categories(self, obj): if obj.public_body: cats = obj.public_body.categories.all() return [o.id for o in cats ] + [c.id for o in cats for c in o.get_ancestors()] return [] def prepare_team(self, obj): if obj.project and obj.project.team_id: return obj.project.team_id return None