class CharacteristicaDocument(Document): """ Characteristica elastic search document. """ id = fields.IntegerField(attr='id') raw_pk = string_field('raw_pk') study_name = string_field('study_name') study_sid = string_field('study_sid') subject_type = string_field('subject_type') # group related group_pk = fields.IntegerField(attr='group_pk') group_name = fields.StringField(attr='group_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) group_count = fields.IntegerField(attr='group_count') group_parent_pk = fields.IntegerField(attr='group_parent_pk', ) # individual_related individual_name = fields.StringField( attr='individual_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) individual_pk = fields.IntegerField(attr='individual_pk') individual_group_pk = fields.IntegerField(attr='individual_group_pk') ### measurement_type = fields.StringField( attr='measurement_type_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) choice = fields.StringField(fields={ 'raw': fields.StringField(analyzer='keyword'), }) unit = fields.StringField(fields={ 'raw': fields.StringField(analyzer='keyword'), }) substance = fields.StringField(attr='substance_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) count = fields.IntegerField() value = fields.FloatField(attr='value') mean = fields.FloatField(attr='mean') median = fields.FloatField(attr='median') min = fields.FloatField(attr='min') max = fields.FloatField(attr='max') se = fields.FloatField(attr='se') sd = fields.FloatField(attr='sd') cv = fields.FloatField(attr='cv') normed = fields.BooleanField() access = string_field('access') allowed_users = fields.ObjectField( attr="allowed_users", properties={'username': string_field("username")}, multi=True) class Django: model = Characteristica # Ignore auto updating of Elasticsearch when a model is saved/deleted: ignore_signals = True # Don't perform an index refresh after every update auto_refresh = False class Index: name = "characteristica" settings = {**elastic_settings, 'max_result_window': 50000} def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super(CharacteristicaDocument, self).get_queryset().select_related( 'group', 'individual')
class PublicSource(PublicSourceBase): id = fields.KeywordField(attr='id') name = fields.TextField( attr='name', fields={'keyword': fields.KeywordField( normalizer=basic_normalizer )}, index_prefixes={} ) type = fields.IntegerField(attr='type.id') is_private = fields.BooleanField(attr='is_private') is_public = fields.BooleanField() parent = fields.TextField( attr='parent.name', fields={'keyword': fields.KeywordField( normalizer=basic_normalizer )}, index_prefixes={} ) attributes = fields.ObjectField() collections = fields.ObjectField() credits = fields.ObjectField() geo_location = fields.GeoPointField() class Index: name = 'public_sources' class Django: model = Source def get_queryset(self): return Source.objects.filter(type=13, workflow__is_public=True) def prepare_is_public(self, instance): try: return instance.workflow.is_public except ObjectDoesNotExist: return False def prepare_attributes(self, instance): attribute_list = [] dates_list = [] for attribute in instance.attributes.all(): label = attribute.attribute_type.short_name if attribute.attribute_type.data_type == 'DATE': if attribute.value_DATE: dates_list.append(attribute.value_DATE.strftime('%Y-%m-%d')) elif attribute.value_DATE_y: d = attribute.value_DATE_d or 1 m = attribute.value_DATE_m or 1 y = attribute.value_DATE_y dates_list.append(date(y, m, d).strftime('%Y-%m-%d')) elif attribute.attribute_type.data_type == 'TXT': attribute_list.append((label, attribute.value_TXT)) else: attribute_list.append((label, str(attribute))) if dates_list: attribute_list.append(('date', dates_list)) if attribute_list: return dict(attribute_list) def prepare_collections(self, instance): try: if instance.sets.all().count() > 0: cols = [('name', set.set_id.name) for set in instance.sets.all() if set.set_id.set_type == 2] if cols: return dict(cols) else: return [{'name': 'none'}] except ObjectDoesNotExist: return [{'name': 'none'}] def prepare_credits(self, instance): credit_list = [] for credit in instance.credits.all(): agent = credit.agent.standard_name type = credit.get_type_display() credit_list.append({ 'agent': f'{agent} ({type})', 'type': type }) return credit_list def prepare_geo_location(self, instance): locales = instance.attributes.filter(attribute_type=36) if locales.exists(): loc_id = locales[0].value_JSON['id'] locale = LocaleReference.objects.get(id=loc_id) return f'{locale.latitude},{locale.longitude}'
class ExperimentDocument(DocType): """ Our Experiment ElasticSearch Document, which corresponds to our Experiment model. """ # Keyword Fields title = fields.TextField( analyzer=html_strip, fielddata=True, fields={'raw': fields.KeywordField()} ) publication_title = fields.TextField( analyzer=html_strip, fielddata=True, fields={'raw': fields.KeywordField()} ) description = fields.TextField( analyzer=html_strip, fielddata=True, fields={'raw': fields.KeywordField()} ) publication_authors = fields.TextField( analyzer=html_strip, fielddata=True, fields={'raw': fields.KeywordField()} ) technology = fields.TextField( analyzer=standard, fielddata=True, fields={'raw': fields.KeywordField()} ) organism_names = fields.TextField( analyzer=html_strip, fielddata=True, fields={'raw': fields.KeywordField()} ) platform_names = fields.TextField( analyzer=standard, fielddata=True, fields={'raw': fields.TextField()} ) platform_accession_codes = fields.TextField( analyzer=standard, fielddata=True, fields={'raw': fields.TextField()} ) # Basic Fields accession_code = fields.TextField() alternate_accession_code = fields.TextField() submitter_institution = fields.TextField() publication_doi = fields.TextField() has_publication = fields.BooleanField() sample_metadata_fields = fields.TextField() pubmed_id = fields.TextField() num_total_samples = fields.IntegerField() num_processed_samples = fields.IntegerField() # FK/M2M # We actually don't use any ForeignKeys in our Experiment document, # but if we did, we'd do it like this. The function `get_instances_from_related` is similarly required, # as is the `related_models` field in the Meta class. # organisms = fields.NestedField(properties={ # 'name': fields.KeywordField(), # 'taxonomy_id': fields.IntegerField(), # 'pk': fields.IntegerField(), # }) # # def get_instances_from_related(self, related_instance): # return related_instance.experts_set.all() class Meta: model = Experiment fields = [ 'id', ]
class CaseDocument(Document): # IMPORTANT: If you change what values are indexed here, also change the "CaseLastUpdate triggers" # section in set_up_postgres.py to keep Elasticsearch updated. name_abbreviation = SuggestField(analyzer='english') name = fields.TextField(index_phrases=True, analyzer='english') frontend_url = fields.KeywordField() frontend_pdf_url = fields.KeywordField() last_page = fields.KeywordField() first_page = fields.KeywordField() decision_date_original = fields.KeywordField() docket_numbers = fields.TextField(multi=True) docket_number = fields.TextField() last_updated = fields.KeywordField() volume = fields.ObjectField( properties={ "barcode": fields.KeywordField(), 'volume_number': SuggestField(), 'volume_number_slug': fields.KeywordField(), }) reporter = fields.ObjectField( properties={ "id": fields.IntegerField(), "full_name": SuggestField(), "short_name": SuggestField(), "short_name_slug": SuggestField(), "start_year": fields.KeywordField(), "end_year": fields.KeywordField(), }) court = fields.ObjectField( properties={ "id": fields.IntegerField(), "slug": fields.KeywordField(), "name": fields.TextField(), "name_abbreviation": SuggestField(), }) citations = fields.ObjectField( properties={ "type": fields.TextField(), "cite": SuggestField(), "normalized_cite": fields.KeywordField(), }) extractedcitations = fields.ObjectField( properties={ "cite": fields.KeywordField(), "normalized_cite": fields.KeywordField(), }) jurisdiction = fields.ObjectField( properties={ "id": fields.IntegerField(), "slug": fields.KeywordField(), "name": fields.KeywordField(), "name_long": SuggestField(), "whitelisted": fields.BooleanField() }) casebody_data = fields.ObjectField( properties={ 'xml': fields.TextField(index=False), 'html': fields.TextField(index=False), 'text': fields.ObjectField( properties={ 'attorneys': fields.TextField(multi=True), 'judges': fields.TextField(multi=True), 'parties': fields.TextField(multi=True), 'head_matter': FTSField(), 'opinions': fields.ObjectField(multi=True, properties={ 'author': fields.KeywordField(), 'text': FTSField(), 'type': fields.KeywordField(), }), 'corrections': fields.TextField(), }), }) analysis = fields.ObjectField(properties={ 'sha256': fields.KeywordField(), 'simhash': fields.KeywordField(), }) def prepare_frontend_pdf_url(self, instance): return instance.get_pdf_url(with_host=False) def prepare_analysis(self, instance): return dict(sorted((a.key, a.value) for a in instance.analysis.all())) def prepare_docket_numbers(self, instance): if not hasattr(instance, 'docket_numbers'): return {'docket_numbers': None} return instance.docket_numbers def prepare_last_updated(self, instance): try: return instance.last_update.timestamp except CaseLastUpdate.DoesNotExist: return None def prepare_casebody_data(self, instance): body = instance.body_cache return instance.redact_obj({ 'xml': body.xml, 'html': body.html, 'text': body.json, }) def prepare_name(self, instance): return instance.redact_obj(instance.name) def prepare_name_abbreviation(self, instance): return instance.redact_obj(instance.name_abbreviation) class Django: model = CaseMetadata fields = [ 'id', 'decision_date', ] ignore_signals = True auto_refresh = False def to_dict(self, skip_empty=False): # we need to do this until elasticsearch_dsl propagates skip_empty=False to the serialization that happens in # embedded objects. doc = super(CaseDocument, self).to_dict(skip_empty=skip_empty) doc['volume'] = self.volume.to_dict(skip_empty=skip_empty) doc['reporter'] = self.reporter.to_dict(skip_empty=skip_empty) doc['court'] = self.court.to_dict(skip_empty=skip_empty) doc['reporter'] = self.reporter.to_dict(skip_empty=skip_empty) doc['jurisdiction'] = self.jurisdiction.to_dict(skip_empty=skip_empty) doc['casebody_data']['text'] = self.casebody_data.text.to_dict( skip_empty=skip_empty) doc['casebody_data']['text']['opinions'] = [ op.to_dict(skip_empty=skip_empty) for op in self.casebody_data['text'].opinions ] doc['cites_to'] = self.extractedcitations return doc def full_cite(self): return "%s, %s%s" % (self.name_abbreviation, ", ".join( cite.cite for cite in self.citations if cite.type != "vendor"), " (%s)" % (self.decision_date_original[:4], ) if self.decision_date_original else "") @classmethod def raw_search(cls, *args, **kwargs): """ Return RawSearch object instead of Search object. """ out = super().search(*args, **kwargs) out.__class__ = RawSearch return out
class MappingDocument(Document): class Index: name = 'mappings' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Mapping fields = ['external_id'] last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='owner_name', normalizer="lowercase") owner_type = fields.KeywordField(attr='owner_type') source = fields.KeywordField(attr='source', normalizer="lowercase") retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') map_type = fields.KeywordField(attr='map_type', normalizer="lowercase") from_concept = fields.ListField(fields.KeywordField()) to_concept = fields.ListField(fields.KeywordField()) concept = fields.ListField(fields.KeywordField()) concept_source = fields.ListField(fields.KeywordField()) concept_owner = fields.ListField(fields.KeywordField()) from_concept_owner = fields.KeywordField(attr='from_source_owner') to_concept_owner = fields.KeywordField(attr='to_source_owner') concept_owner_type = fields.ListField( fields.KeywordField(attr='to_source_owner')) from_concept_owner_type = fields.KeywordField( attr='from_source_owner_type') to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type') from_concept_source = fields.KeywordField(attr='from_source_name') to_concept_source = fields.KeywordField(attr='to_source_name') source_version = fields.ListField(fields.KeywordField()) collection_version = fields.ListField(fields.KeywordField()) collection = fields.ListField(fields.KeywordField()) collection_owner_url = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') id = fields.KeywordField(attr='mnemonic', normalizer="lowercase") extras = fields.ObjectField(dynamic=True) created_by = fields.KeywordField(attr='created_by.username') @staticmethod def prepare_from_concept(instance): from_concept_name = get(instance, 'from_concept_name') or get( instance, 'from_concept.display_name') return [ instance.from_concept_url, instance.from_concept_code, from_concept_name ] @staticmethod def prepare_to_concept(instance): return [instance.get_to_concept_code(), instance.get_to_concept_name()] def prepare_concept(self, instance): return self.prepare_from_concept(instance) + self.prepare_to_concept( instance) @staticmethod def prepare_concept_source(instance): return [instance.from_source_name, instance.to_source_name] @staticmethod def prepare_concept_owner(instance): return [instance.from_source_owner, instance.to_source_owner] @staticmethod def prepare_concept_owner_type(instance): return [instance.from_source_owner_type, instance.to_source_owner_type] @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('version', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('version', flat=True)) @staticmethod def prepare_collection(instance): return list( set( list(instance.collection_set.values_list('mnemonic', flat=True)))) @staticmethod def prepare_collection_owner_url(instance): return list({ coll.parent_url for coll in instance.collection_set.select_related( 'user', 'organization') }) @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) if isinstance(value, dict): value = flatten_dict(value) return value or {}
class ProfileDocument(DocType): # ID id = fields.IntegerField(attr='id') # ******************************************************************** # *********************** Main data fields for search **************** # ******************************************************************** gender = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) location = StringField(analyzer=html_strip, fields={'raw': KeywordField()}) avatar = fields.TextField() about = fields.TextField() phone = fields.StringField() slug = fields.StringField() # age = fields.IntegerField() height = fields.ObjectField(attr='height_field_indexing', properties={ 'name': StringField(analyzer=html_strip), 'id': fields.IntegerField() }) weight = fields.ObjectField(attr='weight_field_indexing', properties={ 'name': StringField(analyzer=html_strip), 'id': fields.IntegerField() }) build = fields.ObjectField(attr='build_field_indexing', properties={ 'name': StringField(analyzer=html_strip), 'id': fields.IntegerField() }) hair = fields.ObjectField(attr='hair_field_indexing', properties={ 'name': StringField(analyzer=html_strip), 'id': fields.IntegerField() }) eye = fields.ObjectField(attr='eye_field_indexing', properties={ 'name': StringField(analyzer=html_strip), 'id': fields.IntegerField() }) ethnicity = fields.ObjectField(attr='ethnicity_field_indexing', properties={ 'name': StringField(analyzer=html_strip), 'id': fields.IntegerField() }) auth_user_nested = fields.NestedField( attr='auth_user_field_indexing', properties={ 'first_name': StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }), 'last_name': StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }), 'username': StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) }) auth_user = fields.ObjectField( attr='auth_user_field_indexing', properties={ 'first_name': StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }), 'last_name': StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }), 'username': StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) }) media = fields.ObjectField(attr='media_field_indexing', properties={ 'has_photo': fields.BooleanField(), 'has_video': fields.BooleanField(), 'has_audio': fields.BooleanField(), }) tags = fields.ObjectField(attr='tag_field_indexing', properties={ 'id': fields.IntegerField(), 'name': fields.StringField() }) class Django: model = UsersProfile
class PropertyAmenitiesDocument(Document): """PropertyAmenities Elasticsearch document.""" id = fields.IntegerField(attr="id") property = fields.TextField(attr="property_indexing", ) pool = fields.BooleanField() garden = fields.BooleanField() elevator = fields.BooleanField() doorman = fields.BooleanField() deck = fields.BooleanField() washer = fields.BooleanField() gym = fields.BooleanField() parking = fields.BooleanField() firePlace = fields.BooleanField() airCondition = fields.BooleanField() dishWasher = fields.BooleanField() itemStorage = fields.BooleanField() wheelchair = fields.BooleanField() balcony = fields.BooleanField() hardFloor = fields.BooleanField() furnished = fields.BooleanField() view = fields.BooleanField() highRise = fields.BooleanField() studentFriendly = fields.BooleanField() utilities = fields.BooleanField() class Django(object): """The model associate with this Document""" model = PropertyAmenities
class AgentDocument(Document): meta = fields.KeywordField() content = fields.TextField(attr="name", store=True) is_private = fields.BooleanField() agent_type = fields.KeywordField(**copy_to_content) name = fields.TextField( fields={ "raw": fields.KeywordField(), "suggest": fields.CompletionField(), }, **copy_to_content, ) name_sort = fields.KeywordField(**copy_to_content) radical = fields.KeywordField(**copy_to_content) based_near = get_place_field(options=copy_to_content) roles = get_controlled_term_field(options=copy_to_content) sources = get_resource_field(options=copy_to_content) contributed_to = fields.ObjectField( properties={ "resource": get_resource_field(options=copy_to_content), "roles": get_controlled_term_field(options=copy_to_content), }, ) gender = fields.KeywordField(**copy_to_content) noble = fields.KeywordField(**copy_to_content) main_places = get_place_field(options=copy_to_content) year = fields.IntegerField(**copy_to_content) date_display = fields.TextField(**copy_to_content) place_birth = get_place_field(options=copy_to_content) place_death = get_place_field(options=copy_to_content) languages = get_controlled_term_field(options=copy_to_content) knows = get_agent_field(options=copy_to_content) member_of = get_agent_field(options=copy_to_content) members = get_agent_field(options=copy_to_content) class Index: name = "rt-agents" class Django: model = Agent fields = ["id", "notes"] def get_queryset(self): return super().get_queryset().exclude(roles__label__in=["archives", "library"]) def get_instances_from_related(self, related_instance): if isinstance(related_instance, Contribution): return related_instance.agent if isinstance(related_instance, Date): if related_instance.person_birth: return related_instance.person_birth return related_instance.person_death if isinstance(related_instance, (ControlledTerm, Place)): return related_instance.agents.all() def prepare_meta(self, instance): return [instance.agent_type] def prepare_name_sort(self, instance): name = instance.get_index_name().lower() if "anonymous" in name: name = f"zzz_{name}" return name def prepare_radical(self, instance): return "yes" if instance.radical else "no" def prepare_gender(self, instance): if instance.is_person: return instance.get_gender_display() def prepare_noble(self, instance): if instance.is_person: return "yes" if instance.noble else "no" def prepare_main_places(self, instance): if instance.is_organisation: return return [self._prepare_place(place) for place in instance.main_places.all()] def _prepare_place(self, place): if not place: return {} return { "address": place.address, "geo": place.geo, "coutry": {"name": place.country.name} if place.country else {}, } def prepare_year(self, instance): if instance.is_organisation: return date_birth = instance.date_birth year_birth = None if date_birth and date_birth.get_date_earliest(): year_birth = date_birth.get_date_earliest().year date_death = instance.date_death year_death = None if date_death and date_death.get_date_latest(): year_death = date_death.get_date_latest().year if year_birth and year_death: return [year for year in range(year_birth, year_death + 1)] if year_birth: return year_birth if year_death: return year_death def prepare_date_display(self, instance): if instance.is_organisation: return date_birth = instance.date_birth date_death = instance.date_death if date_birth and date_death: return "{} – {}".format(str(date_birth), str(date_death)) if date_birth: return "{} – ?".format(str(date_birth)) if date_death: return "? – {}".format(str(date_death)) def prepare_place_birth(self, instance): if instance.is_person: return self._prepare_place(instance.place_birth) def prepare_place_death(self, instance): if instance.is_person: return self._prepare_place(instance.place_death) def prepare_languages(self, instance): if instance.is_organisation: return return [{"label": language.label} for language in instance.languages.all()] def prepare_knows(self, instance): if instance.is_organisation: return return [self._prepare_agent(person) for person in instance.knows.all()] def _prepare_agent(self, agent): if not agent: return {} return {"id": agent.id, "name": agent.name} def prepare_member_of(self, instance): if instance.is_organisation: return return [self._prepare_agent(org) for org in instance.member_of.all()] def prepare_members(self, instance): if instance.is_person: return return [self._prepare_agent(person) for person in instance.members.all()]
class DatasetDocument(ExtendedDocument): license_chosen = fields.IntegerField() license_condition_db_or_copyrighted = fields.TextField() license_condition_personal_data = fields.TextField() license_condition_modification = fields.BooleanField() license_condition_original = fields.BooleanField() license_condition_responsibilities = fields.TextField() license_condition_source = fields.BooleanField() license_condition_timestamp = fields.BooleanField() license_name = fields.TextField() license_description = fields.TextField() resource_modified = fields.DateField(attr='last_modified_resource') url = fields.KeywordField() source = fields.NestedField( properties={ 'title': fields.TextField(), 'source_type': fields.TextField(), 'url': fields.TextField(), 'update_frequency': TranslatedTextField('update_frequency'), 'last_import_timestamp': fields.DateField(), }) formats = fields.KeywordField(multi=True) types = fields.KeywordField(multi=True) openness_scores = fields.IntegerField(multi=True) institution = fields.NestedField(attr='organization', properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title'), 'slug': TranslatedTextField('slug'), }) category = fields.NestedField( properties={ 'id': fields.IntegerField(attr='id'), 'image_url': fields.KeywordField(), 'title': TranslatedTextField('title'), 'description': TranslatedTextField('description') }) categories = fields.NestedField( properties={ 'id': fields.IntegerField(attr='id'), 'image_url': fields.KeywordField(), 'code': fields.KeywordField(), 'title': TranslatedTextField('title'), 'description': TranslatedTextField('description') }) downloads_count = fields.IntegerField() image_url = fields.TextField() image_alt = TranslatedTextField('image_alt') version = fields.KeywordField() source_title = fields.TextField() source_type = fields.TextField() source_url = fields.TextField() resources = fields.NestedField(properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title') }) applications = fields.NestedField(properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title') }) articles = fields.NestedField(properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title') }) showcases = fields.NestedField(attr='showcases_published', properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title') }) update_frequency = fields.KeywordField() users_following = fields.KeywordField(attr='users_following_list', multi=True) last_modified_resource = fields.DateField(attr='last_modified_resource') license_code = fields.IntegerField() computed_downloads_count = fields.IntegerField() computed_views_count = fields.IntegerField() has_high_value_data = fields.BooleanField() if is_enabled('S37_resources_admin_region_data.be'): regions = regions_field() class Index: name = mcs.ELASTICSEARCH_INDEX_NAMES['datasets'] settings = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_SETTINGS aliases = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_ALIAS class Django: model = Dataset related_models = [ Application, Article, Category, DataSource, Organization, Resource, Showcase, UserFollowingDataset, ] def get_instances_from_related(self, related_instance): if isinstance(related_instance, UserFollowingDataset): return related_instance.follower.followed_applications.all() if isinstance(related_instance, Application): return related_instance.datasets.filter(status='published') if isinstance(related_instance, Article): return related_instance.datasets.filter(status='published') if isinstance(related_instance, Resource): return related_instance.dataset if isinstance(related_instance, Category): return related_instance.dataset_set.filter(status='published') if isinstance(related_instance, Organization): return related_instance.datasets.filter(status='published') if isinstance(related_instance, DataSource): return related_instance.datasource_datasets.filter( status='published') if isinstance(related_instance, Showcase): return related_instance.datasets.filter(status='published') def prepare_search_date(self, instance): return instance.verified def prepare_source(self, instance): serializer = DataSourceSerializer() if not instance.source: return {} return serializer.dump(instance.source)
class MappingDocument(Document): class Index: name = 'mappings' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Mapping fields = ['external_id'] last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='owner_name', normalizer="lowercase") owner_type = fields.KeywordField(attr='owner_type') source = fields.KeywordField(attr='source', normalizer="lowercase") retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') map_type = fields.KeywordField(attr='map_type', normalizer="lowercase") from_concept = fields.ListField(fields.KeywordField()) to_concept = fields.ListField(fields.KeywordField()) concept = fields.ListField(fields.KeywordField()) concept_source = fields.ListField(fields.KeywordField()) concept_owner = fields.ListField(fields.KeywordField()) from_concept_owner = fields.KeywordField(attr='from_source_owner') to_concept_owner = fields.KeywordField(attr='to_source_owner') concept_owner_type = fields.ListField( fields.KeywordField(attr='to_source_owner')) from_concept_owner_type = fields.KeywordField( attr='from_source_owner_type') to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type') from_concept_source = fields.KeywordField(attr='from_source_name') to_concept_source = fields.KeywordField(attr='to_source_name') source_version = fields.ListField(fields.TextField()) collection_version = fields.ListField(fields.TextField()) collection = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') id = fields.KeywordField(attr='mnemonic', normalizer="lowercase") extras = fields.ObjectField() @staticmethod def prepare_from_concept(instance): return [ instance.from_concept_url, instance.from_concept_code, instance.from_concept_name ] @staticmethod def prepare_to_concept(instance): return [instance.get_to_concept_code(), instance.get_to_concept_name()] def prepare_concept(self, instance): return self.prepare_from_concept(instance) + self.prepare_to_concept( instance) @staticmethod def prepare_concept_source(instance): return [instance.from_source_name, instance.to_source_name] @staticmethod def prepare_concept_owner(instance): return [instance.from_source_owner, instance.to_source_owner] @staticmethod def prepare_concept_owner_type(instance): return [instance.from_source_owner_type, instance.to_source_owner_type] @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('version', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('version', flat=True)) @staticmethod def prepare_collection(instance): return list( set( list(instance.collection_set.values_list('mnemonic', flat=True)))) @staticmethod def prepare_extras(instance): return instance.extras or {}
class GoodDocument(DocType): pk = fields.IntegerField() id = fields.IntegerField() name = fields.StringField(fields={ 'raw': fields.KeywordField(), }) name_en = fields.StringField(analyzer='english') name_ru = fields.StringField(analyzer='russian') name_uk = fields.StringField() description = fields.StringField(fields={ 'raw': fields.KeywordField(), }) description_en = fields.StringField(analyzer='english') description_ru = fields.StringField(analyzer='russian') description_uk = fields.StringField() category = fields.ObjectField(properties={ 'id': fields.IntegerField(), 'slug': fields.KeywordField(), 'name': fields.StringField(fields={ 'raw': fields.KeywordField(), }), 'name_en': fields.StringField(analyzer='english'), 'name_ru': fields.StringField(analyzer='russian'), 'name_uk': fields.StringField(), 'is_main': fields.BooleanField(), }) categories_ids = fields.IntegerField() categories_names = fields.StringField(fields={ 'raw': fields.KeywordField(), }) categories = fields.NestedField(properties={ 'id': fields.IntegerField(), 'slug': fields.KeywordField(), 'name': fields.StringField(fields={ 'raw': fields.KeywordField(), }), 'name_en': fields.StringField(analyzer='english'), 'name_ru': fields.StringField(analyzer='russian'), 'name_uk': fields.StringField(), 'is_main': fields.BooleanField(), }) seller = fields.ObjectField(properties={ 'id': fields.IntegerField(), 'store_name': fields.StringField(fields={ 'raw': fields.KeywordField(), }), 'store_name_en': fields.StringField(analyzer='english'), 'store_name_ru': fields.StringField(analyzer='russian'), 'store_name_uk': fields.StringField(), 'description': fields.StringField(fields={ 'raw': fields.KeywordField(), }), 'description_en': fields.StringField(analyzer='english'), 'description_ru': fields.StringField(analyzer='russian'), 'description_uk': fields.StringField(), 'location': fields.StringField(fields={ 'raw': fields.KeywordField(), }), 'location_en': fields.StringField(analyzer='english'), 'location_ru': fields.StringField(analyzer='russian'), 'location_uk': fields.StringField(), 'goods_count': fields.IntegerField(), 'rating': fields.FloatField(), }) price = fields.FloatField(attr='price.amount') price_currency = fields.KeywordField() discount = fields.IntegerField() availability = fields.TextField() specifications = fields.ObjectField(properties={ 'color': fields.StringField(attr='color.definition', fields={ 'raw': fields.KeywordField(), }), 'color_en': fields.StringField(attr='color.definition_en', analyzer='english'), 'color_ru': fields.StringField(attr='color.definition_ru', analyzer='russian'), 'color_uk': fields.StringField(attr='color.definition_uk'), 'size': fields.StringField(attr='size.definition', fields={ 'raw': fields.KeywordField(), }), 'size_en': fields.StringField(attr='size.definition_en', analyzer='english'), 'size_ru': fields.StringField(attr='size.definition_ru', analyzer='russian'), 'size_uk': fields.StringField(attr='size.definition_uk'), }) images = fields.NestedField(properties={ 'image_url': fields.KeywordField(), }) main_image_url = fields.KeywordField() created = fields.DateField() modified = fields.DateField() class Meta: model = Good related_models = [ GoodsCategory, HoloUser, Store, GoodSpecifications, GoodImage, ] def get_queryset(self): qs = super().get_queryset() return qs.select_related('category', 'seller') def get_instances_from_related(self, related_instance): if isinstance(related_instance, GoodsCategory): return related_instance.all_goods() if isinstance(related_instance, HoloUser): store = related_instance.store if store: return store.goods.all() if isinstance(related_instance, Store): return related_instance.goods.all() if isinstance(related_instance, GoodSpecifications): return related_instance.good if isinstance(related_instance, GoodImage): return related_instance.good
class DrugDocument(Document): source_id = fields.TextField(fields={'raw': fields.KeywordField()}) trade_name = fields.TextField(fields={ 'raw': fields.KeywordField(), }, ) international_name = fields.ObjectField( properties={ 'name': fields.TextField(fields={ 'raw': fields.KeywordField(), }) }) drug_form = fields.TextField() marketing_status = fields.ObjectField( properties={ 'name': fields.TextField(fields={ 'raw': fields.KeywordField(), }) }) formula = fields.TextField() pharmacotherapeutic_group = fields.ObjectField( properties={'name': fields.TextField()}) atcs = fields.NestedField(properties={ 'name': fields.TextField(), 'type': fields.IntegerField() }) drug_applicant = fields.ObjectField( properties={ 'name': fields.TextField(), 'address': fields.TextField(), 'country_id': fields.ObjectField(properties={'name': fields.TextField()}) }) manufacturers = fields.NestedField( properties={ 'name': fields.TextField(), 'address': fields.TextField(), 'country_id': fields.ObjectField(properties={'name': fields.TextField()}) }) registration_number = fields.TextField( fields={'raw': fields.KeywordField()}) registration_date = fields.DateField() expiration_date = fields.TextField() drug_type = fields.ObjectField(properties={'name': fields.TextField()}) has_bio_origin = fields.BooleanField() has_phyto_origin = fields.BooleanField() is_orphan = fields.BooleanField() is_homeopatic = fields.BooleanField() INN = fields.ObjectField(properties={'name': fields.TextField()}) premature_termination = fields.ObjectField(properties={ 'date': fields.DateField(), 'reason': fields.TextField() }) instruction_url = fields.KeywordField() class Django: model = Drug # The model associated with this Document queryset_pagination = 20 related_models = [ Manufacturer, InternationalName, MarketingStatus, PharmacotherapeuticGroup, ATC, Applicant, DrugType, INN, PrematureTermination ] class Index: name = 'drugs' class Meta: ordering = ('trade_name', ) def get_instances_from_related(self, related_instance): """If related_models is set, define how to retrieve the ATC instance(s) from the related model. The related_models option should be used with caution because it can lead in the index to the updating of a lot of items. """ if isinstance(related_instance, ATC): return related_instance.atcs_set.all() def get_queryset(self): """ Using for getting results with nested docs in one request. """ return super(DrugDocument, self).get_queryset().select_related( 'international_name', 'marketing_status', 'pharmacotherapeutic_group', 'drug_applicant', 'drug_type', 'INN', 'premature_termination', )
class ResourceDocument(ExtendedDocument): NOTES_FIELD_NAME = 'description' format = fields.TextField() formats = fields.KeywordField(attr='formats_list', multi=True) openness_score = fields.IntegerField() openness_scores = fields.IntegerField(multi=True) media_type = fields.TextField() downloads_count = fields.IntegerField() data_date = fields.DateField() file_url = fields.TextField() download_url = fields.TextField() link = fields.TextField() file_size = fields.IntegerField() types = fields.KeywordField(multi=True) dataset = fields.NestedField( properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title'), 'slug': TranslatedTextField('slug') }) institution = fields.NestedField( properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title'), 'slug': TranslatedTextField('slug') }) source = fields.NestedField( properties={ 'title': fields.TextField(), 'source_type': fields.TextField(), 'url': fields.TextField(), 'update_frequency': TranslatedTextField('update_frequency'), 'last_import_timestamp': fields.DateField(), }) # ResourceDoc uuid = fields.TextField() description = TranslatedTextField('description') csv_file_url = fields.TextField() csv_file_size = fields.LongField() csv_download_url = fields.TextField() jsonld_file_url = fields.TextField() jsonld_file_size = fields.LongField() jsonld_download_url = fields.TextField() type = fields.KeywordField() geo_data = fields.NestedField(properties={ 'id': fields.IntegerField(), }) tabular_data = fields.NestedField(properties={ 'id': fields.IntegerField(), }) chartable = fields.NestedField(properties={ 'id': fields.IntegerField(), }) data_special_signs = fields.NestedField( properties={ 'id': fields.IntegerField(), 'symbol': fields.KeywordField(), 'name': TranslatedTextField('name'), 'description': TranslatedTextField('description') }) is_chart_creation_blocked = fields.BooleanField() license_code = fields.IntegerField() update_frequency = fields.KeywordField() computed_downloads_count = fields.IntegerField() computed_views_count = fields.IntegerField() has_high_value_data = fields.BooleanField() if is_enabled('S37_resources_admin_region_data.be'): regions = regions_field(attr='all_regions') if is_enabled('S40_new_file_model.be'): files = files_field(attr='all_files') class Index: name = mcs.ELASTICSEARCH_INDEX_NAMES['resources'] settings = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_SETTINGS aliases = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_ALIAS class Django: model = Resource related_models = [Dataset, SpecialSign] def get_instances_from_related(self, related_instance): if isinstance(related_instance, Dataset): return related_instance.resources.filter(status='published') elif isinstance(related_instance, SpecialSign): return related_instance.special_signs_resources.filter( status='published') def prepare_model_name(self, instance): return instance.category.type def prepare_openness_scores(self, instance): return [instance.openness_score] def prepare_source(self, instance): serializer = DataSourceSerializer() if not instance.dataset.source: return {} return serializer.dump(instance.dataset.source)
class PageDocument(DocType): document = fields.IntegerField(attr='document_id') title = fields.TextField() description = fields.TextField() tags = fields.ListField(fields.KeywordField()) created_at = fields.DateField() publicbody = fields.IntegerField(attr='document.publicbody_id') jurisdiction = fields.IntegerField( attr='document.publicbody.jurisdiction_id') foirequest = fields.IntegerField(attr='document.foirequest_id') campaign = fields.IntegerField(attr='document.foirequest.campaign_id') collections = fields.IntegerField() user = fields.IntegerField(attr='document.user_id') team = fields.IntegerField(attr='document.team_id') public = fields.BooleanField() number = fields.IntegerField() content = fields.TextField( analyzer=analyzer, search_analyzer=search_analyzer, search_quote_analyzer=search_quote_analyzer, index_options='offsets', ) class Meta: model = Page queryset_chunk_size = 50 def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super().get_queryset().select_related('document', ) def prepare_title(self, obj): if obj.number == 1: if obj.document.title.endswith('.pdf'): return '' return obj.document.title return '' def prepare_description(self, obj): if obj.number == 1: return obj.document.description return '' def prepare_tags(self, obj): return [tag.id for tag in obj.document.tags.all()] def prepare_created_at(self, obj): return obj.document.created_at def prepare_public(self, obj): return obj.document.is_public() def prepare_team(self, obj): if obj.document.team_id: return obj.document.team_id return None def prepare_collections(self, obj): collections = obj.document.document_documentcollection.all() return list(collections.values_list('id', flat=True))
class TimecourseDocument(DocType): study = string_field('study') pk = fields.IntegerField('pk') group = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name'), 'count': fields.IntegerField() }) individual = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name')}) interventions = ObjectField(properties={ 'pk': fields.IntegerField(), 'name': string_field('name') }, multi=True) substance = ObjectField(properties={ 'name': string_field('name')} ) ex = ObjectField(properties={ 'pk': string_field('pk')} ) normed = fields.BooleanField() raw = ObjectField(properties={ 'pk': fields.IntegerField()} ) pharmacokinetics = ObjectField(properties={ 'pk': fields.IntegerField()}, multi=True ) value = fields.FloatField('null_value',multi=True) mean = fields.FloatField('null_mean', multi=True) median = fields.FloatField('null_median', multi=True) min = fields.FloatField('null_min', multi=True) max = fields.FloatField('null_max', multi=True) se = fields.FloatField('null_se', multi=True) sd = fields.FloatField('null_sd', multi=True) cv = fields.FloatField('null_cv', multi=True) unit = string_field('unit') time_unit = string_field('time_unit') figure = string_field('figure') time = fields.FloatField('null_time',multi=True) tissue = string_field('tissue') pktype = string_field("pktype_key") #auc_end = fields.FloatField(attr='auc_end') #kel = fields.FloatField(attr='kel') class Meta(object): model = Timecourse # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = True # Don't perform an index refresh after every update (overrides global setting): auto_refresh = False
class ExperimentDocument(Document): """Our Experiment ElasticSearch Document, which corresponds to our Experiment model.""" # Keyword Fields title = fields.TextField( analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()} ) publication_title = fields.TextField( analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()} ) description = fields.TextField( analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()} ) publication_authors = fields.TextField( analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()} ) technology = fields.TextField( analyzer=html_strip_no_stop, fielddata=True, fields={"raw": fields.KeywordField()} ) organism_names = fields.TextField( analyzer=html_strip_no_ngram, fielddata=True, fields={"raw": fields.KeywordField()} ) downloadable_organism_names = fields.TextField( analyzer=html_strip_no_ngram, fielddata=True, fields={"raw": fields.KeywordField()} ) platform_names = fields.TextField( analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()} ) platform_accession_codes = fields.TextField( analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()} ) # Basic Fields accession_code = fields.KeywordField() alternate_accession_code = fields.KeywordField() submitter_institution = fields.TextField() publication_doi = fields.TextField() has_publication = fields.BooleanField() sample_metadata_fields = fields.TextField() pubmed_id = fields.TextField() num_total_samples = fields.IntegerField() num_processed_samples = fields.IntegerField() num_downloadable_samples = fields.IntegerField() source_first_published = fields.DateField() # Index all downloadable samples as keywords so that we can calculate unique counts on the facets downloadable_samples = fields.ListField(fields.KeywordField()) # Index our sample keywords so that we can use them for better search sample_keywords = fields.ListField(fields.KeywordField()) class Django: model = Experiment parallel_indexing = True queryset_pagination = 3000 fields = [ "id", ] def get_queryset(self): """Override default queryset""" return super(ExperimentDocument, self).get_queryset().order_by("id")
class InterventionDocument(Document): pk = fields.IntegerField() measurement_type = fields.StringField( attr='measurement_type_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) form = fields.StringField(attr='form_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) route = fields.StringField(attr='route_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) application = fields.StringField( attr='application_name', fields={ 'raw': fields.StringField(analyzer='keyword'), }) choice = string_field('choice') time_unit = string_field('time_unit') time = fields.FloatField() time_end = fields.FloatField() substance = string_field('substance_name') study_name = string_field('study_name') study_sid = string_field('study_sid') name = string_field('name') normed = fields.BooleanField() raw_pk = string_field('raw_pk') value = fields.FloatField() mean = fields.FloatField() median = fields.FloatField() min = fields.FloatField() max = fields.FloatField() se = fields.FloatField() sd = fields.FloatField() cv = fields.FloatField() unit = string_field('unit') access = string_field('access') allowed_users = fields.ObjectField( attr="allowed_users", properties={'username': string_field("username")}, multi=True) class Django: model = Intervention # Ignore auto updating of Elasticsearch when a model is saved/deleted ignore_signals = True # Don't perform an index refresh after every update auto_refresh = False class Index: name = 'interventions' settings = elastic_settings def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super( InterventionDocument, self).get_queryset().select_related('ex__interventionset__study')
class DatasetsDoc(DocType): id = fields.IntegerField() slug = TranslatedKeywordField('slug') title = TranslatedTextField('title', common_params={'suggest': fields.CompletionField()}) version = fields.KeywordField() url = fields.KeywordField() notes = TranslatedTextField('notes') institution = fields.NestedField(attr='organization', properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title'), 'slug': TranslatedTextField('slug'), }) category = fields.NestedField( attr='category', properties={ 'id': fields.IntegerField(attr='id'), 'image_url': fields.KeywordField(), 'title': TranslatedTextField('title'), 'description': TranslatedTextField('description') } ) resources = fields.NestedField( properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title') } ) applications = fields.NestedField( properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title') } ) articles = fields.NestedField( properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title') } ) tags = TranslatedKeywordsList(attr='tags_list') # customfields = fields.TextField() formats = fields.KeywordField(attr='formats', multi=True) license_condition_db_or_copyrighted = fields.TextField() license_condition_modification = fields.BooleanField() license_condition_original = fields.BooleanField() license_condition_responsibilities = fields.TextField() license_condition_source = fields.BooleanField() license_condition_timestamp = fields.BooleanField() license_name = fields.StringField(attr='license_name') license_description = fields.StringField(attr='license_description') update_frequency = fields.KeywordField() openness_scores = fields.IntegerField(attr='openness_scores', multi=True) users_following = fields.KeywordField(attr='users_following_list', multi=True) views_count = fields.IntegerField() downloads_count = fields.IntegerField() status = fields.KeywordField() modified = fields.DateField() last_modified_resource = fields.DateField(attr='last_modified_resource') created = fields.DateField() verified = fields.DateField() class Meta: doc_type = 'dataset' model = Dataset related_models = [Organization, Category, Application, Article, Resource, UserFollowingDataset] def get_instances_from_related(self, related_instance): if isinstance(related_instance, UserFollowingDataset): return related_instance.follower.followed_applications.all() if isinstance(related_instance, Application): return related_instance.datasets.all() if isinstance(related_instance, Resource): return related_instance.dataset if isinstance(related_instance, Category): return related_instance.dataset_set.filter(status='published') if isinstance(related_instance, Organization): return related_instance.datasets.filter(status='published') def get_queryset(self): return self._doc_type.model.objects.filter(status='published')
class FoiRequestDocument(DocType): content = fields.TextField( analyzer=analyzer, index_options='offsets' ) title = fields.TextField() description = fields.TextField() resolution = fields.KeywordField() status = fields.KeywordField() costs = fields.FloatField() tags = fields.ListField(fields.KeywordField()) classification = fields.ListField(fields.IntegerField()) categories = fields.ListField(fields.IntegerField()) campaign = fields.IntegerField() due_date = fields.DateField() first_message = fields.DateField() last_message = fields.DateField() publicbody = fields.IntegerField(attr='public_body_id') jurisdiction = fields.IntegerField(attr='public_body.jurisdiction_id') user = fields.IntegerField(attr='user_id') team = fields.IntegerField(attr='team_id') public = fields.BooleanField() class Meta: model = FoiRequest queryset_chunk_size = 50 def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return FoiRequest.objects.select_related( 'jurisdiction', 'public_body', ) def prepare_content(self, obj): return render_to_string('foirequest/search/foirequest_text.txt', { 'object': obj }) def prepare_tags(self, obj): return [tag.id for tag in obj.tags.all()] def prepare_public(self, obj): return obj.in_public_search_index() def prepare_campaign(self, obj): return obj.campaign_id def prepare_classification(self, obj): if obj.public_body_id is None: return [] if obj.public_body.classification is None: return [] classification = obj.public_body.classification return [classification.id] + [c.id for c in classification.get_ancestors()] def prepare_categories(self, obj): if obj.public_body: cats = obj.public_body.categories.all() return [o.id for o in cats] + [ c.id for o in cats for c in o.get_ancestors()] return [] def prepare_team(self, obj): if obj.project and obj.project.team_id: return obj.project.team_id return None
class CourseRunDocument(BaseCourseDocument): """ Course run Elasticsearch document. """ announcement = fields.DateField() availability = fields.TextField( fields={ 'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword) }) authoring_organization_uuids = fields.KeywordField(multi=True) course_key = fields.KeywordField() end = fields.DateField() enrollment_start = fields.DateField() enrollment_end = fields.DateField() first_enrollable_paid_seat_sku = fields.TextField() go_live_date = fields.DateField() has_enrollable_seats = fields.BooleanField() has_enrollable_paid_seats = fields.BooleanField() hidden = fields.BooleanField() is_enrollable = fields.BooleanField() is_current_and_still_upgradeable = fields.BooleanField() language = fields.TextField(analyzer=html_strip, fields={'raw': fields.KeywordField()}) license = fields.KeywordField() marketing_url = fields.TextField() min_effort = fields.IntegerField() max_effort = fields.IntegerField() mobile_available = fields.BooleanField() number = fields.KeywordField() paid_seat_enrollment_end = fields.DateField() pacing_type = fields.KeywordField() program_types = fields.KeywordField(multi=True) published = fields.BooleanField() skill_names = fields.KeywordField(multi=True) status = fields.KeywordField() start = fields.DateField() slug = fields.TextField() staff_uuids = fields.KeywordField(multi=True) type = fields.TextField(attr='type_legacy', analyzer=html_strip, fields={ 'raw': fields.KeywordField(attr='type_legacy'), 'lower': fields.TextField( analyzer=case_insensitive_keyword, attr='type_legacy') }) transcript_languages = fields.TextField( analyzer=html_strip, fields={'raw': fields.KeywordField(multi=True)}, multi=True) weeks_to_complete = fields.IntegerField() def prepare_aggregation_key(self, obj): # Aggregate CourseRuns by Course key since that is how we plan to dedup CourseRuns on the marketing site. return 'courserun:{}'.format(obj.course.key) def prepare_course_key(self, obj): return obj.course.key def prepare_first_enrollable_paid_seat_sku(self, obj): return obj.first_enrollable_paid_seat_sku() def prepare_is_current_and_still_upgradeable(self, obj): return obj.is_current_and_still_upgradeable() def prepare_has_enrollable_paid_seats(self, obj): return obj.has_enrollable_paid_seats() def prepare_language(self, obj): return self._prepare_language(obj.language) def prepare_number(self, obj): course_run_key = CourseKey.from_string(obj.key) return course_run_key.course def prepare_org(self, obj): course_run_key = CourseKey.from_string(obj.key) return course_run_key.org def prepare_paid_seat_enrollment_end(self, obj): return obj.get_paid_seat_enrollment_end() def prepare_partner(self, obj): return obj.course.partner.short_code def prepare_published(self, obj): return obj.status == CourseRunStatus.Published def prepare_seat_types(self, obj): return [seat_type.slug for seat_type in obj.seat_types] def prepare_skill_names(self, obj): course_skills = get_whitelisted_course_skills(obj.course.key) return list( set(course_skill.skill.name for course_skill in course_skills)) def prepare_staff_uuids(self, obj): return [str(staff.uuid) for staff in obj.staff.all()] def prepare_transcript_languages(self, obj): return [ self._prepare_language(language) for language in obj.transcript_languages.all() ] def get_queryset(self): return filter_visible_runs( super().get_queryset().select_related('course').prefetch_related( 'seats__type').prefetch_related('transcript_languages')) class Django: """ Django Elasticsearch DSL ORM Meta. """ model = CourseRun class Meta: """ Meta options. """ parallel_indexing = True queryset_pagination = settings.ELASTICSEARCH_DSL_QUERYSET_PAGINATION
class ResourceDocument(Document): meta = fields.KeywordField() content = fields.TextField(attr="title.main_title", store=True) is_private = fields.BooleanField() title = fields.TextField( analyzer=text_folding_analyzer, fields={ "raw": fields.KeywordField(), "sort": fields.KeywordField(normalizer=lowercase_sort_normalizer), "suggest": fields.CompletionField(), }, **copy_to_content, ) form_genre = get_controlled_term_field(options=copy_to_content) subjects = get_controlled_term_field(options=copy_to_content) date_display = fields.TextField(**copy_to_content) year = fields.IntegerField(**copy_to_content) summary = fields.TextField(**copy_to_content) classifications_printing_publishing = fields.ObjectField( properties={ "edition": get_controlled_term_field(options=copy_to_content) }) classifications_translation = fields.ObjectField( properties={ "edition": get_controlled_term_field(options=copy_to_content) }) classifications_paratext = fields.ObjectField( properties={ "edition": get_controlled_term_field(options=copy_to_content) }) classifications_paratext_functions = fields.ObjectField( properties={ "edition": get_controlled_term_field(options=copy_to_content) }) contributions = fields.ObjectField( properties={ "agent": get_agent_field(options=copy_to_content), "roles": get_controlled_term_field(), }) published_as = fields.KeywordField() languages = get_controlled_term_field(options=copy_to_content) places = fields.ObjectField( properties={ "place": get_place_field(options=copy_to_content), "fictional_place": fields.TextField(fields={"raw": fields.KeywordField()}), }) relationships = fields.ObjectField( properties={ "relationship_type": get_controlled_term_field( options=copy_to_content), "related_to": get_resource_field(options=copy_to_content), }) # events = get_event_field(options=copy_to_content) is_original = fields.BooleanField() is_translation = fields.BooleanField() has_date_radical = fields.KeywordField() authors = fields.ObjectField( attr="get_authors_source_text", properties={"person": get_agent_field(options=copy_to_content)}, ) translated_from = get_controlled_term_field(options=copy_to_content) class Index: name = "rt-resources" class Django: model = Resource fields = ["id"] related_models = [ Classification, Contribution, ControlledTerm, Date, ResourceLanguage, ResourcePlace, ResourceRelationship, Title, ] def get_queryset(self): return (super().get_queryset().exclude( relationships__relationship_type__label="paratext of"). select_related("title", "date")) def get_instances_from_related(self, related_instance): if isinstance(related_instance, Date): return related_instance.resource if isinstance(related_instance, (ControlledTerm, Title)): return related_instance.resources.all() if isinstance( related_instance, ( Classification, Contribution, ResourceLanguage, ResourcePlace, ResourceRelationship, ), ): return related_instance.resource def prepare_meta(self, instance): if instance.is_original(): return "source texts" meta = [] if instance.get_paratext(): meta.append("paratexts") if instance.is_translation(): meta.append("translations") return meta def prepare_title(self, instance): titles = [str(instance.title)] for relationship in instance.get_paratext(): paratext = relationship.resource if str(paratext.title) != str(instance.title): titles.append(str(paratext.title)) return titles def prepare_form_genre(self, instance): return self._get_subjects(instance, ["fast-forms", "rt-agt"]) def _get_subjects(self, instance, prefix): subjects = [{ "label": item.label } for item in instance.subjects.filter(vocabulary__prefix__in=prefix)] for relationship in instance.get_paratext(): subjects.extend(self._get_subjects(relationship.resource, prefix)) if subjects: subjects.append({"label": "any"}) return subjects def prepare_subjects(self, instance): return self._get_subjects(instance, ["fast-topic"]) def prepare_date_display(self, instance): resource = self._get_resource(instance) if resource.date: return str(resource.date) def _get_resource(self, resource): if resource.is_paratext(): return resource.paratext_of() return resource def prepare_year(self, instance): resource = self._get_resource(instance) if resource.date: date_earliest = resource.date.get_date_earliest() date_latest = resource.date.get_date_latest() if date_earliest and date_latest: return [ year for year in range(date_earliest.year, date_latest.year + 1) ] if date_earliest: return date_earliest.year if date_latest: return date_latest.year def prepare_summary(self, instance): summaries = [] if instance.summary: summaries = [instance.summary] for relationship in instance.get_paratext(): if relationship.resource.summary: summaries.append(relationship.resource.summary) return summaries def prepare_classifications_printing_publishing(self, instance): return self._get_classifications(instance, "rt-ppt") def _get_classifications(self, instance, prefix): classifications = [ { "edition": { "label": item.edition.label }, } for item in instance.classifications.filter( edition__vocabulary__prefix=prefix) if item.edition.label.lower() not in ["original", "source-text"] ] for relationship in instance.get_paratext(): classifications.extend( self._get_classifications(relationship.resource, prefix)) if classifications: classifications.append({"edition": {"label": "any"}}) return classifications def prepare_classifications_translation(self, instance): return self._get_classifications(instance, "rt-tt") def prepare_classifications_paratext(self, instance): return self._get_classifications(instance, "rt-pt") def prepare_classifications_paratext_functions(self, instance): return self._get_classifications(instance, "rt-ptf") def prepare_contributions(self, instance): contributions = [{ "agent": { "id": item.agent.id, "name": "Anonymous" if item.agent.name.startswith("Anon") else item.agent.get_index_name(), }, "roles": [{ "label": f"{role.label} of translation paratext" if item.resource.is_paratext() else role.label for role in item.roles.all() }], } for item in instance.get_contributions(include_paratext=True)] if contributions: contributions.append({ "agent": { "name": "any" }, "roles": [{ "label": "any" }] }) return contributions def prepare_published_as(self, instance): published_as = [] for item in instance.get_contributions(include_paratext=True): if item.published_as: published_as.append(item.published_as) if published_as: published_as.append("any") return published_as def prepare_languages(self, instance): languages = [{ "label": item.language.label } for item in instance.languages.all()] for relationship in instance.get_paratext(): languages.extend(self.prepare_languages(relationship.resource)) if languages: languages.append({"label": "any"}) return languages def prepare_places(self, instance): places = [] for item in instance.places.all(): address = "" place = {} if item.fictional_place: address = item.fictional_place place = { "fictional_place": item.fictional_place, "place": { "address": address }, } if item.place: address = (f"{address} ({item.place.address})" if address else item.place.address) place["place"] = { "address": address, "geo": item.place.geo, "country": { "name": item.place.country.name }, } places.append(place) if places: places.append( {"place": { "address": "any", "country": { "name": "any" } }}) return places def prepare_relationships(self, instance): relationships = [{ "relationship_type": { "label": item.relationship_type.label }, "related_to": { "id": item.related_to.id, "title": { "main_title": str(item.related_to.title) }, }, } for item in instance.relationships.all()] if relationships: relationships.append({"relationship_type": {"label": "any"}}) return relationships def prepare_events(self, instance): events = [{ "id": item.id, "title": item.title, "place": { "address": item.place.address, "country": { "name": item.place.country.name }, }, } for item in instance.events.all()] if events: events.append({ "title": "any", "place": { "address": "any", "country": { "name": "any" } }, }) return events def prepare_has_date_radical(self, instance): if instance.has_date_radical(): return "yes" return "no" def prepare_translated_from(self, instance): languages = [] if instance.get_languages_source_text(): languages = [{ "label": language.label } for language in instance.get_languages_source_text()] if languages: languages.append({"label": "any"}) return languages
class ConceptDocument(Document): class Index: name = 'concepts' settings = {'number_of_shards': 1, 'number_of_replicas': 0} id = fields.KeywordField(attr='mnemonic', normalizer="lowercase") name = fields.KeywordField(attr='display_name', normalizer="lowercase") last_update = fields.DateField(attr='updated_at') locale = fields.ListField(fields.KeywordField(attr='display_name')) source = fields.KeywordField(attr='parent_resource', normalizer="lowercase") owner = fields.KeywordField(attr='owner_name', normalizer="lowercase") owner_type = fields.KeywordField(attr='owner_type') source_version = fields.ListField(fields.KeywordField()) collection_version = fields.ListField(fields.KeywordField()) collection = fields.ListField(fields.KeywordField()) collection_owner_url = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') datatype = fields.KeywordField(attr='datatype', normalizer="lowercase") concept_class = fields.KeywordField(attr='concept_class', normalizer="lowercase") retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') extras = fields.ObjectField(dynamic=True) class Django: model = Concept fields = [ 'version', ] @staticmethod def prepare_locale(instance): return list( instance.names.filter(locale__isnull=False).distinct('locale').values_list('locale', flat=True) ) @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('version', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('version', flat=True)) @staticmethod def prepare_collection(instance): return list(set(list(instance.collection_set.values_list('mnemonic', flat=True)))) @staticmethod def prepare_collection_owner_url(instance): return list({coll.parent_url for coll in instance.collection_set.select_related('user', 'organization')}) @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) if isinstance(value, dict): value = flatten_dict(value) return value or {}
class ProgramDocument(BaseDocument, OrganizationsMixin): """ Program Elasticsearch document. """ authoring_organization_uuids = fields.KeywordField(multi=True) authoring_organizations = fields.TextField( multi=True, fields={ 'suggest': fields.CompletionField(), 'edge_ngram_completion': fields.TextField(analyzer=edge_ngram_completion), 'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword) }, ) authoring_organization_bodies = fields.TextField(multi=True) credit_backing_organizations = fields.TextField(multi=True) card_image_url = fields.TextField() hidden = fields.BooleanField() is_program_eligible_for_one_click_purchase = fields.BooleanField() language = fields.TextField(multi=True) marketing_url = fields.TextField() min_hours_effort_per_week = fields.IntegerField() max_hours_effort_per_week = fields.IntegerField() partner = fields.TextField( analyzer=html_strip, fields={'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword)} ) published = fields.BooleanField() subtitle = fields.TextField(analyzer=html_strip) status = fields.KeywordField() search_card_display = fields.TextField(multi=True) subject_uuids = fields.KeywordField(multi=True) staff_uuids = fields.KeywordField(multi=True) start = fields.DateField() seat_types = fields.KeywordField(multi=True) title = fields.TextField( analyzer=synonym_text, fields={ 'suggest': fields.CompletionField(), 'edge_ngram_completion': fields.TextField(analyzer=edge_ngram_completion), }, ) type = fields.TextField( analyzer=html_strip, fields={'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword)} ) weeks_to_complete_min = fields.IntegerField() weeks_to_complete_max = fields.IntegerField() def prepare_aggregation_key(self, obj): return 'program:{}'.format(obj.uuid) def prepare_credit_backing_organizations(self, obj): return self._prepare_organizations(obj.credit_backing_organizations.all()) def prepare_language(self, obj): return [self._prepare_language(language) for language in obj.languages] def prepare_organizations(self, obj): return self.prepare_authoring_organizations(obj) + self.prepare_credit_backing_organizations(obj) def prepare_partner(self, obj): return obj.partner.short_code def prepare_published(self, obj): return obj.status == ProgramStatus.Active def prepare_seat_types(self, obj): return [seat_type.slug for seat_type in obj.seat_types] def prepare_search_card_display(self, obj): try: degree = Degree.objects.get(uuid=obj.uuid) except Degree.DoesNotExist: return [] return [degree.search_card_ranking, degree.search_card_cost, degree.search_card_courses] def prepare_subject_uuids(self, obj): return [str(subject.uuid) for subject in obj.subjects] def prepare_staff_uuids(self, obj): return list({str(staff.uuid) for course_run in obj.course_runs for staff in course_run.staff.all()}) def prepare_type(self, obj): return obj.type.name_t def get_queryset(self): return super().get_queryset().select_related('type').select_related('partner') class Django: """ Django Elasticsearch DSL ORM Meta. """ model = Program class Meta: """ Meta options. """ parallel_indexing = True queryset_pagination = settings.ELASTICSEARCH_DSL_QUERYSET_PAGINATION