Example #1
0
class CharacteristicaDocument(Document):
    """ Characteristica elastic search document. """
    id = fields.IntegerField(attr='id')
    raw_pk = string_field('raw_pk')

    study_name = string_field('study_name')
    study_sid = string_field('study_sid')
    subject_type = string_field('subject_type')

    # group related
    group_pk = fields.IntegerField(attr='group_pk')
    group_name = fields.StringField(attr='group_name',
                                    fields={
                                        'raw':
                                        fields.StringField(analyzer='keyword'),
                                    })
    group_count = fields.IntegerField(attr='group_count')
    group_parent_pk = fields.IntegerField(attr='group_parent_pk', )
    # individual_related
    individual_name = fields.StringField(
        attr='individual_name',
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        })
    individual_pk = fields.IntegerField(attr='individual_pk')
    individual_group_pk = fields.IntegerField(attr='individual_group_pk')
    ###

    measurement_type = fields.StringField(
        attr='measurement_type_name',
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        })
    choice = fields.StringField(fields={
        'raw': fields.StringField(analyzer='keyword'),
    })
    unit = fields.StringField(fields={
        'raw': fields.StringField(analyzer='keyword'),
    })
    substance = fields.StringField(attr='substance_name',
                                   fields={
                                       'raw':
                                       fields.StringField(analyzer='keyword'),
                                   })
    count = fields.IntegerField()
    value = fields.FloatField(attr='value')
    mean = fields.FloatField(attr='mean')
    median = fields.FloatField(attr='median')
    min = fields.FloatField(attr='min')
    max = fields.FloatField(attr='max')
    se = fields.FloatField(attr='se')
    sd = fields.FloatField(attr='sd')
    cv = fields.FloatField(attr='cv')

    normed = fields.BooleanField()

    access = string_field('access')
    allowed_users = fields.ObjectField(
        attr="allowed_users",
        properties={'username': string_field("username")},
        multi=True)

    class Django:
        model = Characteristica
        # Ignore auto updating of Elasticsearch when a model is saved/deleted:
        ignore_signals = True
        # Don't perform an index refresh after every update
        auto_refresh = False

    class Index:
        name = "characteristica"
        settings = {**elastic_settings, 'max_result_window': 50000}

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return super(CharacteristicaDocument,
                     self).get_queryset().select_related(
                         'group', 'individual')
Example #2
0
class PublicSource(PublicSourceBase):
    id = fields.KeywordField(attr='id')
    name = fields.TextField(
        attr='name',
        fields={'keyword': fields.KeywordField(
            normalizer=basic_normalizer
        )},
        index_prefixes={}
    )
    type = fields.IntegerField(attr='type.id')
    is_private = fields.BooleanField(attr='is_private')
    is_public = fields.BooleanField()
    parent = fields.TextField(
        attr='parent.name',
        fields={'keyword': fields.KeywordField(
            normalizer=basic_normalizer
        )},
        index_prefixes={}
    )
    attributes = fields.ObjectField()
    collections = fields.ObjectField()
    credits = fields.ObjectField()
    geo_location = fields.GeoPointField()

    class Index:
        name = 'public_sources'

    class Django:
        model = Source

    def get_queryset(self):
        return Source.objects.filter(type=13, workflow__is_public=True)

    def prepare_is_public(self, instance):
        try:
            return instance.workflow.is_public
        except ObjectDoesNotExist:
            return False

    def prepare_attributes(self, instance):
        attribute_list = []
        dates_list = []
        for attribute in instance.attributes.all():
            label = attribute.attribute_type.short_name
            if attribute.attribute_type.data_type == 'DATE':
                if attribute.value_DATE:
                    dates_list.append(attribute.value_DATE.strftime('%Y-%m-%d'))
                elif attribute.value_DATE_y:
                    d = attribute.value_DATE_d or 1
                    m = attribute.value_DATE_m or 1
                    y = attribute.value_DATE_y
                    dates_list.append(date(y, m, d).strftime('%Y-%m-%d'))

            elif attribute.attribute_type.data_type == 'TXT':
                attribute_list.append((label, attribute.value_TXT))

            else:
                attribute_list.append((label, str(attribute)))

        if dates_list:
            attribute_list.append(('date', dates_list))

        if attribute_list:
            return dict(attribute_list)

    def prepare_collections(self, instance):
        try:
            if instance.sets.all().count() > 0:
                cols = [('name', set.set_id.name) for set in instance.sets.all() if set.set_id.set_type == 2]
                if cols:
                    return dict(cols)
                else:
                    return [{'name': 'none'}]
        except ObjectDoesNotExist:
            return [{'name': 'none'}]

    def prepare_credits(self, instance):
        credit_list = []
        for credit in instance.credits.all():
            agent = credit.agent.standard_name
            type = credit.get_type_display()
            credit_list.append({
                'agent': f'{agent} ({type})',
                'type': type
            })
        return credit_list

    def prepare_geo_location(self, instance):
        locales = instance.attributes.filter(attribute_type=36)
        if locales.exists():
            loc_id = locales[0].value_JSON['id']
            locale = LocaleReference.objects.get(id=loc_id)
            return f'{locale.latitude},{locale.longitude}'
Example #3
0
class ExperimentDocument(DocType):
    """ Our Experiment ElasticSearch Document, which
    corresponds to our Experiment model. """

    # Keyword Fields
    title = fields.TextField(
        analyzer=html_strip,
        fielddata=True,
        fields={'raw': fields.KeywordField()}
    )
    publication_title = fields.TextField(
        analyzer=html_strip,
        fielddata=True,
        fields={'raw': fields.KeywordField()}
    )
    description = fields.TextField(
        analyzer=html_strip,
        fielddata=True,
        fields={'raw': fields.KeywordField()}
    )
    publication_authors = fields.TextField(
        analyzer=html_strip,
        fielddata=True,
        fields={'raw': fields.KeywordField()}
    )
    technology = fields.TextField(
        analyzer=standard,
        fielddata=True,
        fields={'raw': fields.KeywordField()}
    )
    organism_names = fields.TextField(
        analyzer=html_strip,
        fielddata=True,
        fields={'raw': fields.KeywordField()}
    )
    platform_names = fields.TextField(
        analyzer=standard,
        fielddata=True,
        fields={'raw': fields.TextField()}
    )
    platform_accession_codes = fields.TextField(
        analyzer=standard,
        fielddata=True,
        fields={'raw': fields.TextField()}
    )
    
    # Basic Fields
    accession_code = fields.TextField()
    alternate_accession_code = fields.TextField()
    submitter_institution = fields.TextField()
    publication_doi = fields.TextField()
    has_publication = fields.BooleanField()
    sample_metadata_fields = fields.TextField()
    pubmed_id = fields.TextField()
    num_total_samples = fields.IntegerField()
    num_processed_samples = fields.IntegerField()

    # FK/M2M
    # We actually don't use any ForeignKeys in our Experiment document,
    # but if we did, we'd do it like this. The function `get_instances_from_related` is similarly required,
    # as is the `related_models` field in the Meta class.

    # organisms = fields.NestedField(properties={
    #     'name': fields.KeywordField(),
    #     'taxonomy_id': fields.IntegerField(),
    #     'pk': fields.IntegerField(),
    # })
    # 
    # def get_instances_from_related(self, related_instance):
    #     return related_instance.experts_set.all()

    class Meta:
        model = Experiment

        fields = [
           'id',
        ]
Example #4
0
class CaseDocument(Document):
    # IMPORTANT: If you change what values are indexed here, also change the "CaseLastUpdate triggers"
    # section in set_up_postgres.py to keep Elasticsearch updated.
    name_abbreviation = SuggestField(analyzer='english')
    name = fields.TextField(index_phrases=True, analyzer='english')
    frontend_url = fields.KeywordField()
    frontend_pdf_url = fields.KeywordField()
    last_page = fields.KeywordField()
    first_page = fields.KeywordField()
    decision_date_original = fields.KeywordField()
    docket_numbers = fields.TextField(multi=True)
    docket_number = fields.TextField()
    last_updated = fields.KeywordField()

    volume = fields.ObjectField(
        properties={
            "barcode": fields.KeywordField(),
            'volume_number': SuggestField(),
            'volume_number_slug': fields.KeywordField(),
        })

    reporter = fields.ObjectField(
        properties={
            "id": fields.IntegerField(),
            "full_name": SuggestField(),
            "short_name": SuggestField(),
            "short_name_slug": SuggestField(),
            "start_year": fields.KeywordField(),
            "end_year": fields.KeywordField(),
        })

    court = fields.ObjectField(
        properties={
            "id": fields.IntegerField(),
            "slug": fields.KeywordField(),
            "name": fields.TextField(),
            "name_abbreviation": SuggestField(),
        })

    citations = fields.ObjectField(
        properties={
            "type": fields.TextField(),
            "cite": SuggestField(),
            "normalized_cite": fields.KeywordField(),
        })

    extractedcitations = fields.ObjectField(
        properties={
            "cite": fields.KeywordField(),
            "normalized_cite": fields.KeywordField(),
        })

    jurisdiction = fields.ObjectField(
        properties={
            "id": fields.IntegerField(),
            "slug": fields.KeywordField(),
            "name": fields.KeywordField(),
            "name_long": SuggestField(),
            "whitelisted": fields.BooleanField()
        })

    casebody_data = fields.ObjectField(
        properties={
            'xml':
            fields.TextField(index=False),
            'html':
            fields.TextField(index=False),
            'text':
            fields.ObjectField(
                properties={
                    'attorneys':
                    fields.TextField(multi=True),
                    'judges':
                    fields.TextField(multi=True),
                    'parties':
                    fields.TextField(multi=True),
                    'head_matter':
                    FTSField(),
                    'opinions':
                    fields.ObjectField(multi=True,
                                       properties={
                                           'author': fields.KeywordField(),
                                           'text': FTSField(),
                                           'type': fields.KeywordField(),
                                       }),
                    'corrections':
                    fields.TextField(),
                }),
        })

    analysis = fields.ObjectField(properties={
        'sha256': fields.KeywordField(),
        'simhash': fields.KeywordField(),
    })

    def prepare_frontend_pdf_url(self, instance):
        return instance.get_pdf_url(with_host=False)

    def prepare_analysis(self, instance):
        return dict(sorted((a.key, a.value) for a in instance.analysis.all()))

    def prepare_docket_numbers(self, instance):
        if not hasattr(instance, 'docket_numbers'):
            return {'docket_numbers': None}
        return instance.docket_numbers

    def prepare_last_updated(self, instance):
        try:
            return instance.last_update.timestamp
        except CaseLastUpdate.DoesNotExist:
            return None

    def prepare_casebody_data(self, instance):
        body = instance.body_cache
        return instance.redact_obj({
            'xml': body.xml,
            'html': body.html,
            'text': body.json,
        })

    def prepare_name(self, instance):
        return instance.redact_obj(instance.name)

    def prepare_name_abbreviation(self, instance):
        return instance.redact_obj(instance.name_abbreviation)

    class Django:
        model = CaseMetadata
        fields = [
            'id',
            'decision_date',
        ]
        ignore_signals = True
        auto_refresh = False

    def to_dict(self, skip_empty=False):
        # we need to do this until elasticsearch_dsl propagates skip_empty=False to the serialization that happens in
        # embedded objects.
        doc = super(CaseDocument, self).to_dict(skip_empty=skip_empty)
        doc['volume'] = self.volume.to_dict(skip_empty=skip_empty)
        doc['reporter'] = self.reporter.to_dict(skip_empty=skip_empty)
        doc['court'] = self.court.to_dict(skip_empty=skip_empty)
        doc['reporter'] = self.reporter.to_dict(skip_empty=skip_empty)
        doc['jurisdiction'] = self.jurisdiction.to_dict(skip_empty=skip_empty)
        doc['casebody_data']['text'] = self.casebody_data.text.to_dict(
            skip_empty=skip_empty)
        doc['casebody_data']['text']['opinions'] = [
            op.to_dict(skip_empty=skip_empty)
            for op in self.casebody_data['text'].opinions
        ]
        doc['cites_to'] = self.extractedcitations
        return doc

    def full_cite(self):
        return "%s, %s%s" % (self.name_abbreviation, ", ".join(
            cite.cite
            for cite in self.citations if cite.type != "vendor"), " (%s)" %
                             (self.decision_date_original[:4], )
                             if self.decision_date_original else "")

    @classmethod
    def raw_search(cls, *args, **kwargs):
        """
            Return RawSearch object instead of Search object.
        """
        out = super().search(*args, **kwargs)
        out.__class__ = RawSearch
        return out
Example #5
0
class MappingDocument(Document):
    class Index:
        name = 'mappings'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = Mapping
        fields = ['external_id']

    last_update = fields.DateField(attr='updated_at')
    owner = fields.KeywordField(attr='owner_name', normalizer="lowercase")
    owner_type = fields.KeywordField(attr='owner_type')
    source = fields.KeywordField(attr='source', normalizer="lowercase")
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    map_type = fields.KeywordField(attr='map_type', normalizer="lowercase")
    from_concept = fields.ListField(fields.KeywordField())
    to_concept = fields.ListField(fields.KeywordField())
    concept = fields.ListField(fields.KeywordField())
    concept_source = fields.ListField(fields.KeywordField())
    concept_owner = fields.ListField(fields.KeywordField())
    from_concept_owner = fields.KeywordField(attr='from_source_owner')
    to_concept_owner = fields.KeywordField(attr='to_source_owner')
    concept_owner_type = fields.ListField(
        fields.KeywordField(attr='to_source_owner'))
    from_concept_owner_type = fields.KeywordField(
        attr='from_source_owner_type')
    to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type')
    from_concept_source = fields.KeywordField(attr='from_source_name')
    to_concept_source = fields.KeywordField(attr='to_source_name')
    source_version = fields.ListField(fields.KeywordField())
    collection_version = fields.ListField(fields.KeywordField())
    collection = fields.ListField(fields.KeywordField())
    collection_owner_url = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    id = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    extras = fields.ObjectField(dynamic=True)
    created_by = fields.KeywordField(attr='created_by.username')

    @staticmethod
    def prepare_from_concept(instance):
        from_concept_name = get(instance, 'from_concept_name') or get(
            instance, 'from_concept.display_name')
        return [
            instance.from_concept_url, instance.from_concept_code,
            from_concept_name
        ]

    @staticmethod
    def prepare_to_concept(instance):
        return [instance.get_to_concept_code(), instance.get_to_concept_name()]

    def prepare_concept(self, instance):
        return self.prepare_from_concept(instance) + self.prepare_to_concept(
            instance)

    @staticmethod
    def prepare_concept_source(instance):
        return [instance.from_source_name, instance.to_source_name]

    @staticmethod
    def prepare_concept_owner(instance):
        return [instance.from_source_owner, instance.to_source_owner]

    @staticmethod
    def prepare_concept_owner_type(instance):
        return [instance.from_source_owner_type, instance.to_source_owner_type]

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('version', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('version', flat=True))

    @staticmethod
    def prepare_collection(instance):
        return list(
            set(
                list(instance.collection_set.values_list('mnemonic',
                                                         flat=True))))

    @staticmethod
    def prepare_collection_owner_url(instance):
        return list({
            coll.parent_url
            for coll in instance.collection_set.select_related(
                'user', 'organization')
        })

    @staticmethod
    def prepare_extras(instance):
        value = {}

        if instance.extras:
            value = jsonify_safe(instance.extras)
            if isinstance(value, dict):
                value = flatten_dict(value)

        return value or {}
Example #6
0
class ProfileDocument(DocType):
    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # *********************** Main data fields for search ****************
    # ********************************************************************

    gender = StringField(analyzer=html_strip, fields={
        'raw': KeywordField(),
    })

    location = StringField(analyzer=html_strip, fields={'raw': KeywordField()})
    avatar = fields.TextField()
    about = fields.TextField()
    phone = fields.StringField()
    slug = fields.StringField()
    # age = fields.IntegerField()

    height = fields.ObjectField(attr='height_field_indexing',
                                properties={
                                    'name': StringField(analyzer=html_strip),
                                    'id': fields.IntegerField()
                                })

    weight = fields.ObjectField(attr='weight_field_indexing',
                                properties={
                                    'name': StringField(analyzer=html_strip),
                                    'id': fields.IntegerField()
                                })

    build = fields.ObjectField(attr='build_field_indexing',
                               properties={
                                   'name': StringField(analyzer=html_strip),
                                   'id': fields.IntegerField()
                               })

    hair = fields.ObjectField(attr='hair_field_indexing',
                              properties={
                                  'name': StringField(analyzer=html_strip),
                                  'id': fields.IntegerField()
                              })

    eye = fields.ObjectField(attr='eye_field_indexing',
                             properties={
                                 'name': StringField(analyzer=html_strip),
                                 'id': fields.IntegerField()
                             })

    ethnicity = fields.ObjectField(attr='ethnicity_field_indexing',
                                   properties={
                                       'name':
                                       StringField(analyzer=html_strip),
                                       'id': fields.IntegerField()
                                   })

    auth_user_nested = fields.NestedField(
        attr='auth_user_field_indexing',
        properties={
            'first_name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }),
            'last_name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }),
            'username':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        })
        })

    auth_user = fields.ObjectField(
        attr='auth_user_field_indexing',
        properties={
            'first_name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }),
            'last_name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }),
            'username':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        })
        })

    media = fields.ObjectField(attr='media_field_indexing',
                               properties={
                                   'has_photo': fields.BooleanField(),
                                   'has_video': fields.BooleanField(),
                                   'has_audio': fields.BooleanField(),
                               })

    tags = fields.ObjectField(attr='tag_field_indexing',
                              properties={
                                  'id': fields.IntegerField(),
                                  'name': fields.StringField()
                              })

    class Django:
        model = UsersProfile
Example #7
0
class PropertyAmenitiesDocument(Document):
    """PropertyAmenities Elasticsearch document."""

    id = fields.IntegerField(attr="id")
    property = fields.TextField(attr="property_indexing", )
    pool = fields.BooleanField()
    garden = fields.BooleanField()
    elevator = fields.BooleanField()
    doorman = fields.BooleanField()
    deck = fields.BooleanField()
    washer = fields.BooleanField()
    gym = fields.BooleanField()
    parking = fields.BooleanField()
    firePlace = fields.BooleanField()
    airCondition = fields.BooleanField()
    dishWasher = fields.BooleanField()
    itemStorage = fields.BooleanField()
    wheelchair = fields.BooleanField()
    balcony = fields.BooleanField()
    hardFloor = fields.BooleanField()
    furnished = fields.BooleanField()
    view = fields.BooleanField()
    highRise = fields.BooleanField()
    studentFriendly = fields.BooleanField()
    utilities = fields.BooleanField()

    class Django(object):
        """The model associate with this Document"""

        model = PropertyAmenities
Example #8
0
class AgentDocument(Document):
    meta = fields.KeywordField()
    content = fields.TextField(attr="name", store=True)

    is_private = fields.BooleanField()

    agent_type = fields.KeywordField(**copy_to_content)
    name = fields.TextField(
        fields={
            "raw": fields.KeywordField(),
            "suggest": fields.CompletionField(),
        },
        **copy_to_content,
    )
    name_sort = fields.KeywordField(**copy_to_content)
    radical = fields.KeywordField(**copy_to_content)
    based_near = get_place_field(options=copy_to_content)
    roles = get_controlled_term_field(options=copy_to_content)
    sources = get_resource_field(options=copy_to_content)

    contributed_to = fields.ObjectField(
        properties={
            "resource": get_resource_field(options=copy_to_content),
            "roles": get_controlled_term_field(options=copy_to_content),
        },
    )

    gender = fields.KeywordField(**copy_to_content)
    noble = fields.KeywordField(**copy_to_content)
    main_places = get_place_field(options=copy_to_content)
    year = fields.IntegerField(**copy_to_content)
    date_display = fields.TextField(**copy_to_content)
    place_birth = get_place_field(options=copy_to_content)
    place_death = get_place_field(options=copy_to_content)
    languages = get_controlled_term_field(options=copy_to_content)
    knows = get_agent_field(options=copy_to_content)
    member_of = get_agent_field(options=copy_to_content)

    members = get_agent_field(options=copy_to_content)

    class Index:
        name = "rt-agents"

    class Django:
        model = Agent
        fields = ["id", "notes"]

    def get_queryset(self):
        return super().get_queryset().exclude(roles__label__in=["archives", "library"])

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, Contribution):
            return related_instance.agent

        if isinstance(related_instance, Date):
            if related_instance.person_birth:
                return related_instance.person_birth

            return related_instance.person_death

        if isinstance(related_instance, (ControlledTerm, Place)):
            return related_instance.agents.all()

    def prepare_meta(self, instance):
        return [instance.agent_type]

    def prepare_name_sort(self, instance):
        name = instance.get_index_name().lower()

        if "anonymous" in name:
            name = f"zzz_{name}"

        return name

    def prepare_radical(self, instance):
        return "yes" if instance.radical else "no"

    def prepare_gender(self, instance):
        if instance.is_person:
            return instance.get_gender_display()

    def prepare_noble(self, instance):
        if instance.is_person:
            return "yes" if instance.noble else "no"

    def prepare_main_places(self, instance):
        if instance.is_organisation:
            return

        return [self._prepare_place(place) for place in instance.main_places.all()]

    def _prepare_place(self, place):
        if not place:
            return {}

        return {
            "address": place.address,
            "geo": place.geo,
            "coutry": {"name": place.country.name} if place.country else {},
        }

    def prepare_year(self, instance):
        if instance.is_organisation:
            return

        date_birth = instance.date_birth
        year_birth = None

        if date_birth and date_birth.get_date_earliest():
            year_birth = date_birth.get_date_earliest().year

        date_death = instance.date_death
        year_death = None

        if date_death and date_death.get_date_latest():
            year_death = date_death.get_date_latest().year

        if year_birth and year_death:
            return [year for year in range(year_birth, year_death + 1)]

        if year_birth:
            return year_birth

        if year_death:
            return year_death

    def prepare_date_display(self, instance):
        if instance.is_organisation:
            return

        date_birth = instance.date_birth
        date_death = instance.date_death

        if date_birth and date_death:
            return "{} – {}".format(str(date_birth), str(date_death))

        if date_birth:
            return "{} – ?".format(str(date_birth))

        if date_death:
            return "? – {}".format(str(date_death))

    def prepare_place_birth(self, instance):
        if instance.is_person:
            return self._prepare_place(instance.place_birth)

    def prepare_place_death(self, instance):
        if instance.is_person:
            return self._prepare_place(instance.place_death)

    def prepare_languages(self, instance):
        if instance.is_organisation:
            return

        return [{"label": language.label} for language in instance.languages.all()]

    def prepare_knows(self, instance):
        if instance.is_organisation:
            return

        return [self._prepare_agent(person) for person in instance.knows.all()]

    def _prepare_agent(self, agent):
        if not agent:
            return {}

        return {"id": agent.id, "name": agent.name}

    def prepare_member_of(self, instance):
        if instance.is_organisation:
            return

        return [self._prepare_agent(org) for org in instance.member_of.all()]

    def prepare_members(self, instance):
        if instance.is_person:
            return

        return [self._prepare_agent(person) for person in instance.members.all()]
class DatasetDocument(ExtendedDocument):
    license_chosen = fields.IntegerField()
    license_condition_db_or_copyrighted = fields.TextField()
    license_condition_personal_data = fields.TextField()
    license_condition_modification = fields.BooleanField()
    license_condition_original = fields.BooleanField()
    license_condition_responsibilities = fields.TextField()
    license_condition_source = fields.BooleanField()
    license_condition_timestamp = fields.BooleanField()
    license_name = fields.TextField()
    license_description = fields.TextField()
    resource_modified = fields.DateField(attr='last_modified_resource')
    url = fields.KeywordField()
    source = fields.NestedField(
        properties={
            'title': fields.TextField(),
            'source_type': fields.TextField(),
            'url': fields.TextField(),
            'update_frequency': TranslatedTextField('update_frequency'),
            'last_import_timestamp': fields.DateField(),
        })

    formats = fields.KeywordField(multi=True)
    types = fields.KeywordField(multi=True)
    openness_scores = fields.IntegerField(multi=True)
    institution = fields.NestedField(attr='organization',
                                     properties={
                                         'id': fields.IntegerField(),
                                         'title': TranslatedTextField('title'),
                                         'slug': TranslatedTextField('slug'),
                                     })
    category = fields.NestedField(
        properties={
            'id': fields.IntegerField(attr='id'),
            'image_url': fields.KeywordField(),
            'title': TranslatedTextField('title'),
            'description': TranslatedTextField('description')
        })
    categories = fields.NestedField(
        properties={
            'id': fields.IntegerField(attr='id'),
            'image_url': fields.KeywordField(),
            'code': fields.KeywordField(),
            'title': TranslatedTextField('title'),
            'description': TranslatedTextField('description')
        })
    downloads_count = fields.IntegerField()
    image_url = fields.TextField()
    image_alt = TranslatedTextField('image_alt')

    version = fields.KeywordField()
    source_title = fields.TextField()
    source_type = fields.TextField()
    source_url = fields.TextField()

    resources = fields.NestedField(properties={
        'id': fields.IntegerField(),
        'title': TranslatedTextField('title')
    })
    applications = fields.NestedField(properties={
        'id': fields.IntegerField(),
        'title': TranslatedTextField('title')
    })

    articles = fields.NestedField(properties={
        'id': fields.IntegerField(),
        'title': TranslatedTextField('title')
    })
    showcases = fields.NestedField(attr='showcases_published',
                                   properties={
                                       'id': fields.IntegerField(),
                                       'title': TranslatedTextField('title')
                                   })

    update_frequency = fields.KeywordField()
    users_following = fields.KeywordField(attr='users_following_list',
                                          multi=True)
    last_modified_resource = fields.DateField(attr='last_modified_resource')

    license_code = fields.IntegerField()
    computed_downloads_count = fields.IntegerField()
    computed_views_count = fields.IntegerField()
    has_high_value_data = fields.BooleanField()
    if is_enabled('S37_resources_admin_region_data.be'):
        regions = regions_field()

    class Index:
        name = mcs.ELASTICSEARCH_INDEX_NAMES['datasets']
        settings = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_SETTINGS
        aliases = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_ALIAS

    class Django:
        model = Dataset
        related_models = [
            Application,
            Article,
            Category,
            DataSource,
            Organization,
            Resource,
            Showcase,
            UserFollowingDataset,
        ]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, UserFollowingDataset):
            return related_instance.follower.followed_applications.all()
        if isinstance(related_instance, Application):
            return related_instance.datasets.filter(status='published')
        if isinstance(related_instance, Article):
            return related_instance.datasets.filter(status='published')
        if isinstance(related_instance, Resource):
            return related_instance.dataset
        if isinstance(related_instance, Category):
            return related_instance.dataset_set.filter(status='published')
        if isinstance(related_instance, Organization):
            return related_instance.datasets.filter(status='published')
        if isinstance(related_instance, DataSource):
            return related_instance.datasource_datasets.filter(
                status='published')
        if isinstance(related_instance, Showcase):
            return related_instance.datasets.filter(status='published')

    def prepare_search_date(self, instance):
        return instance.verified

    def prepare_source(self, instance):
        serializer = DataSourceSerializer()
        if not instance.source:
            return {}
        return serializer.dump(instance.source)
Example #10
0
class MappingDocument(Document):
    class Index:
        name = 'mappings'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = Mapping
        fields = ['external_id']

    last_update = fields.DateField(attr='updated_at')
    owner = fields.KeywordField(attr='owner_name', normalizer="lowercase")
    owner_type = fields.KeywordField(attr='owner_type')
    source = fields.KeywordField(attr='source', normalizer="lowercase")
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    map_type = fields.KeywordField(attr='map_type', normalizer="lowercase")
    from_concept = fields.ListField(fields.KeywordField())
    to_concept = fields.ListField(fields.KeywordField())
    concept = fields.ListField(fields.KeywordField())
    concept_source = fields.ListField(fields.KeywordField())
    concept_owner = fields.ListField(fields.KeywordField())
    from_concept_owner = fields.KeywordField(attr='from_source_owner')
    to_concept_owner = fields.KeywordField(attr='to_source_owner')
    concept_owner_type = fields.ListField(
        fields.KeywordField(attr='to_source_owner'))
    from_concept_owner_type = fields.KeywordField(
        attr='from_source_owner_type')
    to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type')
    from_concept_source = fields.KeywordField(attr='from_source_name')
    to_concept_source = fields.KeywordField(attr='to_source_name')
    source_version = fields.ListField(fields.TextField())
    collection_version = fields.ListField(fields.TextField())
    collection = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    id = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    extras = fields.ObjectField()

    @staticmethod
    def prepare_from_concept(instance):
        return [
            instance.from_concept_url, instance.from_concept_code,
            instance.from_concept_name
        ]

    @staticmethod
    def prepare_to_concept(instance):
        return [instance.get_to_concept_code(), instance.get_to_concept_name()]

    def prepare_concept(self, instance):
        return self.prepare_from_concept(instance) + self.prepare_to_concept(
            instance)

    @staticmethod
    def prepare_concept_source(instance):
        return [instance.from_source_name, instance.to_source_name]

    @staticmethod
    def prepare_concept_owner(instance):
        return [instance.from_source_owner, instance.to_source_owner]

    @staticmethod
    def prepare_concept_owner_type(instance):
        return [instance.from_source_owner_type, instance.to_source_owner_type]

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('version', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('version', flat=True))

    @staticmethod
    def prepare_collection(instance):
        return list(
            set(
                list(instance.collection_set.values_list('mnemonic',
                                                         flat=True))))

    @staticmethod
    def prepare_extras(instance):
        return instance.extras or {}
Example #11
0
class GoodDocument(DocType):
    pk = fields.IntegerField()
    id = fields.IntegerField()
    name = fields.StringField(fields={
        'raw': fields.KeywordField(),
    })
    name_en = fields.StringField(analyzer='english')
    name_ru = fields.StringField(analyzer='russian')
    name_uk = fields.StringField()
    description = fields.StringField(fields={
        'raw': fields.KeywordField(),
    })
    description_en = fields.StringField(analyzer='english')
    description_ru = fields.StringField(analyzer='russian')
    description_uk = fields.StringField()
    category = fields.ObjectField(properties={
        'id': fields.IntegerField(),
        'slug': fields.KeywordField(),
        'name': fields.StringField(fields={
            'raw': fields.KeywordField(),
        }),
        'name_en': fields.StringField(analyzer='english'),
        'name_ru': fields.StringField(analyzer='russian'),
        'name_uk': fields.StringField(),
        'is_main': fields.BooleanField(),
    })
    categories_ids = fields.IntegerField()
    categories_names = fields.StringField(fields={
        'raw': fields.KeywordField(),
    })
    categories = fields.NestedField(properties={
        'id': fields.IntegerField(),
        'slug': fields.KeywordField(),
        'name': fields.StringField(fields={
            'raw': fields.KeywordField(),
        }),
        'name_en': fields.StringField(analyzer='english'),
        'name_ru': fields.StringField(analyzer='russian'),
        'name_uk': fields.StringField(),
        'is_main': fields.BooleanField(),
    })
    seller = fields.ObjectField(properties={
        'id': fields.IntegerField(),
        'store_name': fields.StringField(fields={
            'raw': fields.KeywordField(),
        }),
        'store_name_en': fields.StringField(analyzer='english'),
        'store_name_ru': fields.StringField(analyzer='russian'),
        'store_name_uk': fields.StringField(),
        'description': fields.StringField(fields={
            'raw': fields.KeywordField(),
        }),
        'description_en': fields.StringField(analyzer='english'),
        'description_ru': fields.StringField(analyzer='russian'),
        'description_uk': fields.StringField(),
        'location': fields.StringField(fields={
            'raw': fields.KeywordField(),
        }),
        'location_en': fields.StringField(analyzer='english'),
        'location_ru': fields.StringField(analyzer='russian'),
        'location_uk': fields.StringField(),
        'goods_count': fields.IntegerField(),
        'rating': fields.FloatField(),
    })
    price = fields.FloatField(attr='price.amount')
    price_currency = fields.KeywordField()
    discount = fields.IntegerField()
    availability = fields.TextField()
    specifications = fields.ObjectField(properties={
        'color': fields.StringField(attr='color.definition', fields={
            'raw': fields.KeywordField(),
        }),
        'color_en': fields.StringField(attr='color.definition_en', analyzer='english'),
        'color_ru': fields.StringField(attr='color.definition_ru', analyzer='russian'),
        'color_uk': fields.StringField(attr='color.definition_uk'),
        'size': fields.StringField(attr='size.definition', fields={
            'raw': fields.KeywordField(),
        }),
        'size_en': fields.StringField(attr='size.definition_en', analyzer='english'),
        'size_ru': fields.StringField(attr='size.definition_ru', analyzer='russian'),
        'size_uk': fields.StringField(attr='size.definition_uk'),
    })
    images = fields.NestedField(properties={
        'image_url': fields.KeywordField(),
    })
    main_image_url = fields.KeywordField()
    created = fields.DateField()
    modified = fields.DateField()

    class Meta:
        model = Good
        related_models = [
            GoodsCategory,
            HoloUser,
            Store,
            GoodSpecifications,
            GoodImage,
        ]

    def get_queryset(self):
        qs = super().get_queryset()
        return qs.select_related('category', 'seller')

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, GoodsCategory):
            return related_instance.all_goods()
        if isinstance(related_instance, HoloUser):
            store = related_instance.store
            if store:
                return store.goods.all()
        if isinstance(related_instance, Store):
            return related_instance.goods.all()
        if isinstance(related_instance, GoodSpecifications):
            return related_instance.good
        if isinstance(related_instance, GoodImage):
            return related_instance.good
class DrugDocument(Document):
    source_id = fields.TextField(fields={'raw': fields.KeywordField()})
    trade_name = fields.TextField(fields={
        'raw': fields.KeywordField(),
    }, )
    international_name = fields.ObjectField(
        properties={
            'name': fields.TextField(fields={
                'raw': fields.KeywordField(),
            })
        })
    drug_form = fields.TextField()
    marketing_status = fields.ObjectField(
        properties={
            'name': fields.TextField(fields={
                'raw': fields.KeywordField(),
            })
        })
    formula = fields.TextField()
    pharmacotherapeutic_group = fields.ObjectField(
        properties={'name': fields.TextField()})
    atcs = fields.NestedField(properties={
        'name': fields.TextField(),
        'type': fields.IntegerField()
    })
    drug_applicant = fields.ObjectField(
        properties={
            'name':
            fields.TextField(),
            'address':
            fields.TextField(),
            'country_id':
            fields.ObjectField(properties={'name': fields.TextField()})
        })
    manufacturers = fields.NestedField(
        properties={
            'name':
            fields.TextField(),
            'address':
            fields.TextField(),
            'country_id':
            fields.ObjectField(properties={'name': fields.TextField()})
        })
    registration_number = fields.TextField(
        fields={'raw': fields.KeywordField()})
    registration_date = fields.DateField()
    expiration_date = fields.TextField()
    drug_type = fields.ObjectField(properties={'name': fields.TextField()})
    has_bio_origin = fields.BooleanField()
    has_phyto_origin = fields.BooleanField()
    is_orphan = fields.BooleanField()
    is_homeopatic = fields.BooleanField()
    INN = fields.ObjectField(properties={'name': fields.TextField()})
    premature_termination = fields.ObjectField(properties={
        'date': fields.DateField(),
        'reason': fields.TextField()
    })
    instruction_url = fields.KeywordField()

    class Django:
        model = Drug  # The model associated with this Document
        queryset_pagination = 20
        related_models = [
            Manufacturer, InternationalName, MarketingStatus,
            PharmacotherapeuticGroup, ATC, Applicant, DrugType, INN,
            PrematureTermination
        ]

    class Index:
        name = 'drugs'

    class Meta:
        ordering = ('trade_name', )

    def get_instances_from_related(self, related_instance):
        """If related_models is set, define how to retrieve the ATC instance(s) from the related model.
        The related_models option should be used with caution because it can lead in the index
        to the updating of a lot of items.
        """
        if isinstance(related_instance, ATC):
            return related_instance.atcs_set.all()

    def get_queryset(self):
        """
        Using for getting results with nested docs in one request.
        """
        return super(DrugDocument, self).get_queryset().select_related(
            'international_name',
            'marketing_status',
            'pharmacotherapeutic_group',
            'drug_applicant',
            'drug_type',
            'INN',
            'premature_termination',
        )
Example #13
0
class ResourceDocument(ExtendedDocument):
    NOTES_FIELD_NAME = 'description'
    format = fields.TextField()
    formats = fields.KeywordField(attr='formats_list', multi=True)
    openness_score = fields.IntegerField()
    openness_scores = fields.IntegerField(multi=True)
    media_type = fields.TextField()
    downloads_count = fields.IntegerField()
    data_date = fields.DateField()
    file_url = fields.TextField()
    download_url = fields.TextField()
    link = fields.TextField()
    file_size = fields.IntegerField()
    types = fields.KeywordField(multi=True)
    dataset = fields.NestedField(
        properties={
            'id': fields.IntegerField(),
            'title': TranslatedTextField('title'),
            'slug': TranslatedTextField('slug')
        })
    institution = fields.NestedField(
        properties={
            'id': fields.IntegerField(),
            'title': TranslatedTextField('title'),
            'slug': TranslatedTextField('slug')
        })
    source = fields.NestedField(
        properties={
            'title': fields.TextField(),
            'source_type': fields.TextField(),
            'url': fields.TextField(),
            'update_frequency': TranslatedTextField('update_frequency'),
            'last_import_timestamp': fields.DateField(),
        })

    # ResourceDoc
    uuid = fields.TextField()
    description = TranslatedTextField('description')

    csv_file_url = fields.TextField()
    csv_file_size = fields.LongField()
    csv_download_url = fields.TextField()

    jsonld_file_url = fields.TextField()
    jsonld_file_size = fields.LongField()
    jsonld_download_url = fields.TextField()

    type = fields.KeywordField()

    geo_data = fields.NestedField(properties={
        'id': fields.IntegerField(),
    })
    tabular_data = fields.NestedField(properties={
        'id': fields.IntegerField(),
    })
    chartable = fields.NestedField(properties={
        'id': fields.IntegerField(),
    })
    data_special_signs = fields.NestedField(
        properties={
            'id': fields.IntegerField(),
            'symbol': fields.KeywordField(),
            'name': TranslatedTextField('name'),
            'description': TranslatedTextField('description')
        })
    is_chart_creation_blocked = fields.BooleanField()

    license_code = fields.IntegerField()
    update_frequency = fields.KeywordField()
    computed_downloads_count = fields.IntegerField()
    computed_views_count = fields.IntegerField()
    has_high_value_data = fields.BooleanField()
    if is_enabled('S37_resources_admin_region_data.be'):
        regions = regions_field(attr='all_regions')
    if is_enabled('S40_new_file_model.be'):
        files = files_field(attr='all_files')

    class Index:
        name = mcs.ELASTICSEARCH_INDEX_NAMES['resources']
        settings = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_SETTINGS
        aliases = mcs.ELASTICSEARCH_DSL_SEARCH_INDEX_ALIAS

    class Django:
        model = Resource
        related_models = [Dataset, SpecialSign]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, Dataset):
            return related_instance.resources.filter(status='published')
        elif isinstance(related_instance, SpecialSign):
            return related_instance.special_signs_resources.filter(
                status='published')

    def prepare_model_name(self, instance):
        return instance.category.type

    def prepare_openness_scores(self, instance):
        return [instance.openness_score]

    def prepare_source(self, instance):
        serializer = DataSourceSerializer()
        if not instance.dataset.source:
            return {}
        return serializer.dump(instance.dataset.source)
Example #14
0
class PageDocument(DocType):
    document = fields.IntegerField(attr='document_id')

    title = fields.TextField()
    description = fields.TextField()

    tags = fields.ListField(fields.KeywordField())
    created_at = fields.DateField()

    publicbody = fields.IntegerField(attr='document.publicbody_id')
    jurisdiction = fields.IntegerField(
        attr='document.publicbody.jurisdiction_id')
    foirequest = fields.IntegerField(attr='document.foirequest_id')
    campaign = fields.IntegerField(attr='document.foirequest.campaign_id')
    collections = fields.IntegerField()

    user = fields.IntegerField(attr='document.user_id')
    team = fields.IntegerField(attr='document.team_id')

    public = fields.BooleanField()

    number = fields.IntegerField()
    content = fields.TextField(
        analyzer=analyzer,
        search_analyzer=search_analyzer,
        search_quote_analyzer=search_quote_analyzer,
        index_options='offsets',
    )

    class Meta:
        model = Page
        queryset_chunk_size = 50

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return super().get_queryset().select_related('document', )

    def prepare_title(self, obj):
        if obj.number == 1:
            if obj.document.title.endswith('.pdf'):
                return ''
            return obj.document.title
        return ''

    def prepare_description(self, obj):
        if obj.number == 1:
            return obj.document.description
        return ''

    def prepare_tags(self, obj):
        return [tag.id for tag in obj.document.tags.all()]

    def prepare_created_at(self, obj):
        return obj.document.created_at

    def prepare_public(self, obj):
        return obj.document.is_public()

    def prepare_team(self, obj):
        if obj.document.team_id:
            return obj.document.team_id
        return None

    def prepare_collections(self, obj):
        collections = obj.document.document_documentcollection.all()
        return list(collections.values_list('id', flat=True))
Example #15
0
class TimecourseDocument(DocType):
    study = string_field('study')
    pk = fields.IntegerField('pk')

    group = ObjectField(properties={
        'pk': fields.IntegerField(),
        'name': string_field('name'),
        'count': fields.IntegerField()
    })

    individual = ObjectField(properties={
        'pk': fields.IntegerField(),
        'name': string_field('name')})

    interventions = ObjectField(properties={
        'pk': fields.IntegerField(),
        'name': string_field('name')
    }, multi=True)

    substance = ObjectField(properties={
        'name': string_field('name')}
        )
    ex = ObjectField(properties={
        'pk': string_field('pk')}
        )

    normed = fields.BooleanField()

    raw = ObjectField(properties={
        'pk': fields.IntegerField()}
    )

    pharmacokinetics = ObjectField(properties={
        'pk': fields.IntegerField()},
        multi=True
    )

    value = fields.FloatField('null_value',multi=True)
    mean = fields.FloatField('null_mean', multi=True)
    median = fields.FloatField('null_median', multi=True)
    min = fields.FloatField('null_min', multi=True)
    max = fields.FloatField('null_max', multi=True)
    se = fields.FloatField('null_se', multi=True)
    sd = fields.FloatField('null_sd', multi=True)
    cv = fields.FloatField('null_cv', multi=True)
    unit = string_field('unit')

    time_unit = string_field('time_unit')
    figure = string_field('figure')

    time = fields.FloatField('null_time',multi=True)
    tissue = string_field('tissue')
    pktype = string_field("pktype_key")

    #auc_end = fields.FloatField(attr='auc_end')
    #kel = fields.FloatField(attr='kel')

    class Meta(object):
            model = Timecourse
            # Ignore auto updating of Elasticsearch when a model is saved
            # or deleted:
            ignore_signals = True
            # Don't perform an index refresh after every update (overrides global setting):
            auto_refresh = False
Example #16
0
class ExperimentDocument(Document):
    """Our Experiment ElasticSearch Document, which
    corresponds to our Experiment model."""

    # Keyword Fields
    title = fields.TextField(
        analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}
    )
    publication_title = fields.TextField(
        analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}
    )
    description = fields.TextField(
        analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}
    )
    publication_authors = fields.TextField(
        analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}
    )
    technology = fields.TextField(
        analyzer=html_strip_no_stop, fielddata=True, fields={"raw": fields.KeywordField()}
    )
    organism_names = fields.TextField(
        analyzer=html_strip_no_ngram, fielddata=True, fields={"raw": fields.KeywordField()}
    )
    downloadable_organism_names = fields.TextField(
        analyzer=html_strip_no_ngram, fielddata=True, fields={"raw": fields.KeywordField()}
    )
    platform_names = fields.TextField(
        analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()}
    )
    platform_accession_codes = fields.TextField(
        analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()}
    )

    # Basic Fields
    accession_code = fields.KeywordField()
    alternate_accession_code = fields.KeywordField()
    submitter_institution = fields.TextField()
    publication_doi = fields.TextField()
    has_publication = fields.BooleanField()
    sample_metadata_fields = fields.TextField()
    pubmed_id = fields.TextField()
    num_total_samples = fields.IntegerField()
    num_processed_samples = fields.IntegerField()
    num_downloadable_samples = fields.IntegerField()
    source_first_published = fields.DateField()

    # Index all downloadable samples as keywords so that we can calculate unique counts on the facets
    downloadable_samples = fields.ListField(fields.KeywordField())

    # Index our sample keywords so that we can use them for better search
    sample_keywords = fields.ListField(fields.KeywordField())

    class Django:
        model = Experiment
        parallel_indexing = True
        queryset_pagination = 3000

        fields = [
            "id",
        ]

    def get_queryset(self):
        """Override default queryset"""
        return super(ExperimentDocument, self).get_queryset().order_by("id")
Example #17
0
class InterventionDocument(Document):
    pk = fields.IntegerField()
    measurement_type = fields.StringField(
        attr='measurement_type_name',
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        })

    form = fields.StringField(attr='form_name',
                              fields={
                                  'raw':
                                  fields.StringField(analyzer='keyword'),
                              })
    route = fields.StringField(attr='route_name',
                               fields={
                                   'raw':
                                   fields.StringField(analyzer='keyword'),
                               })
    application = fields.StringField(
        attr='application_name',
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        })
    choice = string_field('choice')
    time_unit = string_field('time_unit')
    time = fields.FloatField()
    time_end = fields.FloatField()

    substance = string_field('substance_name')
    study_name = string_field('study_name')
    study_sid = string_field('study_sid')

    name = string_field('name')
    normed = fields.BooleanField()
    raw_pk = string_field('raw_pk')
    value = fields.FloatField()
    mean = fields.FloatField()
    median = fields.FloatField()
    min = fields.FloatField()
    max = fields.FloatField()
    se = fields.FloatField()
    sd = fields.FloatField()
    cv = fields.FloatField()
    unit = string_field('unit')
    access = string_field('access')
    allowed_users = fields.ObjectField(
        attr="allowed_users",
        properties={'username': string_field("username")},
        multi=True)

    class Django:
        model = Intervention
        # Ignore auto updating of Elasticsearch when a model is saved/deleted
        ignore_signals = True
        # Don't perform an index refresh after every update
        auto_refresh = False

    class Index:
        name = 'interventions'
        settings = elastic_settings

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return super(
            InterventionDocument,
            self).get_queryset().select_related('ex__interventionset__study')
Example #18
0
class DatasetsDoc(DocType):
    id = fields.IntegerField()
    slug = TranslatedKeywordField('slug')
    title = TranslatedTextField('title', common_params={'suggest': fields.CompletionField()})
    version = fields.KeywordField()
    url = fields.KeywordField()
    notes = TranslatedTextField('notes')

    institution = fields.NestedField(attr='organization',
                                     properties={
                                         'id': fields.IntegerField(),
                                         'title': TranslatedTextField('title'),
                                         'slug': TranslatedTextField('slug'),
                                     })

    category = fields.NestedField(
        attr='category',
        properties={
            'id': fields.IntegerField(attr='id'),
            'image_url': fields.KeywordField(),
            'title': TranslatedTextField('title'),
            'description': TranslatedTextField('description')
        }
    )

    resources = fields.NestedField(
        properties={
            'id': fields.IntegerField(),
            'title': TranslatedTextField('title')
        }
    )

    applications = fields.NestedField(
        properties={
            'id': fields.IntegerField(),
            'title': TranslatedTextField('title')
        }
    )

    articles = fields.NestedField(
        properties={
            'id': fields.IntegerField(),
            'title': TranslatedTextField('title')
        }
    )

    tags = TranslatedKeywordsList(attr='tags_list')
    # customfields = fields.TextField()
    formats = fields.KeywordField(attr='formats', multi=True)

    license_condition_db_or_copyrighted = fields.TextField()
    license_condition_modification = fields.BooleanField()
    license_condition_original = fields.BooleanField()
    license_condition_responsibilities = fields.TextField()
    license_condition_source = fields.BooleanField()
    license_condition_timestamp = fields.BooleanField()
    license_name = fields.StringField(attr='license_name')
    license_description = fields.StringField(attr='license_description')
    update_frequency = fields.KeywordField()

    openness_scores = fields.IntegerField(attr='openness_scores', multi=True)
    users_following = fields.KeywordField(attr='users_following_list', multi=True)
    views_count = fields.IntegerField()
    downloads_count = fields.IntegerField()
    status = fields.KeywordField()
    modified = fields.DateField()
    last_modified_resource = fields.DateField(attr='last_modified_resource')
    created = fields.DateField()
    verified = fields.DateField()

    class Meta:
        doc_type = 'dataset'
        model = Dataset
        related_models = [Organization, Category, Application, Article, Resource, UserFollowingDataset]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, UserFollowingDataset):
            return related_instance.follower.followed_applications.all()
        if isinstance(related_instance, Application):
            return related_instance.datasets.all()
        if isinstance(related_instance, Resource):
            return related_instance.dataset
        if isinstance(related_instance, Category):
            return related_instance.dataset_set.filter(status='published')
        if isinstance(related_instance, Organization):
            return related_instance.datasets.filter(status='published')

    def get_queryset(self):
        return self._doc_type.model.objects.filter(status='published')
Example #19
0
class FoiRequestDocument(DocType):
    content = fields.TextField(
        analyzer=analyzer,
        index_options='offsets'
    )
    title = fields.TextField()
    description = fields.TextField()

    resolution = fields.KeywordField()
    status = fields.KeywordField()
    costs = fields.FloatField()

    tags = fields.ListField(fields.KeywordField())
    classification = fields.ListField(fields.IntegerField())
    categories = fields.ListField(fields.IntegerField())
    campaign = fields.IntegerField()

    due_date = fields.DateField()
    first_message = fields.DateField()
    last_message = fields.DateField()

    publicbody = fields.IntegerField(attr='public_body_id')
    jurisdiction = fields.IntegerField(attr='public_body.jurisdiction_id')

    user = fields.IntegerField(attr='user_id')
    team = fields.IntegerField(attr='team_id')

    public = fields.BooleanField()

    class Meta:
        model = FoiRequest
        queryset_chunk_size = 50

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return FoiRequest.objects.select_related(
            'jurisdiction',
            'public_body',
        )

    def prepare_content(self, obj):
        return render_to_string('foirequest/search/foirequest_text.txt', {
                'object': obj
        })

    def prepare_tags(self, obj):
        return [tag.id for tag in obj.tags.all()]

    def prepare_public(self, obj):
        return obj.in_public_search_index()

    def prepare_campaign(self, obj):
        return obj.campaign_id

    def prepare_classification(self, obj):
        if obj.public_body_id is None:
            return []
        if obj.public_body.classification is None:
            return []
        classification = obj.public_body.classification
        return [classification.id] + [c.id for c in
                classification.get_ancestors()]

    def prepare_categories(self, obj):
        if obj.public_body:
            cats = obj.public_body.categories.all()
            return [o.id for o in cats] + [
                    c.id for o in cats for c in o.get_ancestors()]
        return []

    def prepare_team(self, obj):
        if obj.project and obj.project.team_id:
            return obj.project.team_id
        return None
Example #20
0
class CourseRunDocument(BaseCourseDocument):
    """
    Course run Elasticsearch document.
    """

    announcement = fields.DateField()
    availability = fields.TextField(
        fields={
            'raw': fields.KeywordField(),
            'lower': fields.TextField(analyzer=case_insensitive_keyword)
        })
    authoring_organization_uuids = fields.KeywordField(multi=True)
    course_key = fields.KeywordField()
    end = fields.DateField()
    enrollment_start = fields.DateField()
    enrollment_end = fields.DateField()
    first_enrollable_paid_seat_sku = fields.TextField()
    go_live_date = fields.DateField()
    has_enrollable_seats = fields.BooleanField()
    has_enrollable_paid_seats = fields.BooleanField()
    hidden = fields.BooleanField()
    is_enrollable = fields.BooleanField()
    is_current_and_still_upgradeable = fields.BooleanField()
    language = fields.TextField(analyzer=html_strip,
                                fields={'raw': fields.KeywordField()})
    license = fields.KeywordField()
    marketing_url = fields.TextField()
    min_effort = fields.IntegerField()
    max_effort = fields.IntegerField()
    mobile_available = fields.BooleanField()
    number = fields.KeywordField()
    paid_seat_enrollment_end = fields.DateField()
    pacing_type = fields.KeywordField()
    program_types = fields.KeywordField(multi=True)
    published = fields.BooleanField()
    skill_names = fields.KeywordField(multi=True)
    status = fields.KeywordField()
    start = fields.DateField()
    slug = fields.TextField()
    staff_uuids = fields.KeywordField(multi=True)
    type = fields.TextField(attr='type_legacy',
                            analyzer=html_strip,
                            fields={
                                'raw':
                                fields.KeywordField(attr='type_legacy'),
                                'lower':
                                fields.TextField(
                                    analyzer=case_insensitive_keyword,
                                    attr='type_legacy')
                            })
    transcript_languages = fields.TextField(
        analyzer=html_strip,
        fields={'raw': fields.KeywordField(multi=True)},
        multi=True)
    weeks_to_complete = fields.IntegerField()

    def prepare_aggregation_key(self, obj):
        # Aggregate CourseRuns by Course key since that is how we plan to dedup CourseRuns on the marketing site.
        return 'courserun:{}'.format(obj.course.key)

    def prepare_course_key(self, obj):
        return obj.course.key

    def prepare_first_enrollable_paid_seat_sku(self, obj):
        return obj.first_enrollable_paid_seat_sku()

    def prepare_is_current_and_still_upgradeable(self, obj):
        return obj.is_current_and_still_upgradeable()

    def prepare_has_enrollable_paid_seats(self, obj):
        return obj.has_enrollable_paid_seats()

    def prepare_language(self, obj):
        return self._prepare_language(obj.language)

    def prepare_number(self, obj):
        course_run_key = CourseKey.from_string(obj.key)
        return course_run_key.course

    def prepare_org(self, obj):
        course_run_key = CourseKey.from_string(obj.key)
        return course_run_key.org

    def prepare_paid_seat_enrollment_end(self, obj):
        return obj.get_paid_seat_enrollment_end()

    def prepare_partner(self, obj):
        return obj.course.partner.short_code

    def prepare_published(self, obj):
        return obj.status == CourseRunStatus.Published

    def prepare_seat_types(self, obj):
        return [seat_type.slug for seat_type in obj.seat_types]

    def prepare_skill_names(self, obj):
        course_skills = get_whitelisted_course_skills(obj.course.key)
        return list(
            set(course_skill.skill.name for course_skill in course_skills))

    def prepare_staff_uuids(self, obj):
        return [str(staff.uuid) for staff in obj.staff.all()]

    def prepare_transcript_languages(self, obj):
        return [
            self._prepare_language(language)
            for language in obj.transcript_languages.all()
        ]

    def get_queryset(self):
        return filter_visible_runs(
            super().get_queryset().select_related('course').prefetch_related(
                'seats__type').prefetch_related('transcript_languages'))

    class Django:
        """
        Django Elasticsearch DSL ORM Meta.
        """

        model = CourseRun

    class Meta:
        """
        Meta options.
        """

        parallel_indexing = True
        queryset_pagination = settings.ELASTICSEARCH_DSL_QUERYSET_PAGINATION
class ResourceDocument(Document):
    meta = fields.KeywordField()
    content = fields.TextField(attr="title.main_title", store=True)

    is_private = fields.BooleanField()

    title = fields.TextField(
        analyzer=text_folding_analyzer,
        fields={
            "raw": fields.KeywordField(),
            "sort": fields.KeywordField(normalizer=lowercase_sort_normalizer),
            "suggest": fields.CompletionField(),
        },
        **copy_to_content,
    )
    form_genre = get_controlled_term_field(options=copy_to_content)
    subjects = get_controlled_term_field(options=copy_to_content)
    date_display = fields.TextField(**copy_to_content)
    year = fields.IntegerField(**copy_to_content)
    summary = fields.TextField(**copy_to_content)
    classifications_printing_publishing = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    classifications_translation = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    classifications_paratext = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    classifications_paratext_functions = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    contributions = fields.ObjectField(
        properties={
            "agent": get_agent_field(options=copy_to_content),
            "roles": get_controlled_term_field(),
        })
    published_as = fields.KeywordField()
    languages = get_controlled_term_field(options=copy_to_content)
    places = fields.ObjectField(
        properties={
            "place":
            get_place_field(options=copy_to_content),
            "fictional_place":
            fields.TextField(fields={"raw": fields.KeywordField()}),
        })
    relationships = fields.ObjectField(
        properties={
            "relationship_type": get_controlled_term_field(
                options=copy_to_content),
            "related_to": get_resource_field(options=copy_to_content),
        })

    # events = get_event_field(options=copy_to_content)

    is_original = fields.BooleanField()
    is_translation = fields.BooleanField()

    has_date_radical = fields.KeywordField()

    authors = fields.ObjectField(
        attr="get_authors_source_text",
        properties={"person": get_agent_field(options=copy_to_content)},
    )

    translated_from = get_controlled_term_field(options=copy_to_content)

    class Index:
        name = "rt-resources"

    class Django:
        model = Resource
        fields = ["id"]

        related_models = [
            Classification,
            Contribution,
            ControlledTerm,
            Date,
            ResourceLanguage,
            ResourcePlace,
            ResourceRelationship,
            Title,
        ]

    def get_queryset(self):
        return (super().get_queryset().exclude(
            relationships__relationship_type__label="paratext of").
                select_related("title", "date"))

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, Date):
            return related_instance.resource

        if isinstance(related_instance, (ControlledTerm, Title)):
            return related_instance.resources.all()

        if isinstance(
                related_instance,
            (
                Classification,
                Contribution,
                ResourceLanguage,
                ResourcePlace,
                ResourceRelationship,
            ),
        ):
            return related_instance.resource

    def prepare_meta(self, instance):
        if instance.is_original():
            return "source texts"

        meta = []

        if instance.get_paratext():
            meta.append("paratexts")

        if instance.is_translation():
            meta.append("translations")

        return meta

    def prepare_title(self, instance):
        titles = [str(instance.title)]

        for relationship in instance.get_paratext():
            paratext = relationship.resource
            if str(paratext.title) != str(instance.title):
                titles.append(str(paratext.title))

        return titles

    def prepare_form_genre(self, instance):
        return self._get_subjects(instance, ["fast-forms", "rt-agt"])

    def _get_subjects(self, instance, prefix):
        subjects = [{
            "label": item.label
        } for item in instance.subjects.filter(vocabulary__prefix__in=prefix)]

        for relationship in instance.get_paratext():
            subjects.extend(self._get_subjects(relationship.resource, prefix))

        if subjects:
            subjects.append({"label": "any"})

        return subjects

    def prepare_subjects(self, instance):
        return self._get_subjects(instance, ["fast-topic"])

    def prepare_date_display(self, instance):
        resource = self._get_resource(instance)
        if resource.date:
            return str(resource.date)

    def _get_resource(self, resource):
        if resource.is_paratext():
            return resource.paratext_of()

        return resource

    def prepare_year(self, instance):
        resource = self._get_resource(instance)
        if resource.date:
            date_earliest = resource.date.get_date_earliest()
            date_latest = resource.date.get_date_latest()

            if date_earliest and date_latest:
                return [
                    year
                    for year in range(date_earliest.year, date_latest.year + 1)
                ]

            if date_earliest:
                return date_earliest.year

            if date_latest:
                return date_latest.year

    def prepare_summary(self, instance):
        summaries = []

        if instance.summary:
            summaries = [instance.summary]

        for relationship in instance.get_paratext():
            if relationship.resource.summary:
                summaries.append(relationship.resource.summary)

        return summaries

    def prepare_classifications_printing_publishing(self, instance):
        return self._get_classifications(instance, "rt-ppt")

    def _get_classifications(self, instance, prefix):
        classifications = [
            {
                "edition": {
                    "label": item.edition.label
                },
            } for item in instance.classifications.filter(
                edition__vocabulary__prefix=prefix)
            if item.edition.label.lower() not in ["original", "source-text"]
        ]

        for relationship in instance.get_paratext():
            classifications.extend(
                self._get_classifications(relationship.resource, prefix))

        if classifications:
            classifications.append({"edition": {"label": "any"}})

        return classifications

    def prepare_classifications_translation(self, instance):
        return self._get_classifications(instance, "rt-tt")

    def prepare_classifications_paratext(self, instance):
        return self._get_classifications(instance, "rt-pt")

    def prepare_classifications_paratext_functions(self, instance):
        return self._get_classifications(instance, "rt-ptf")

    def prepare_contributions(self, instance):
        contributions = [{
            "agent": {
                "id":
                item.agent.id,
                "name":
                "Anonymous" if item.agent.name.startswith("Anon") else
                item.agent.get_index_name(),
            },
            "roles": [{
                "label": f"{role.label} of translation paratext"
                if item.resource.is_paratext() else role.label
                for role in item.roles.all()
            }],
        } for item in instance.get_contributions(include_paratext=True)]

        if contributions:
            contributions.append({
                "agent": {
                    "name": "any"
                },
                "roles": [{
                    "label": "any"
                }]
            })

        return contributions

    def prepare_published_as(self, instance):
        published_as = []

        for item in instance.get_contributions(include_paratext=True):
            if item.published_as:
                published_as.append(item.published_as)

        if published_as:
            published_as.append("any")

        return published_as

    def prepare_languages(self, instance):
        languages = [{
            "label": item.language.label
        } for item in instance.languages.all()]

        for relationship in instance.get_paratext():
            languages.extend(self.prepare_languages(relationship.resource))

        if languages:
            languages.append({"label": "any"})

        return languages

    def prepare_places(self, instance):
        places = []

        for item in instance.places.all():
            address = ""
            place = {}

            if item.fictional_place:
                address = item.fictional_place
                place = {
                    "fictional_place": item.fictional_place,
                    "place": {
                        "address": address
                    },
                }

            if item.place:
                address = (f"{address} ({item.place.address})"
                           if address else item.place.address)
                place["place"] = {
                    "address": address,
                    "geo": item.place.geo,
                    "country": {
                        "name": item.place.country.name
                    },
                }

            places.append(place)

        if places:
            places.append(
                {"place": {
                    "address": "any",
                    "country": {
                        "name": "any"
                    }
                }})

        return places

    def prepare_relationships(self, instance):
        relationships = [{
            "relationship_type": {
                "label": item.relationship_type.label
            },
            "related_to": {
                "id": item.related_to.id,
                "title": {
                    "main_title": str(item.related_to.title)
                },
            },
        } for item in instance.relationships.all()]

        if relationships:
            relationships.append({"relationship_type": {"label": "any"}})

        return relationships

    def prepare_events(self, instance):
        events = [{
            "id": item.id,
            "title": item.title,
            "place": {
                "address": item.place.address,
                "country": {
                    "name": item.place.country.name
                },
            },
        } for item in instance.events.all()]

        if events:
            events.append({
                "title": "any",
                "place": {
                    "address": "any",
                    "country": {
                        "name": "any"
                    }
                },
            })

        return events

    def prepare_has_date_radical(self, instance):
        if instance.has_date_radical():
            return "yes"

        return "no"

    def prepare_translated_from(self, instance):
        languages = []

        if instance.get_languages_source_text():
            languages = [{
                "label": language.label
            } for language in instance.get_languages_source_text()]

        if languages:
            languages.append({"label": "any"})

        return languages
Example #22
0
class ConceptDocument(Document):
    class Index:
        name = 'concepts'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    id = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    name = fields.KeywordField(attr='display_name', normalizer="lowercase")
    last_update = fields.DateField(attr='updated_at')
    locale = fields.ListField(fields.KeywordField(attr='display_name'))
    source = fields.KeywordField(attr='parent_resource', normalizer="lowercase")
    owner = fields.KeywordField(attr='owner_name', normalizer="lowercase")
    owner_type = fields.KeywordField(attr='owner_type')
    source_version = fields.ListField(fields.KeywordField())
    collection_version = fields.ListField(fields.KeywordField())
    collection = fields.ListField(fields.KeywordField())
    collection_owner_url = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    datatype = fields.KeywordField(attr='datatype', normalizer="lowercase")
    concept_class = fields.KeywordField(attr='concept_class', normalizer="lowercase")
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    extras = fields.ObjectField(dynamic=True)

    class Django:
        model = Concept
        fields = [
            'version',
        ]

    @staticmethod
    def prepare_locale(instance):
        return list(
            instance.names.filter(locale__isnull=False).distinct('locale').values_list('locale', flat=True)
        )

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('version', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('version', flat=True))

    @staticmethod
    def prepare_collection(instance):
        return list(set(list(instance.collection_set.values_list('mnemonic', flat=True))))

    @staticmethod
    def prepare_collection_owner_url(instance):
        return list({coll.parent_url for coll in instance.collection_set.select_related('user', 'organization')})

    @staticmethod
    def prepare_extras(instance):
        value = {}

        if instance.extras:
            value = jsonify_safe(instance.extras)
            if isinstance(value, dict):
                value = flatten_dict(value)

        return value or {}
Example #23
0
class ProgramDocument(BaseDocument, OrganizationsMixin):
    """
    Program Elasticsearch document.
    """

    authoring_organization_uuids = fields.KeywordField(multi=True)
    authoring_organizations = fields.TextField(
        multi=True,
        fields={
            'suggest': fields.CompletionField(),
            'edge_ngram_completion': fields.TextField(analyzer=edge_ngram_completion),
            'raw': fields.KeywordField(),
            'lower': fields.TextField(analyzer=case_insensitive_keyword)
        },
    )
    authoring_organization_bodies = fields.TextField(multi=True)
    credit_backing_organizations = fields.TextField(multi=True)
    card_image_url = fields.TextField()
    hidden = fields.BooleanField()
    is_program_eligible_for_one_click_purchase = fields.BooleanField()
    language = fields.TextField(multi=True)
    marketing_url = fields.TextField()
    min_hours_effort_per_week = fields.IntegerField()
    max_hours_effort_per_week = fields.IntegerField()
    partner = fields.TextField(
        analyzer=html_strip,
        fields={'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword)}
    )
    published = fields.BooleanField()
    subtitle = fields.TextField(analyzer=html_strip)
    status = fields.KeywordField()
    search_card_display = fields.TextField(multi=True)
    subject_uuids = fields.KeywordField(multi=True)
    staff_uuids = fields.KeywordField(multi=True)
    start = fields.DateField()
    seat_types = fields.KeywordField(multi=True)
    title = fields.TextField(
        analyzer=synonym_text,
        fields={
            'suggest': fields.CompletionField(),
            'edge_ngram_completion': fields.TextField(analyzer=edge_ngram_completion),
        },
    )
    type = fields.TextField(
        analyzer=html_strip,
        fields={'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword)}
    )
    weeks_to_complete_min = fields.IntegerField()
    weeks_to_complete_max = fields.IntegerField()

    def prepare_aggregation_key(self, obj):
        return 'program:{}'.format(obj.uuid)

    def prepare_credit_backing_organizations(self, obj):
        return self._prepare_organizations(obj.credit_backing_organizations.all())

    def prepare_language(self, obj):
        return [self._prepare_language(language) for language in obj.languages]

    def prepare_organizations(self, obj):
        return self.prepare_authoring_organizations(obj) + self.prepare_credit_backing_organizations(obj)

    def prepare_partner(self, obj):
        return obj.partner.short_code

    def prepare_published(self, obj):
        return obj.status == ProgramStatus.Active

    def prepare_seat_types(self, obj):
        return [seat_type.slug for seat_type in obj.seat_types]

    def prepare_search_card_display(self, obj):
        try:
            degree = Degree.objects.get(uuid=obj.uuid)
        except Degree.DoesNotExist:

            return []
        return [degree.search_card_ranking, degree.search_card_cost, degree.search_card_courses]

    def prepare_subject_uuids(self, obj):
        return [str(subject.uuid) for subject in obj.subjects]

    def prepare_staff_uuids(self, obj):
        return list({str(staff.uuid) for course_run in obj.course_runs for staff in course_run.staff.all()})

    def prepare_type(self, obj):
        return obj.type.name_t

    def get_queryset(self):
        return super().get_queryset().select_related('type').select_related('partner')

    class Django:
        """
        Django Elasticsearch DSL ORM Meta.
        """

        model = Program

    class Meta:
        """
        Meta options.
        """

        parallel_indexing = True
        queryset_pagination = settings.ELASTICSEARCH_DSL_QUERYSET_PAGINATION