Esempio n. 1
0
class PublicBodyDocument(DocType):
    name = fields.TextField(
        fields={'raw': fields.KeywordField()},
        analyzer=analyzer,
    )
    name_auto = fields.TextField(attr='all_names', analyzer=ngram_analyzer)
    content = fields.TextField(analyzer=analyzer)

    jurisdiction = fields.IntegerField(attr='jurisdiction_id')

    classification = fields.ListField(fields.IntegerField())
    categories = fields.ListField(fields.IntegerField())
    regions = fields.ListField(fields.IntegerField())
    regions_exact = fields.ListField(fields.IntegerField())
    regions_kind = fields.ListField(fields.KeywordField())

    class Meta:
        model = PublicBody
        queryset_chunk_size = 100

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return super().get_queryset().select_related(
            'jurisdiction').prefetch_related('classification', 'categories',
                                             'regions')

    def prepare_content(self, obj):
        content = [
            obj.name, obj.other_names,
            obj.jurisdiction.name if obj.jurisdiction else '', obj.email or '',
            obj.description, obj.contact, obj.address, obj.url,
            obj.classification.name if obj.classification else ''
        ] + [o.name for o in obj.categories.all()]
        return ' '.join(c for c in content if c)

    def prepare_classification(self, obj):
        if obj.classification is None:
            return []
        return [obj.classification.id
                ] + [c.id for c in obj.classification.get_ancestors()]

    def prepare_categories(self, obj):
        cats = obj.categories.all()
        return [o.id for o in cats
                ] + [c.id for o in cats for c in o.get_ancestors()]

    def prepare_regions(self, obj):
        regs = obj.regions.all()
        return [o.id for o in regs
                ] + [c.id for o in regs for c in o.get_ancestors()]

    def prepare_regions_exact(self, obj):
        regs = obj.regions.all()
        return [o.id for o in regs]

    def prepare_regions_kind(self, obj):
        regs = obj.regions.all()
        return [o.kind for o in regs]
Esempio n. 2
0
class ArticleDocument(Document):
    title = fields.TextField(
        fields={'raw': fields.KeywordField()},
        analyzer=analyzer,
    )
    url = fields.TextField(
        fields={'raw': fields.KeywordField()},
        analyzer=analyzer,
    )
    description = fields.TextField(
        fields={'raw': fields.KeywordField()},
        analyzer=analyzer,
    )
    start_publication = fields.DateField()
    author = fields.ListField(fields.IntegerField())
    category = fields.ListField(fields.IntegerField())

    content = fields.TextField(analyzer=analyzer,
                               search_analyzer=search_analyzer,
                               search_quote_analyzer=search_quote_analyzer,
                               index_options='offsets')

    special_signals = True

    class Django:
        model = Article
        queryset_chunk_size = 100

    def get_queryset(self):
        return Article.published.all()

    def prepare_content(self, obj):
        html = obj.get_html_content()
        return ' '.join([obj.title, obj.description,
                         strip_tags(html)] +
                        [o.title for o in obj.categories.all()] +
                        [t.name for t in obj.tags.all()])

    def prepare_description(self, obj):
        return strip_tags(obj.description)

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_title(self, obj):
        return obj.title

    def prepare_start_publication(self, obj):
        return obj.start_publication

    def prepare_category(self, obj):
        cats = obj.categories.all()
        return [o.id for o in cats]

    def prepare_author(self, obj):
        authors = obj.authors.all()
        return [o.id for o in authors]
Esempio n. 3
0
class ConceptDocument(Document):
    class Index:
        name = 'concepts'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    id = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    name = fields.KeywordField(attr='display_name', normalizer="lowercase")
    last_update = fields.DateField(attr='updated_at')
    locale = fields.ListField(fields.KeywordField(attr='display_name'))
    source = fields.KeywordField(attr='parent_resource',
                                 normalizer="lowercase")
    owner = fields.KeywordField(attr='owner_name', normalizer="lowercase")
    owner_type = fields.KeywordField(attr='owner_type')
    source_version = fields.ListField(fields.TextField())
    collection_version = fields.ListField(fields.TextField())
    collection = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    datatype = fields.KeywordField(attr='datatype', normalizer="lowercase")
    concept_class = fields.KeywordField(attr='concept_class',
                                        normalizer="lowercase")
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    extras = fields.ObjectField()

    class Django:
        model = Concept
        fields = [
            'version',
        ]

    @staticmethod
    def prepare_locale(instance):
        return list(
            instance.names.filter(
                locale__isnull=False).distinct('locale').values_list(
                    'locale', flat=True))

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('version', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('version', flat=True))

    @staticmethod
    def prepare_collection(instance):
        return list(
            set(
                list(instance.collection_set.values_list('mnemonic',
                                                         flat=True))))

    @staticmethod
    def prepare_extras(instance):
        return instance.extras or {}
Esempio n. 4
0
class ArticleDocument(DocType):
    title = fields.TextField(
        fields={'raw': fields.KeywordField()},
        analyzer=analyzer,
    )
    url = fields.TextField(
        fields={'raw': fields.KeywordField()},
        analyzer=analyzer,
    )
    description = fields.TextField(
        fields={'raw': fields.KeywordField()},
        analyzer=analyzer,
    )
    start_publication = fields.DateField()
    author = fields.ListField(fields.IntegerField())
    category = fields.ListField(fields.IntegerField())

    content = fields.TextField(
        analyzer=analyzer
    )

    special_signals = True

    class Meta:
        model = Article
        queryset_chunk_size = 100

    def get_queryset(self):
        return Article.published.all()

    def prepare_content(self, obj):
        html = obj.get_html_content()
        return strip_tags(html)

    def prepare_description(self, obj):
        return strip_tags(obj.description)

    def prepare_url(self, obj):
        return obj.get_absolute_url()

    def prepare_title(self, obj):
        return obj.title

    def prepare_start_publication(self, obj):
        return obj.start_publication

    def prepare_category(self, obj):
        cats = obj.categories.all()
        return [o.id for o in cats]

    def prepare_author(self, obj):
        authors = obj.authors.all()
        return [o.id for o in authors]
Esempio n. 5
0
class JobDocument(Document):
    # Object/List fields
    analyzers_to_execute = fields.ListField(fields.KeywordField())
    connectors_to_execute = fields.ListField(fields.KeywordField())
    # Normal fields
    errors = fields.TextField()
    # Keyword fields to allow aggregations/vizualizations
    source = fields.KeywordField()
    status = fields.KeywordField()
    md5 = fields.KeywordField()
    tlp = fields.KeywordField()
    observable_name = fields.KeywordField()
    observable_classification = fields.KeywordField()
    file_name = fields.KeywordField()
    file_mimetype = fields.KeywordField()
    # Nested (ForeignKey) fields
    tags = fields.NestedField(
        properties={"label": fields.KeywordField(), "color": fields.TextField()}
    )
    analyzer_reports = fields.NestedField(
        properties={
            "name": fields.KeywordField(),
            "status": fields.KeywordField(),
            "report": fields.ObjectField(),
            "errors": fields.TextField(),
            "start_time": fields.DateField(),
            "end_time": fields.DateField(),
        }
    )
    connector_reports = fields.NestedField(
        properties={
            "name": fields.KeywordField(),
            "status": fields.KeywordField(),
            "report": fields.ObjectField(),
            "errors": fields.TextField(),
            "start_time": fields.DateField(),
            "end_time": fields.DateField(),
        }
    )

    class Index:
        # Name of the Elasticsearch index
        name = "jobs"

    class Django:
        model = Job  # The model associated with this Document

        # The fields of the model you want to be indexed in Elasticsearch
        fields = [
            "is_sample",
            "received_request_time",
            "finished_analysis_time",
        ]
Esempio n. 6
0
class TweetsDocument(Document):
    # search_item = fields.TextField(attrs="search_item")
    excepted_words = fields.ListField(field=fields.TextField)

    author = fields.ObjectField()
    entities = fields.ObjectField()
    text = fields.TextField(fields={"raw": fields.KeywordField()},
                            analyzer="text_analyser")
    raw_text = fields.TextField()

    retweet_count = fields.IntegerField()

    class Django:
        model = models.Tweet
        fields = [
            "search_item",
            "id",
            # "author",
            "created_date",
            # "entities",
            "source",
            "lang",
        ]

        auto_refresh = False
        ignore_signals = False
        # Paginate the django queryset used to populate the index with the specified size (by
        # default there is no pagination)
        queryset_pagination = 1000
class ArticleDocument(Document):
    category = fields.ObjectField(
        properties={
            'id': fields.TextField(),
            'name': fields.TextField(),
            'image': fields.FileField(),
        })
    cover = fields.FileField()
    tags = fields.ListField(fields.TextField())

    class Index:
        name = 'articles'
        settings = {'number_of_shards': 1, 'number_of_replicas': 1}

    class Django:
        model = Articles
        fields = [
            'id', 'updated_at', 'title', 'subtitle', 'author_name', 'user',
            'realease', 'is_active', 'slug'
        ]
        # related_models = [Category]

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return super(ArticleDocument,
                     self).get_queryset().select_related('category')
Esempio n. 8
0
class CollectionDocument(Document):
    class Index:
        name = 'collections'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    last_update = fields.DateField(attr='updated_at')
    public_can_view = fields.TextField(attr='public_can_view')
    locale = fields.ListField(fields.KeywordField())
    owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase')
    owner_type = fields.KeywordField(attr='parent_resource_type')
    collection_type = fields.KeywordField(attr='collection_type',
                                          normalizer='lowercase')
    is_active = fields.KeywordField(attr='is_active')
    version = fields.KeywordField(attr='version')
    name = fields.KeywordField(attr='name', normalizer='lowercase')
    canonical_url = fields.KeywordField(attr='canonical_url',
                                        normalizer='lowercase')
    mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase')
    extras = fields.ObjectField()

    class Django:
        model = Collection
        fields = [
            'full_name',
            'custom_validation_schema',
        ]

    @staticmethod
    def prepare_locale(instance):
        return get(instance.supported_locales, [])

    @staticmethod
    def prepare_extras(instance):
        return instance.extras or {}
Esempio n. 9
0
class OrganizationDocument(Document):
    class Index:
        name = 'organizations'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    last_update = fields.DateField(attr='updated_at')
    public_can_view = fields.BooleanField(attr='public_can_view')
    name = fields.KeywordField(attr='name', normalizer="lowercase")
    mnemonic = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    extras = fields.ObjectField()
    user = fields.ListField(fields.KeywordField())

    class Django:
        model = Organization
        fields = [
            'is_active',
            'company',
            'location',
        ]

    @staticmethod
    def prepare_extras(instance):
        return instance.extras or {}

    @staticmethod
    def prepare_user(instance):
        return list(instance.members.values_list('username', flat=True))
Esempio n. 10
0
class ExperimentDocument(Document):
    """ Our Experiment ElasticSearch Document, which
    corresponds to our Experiment model. """

    # Keyword Fields
    title = fields.TextField(analyzer=html_strip,
                             fielddata=True,
                             fields={"raw": fields.KeywordField()})
    publication_title = fields.TextField(analyzer=html_strip,
                                         fielddata=True,
                                         fields={"raw": fields.KeywordField()})
    description = fields.TextField(analyzer=html_strip,
                                   fielddata=True,
                                   fields={"raw": fields.KeywordField()})
    publication_authors = fields.TextField(
        analyzer=html_strip,
        fielddata=True,
        fields={"raw": fields.KeywordField()})
    technology = fields.TextField(analyzer=html_strip_no_stop,
                                  fielddata=True,
                                  fields={"raw": fields.KeywordField()})
    organism_names = fields.TextField(analyzer=html_strip_no_ngram,
                                      fielddata=True,
                                      fields={"raw": fields.KeywordField()})
    platform_names = fields.TextField(analyzer=standard_keyword,
                                      fielddata=True,
                                      fields={"raw": fields.TextField()})
    platform_accession_codes = fields.TextField(
        analyzer=standard_keyword,
        fielddata=True,
        fields={"raw": fields.TextField()})

    # Basic Fields
    accession_code = fields.KeywordField()
    alternate_accession_code = fields.KeywordField()
    submitter_institution = fields.TextField()
    publication_doi = fields.TextField()
    has_publication = fields.BooleanField()
    sample_metadata_fields = fields.TextField()
    pubmed_id = fields.TextField()
    num_total_samples = fields.IntegerField()
    num_processed_samples = fields.IntegerField()
    num_downloadable_samples = fields.IntegerField()
    source_first_published = fields.DateField()

    # Index all downloadable samples as keywords so that we can calculate unique counts on the facets
    downloadable_samples = fields.ListField(fields.KeywordField())

    class Django:
        model = Experiment
        parallel_indexing = True
        queryset_pagination = 3000

        fields = [
            "id",
        ]

    def get_queryset(self):
        """ Override default queryset """
        return super(ExperimentDocument, self).get_queryset().order_by("id")
Esempio n. 11
0
class SourceDocument(Document):
    class Index:
        name = 'sources'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    locale = fields.ListField(fields.KeywordField())
    last_update = fields.DateField(attr='updated_at')
    owner = fields.KeywordField(attr='parent_resource')
    owner_type = fields.KeywordField(attr='parent_resource_type')
    public_can_view = fields.TextField(attr='public_can_view')
    source_type = fields.KeywordField(attr='source_type')
    is_active = fields.KeywordField(attr='is_active')
    version = fields.KeywordField(attr='version')
    name = fields.KeywordField(attr='name')

    class Django:
        model = Source
        fields = [
            'full_name',
            'custom_validation_schema',
        ]

    @staticmethod
    def prepare_locale(instance):
        return get(instance.supported_locales, [])
Esempio n. 12
0
class JobDocument(Document):
    # Object/List fields
    analyzers_requested = fields.ListField(fields.KeywordField())
    analyzers_to_execute = fields.ListField(fields.KeywordField())
    analysis_reports = fields.ObjectField()
    # Normal fields
    errors = fields.TextField()
    runtime_configuration = fields.ObjectField()
    # Keyword fields to allow aggregations/vizualizations
    source = fields.KeywordField()
    md5 = fields.KeywordField()
    status = fields.KeywordField()
    observable_name = fields.KeywordField()
    observable_classification = fields.KeywordField()
    file_name = fields.KeywordField()
    file_mimetype = fields.KeywordField()
    # Nested (ForeignKey) fields
    tags = fields.NestedField(properties={
        "label": fields.KeywordField(),
        "color": fields.TextField()
    })

    def prepare_runtime_configuration(self, instance):
        return instance.runtime_configuration

    def prepare_analysis_reports(self, instance):
        """
        https://github.com/django-es/django-elasticsearch-dsl/issues/36
        """
        return instance.analysis_reports

    class Index:
        # Name of the Elasticsearch index
        name = "jobs"

    class Django:
        model = Job  # The model associated with this Document

        # The fields of the model you want to be indexed in Elasticsearch
        fields = [
            "is_sample",
            "run_all_available_analyzers",
            "received_request_time",
            "finished_analysis_time",
            "force_privacy",
            "disable_external_analyzers",
        ]
Esempio n. 13
0
class CollectionDocument(Document):
    class Index:
        name = 'collections'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    last_update = fields.DateField(attr='updated_at')
    public_can_view = fields.TextField(attr='public_can_view')
    locale = fields.ListField(fields.KeywordField())
    owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase')
    owner_type = fields.KeywordField(attr='parent_resource_type')
    collection_type = fields.KeywordField(attr='collection_type',
                                          normalizer='lowercase')
    is_active = fields.KeywordField(attr='is_active')
    version = fields.KeywordField(attr='version')
    name = fields.KeywordField(attr='name', normalizer='lowercase')
    canonical_url = fields.KeywordField(attr='canonical_url',
                                        normalizer='lowercase')
    mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase')
    extras = fields.ObjectField()
    identifier = fields.ObjectField()
    publisher = fields.KeywordField(attr='publisher', normalizer='lowercase')
    immutable = fields.KeywordField(attr='immutable')
    created_by = fields.KeywordField()

    class Django:
        model = Collection
        fields = [
            'full_name',
            'custom_validation_schema',
            'revision_date',
            'retired',
        ]

    @staticmethod
    def prepare_locale(instance):
        return get(instance.supported_locales, [])

    @staticmethod
    def prepare_extras(instance):
        value = {}

        if instance.extras:
            value = jsonify_safe(instance.extras)

        return value or {}

    @staticmethod
    def prepare_identifier(instance):
        value = {}

        if instance.identifier:
            value = jsonify_safe(instance.identifier)

        return value or {}

    @staticmethod
    def prepare_created_by(instance):
        return instance.created_by.username
Esempio n. 14
0
class AdDocument(DocType):

    category = fields.KeywordField(attr='category.path_name')
    category_path = fields.KeywordField(attr='category.path_ids_str')
    category_slug = fields.KeywordField(attr='category.slug')

    images = fields.ListField(fields.KeywordField(attr='images_url'))

    product = fields.ObjectField(properties={
        'title': fields.KeywordField(attr='product_type.title'),
        'specs': fields.NestedField(attr='specs.all', properties={
            'label': fields.KeywordField(attr='field.label'),
            'value': fields.KeywordField(attr='value.value_json')
        })
    })

    class Meta:
        model = Ad
        fields = [
            'title',
            'desc',
            'price',
            'created_at',
            'updated_at'
        ]
        related_models = [Taxonomy, File, Product]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, Taxonomy):
            return related_instance.ad_set.all()
        elif isinstance(related_instance, File):
            return related_instance.ad_set.all()
        elif isinstance(related_instance, Product):
            try:
                return related_instance.ad
            except Ad.DoesNotExist as exc:
                pass
Esempio n. 15
0
class UserProfileDocument(Document):
    class Index:
        name = 'user_profiles'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    last_update = fields.DateField(attr='updated_at')
    date_joined = fields.DateField(attr='created_at')
    username = fields.KeywordField(attr='username', normalizer='lowercase')
    location = fields.KeywordField(attr='location', normalizer='lowercase')
    company = fields.KeywordField(attr='company', normalizer='lowercase')
    name = fields.KeywordField(attr='name', normalizer='lowercase')
    extras = fields.ObjectField(dynamic=True)
    org = fields.ListField(fields.KeywordField())

    class Django:
        model = UserProfile
        fields = [
            'is_active',
            'is_superuser',
            'is_staff',
        ]

    @staticmethod
    def prepare_extras(instance):
        value = {}

        if instance.extras:
            value = jsonify_safe(instance.extras)
            if isinstance(value, dict):
                value = flatten_dict(value)

        return value or {}

    @staticmethod
    def prepare_org(instance):
        return list(instance.organizations.values_list('mnemonic', flat=True))
Esempio n. 16
0
class UserProfileDocument(Document):
    class Index:
        name = 'user_profiles'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    date_joined = fields.DateField(attr='created_at')
    username = fields.KeywordField(attr='username', normalizer='lowercase')
    location = fields.KeywordField(attr='location', normalizer='lowercase')
    company = fields.KeywordField(attr='company', normalizer='lowercase')
    name = fields.KeywordField(attr='name', normalizer='lowercase')
    extras = fields.ObjectField()
    org = fields.ListField(fields.KeywordField())

    class Django:
        model = UserProfile
        fields = ['is_active']

    @staticmethod
    def prepare_extras(instance):
        return instance.extras or {}

    @staticmethod
    def prepare_org(instance):
        return list(instance.organizations.values_list('mnemonic', flat=True))
Esempio n. 17
0
class CityDocument(Document):
    """City Elasticsearch document.

    This document has been created purely for testing out complex fields.
    """

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # ********************** Main data fields for search *****************
    # ********************************************************************

    name = StringField(analyzer=html_strip,
                       fields={
                           'raw': KeywordField(),
                           'suggest': fields.CompletionField(),
                       })

    info = StringField(analyzer=html_strip)

    # ********************************************************************
    # ************** Nested fields for search and filtering **************
    # ********************************************************************

    # City object
    country = fields.NestedField(
        properties={
            'name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }),
            'info':
            StringField(analyzer=html_strip),
            'location':
            fields.GeoPointField(attr='location_field_indexing'),
        })

    location = fields.GeoPointField(attr='location_field_indexing')

    # ********************************************************************
    # ********** Other complex fields for search and filtering ***********
    # ********************************************************************

    boolean_list = fields.ListField(StringField(attr='boolean_list_indexing'))
    # boolean_dict_indexing = fields.ObjectField(
    #     properties={
    #         'true': fields.BooleanField(),
    #         'false': fields.BooleanField(),
    #     }
    # )
    datetime_list = fields.ListField(
        StringField(attr='datetime_list_indexing'))
    # datetime_dict_indexing
    float_list = fields.ListField(StringField(attr='float_list_indexing'))
    # float_dict_indexing
    integer_list = fields.ListField(StringField(attr='integer_list_indexing'))

    # integer_dict_indexing

    class Django(object):
        model = City  # The model associate with this Document

    class Meta(object):
        parallel_indexing = True
Esempio n. 18
0
class BookDocument(DocType):
    """Book Elasticsearch document."""

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # *********************** Main data fields for search ****************
    # ********************************************************************
    __title_fields = {
        'raw': KeywordField(),
        'suggest': fields.CompletionField(),
        'edge_ngram_completion': StringField(analyzer=edge_ngram_completion),
        'mlt': StringField(analyzer='english'),
    }

    if ELASTICSEARCH_GTE_5_0:
        __title_fields.update({
            'suggest_context':
            fields.CompletionField(contexts=[
                {
                    "name": "tag",
                    "type": "category",
                    "path": "tags.raw",
                },
                {
                    "name": "state",
                    "type": "category",
                    "path": "state.raw",
                },
                {
                    "name": "publisher",
                    "type": "category",
                    "path": "publisher.raw",
                },
            ]),
        })

    title = StringField(analyzer=html_strip, fields=__title_fields)

    description = StringField(analyzer=html_strip,
                              fields={
                                  'raw': KeywordField(),
                                  'mlt': StringField(analyzer='english'),
                              })

    summary = StringField(analyzer=html_strip,
                          fields={
                              'raw': KeywordField(),
                              'mlt': StringField(analyzer='english'),
                          })

    # ********************************************************************
    # ********** Additional fields for search and filtering **************
    # ********************************************************************

    authors = fields.ListField(
        StringField(analyzer=html_strip, fields={
            'raw': KeywordField(),
        }))

    # Publisher
    publisher = StringField(attr='publisher_indexing',
                            analyzer=html_strip,
                            fields={
                                'raw': KeywordField(),
                                'suggest': fields.CompletionField(),
                            })

    # Publication date
    publication_date = fields.DateField()

    # State
    state = StringField(analyzer=html_strip, fields={
        'raw': KeywordField(),
    })

    # ISBN
    isbn = StringField(analyzer=html_strip, fields={
        'raw': KeywordField(),
    })

    # Price
    price = fields.FloatField()

    # Pages
    pages = fields.IntegerField()

    # Stock count
    stock_count = fields.IntegerField()

    # Tags
    tags = StringField(attr='tags_indexing',
                       analyzer=html_strip,
                       fields={
                           'raw': KeywordField(multi=True),
                           'suggest': fields.CompletionField(multi=True),
                       },
                       multi=True)

    # Date created
    created = fields.DateField()

    null_field = StringField(attr='null_field_indexing')

    class Meta(object):
        """Meta options."""

        model = Book  # The model associate with this DocType
        parallel_indexing = True

    def prepare_summary(self, instance):
        """Prepare summary."""
        return instance.summary[:32766]

    def prepare_authors(self, instance):
        """Prepare authors."""
        return [author.name for author in instance.authors.all()]
Esempio n. 19
0
class MappingDocument(Document):
    class Index:
        name = 'mappings'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = Mapping
        fields = [
            'external_id'
        ]

    last_update = fields.DateField(attr='updated_at')
    owner = fields.KeywordField(attr='owner_name')
    owner_type = fields.KeywordField(attr='owner_type')
    source = fields.KeywordField(attr='source')
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    map_type = fields.KeywordField(attr='map_type')
    from_concept = fields.ListField(fields.KeywordField())
    to_concept = fields.ListField(fields.KeywordField())
    concept = fields.ListField(fields.KeywordField())
    concept_source = fields.ListField(fields.KeywordField())
    concept_owner = fields.ListField(fields.KeywordField())
    from_concept_owner = fields.KeywordField(attr='from_source_owner')
    to_concept_owner = fields.KeywordField(attr='to_source_owner')
    concept_owner_type = fields.ListField(fields.KeywordField(attr='to_source_owner'))
    from_concept_owner_type = fields.KeywordField(attr='from_source_owner_type')
    to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type')
    from_concept_source = fields.KeywordField(attr='from_source_name')
    to_concept_source = fields.KeywordField(attr='to_source_name')
    source_version = fields.ListField(fields.IntegerField())
    collection_version = fields.ListField(fields.IntegerField())
    collection = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    id = fields.KeywordField(attr='mnemonic')

    @staticmethod
    def prepare_from_concept(instance):
        return [instance.from_concept_url, instance.from_concept_code, instance.from_concept_name]

    @staticmethod
    def prepare_to_concept(instance):
        return [instance.get_to_concept_code(), instance.get_to_concept_name()]

    def prepare_concept(self, instance):
        return self.prepare_from_concept(instance) + self.prepare_to_concept(instance)

    @staticmethod
    def prepare_concept_source(instance):
        return [instance.from_source_name, instance.to_source_name]

    @staticmethod
    def prepare_concept_owner(instance):
        return [instance.from_source_owner, instance.to_source_owner]

    @staticmethod
    def prepare_concept_owner_type(instance):
        return [instance.from_source_owner_type, instance.to_source_owner_type]

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('id', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('id', flat=True))

    @staticmethod
    def prepare_collection(instance):
        from core.collections.models import Collection
        return list(
            Collection.objects.filter(
                version=HEAD,
                mnemonic__in=instance.collection_set.values_list('mnemonic', flat=True)
            ).distinct('id').values_list('id', flat=True)
        )
Esempio n. 20
0
class FoiRequestDocument(Document):
    content = fields.TextField(analyzer=analyzer,
                               search_analyzer=search_analyzer,
                               search_quote_analyzer=search_quote_analyzer,
                               index_options='offsets')
    title = fields.TextField()
    description = fields.TextField()

    resolution = fields.KeywordField()
    status = fields.KeywordField()
    costs = fields.FloatField()

    tags = fields.ListField(fields.KeywordField())
    classification = fields.ListField(fields.IntegerField())
    categories = fields.ListField(fields.IntegerField())
    campaign = fields.IntegerField()

    due_date = fields.DateField()
    first_message = fields.DateField()
    last_message = fields.DateField()

    publicbody = fields.IntegerField(attr='public_body_id')
    jurisdiction = fields.IntegerField(attr='public_body.jurisdiction_id')

    user = fields.IntegerField(attr='user_id')
    team = fields.IntegerField(attr='team_id')

    public = fields.BooleanField()

    class Django:
        model = FoiRequest
        queryset_chunk_size = 50

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return FoiRequest.objects.select_related(
            'jurisdiction',
            'public_body',
        )

    def prepare_content(self, obj):
        return render_to_string('foirequest/search/foirequest_text.txt',
                                {'object': obj})

    def prepare_tags(self, obj):
        return [tag.id for tag in obj.tags.all()]

    def prepare_public(self, obj):
        return obj.in_public_search_index()

    def prepare_campaign(self, obj):
        return obj.campaign_id

    def prepare_classification(self, obj):
        if obj.public_body_id is None:
            return []
        if obj.public_body.classification is None:
            return []
        classification = obj.public_body.classification
        return [classification.id
                ] + [c.id for c in classification.get_ancestors()]

    def prepare_categories(self, obj):
        if obj.public_body:
            cats = obj.public_body.categories.all()
            return [o.id for o in cats
                    ] + [c.id for o in cats for c in o.get_ancestors()]
        return []

    def prepare_team(self, obj):
        if obj.project and obj.project.team_id:
            return obj.project.team_id
        return None
Esempio n. 21
0
class LegislationDocument(DocType):

    classifications = fields.TextField(term_vector='with_positions_offsets')

    article_classifications = fields.TextField(
        term_vector='with_positions_offsets')

    tags = fields.TextField(term_vector='with_positions_offsets')

    article_tags = fields.TextField(term_vector='with_positions_offsets')

    country = fields.KeywordField()
    country_name = fields.KeywordField(attr='country.name')

    law_type = fields.KeywordField()

    pdf_text = fields.TextField()

    year_mentions = fields.ListField(fields.IntegerField())

    articles = fields.NestedField(
        properties={
            'pk':
            fields.IntegerField(),
            'code':
            fields.KeywordField(),
            'text':
            fields.TextField(),
            'classifications_text':
            fields.TextField(term_vector='with_positions_offsets'),
            'parent_classifications':
            fields.TextField(term_vector='with_positions_offsets'),
            'tags_text':
            fields.TextField(term_vector='with_positions_offsets'),
            'parent_tags':
            fields.TextField(term_vector='with_positions_offsets')
        })

    def prepare_classifications(self, instance):
        classification_names = instance.classifications.all().values_list(
            'name', flat=True)
        if CONN in ''.join(classification_names):
            raise ValidationError(
                "Classification names must not include the character "
                "'{}'.".format(CONN))
        return CONN.join(classification_names)

    def prepare_article_classifications(self, instance):
        classification_names = {
            cl.name
            for cl in [
                item for sublist in [
                    article.classifications.all()
                    for article in instance.articles.all()
                ] for item in sublist
            ]
        }
        if CONN in ''.join(classification_names):
            raise ValidationError(
                "Classification names must not include the character "
                "'{}'.".format(CONN))
        return CONN.join(classification_names)

    def prepare_tags(self, instance):
        tag_names = instance.tags.all().values_list('name', flat=True)
        if CONN in ''.join(tag_names):
            raise ValidationError(
                "Tag names must not include the character '{}'.".format(CONN))
        return CONN.join(tag_names)

    def prepare_article_tags(self, instance):
        tag_names = {
            tag.name
            for tag in [
                item for sublist in
                [article.tags.all() for article in instance.articles.all()]
                for item in sublist
            ]
        }
        if CONN in ''.join(tag_names):
            raise ValidationError(
                "Tag names must not include the character '{}'.".format(CONN))
        return CONN.join(tag_names)

    def prepare_country(self, instance):
        return instance.country.iso

    def prepare_pdf_text(self, instance):
        return '\n\n'.join([page.page_text for page in instance.pages.all()])

    def prepare_year_mentions(self, instance):
        return [
            int(year)
            for year in re.findall('\d{4}', instance.year_mention or '') if
            int(year) >= settings.MIN_YEAR and int(year) <= settings.MAX_YEAR
        ]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, LegislationArticle):
            return related_instance.legislation
        else:  # it's a TaxonomyClassification or TaxonomyTag
            return related_instance.legislation_set.all()

    class Meta:
        model = Legislation  # The model associated with this DocType

        # The fields of the model to be indexed in Elasticsearch
        fields = [
            'id',
            'title',
            'abstract',
            'year',
            'year_amendment',
        ]

        related_models = [
            LegislationArticle, TaxonomyClassification, TaxonomyTag
        ]
Esempio n. 22
0
class MappingDocument(Document):
    class Index:
        name = 'mappings'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = Mapping
        fields = ['external_id']

    last_update = fields.DateField(attr='updated_at')
    owner = fields.KeywordField(attr='owner_name', normalizer="lowercase")
    owner_type = fields.KeywordField(attr='owner_type')
    source = fields.KeywordField(attr='source', normalizer="lowercase")
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    map_type = fields.KeywordField(attr='map_type', normalizer="lowercase")
    from_concept = fields.ListField(fields.KeywordField())
    to_concept = fields.ListField(fields.KeywordField())
    concept = fields.ListField(fields.KeywordField())
    concept_source = fields.ListField(fields.KeywordField())
    concept_owner = fields.ListField(fields.KeywordField())
    from_concept_owner = fields.KeywordField(attr='from_source_owner')
    to_concept_owner = fields.KeywordField(attr='to_source_owner')
    concept_owner_type = fields.ListField(
        fields.KeywordField(attr='to_source_owner'))
    from_concept_owner_type = fields.KeywordField(
        attr='from_source_owner_type')
    to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type')
    from_concept_source = fields.KeywordField(attr='from_source_name')
    to_concept_source = fields.KeywordField(attr='to_source_name')
    source_version = fields.ListField(fields.KeywordField())
    collection_version = fields.ListField(fields.KeywordField())
    collection = fields.ListField(fields.KeywordField())
    collection_owner_url = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    id = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    extras = fields.ObjectField(dynamic=True)
    created_by = fields.KeywordField(attr='created_by.username')

    @staticmethod
    def prepare_from_concept(instance):
        from_concept_name = get(instance, 'from_concept_name') or get(
            instance, 'from_concept.display_name')
        return [
            instance.from_concept_url, instance.from_concept_code,
            from_concept_name
        ]

    @staticmethod
    def prepare_to_concept(instance):
        return [instance.get_to_concept_code(), instance.get_to_concept_name()]

    def prepare_concept(self, instance):
        return self.prepare_from_concept(instance) + self.prepare_to_concept(
            instance)

    @staticmethod
    def prepare_concept_source(instance):
        return [instance.from_source_name, instance.to_source_name]

    @staticmethod
    def prepare_concept_owner(instance):
        return [instance.from_source_owner, instance.to_source_owner]

    @staticmethod
    def prepare_concept_owner_type(instance):
        return [instance.from_source_owner_type, instance.to_source_owner_type]

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('version', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('version', flat=True))

    @staticmethod
    def prepare_collection(instance):
        return list(
            set(
                list(instance.collection_set.values_list('mnemonic',
                                                         flat=True))))

    @staticmethod
    def prepare_collection_owner_url(instance):
        return list({
            coll.parent_url
            for coll in instance.collection_set.select_related(
                'user', 'organization')
        })

    @staticmethod
    def prepare_extras(instance):
        value = {}

        if instance.extras:
            value = jsonify_safe(instance.extras)
            if isinstance(value, dict):
                value = flatten_dict(value)

        return value or {}
Esempio n. 23
0
class VariableDocument(GenericDataDocument):
    """Search document data.Variable"""

    name = fields.TextField(analyzer=n_gram_analyzer)

    dataset = fields.ObjectField(
        properties={
            "name": fields.TextField(),
            "label": fields.TextField(),
            "label_de": fields.TextField(),
        })
    categories = fields.ObjectField(
        properties={
            "labels": fields.ListField(fields.TextField(analyzer="english")),
            "labels_de": fields.ListField(fields.TextField(analyzer="german")),
        })
    conceptual_dataset = fields.KeywordField()

    @staticmethod
    def _get_study(model_object: Variable) -> Study:
        study: Study = model_object.dataset.study
        return study

    def prepare_analysis_unit(self, variable: Variable) -> Optional[str]:
        """Return the related analysis_unit's or None"""
        return self._handle_missing_content(variable.dataset.analysis_unit)

    @staticmethod
    def prepare_categories(variable: Variable) -> Dict[str, List[str]]:
        """Return the variable's categories, only labels and labels_de"""
        output = {}
        for key in ("labels", "labels_de"):
            labels = variable.categories.get(key)
            if labels:
                output[key] = list(
                    filter(
                        lambda label: not re.match(r"\[-\d+\].*", label),
                        labels,
                    ))
        return output

    def prepare_conceptual_dataset(self, variable: Variable) -> Optional[str]:
        """Return the related conceptual_dataset' title or None"""
        return self._handle_missing_content(
            variable.dataset.conceptual_dataset)

    def prepare_period(self, variable: Variable) -> Optional[str]:
        """Return the related period's title or None"""
        return self._handle_missing_content(variable.dataset.period)

    class Index:  # pylint: disable=missing-docstring,too-few-public-methods
        name = f"{settings.ELASTICSEARCH_DSL_INDEX_PREFIX}variables"

    class Django:  # pylint: disable=missing-docstring,too-few-public-methods
        model = Variable

    def get_queryset(self) -> QuerySet:
        """
        Return the queryset that should be indexed by this doc type,
        with select related
        dataset, analysis_unit, conceptual_dataset, period and study.
        """
        return (super().get_queryset().select_related(
            "concept",
            "dataset",
            "dataset__analysis_unit",
            "dataset__conceptual_dataset",
            "dataset__period",
            "dataset__study",
        ))
Esempio n. 24
0
class LegislationDocument(Document):

    classifications = fields.TextField(term_vector="with_positions_offsets")

    section_classifications = fields.TextField(
        term_vector="with_positions_offsets")

    tags = fields.TextField(term_vector="with_positions_offsets")

    section_tags = fields.TextField(term_vector="with_positions_offsets")

    country = fields.KeywordField()
    country_name = fields.KeywordField(attr="country.name")

    law_type = fields.KeywordField()

    pdf_text = fields.TextField()

    year_mentions = fields.ListField(fields.IntegerField())

    sections = fields.NestedField(
        properties={
            "pk":
            fields.IntegerField(),
            "code":
            fields.KeywordField(),
            "text":
            fields.TextField(),
            "classifications_text":
            fields.TextField(term_vector="with_positions_offsets"),
            "parent_classifications":
            fields.TextField(term_vector="with_positions_offsets"),
            "tags_text":
            fields.TextField(term_vector="with_positions_offsets"),
            "parent_tags":
            fields.TextField(term_vector="with_positions_offsets"),
        })

    def prepare_classifications(self, instance):
        classification_names = instance.classifications.all().values_list(
            "name",
            "code",
        )
        classification_names = [
            "{} ({})".format(x[0], x[1]) for x in classification_names
        ]
        if CONN in "".join(classification_names):
            raise ValidationError(
                "Classification names must not include the character "
                "'{}'.".format(CONN))
        return CONN.join(classification_names)

    def prepare_section_classifications(self, instance):
        classification_names = {
            "{} ({})".format(cl.name, cl.code)
            for cl in [
                item for sublist in [
                    section.classifications.all()
                    for section in instance.sections.all()
                ] for item in sublist
            ]
        }
        if CONN in "".join(classification_names):
            raise ValidationError(
                "Classification names must not include the character "
                "'{}'.".format(CONN))
        return CONN.join(classification_names)

    def prepare_tags(self, instance):
        tag_names = instance.tags.all().values_list("name", flat=True)
        if CONN in "".join(tag_names):
            raise ValidationError(
                "Tag names must not include the character '{}'.".format(CONN))
        return CONN.join(tag_names)

    def prepare_section_tags(self, instance):
        tag_names = {
            tag.name
            for tag in [
                item for sublist in
                [section.tags.all() for section in instance.sections.all()]
                for item in sublist
            ]
        }
        if CONN in "".join(tag_names):
            raise ValidationError(
                "Tag names must not include the character '{}'.".format(CONN))
        return CONN.join(tag_names)

    def prepare_country(self, instance):
        return instance.country.iso

    def prepare_pdf_text(self, instance):
        return "\n\n".join([page.page_text for page in instance.pages.all()])

    def prepare_year_mentions(self, instance):
        return [
            int(year)
            for year in re.findall("\d{4}", instance.year_mention or "") if
            int(year) >= settings.MIN_YEAR and int(year) <= settings.MAX_YEAR
        ]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, LegislationSection):
            return related_instance.legislation
        else:  # it's a TaxonomyClassification or TaxonomyTag
            return related_instance.legislation_set.all()

    class Django:
        model = Legislation  # The model associated with this Document

        # The fields of the model to be indexed in Elasticsearch
        fields = [
            "id",
            "title",
            "abstract",
            "year",
            "year_amendment",
        ]

        related_models = [
            LegislationSection, TaxonomyClassification, TaxonomyTag
        ]
Esempio n. 25
0
class QuestionDocument(GenericDataDocument):
    """Search document instruments.Question"""

    instrument = fields.ObjectField(
        properties={
            "name": fields.TextField(),
            "label": fields.TextField(),
            "label_de": fields.TextField(),
        })
    items = fields.ObjectField(
        properties={
            "en": fields.ListField(fields.TextField(analyzer="english")),
            "de": fields.ListField(fields.TextField(analyzer="german")),
        })

    @staticmethod
    def _get_study(model_object: Question) -> Study:
        study: Study = getattr(model_object.instrument, "study")
        return study

    # lookup methods
    @staticmethod
    def prepare_analysis_unit(question: Question) -> Optional[str]:
        """Return the related analysis_unit's or None"""
        try:
            return question.instrument.analysis_unit.title()
        except AttributeError:
            return None

    @staticmethod
    def prepare_period(question: Question) -> Optional[str]:
        """Return the related period's title or None"""
        try:
            return question.instrument.period.title()
        except AttributeError:
            return None

    @staticmethod
    def prepare_items(question: Question) -> Dict:
        """Return the question's items, containing text, text_de and answers"""
        items = {"en": [], "de": []}
        for item in question.items:
            text = item.get("text")
            text_de = item.get("text_de")
            if text:
                items["en"].append(text)
            if text_de:
                items["de"].append(text_de)
            answers = item.get("answers", [])
            for answer in answers:
                label = answer.get("label")
                label_de = answer.get("label_de")
                if label:
                    items["en"].append(label)
                if label_de:
                    items["de"].append(label_de)
        return items

    class Index:  # pylint: disable=too-few-public-methods missing-class-docstring
        name = f"{settings.ELASTICSEARCH_DSL_INDEX_PREFIX}questions"

    class Django:  # pylint: disable=too-few-public-methods missing-class-docstring
        model = Question

    def get_queryset(self) -> QuerySet:
        """
        Return the queryset that should be indexed by this doc type,
        with select related instrument, study, period.
        """
        return (super().get_queryset().select_related(
            "instrument",
            "instrument__analysis_unit",
            "instrument__period",
            "instrument__study",
        ).only(
            "items",
            "name",
            "label",
            "label_de",
            "instrument__name",
            "instrument__label",
            "instrument__analysis_unit__name",
            "instrument__analysis_unit__label",
            "instrument__period__name",
            "instrument__period__label",
            "instrument__study__name",
            "instrument__study__label",
        ))
Esempio n. 26
0
class ConceptDocument(Document):
    class Index:
        name = 'concepts'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    id = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    name = fields.TextField()
    _name = fields.KeywordField(attr='display_name', normalizer='lowercase')
    last_update = fields.DateField(attr='updated_at')
    locale = fields.ListField(fields.KeywordField())
    source = fields.KeywordField(attr='parent_resource',
                                 normalizer="lowercase")
    owner = fields.KeywordField(attr='owner_name', normalizer="lowercase")
    owner_type = fields.KeywordField(attr='owner_type')
    source_version = fields.ListField(fields.KeywordField())
    collection_version = fields.ListField(fields.KeywordField())
    collection = fields.ListField(fields.KeywordField())
    collection_owner_url = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    datatype = fields.KeywordField(attr='datatype', normalizer="lowercase")
    concept_class = fields.KeywordField(attr='concept_class',
                                        normalizer="lowercase")
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    extras = fields.ObjectField(dynamic=True)

    class Django:
        model = Concept
        fields = [
            'version',
            'external_id',
        ]

    @staticmethod
    def prepare_name(instance):
        name = instance.display_name
        if name:
            name = name.replace('-', '_')
        return name

    @staticmethod
    def prepare_locale(instance):
        return list(
            instance.names.filter(
                locale__isnull=False).distinct('locale').values_list(
                    'locale', flat=True))

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('version', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('version', flat=True))

    @staticmethod
    def prepare_collection(instance):
        return list(
            set(
                list(instance.collection_set.values_list('mnemonic',
                                                         flat=True))))

    @staticmethod
    def prepare_collection_owner_url(instance):
        return list({
            coll.parent_url
            for coll in instance.collection_set.select_related(
                'user', 'organization')
        })

    @staticmethod
    def prepare_extras(instance):
        value = {}

        if instance.extras:
            value = jsonify_safe(instance.extras)
            if isinstance(value, dict):
                value = flatten_dict(value)

        return value or {}
Esempio n. 27
0
class PageDocument(DocType):
    document = fields.IntegerField(attr='document_id')

    title = fields.TextField()
    description = fields.TextField()

    tags = fields.ListField(fields.KeywordField())
    created_at = fields.DateField()

    publicbody = fields.IntegerField(attr='document.publicbody_id')
    jurisdiction = fields.IntegerField(
        attr='document.publicbody.jurisdiction_id')
    foirequest = fields.IntegerField(attr='document.foirequest_id')
    campaign = fields.IntegerField(attr='document.foirequest.campaign_id')
    collections = fields.IntegerField()

    user = fields.IntegerField(attr='document.user_id')
    team = fields.IntegerField(attr='document.team_id')

    public = fields.BooleanField()

    number = fields.IntegerField()
    content = fields.TextField(
        analyzer=analyzer,
        search_analyzer=search_analyzer,
        search_quote_analyzer=search_quote_analyzer,
        index_options='offsets',
    )

    class Meta:
        model = Page
        queryset_chunk_size = 50

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return super().get_queryset().select_related('document', )

    def prepare_title(self, obj):
        if obj.number == 1:
            if obj.document.title.endswith('.pdf'):
                return ''
            return obj.document.title
        return ''

    def prepare_description(self, obj):
        if obj.number == 1:
            return obj.document.description
        return ''

    def prepare_tags(self, obj):
        return [tag.id for tag in obj.document.tags.all()]

    def prepare_created_at(self, obj):
        return obj.document.created_at

    def prepare_public(self, obj):
        return obj.document.is_public()

    def prepare_team(self, obj):
        if obj.document.team_id:
            return obj.document.team_id
        return None

    def prepare_collections(self, obj):
        collections = obj.document.document_documentcollection.all()
        return list(collections.values_list('id', flat=True))
Esempio n. 28
0
class MappingDocument(Document):
    class Index:
        name = 'mappings'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = Mapping
        fields = ['external_id']

    last_update = fields.DateField(attr='updated_at')
    owner = fields.KeywordField(attr='owner_name', normalizer="lowercase")
    owner_type = fields.KeywordField(attr='owner_type')
    source = fields.KeywordField(attr='source', normalizer="lowercase")
    retired = fields.KeywordField(attr='retired')
    is_active = fields.KeywordField(attr='is_active')
    is_latest_version = fields.KeywordField(attr='is_latest_version')
    map_type = fields.KeywordField(attr='map_type', normalizer="lowercase")
    from_concept = fields.ListField(fields.KeywordField())
    to_concept = fields.ListField(fields.KeywordField())
    concept = fields.ListField(fields.KeywordField())
    concept_source = fields.ListField(fields.KeywordField())
    concept_owner = fields.ListField(fields.KeywordField())
    from_concept_owner = fields.KeywordField(attr='from_source_owner')
    to_concept_owner = fields.KeywordField(attr='to_source_owner')
    concept_owner_type = fields.ListField(
        fields.KeywordField(attr='to_source_owner'))
    from_concept_owner_type = fields.KeywordField(
        attr='from_source_owner_type')
    to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type')
    from_concept_source = fields.KeywordField(attr='from_source_name')
    to_concept_source = fields.KeywordField(attr='to_source_name')
    source_version = fields.ListField(fields.TextField())
    collection_version = fields.ListField(fields.TextField())
    collection = fields.ListField(fields.KeywordField())
    public_can_view = fields.BooleanField(attr='public_can_view')
    id = fields.KeywordField(attr='mnemonic', normalizer="lowercase")
    extras = fields.ObjectField()

    @staticmethod
    def prepare_from_concept(instance):
        return [
            instance.from_concept_url, instance.from_concept_code,
            instance.from_concept_name
        ]

    @staticmethod
    def prepare_to_concept(instance):
        return [instance.get_to_concept_code(), instance.get_to_concept_name()]

    def prepare_concept(self, instance):
        return self.prepare_from_concept(instance) + self.prepare_to_concept(
            instance)

    @staticmethod
    def prepare_concept_source(instance):
        return [instance.from_source_name, instance.to_source_name]

    @staticmethod
    def prepare_concept_owner(instance):
        return [instance.from_source_owner, instance.to_source_owner]

    @staticmethod
    def prepare_concept_owner_type(instance):
        return [instance.from_source_owner_type, instance.to_source_owner_type]

    @staticmethod
    def prepare_source_version(instance):
        return list(instance.sources.values_list('version', flat=True))

    @staticmethod
    def prepare_collection_version(instance):
        return list(instance.collection_set.values_list('version', flat=True))

    @staticmethod
    def prepare_collection(instance):
        return list(
            set(
                list(instance.collection_set.values_list('mnemonic',
                                                         flat=True))))

    @staticmethod
    def prepare_extras(instance):
        return instance.extras or {}
Esempio n. 29
0
class CollectionItemDocument(DocType):
    """Collection item document."""

    # ID
    id = fields.IntegerField(attr='id')

    record_number = KeywordField()

    inventory_number = KeywordField()

    api_url = KeywordField(index="not_analyzed")

    web_url = KeywordField(index="not_analyzed")

    # ********************************************************************
    # *************** Main data fields for search and filtering **********
    # ********************************************************************

    importer_uid = KeywordField(attr='importer_uid_indexing')

    language_code_orig = KeywordField(attr='language_code_orig')

    department = StringField(
        attr='department_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    # ********************************************************************
    # ***************************** English ******************************
    # ********************************************************************

    title_en = StringField(
        attr='title_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    description_en = StringField(
        attr='description_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    period_en = StringField(
        attr='period_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    period_1_en = fields.NestedField(
        attr='period_1_en_indexing',
        properties={
            'name':
            StringField(analyzer=html_strip_synonyms_en,
                        fields={
                            'raw': KeywordField(),
                        }),
            'period_2_en':
            fields.NestedField(
                properties={
                    'name':
                    StringField(analyzer=html_strip_synonyms_en,
                                fields={
                                    'raw': KeywordField(),
                                }),
                    'period_3_en':
                    fields.NestedField(
                        properties={
                            'name':
                            StringField(analyzer=html_strip_synonyms_en,
                                        fields={
                                            'raw': KeywordField(),
                                        }),
                            'period_4_en':
                            fields.NestedField(
                                properties={
                                    'name':
                                    StringField(
                                        analyzer=html_strip_synonyms_en,
                                        fields={
                                            'raw': KeywordField(),
                                        })
                                })
                        })
                })
        })

    primary_object_type_en = StringField(
        attr='primary_object_type_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            'suggest': fields.CompletionField(),
        })

    object_type_en = StringField(attr='object_type_en_indexing',
                                 analyzer=html_strip_synonyms_en,
                                 fields={
                                     'raw': KeywordField(),
                                     'natural':
                                     StringField(analyzer='english'),
                                     'suggest': fields.CompletionField(),
                                 })

    # To be shown on the detail page
    object_type_detail_en = fields.TextField(
        attr='object_type_detail_en_indexing', index='no')

    material_en = StringField(
        attr='material_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    material_detail_en = fields.TextField(attr='material_detail_en_indexing',
                                          index='no')

    city_en = StringField(
        attr='city_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    country_en = StringField(
        attr='country_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    references_en = fields.TextField(attr='references_en_indexing', index='no')

    # To be shown on the detail page
    acquired_en = fields.TextField(attr='acquired_en_indexing', index='no')

    # To be shown on the detail page
    site_found_en = fields.TextField(attr='site_found_en_indexing', index='no')

    # To be shown on the detail page
    reign_en = fields.TextField(attr='reign_en_indexing', index='no')

    # To be shown on the detail page
    keywords_en = fields.TextField(attr='keywords_en_indexing', index='no')

    # To be shown on the detail page
    dynasty_en = fields.TextField(attr='dynasty_en_indexing', index='no')

    # New fields
    # To be shown on the detail page
    credit_line_en = fields.TextField(attr='credit_line_en_indexing',
                                      index='no')

    # To be shown on the detail page
    region_en = fields.TextField(attr='region_en_indexing', index='no')

    # To be shown on the detail page
    sub_region_en = fields.TextField(attr='sub_region_en_indexing', index='no')

    # To be shown on the detail page
    locale_en = fields.TextField(attr='locale_en_indexing', index='no')

    # To be shown on the detail page
    excavation_en = fields.TextField(attr='excavation_en_indexing', index='no')

    # To be shown on the detail page
    museum_collection_en = fields.TextField(
        attr='museum_collection_en_indexing', index='no')

    # To be shown on the detail page
    style_en = fields.TextField(attr='style_en_indexing', index='no')

    # To be shown on the detail page
    culture_en = fields.TextField(attr='culture_en_indexing', index='no')

    # To be shown on the detail page
    inscriptions_en = fields.TextField(attr='inscriptions_en_indexing',
                                       index='no')

    # To be shown on the detail page
    provenance_en = fields.TextField(attr='provenance_en_indexing', index='no')

    # To be shown on the detail page
    exhibitions_en = fields.TextField(attr='exhibitions_en_indexing',
                                      index='no')

    # ********************************************************************
    # ****************************** Dutch *******************************
    # ********************************************************************

    title_nl = StringField(
        attr='title_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    description_nl = StringField(
        attr='description_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    period_nl = StringField(
        attr='period_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    period_1_nl = fields.NestedField(
        attr='period_1_nl_indexing',
        properties={
            'name':
            StringField(analyzer=html_strip_synonyms_nl,
                        fields={
                            'raw': KeywordField(),
                        }),
            'period_2_nl':
            fields.NestedField(
                properties={
                    'name':
                    StringField(analyzer=html_strip_synonyms_nl,
                                fields={
                                    'raw': KeywordField(),
                                }),
                    'period_3_nl':
                    fields.NestedField(
                        properties={
                            'name':
                            StringField(analyzer=html_strip_synonyms_nl,
                                        fields={
                                            'raw': KeywordField(),
                                        }),
                            'period_4_nl':
                            fields.NestedField(
                                properties={
                                    'name':
                                    StringField(
                                        analyzer=html_strip_synonyms_nl,
                                        fields={
                                            'raw': KeywordField(),
                                        })
                                })
                        })
                })
        })

    primary_object_type_nl = StringField(
        attr='primary_object_type_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            'suggest': fields.CompletionField(),
        })

    object_type_nl = StringField(attr='object_type_nl_indexing',
                                 analyzer=html_strip_synonyms_nl,
                                 fields={
                                     'raw': KeywordField(),
                                     'natural': StringField(analyzer='dutch'),
                                     'suggest': fields.CompletionField(),
                                 })

    # To be shown on the detail page
    object_type_detail_nl = fields.TextField(
        attr='object_type_detail_nl_indexing', index='no')

    material_nl = StringField(
        attr='material_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    material_detail_nl = fields.TextField(attr='material_detail_nl_indexing',
                                          index='no')

    city_nl = StringField(
        attr='city_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    country_nl = StringField(
        attr='country_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    keywords_nl = fields.TextField(attr='keywords_nl_indexing', index='no')

    # To be shown on the detail page
    acquired_nl = fields.TextField(attr='acquired_nl_indexing', index='no')

    # To be shown on the detail page
    site_found_nl = fields.TextField(attr='site_found_nl_indexing', index='no')

    # To be shown on the detail page
    reign_nl = fields.TextField(attr='reign_nl_indexing', index='no')

    # To be shown on the detail page
    references_nl = fields.TextField(attr='references_nl_indexing', index='no')

    # To be shown on the detail page
    dynasty_nl = fields.TextField(attr='dynasty_nl_indexing', index='no')

    # New fields
    # To be shown on the detail page
    credit_line_nl = fields.TextField(attr='credit_line_nl_indexing',
                                      index='no')

    # To be shown on the detail page
    region_nl = fields.TextField(attr='region_nl_indexing', index='no')

    # To be shown on the detail page
    sub_region_nl = fields.TextField(attr='sub_region_nl_indexing', index='no')

    # To be shown on the detail page
    locale_nl = fields.TextField(attr='locale_nl_indexing', index='no')

    # To be shown on the detail page
    excavation_nl = fields.TextField(attr='excavation_nl_indexing', index='no')

    # To be shown on the detail page
    museum_collection_nl = fields.TextField(
        attr='museum_collection_nl_indexing', index='no')

    # To be shown on the detail page
    style_nl = fields.TextField(attr='style_nl_indexing', index='no')

    # To be shown on the detail page
    culture_nl = fields.TextField(attr='culture_nl_indexing', index='no')

    # To be shown on the detail page
    inscriptions_nl = fields.TextField(attr='inscriptions_nl_indexing',
                                       index='no')

    # To be shown on the detail page
    provenance_nl = fields.TextField(attr='provenance_nl_indexing', index='no')

    # To be shown on the detail page
    exhibitions_nl = fields.TextField(attr='exhibitions_nl_indexing',
                                      index='no')

    # ********************************************************************
    # ************************** Language independent ********************
    # ********************************************************************

    dimensions = StringField(
        attr='dimensions_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(),
            # 'suggest': fields.CompletionField(),
        })

    object_date_begin = StringField(
        attr='object_date_begin_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(),
            # 'suggest': fields.CompletionField(),
        })

    object_date_end = StringField(
        attr='object_date_end_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(),
            # 'suggest': fields.CompletionField(),
        })

    location = fields.GeoPointField(attr='geo_location_indexing')

    # List of 32x32 PNG versions of the images. Full path to.
    images = fields.ListField(StringField(attr='images_indexing'))

    # List of image URLs.
    images_urls = fields.ListField(
        fields.ObjectField(attr='images_urls_indexing',
                           properties={
                               'th': KeywordField(index="not_analyzed"),
                               'lr': KeywordField(index="not_analyzed"),
                           }))

    # Classified as by our AI
    classified_as = fields.ListField(
        StringField(attr='classified_as_indexing',
                    fields={
                        'raw': KeywordField(),
                    }))

    # Classified as 1st element
    classified_as_1 = StringField(attr='classified_as_1_indexing',
                                  fields={
                                      'raw': KeywordField(),
                                  })

    # Classified as 2nd element
    classified_as_2 = StringField(attr='classified_as_2_indexing',
                                  fields={
                                      'raw': KeywordField(),
                                  })

    # Classified as 3rd element
    classified_as_3 = StringField(attr='classified_as_3_indexing',
                                  fields={
                                      'raw': KeywordField(),
                                  })

    # ********************************************************************
    # ************** Nested fields for search and filtering **************
    # ********************************************************************

    # # City object
    # country = fields.NestedField(
    #     properties={
    #         'name': StringField(
    #             analyzer=html_strip,
    #             fields={
    #                 'raw': KeywordField(),
    #                 'suggest': fields.CompletionField(),
    #             }
    #         ),
    #         'info': StringField(analyzer=html_strip),
    #         'location': fields.GeoPointField(attr='location_field_indexing'),
    #     }
    # )
    #
    # location = fields.GeoPointField(attr='location_field_indexing')

    class Meta(object):
        """Meta options."""

        model = Item  # The model associate with this DocType

    def get_queryset(self):
        """Filter out items that are not eligible for indexing."""
        qs = super(CollectionItemDocument, self).get_queryset()

        # qs = qs.select_related('period_node').prefetch_related('images')

        filters = []
        for field in ['title']:
            for language in ['en', 'nl']:
                filters.extend([
                    Q(**{"{}_{}__isnull".format(field, language): True}),
                    Q(**{"{}_{}__exact".format(field, language): ''}),
                ])

        if filters:
            qs = qs.exclude(six.moves.reduce(operator.or_, filters))

        # We concatenate ``object_type`` and ``classification`` fields, after
        # cleaning them. Therefore, db-only checks don't work here.
        ids = []
        for item in qs:
            if not (item.object_type_en_indexing
                    and item.object_type_nl_indexing):
                ids.append(item.pk)

        return qs.exclude(id__in=ids)

    def prepare_department(self, instance):
        """Prepare department."""
        return instance.department_indexing \
            if instance.department_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_object_date_begin(self, instance):
        """Prepare material."""
        return instance.object_date_begin_indexing

    def prepare_object_date_end(self, instance):
        """Prepare material."""
        return instance.object_date_end_indexing

    # ********************************************************************
    # ***************************** English ******************************
    # ********************************************************************

    def prepare_material_en(self, instance):
        """Prepare material."""
        return instance.material_en_indexing \
            if instance.material_en_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_period_en(self, instance):
        """Prepare state."""
        return instance.period_en_indexing \
            if instance.period_en_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_dynasty_en(self, instance):
        """Prepare dynasty."""
        return instance.dynasty_en_indexing \
            if instance.dynasty_en_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_description_en(self, instance):
        """Prepare description."""
        return instance.description_en_indexing \
            if instance.description_en_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_city_en(self, instance):
        """Prepare city."""
        return instance.city_en_indexing \
            if instance.city_en_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_country_en(self, instance):
        """Prepare country."""
        return instance.country_en_indexing \
            if instance.country_en_indexing \
            else VALUE_NOT_SPECIFIED

    # ********************************************************************
    # ****************************** Dutch *******************************
    # ********************************************************************

    def prepare_material_nl(self, instance):
        """Prepare material."""
        return instance.material_nl_indexing \
            if instance.material_nl_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_period_nl(self, instance):
        """Prepare state."""
        return instance.period_nl_indexing \
            if instance.period_nl_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_dynasty_nl(self, instance):
        """Prepare dynasty."""
        return instance.dynasty_nl_indexing \
            if instance.dynasty_nl_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_description_nl(self, instance):
        """Prepare description."""
        return instance.description_nl_indexing \
            if instance.description_nl_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_city_nl(self, instance):
        """Prepare city."""
        return instance.city_nl_indexing \
            if instance.city_nl_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_country_nl(self, instance):
        """Prepare country."""
        return instance.country_nl_indexing \
            if instance.country_nl_indexing \
            else VALUE_NOT_SPECIFIED
Esempio n. 30
0
class SourceDocument(Document):
    class Index:
        name = 'sources'
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    locale = fields.ListField(fields.KeywordField())
    last_update = fields.DateField(attr='updated_at')
    owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase')
    owner_type = fields.KeywordField(attr='parent_resource_type')
    public_can_view = fields.TextField(attr='public_can_view')
    source_type = fields.KeywordField(attr='source_type',
                                      normalizer='lowercase')
    is_active = fields.KeywordField(attr='is_active')
    version = fields.KeywordField(attr='version')
    name = fields.KeywordField(attr='name', normalizer='lowercase')
    canonical_url = fields.KeywordField(attr='canonical_url',
                                        normalizer='lowercase')
    mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase')
    extras = fields.ObjectField(dynamic=True)
    identifier = fields.ObjectField()
    jurisdiction = fields.ObjectField()
    publisher = fields.KeywordField(attr='publisher', normalizer='lowercase')
    content_type = fields.KeywordField(attr='content_type',
                                       normalizer='lowercase')
    custom_validation_schema = fields.KeywordField(
        attr='custom_validation_schema', normalizer='lowercase')
    hierarchy_meaning = fields.KeywordField(attr='hierarchy_meaning',
                                            normalizer='lowercase')
    created_by = fields.KeywordField()

    class Django:
        model = Source
        fields = [
            'full_name',
            'revision_date',
            'retired',
            'experimental',
            'case_sensitive',
            'compositional',
            'version_needed',
            'external_id',
        ]

    @staticmethod
    def prepare_locale(instance):
        return get(instance.supported_locales, [])

    @staticmethod
    def prepare_extras(instance):
        value = {}

        if instance.extras:
            value = jsonify_safe(instance.extras)
            if isinstance(value, dict):
                value = flatten_dict(value)

        return value or {}

    @staticmethod
    def prepare_identifier(instance):
        value = {}

        if instance.identifier:
            value = jsonify_safe(instance.identifier)

        return value or {}

    @staticmethod
    def prepare_jurisdiction(instance):
        value = {}
        if instance.jurisdiction:
            value = jsonify_safe(instance.jurisdiction)

        return value or {}

    @staticmethod
    def prepare_created_by(instance):
        return instance.created_by.username