예제 #1
0
class BookDocument(DocType):
    """Book Elasticsearch document."""

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # *********************** Main data fields for search ****************
    # ********************************************************************
    __title_fields = {
        'raw': KeywordField(),
        'suggest': fields.CompletionField(),
        'edge_ngram_completion': StringField(
            analyzer=edge_ngram_completion
        ),
        'mlt': StringField(analyzer='english'),
    }

    if ELASTICSEARCH_GTE_5_0:
        __title_fields.update(
            {
                'suggest_context': fields.CompletionField(
                    contexts=[
                        {
                            "name": "tag",
                            "type": "category",
                            "path": "tags.raw",
                        },
                        {
                            "name": "state",
                            "type": "category",
                            "path": "state.raw",
                        },
                        {
                            "name": "publisher",
                            "type": "category",
                            "path": "publisher.raw",
                        },
                    ]
                ),
            }
        )

    title = StringField(
        analyzer=html_strip,
        fields=__title_fields
    )

    description = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'mlt': StringField(analyzer='english'),
        }
    )

    summary = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'mlt': StringField(analyzer='english'),
        }
    )

    # ********************************************************************
    # ********** Additional fields for search and filtering **************
    # ********************************************************************

    authors = fields.ListField(
        StringField(
            analyzer=html_strip,
            fields={
                'raw': KeywordField(),
            }
        )
    )

    # Publisher
    publisher = StringField(
        attr='publisher_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'suggest': fields.CompletionField(),
        }
    )

    # Publication date
    publication_date = fields.DateField()

    # State
    state = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
        }
    )

    # ISBN
    isbn = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
        }
    )

    # Price
    price = fields.FloatField()

    # Pages
    pages = fields.IntegerField()

    # Stock count
    stock_count = fields.IntegerField()

    # Tags
    tags = StringField(
        attr='tags_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(multi=True),
            'suggest': fields.CompletionField(multi=True),
        },
        multi=True
    )

    # Date created
    created = fields.DateField()

    null_field = StringField(attr='null_field_indexing')

    class Meta(object):
        """Meta options."""

        model = Book  # The model associate with this DocType
        parallel_indexing = True
        # queryset_pagination = 500000  # This will split the queryset
        #                               # into parts while indexing

    def prepare_summary(self, instance):
        """Prepare summary."""
        return instance.summary[:32766] if instance.summary else None

    def prepare_authors(self, instance):
        """Prepare authors."""
        return [author.name for author in instance.authors.all()]
class LocationDocument(DocType):
    """
    Location document.
    """
    full = fields.StringField(
        analyzer=html_strip,
        fields={
            "raw":
            KeywordField(),
            "suggest":
            fields.CompletionField(),
            "context":
            fields.CompletionField(contexts=[
                {
                    "name": "category",
                    "type": "category",
                    "path": "category.raw",
                },
                {
                    "name": "occupied",
                    "type": "category",
                    "path": "occupied.raw",
                },
            ]),
            # edge_ngram_completion
            "q":
            StringField(analyzer=edge_ngram_completion),
        })
    partial = fields.StringField(
        analyzer=html_strip,
        fields={
            "raw":
            KeywordField(),
            "suggest":
            fields.CompletionField(),
            "context":
            fields.CompletionField(contexts=[
                {
                    "name": "category",
                    "type": "category",
                    "path": "category.raw",
                },
                {
                    "name": "occupied",
                    "type": "category",
                    "path": "occupied.raw",
                },
            ]),
            # edge_ngram_completion
            "q":
            StringField(analyzer=edge_ngram_completion),
        })
    postcode = fields.StringField(analyzer=html_strip,
                                  fields={
                                      "raw":
                                      KeywordField(),
                                      "suggest":
                                      fields.CompletionField(),
                                      "context":
                                      fields.CompletionField(contexts=[
                                          {
                                              "name": "category",
                                              "type": "category",
                                              "path": "category.raw",
                                          },
                                          {
                                              "name": "occupied",
                                              "type": "category",
                                              "path": "occupied.raw",
                                          },
                                      ]),
                                  })
    number = fields.StringField(attr="address_no",
                                analyzer=html_strip,
                                fields={
                                    "raw": KeywordField(),
                                })
    address = fields.StringField(attr="address_street",
                                 analyzer=html_strip,
                                 fields={
                                     "raw": KeywordField(),
                                 })
    town = fields.StringField(attr="address_town",
                              analyzer=html_strip,
                              fields={
                                  "raw": KeywordField(),
                              })
    authority = fields.StringField(attr="authority_name",
                                   analyzer=html_strip,
                                   fields={
                                       "raw": KeywordField(),
                                   })
    # URL fields /geocode/slug
    geocode = fields.StringField(analyzer=html_strip,
                                 fields={
                                     "raw": KeywordField(),
                                 })
    slug = fields.StringField(analyzer=html_strip,
                              fields={
                                  "raw": KeywordField(),
                              })
    # Filter fields
    category = fields.StringField(attr="group",
                                  analyzer=html_strip,
                                  fields={
                                      "raw": KeywordField(),
                                  })
    occupied = fields.StringField(attr="occupation_status_text",
                                  analyzer=html_strip,
                                  fields={
                                      "raw": KeywordField(),
                                  })
    size = fields.FloatField(attr="floor_area")
    staff = fields.FloatField(attr="employee_count")
    rent = fields.FloatField(attr="rental_valuation")
    revenue = fields.FloatField(attr="revenue")
    coordinates = fields.GeoPointField(attr="location_field_indexing")

    class Meta(object):
        """Meta options."""

        model = Location  # The model associate with this DocType
        parallel_indexing = True
        queryset_pagination = 1000  # This will split the queryset
class JournalDocument(Document):
    """Journal Elasticsearch document."""

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ISBN/ID
    isbn = StringField(analyzer=html_strip, fields={
        'raw': KeywordField(),
    })

    # ********************************************************************
    # *********************** Main data fields for search ****************
    # ********************************************************************

    title = StringField(analyzer=html_strip,
                        fields={
                            'raw':
                            KeywordField(),
                            'suggest':
                            fields.CompletionField(),
                            'edge_ngram_completion':
                            StringField(analyzer=edge_ngram_completion),
                            'mlt':
                            StringField(analyzer='english'),
                        })

    description = StringField(analyzer=html_strip,
                              fields={
                                  'raw': KeywordField(),
                                  'mlt': StringField(analyzer='english'),
                              })

    summary = StringField(analyzer=html_strip,
                          fields={
                              'raw': KeywordField(),
                              'mlt': StringField(analyzer='english'),
                          })

    # ********************************************************************
    # ********** Additional fields for search and filtering **************
    # ********************************************************************

    # Publication date
    publication_date = fields.DateField()

    # Price
    price = fields.FloatField()

    # Pages
    pages = fields.IntegerField()

    # Stock count
    stock_count = fields.IntegerField()

    # Date created
    created = fields.DateField(attr='created_indexing')

    class Django(object):
        model = Journal  # The model associate with this Document

    class Meta:
        parallel_indexing = True
        # queryset_pagination = 50  # This will split the queryset
        #                           # into parts while indexing

    def prepare_summary(self, instance):
        """Prepare summary."""
        return instance.summary[:32766] if instance.summary else None
예제 #4
0
class AddressDocument(Document):
    id = fields.IntegerField(attr='id')
    street = StringField(analyzer=html_strip,
                         fields={
                             'raw': KeywordField(),
                             'suggest': fields.CompletionField(),
                         })
    house_number = StringField(analyzer=html_strip)
    appendix = StringField(analyzer=html_strip)
    zip_code = StringField(analyzer=html_strip,
                           fields={
                               'raw': KeywordField(),
                               'suggest': fields.CompletionField(),
                           })
    city = fields.ObjectField(
        properties={
            'name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField()
                        }),
            'info':
            StringField(analyzer=html_strip),
            'location':
            fields.GeoPointField(attr='location_field_indexing'),
            'country':
            fields.ObjectField(
                properties={
                    'name':
                    StringField(analyzer=html_strip,
                                fields={
                                    'raw': KeywordField(),
                                    'suggest': fields.CompletionField(),
                                }),
                    'info':
                    StringField(analyzer=html_strip),
                    'location':
                    fields.GeoPointField(attr='location_field_indexing')
                })
        })

    # Defining the ``@property`` functions in the address model
    country = fields.NestedField(
        attr='country_indexing',
        properties={
            'name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }),
            'city':
            fields.ObjectField(
                properties={
                    'name':
                    StringField(analyzer=html_strip,
                                fields={'raw': KeywordField()})
                })
        })

    continent = fields.NestedField(
        attr='continent_indexing',
        properties={
            'name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField()
                        }),
            'country':
            fields.NestedField(
                properties={
                    'name':
                    StringField(analyzer=html_strip,
                                fields={'raw': KeywordField()}),
                    'city':
                    fields.NestedField(
                        properties={
                            'name':
                            StringField(analyzer=html_strip,
                                        fields={'raw': KeywordField()})
                        })
                })
        })

    location = fields.GeoPointField(attr='location_field_indexing')

    class Django(object):
        model = Address
class ResourceDocument(Document):
    meta = fields.KeywordField()
    content = fields.TextField(attr="title.main_title", store=True)

    is_private = fields.BooleanField()

    title = fields.TextField(
        analyzer=text_folding_analyzer,
        fields={
            "raw": fields.KeywordField(),
            "sort": fields.KeywordField(normalizer=lowercase_sort_normalizer),
            "suggest": fields.CompletionField(),
        },
        **copy_to_content,
    )
    form_genre = get_controlled_term_field(options=copy_to_content)
    subjects = get_controlled_term_field(options=copy_to_content)
    date_display = fields.TextField(**copy_to_content)
    year = fields.IntegerField(**copy_to_content)
    summary = fields.TextField(**copy_to_content)
    classifications_printing_publishing = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    classifications_translation = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    classifications_paratext = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    classifications_paratext_functions = fields.ObjectField(
        properties={
            "edition": get_controlled_term_field(options=copy_to_content)
        })
    contributions = fields.ObjectField(
        properties={
            "agent": get_agent_field(options=copy_to_content),
            "roles": get_controlled_term_field(),
        })
    published_as = fields.KeywordField()
    languages = get_controlled_term_field(options=copy_to_content)
    places = fields.ObjectField(
        properties={
            "place":
            get_place_field(options=copy_to_content),
            "fictional_place":
            fields.TextField(fields={"raw": fields.KeywordField()}),
        })
    relationships = fields.ObjectField(
        properties={
            "relationship_type": get_controlled_term_field(
                options=copy_to_content),
            "related_to": get_resource_field(options=copy_to_content),
        })

    # events = get_event_field(options=copy_to_content)

    is_original = fields.BooleanField()
    is_translation = fields.BooleanField()

    has_date_radical = fields.KeywordField()

    authors = fields.ObjectField(
        attr="get_authors_source_text",
        properties={"person": get_agent_field(options=copy_to_content)},
    )

    translated_from = get_controlled_term_field(options=copy_to_content)

    class Index:
        name = "rt-resources"

    class Django:
        model = Resource
        fields = ["id"]

        related_models = [
            Classification,
            Contribution,
            ControlledTerm,
            Date,
            ResourceLanguage,
            ResourcePlace,
            ResourceRelationship,
            Title,
        ]

    def get_queryset(self):
        return (super().get_queryset().exclude(
            relationships__relationship_type__label="paratext of").
                select_related("title", "date"))

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, Date):
            return related_instance.resource

        if isinstance(related_instance, (ControlledTerm, Title)):
            return related_instance.resources.all()

        if isinstance(
                related_instance,
            (
                Classification,
                Contribution,
                ResourceLanguage,
                ResourcePlace,
                ResourceRelationship,
            ),
        ):
            return related_instance.resource

    def prepare_meta(self, instance):
        if instance.is_original():
            return "source texts"

        meta = []

        if instance.get_paratext():
            meta.append("paratexts")

        if instance.is_translation():
            meta.append("translations")

        return meta

    def prepare_title(self, instance):
        titles = [str(instance.title)]

        for relationship in instance.get_paratext():
            paratext = relationship.resource
            if str(paratext.title) != str(instance.title):
                titles.append(str(paratext.title))

        return titles

    def prepare_form_genre(self, instance):
        return self._get_subjects(instance, ["fast-forms", "rt-agt"])

    def _get_subjects(self, instance, prefix):
        subjects = [{
            "label": item.label
        } for item in instance.subjects.filter(vocabulary__prefix__in=prefix)]

        for relationship in instance.get_paratext():
            subjects.extend(self._get_subjects(relationship.resource, prefix))

        if subjects:
            subjects.append({"label": "any"})

        return subjects

    def prepare_subjects(self, instance):
        return self._get_subjects(instance, ["fast-topic"])

    def prepare_date_display(self, instance):
        resource = self._get_resource(instance)
        if resource.date:
            return str(resource.date)

    def _get_resource(self, resource):
        if resource.is_paratext():
            return resource.paratext_of()

        return resource

    def prepare_year(self, instance):
        resource = self._get_resource(instance)
        if resource.date:
            date_earliest = resource.date.get_date_earliest()
            date_latest = resource.date.get_date_latest()

            if date_earliest and date_latest:
                return [
                    year
                    for year in range(date_earliest.year, date_latest.year + 1)
                ]

            if date_earliest:
                return date_earliest.year

            if date_latest:
                return date_latest.year

    def prepare_summary(self, instance):
        summaries = []

        if instance.summary:
            summaries = [instance.summary]

        for relationship in instance.get_paratext():
            if relationship.resource.summary:
                summaries.append(relationship.resource.summary)

        return summaries

    def prepare_classifications_printing_publishing(self, instance):
        return self._get_classifications(instance, "rt-ppt")

    def _get_classifications(self, instance, prefix):
        classifications = [
            {
                "edition": {
                    "label": item.edition.label
                },
            } for item in instance.classifications.filter(
                edition__vocabulary__prefix=prefix)
            if item.edition.label.lower() not in ["original", "source-text"]
        ]

        for relationship in instance.get_paratext():
            classifications.extend(
                self._get_classifications(relationship.resource, prefix))

        if classifications:
            classifications.append({"edition": {"label": "any"}})

        return classifications

    def prepare_classifications_translation(self, instance):
        return self._get_classifications(instance, "rt-tt")

    def prepare_classifications_paratext(self, instance):
        return self._get_classifications(instance, "rt-pt")

    def prepare_classifications_paratext_functions(self, instance):
        return self._get_classifications(instance, "rt-ptf")

    def prepare_contributions(self, instance):
        contributions = [{
            "agent": {
                "id":
                item.agent.id,
                "name":
                "Anonymous" if item.agent.name.startswith("Anon") else
                item.agent.get_index_name(),
            },
            "roles": [{
                "label": f"{role.label} of translation paratext"
                if item.resource.is_paratext() else role.label
                for role in item.roles.all()
            }],
        } for item in instance.get_contributions(include_paratext=True)]

        if contributions:
            contributions.append({
                "agent": {
                    "name": "any"
                },
                "roles": [{
                    "label": "any"
                }]
            })

        return contributions

    def prepare_published_as(self, instance):
        published_as = []

        for item in instance.get_contributions(include_paratext=True):
            if item.published_as:
                published_as.append(item.published_as)

        if published_as:
            published_as.append("any")

        return published_as

    def prepare_languages(self, instance):
        languages = [{
            "label": item.language.label
        } for item in instance.languages.all()]

        for relationship in instance.get_paratext():
            languages.extend(self.prepare_languages(relationship.resource))

        if languages:
            languages.append({"label": "any"})

        return languages

    def prepare_places(self, instance):
        places = []

        for item in instance.places.all():
            address = ""
            place = {}

            if item.fictional_place:
                address = item.fictional_place
                place = {
                    "fictional_place": item.fictional_place,
                    "place": {
                        "address": address
                    },
                }

            if item.place:
                address = (f"{address} ({item.place.address})"
                           if address else item.place.address)
                place["place"] = {
                    "address": address,
                    "geo": item.place.geo,
                    "country": {
                        "name": item.place.country.name
                    },
                }

            places.append(place)

        if places:
            places.append(
                {"place": {
                    "address": "any",
                    "country": {
                        "name": "any"
                    }
                }})

        return places

    def prepare_relationships(self, instance):
        relationships = [{
            "relationship_type": {
                "label": item.relationship_type.label
            },
            "related_to": {
                "id": item.related_to.id,
                "title": {
                    "main_title": str(item.related_to.title)
                },
            },
        } for item in instance.relationships.all()]

        if relationships:
            relationships.append({"relationship_type": {"label": "any"}})

        return relationships

    def prepare_events(self, instance):
        events = [{
            "id": item.id,
            "title": item.title,
            "place": {
                "address": item.place.address,
                "country": {
                    "name": item.place.country.name
                },
            },
        } for item in instance.events.all()]

        if events:
            events.append({
                "title": "any",
                "place": {
                    "address": "any",
                    "country": {
                        "name": "any"
                    }
                },
            })

        return events

    def prepare_has_date_radical(self, instance):
        if instance.has_date_radical():
            return "yes"

        return "no"

    def prepare_translated_from(self, instance):
        languages = []

        if instance.get_languages_source_text():
            languages = [{
                "label": language.label
            } for language in instance.get_languages_source_text()]

        if languages:
            languages.append({"label": "any"})

        return languages
class LocationDocument(Document):
    """
    Location document.
    """
    # Full fields
    __full_fields = {
        "raw": KeywordField(),
        # edge_ngram_completion
        "q": StringField(analyzer=edge_ngram_completion),
    }

    if ELASTICSEARCH_GTE_5_0:
        __full_fields.update({
            "suggest":
            fields.CompletionField(),
            "context":
            fields.CompletionField(contexts=[
                {
                    "name": "category",
                    "type": "category",
                    "path": "category.raw",
                },
                {
                    "name": "occupied",
                    "type": "category",
                    "path": "occupied.raw",
                },
            ]),
        })

    full = StringField(analyzer=html_strip, fields=__full_fields)

    # Partial fields
    __partial_fields = {
        "raw": KeywordField(),
        # edge_ngram_completion
        "q": StringField(analyzer=edge_ngram_completion),
    }
    if ELASTICSEARCH_GTE_5_0:
        __partial_fields.update({
            "suggest":
            fields.CompletionField(),
            "context":
            fields.CompletionField(contexts=[
                {
                    "name": "category",
                    "type": "category",
                    "path": "category.raw",
                },
                {
                    "name": "occupied",
                    "type": "category",
                    "path": "occupied.raw",
                },
            ]),
        })
    partial = StringField(analyzer=html_strip, fields=__partial_fields)

    # Postcode
    __postcode_fields = {
        "raw": KeywordField(),
    }
    if ELASTICSEARCH_GTE_5_0:
        __postcode_fields.update({
            "suggest":
            fields.CompletionField(),
            "context":
            fields.CompletionField(contexts=[
                {
                    "name": "category",
                    "type": "category",
                    "path": "category.raw",
                },
                {
                    "name": "occupied",
                    "type": "category",
                    "path": "occupied.raw",
                },
            ]),
        })
    postcode = StringField(analyzer=html_strip, fields=__postcode_fields)

    # Number
    number = StringField(attr="address_no",
                         analyzer=html_strip,
                         fields={
                             "raw": KeywordField(),
                         })

    # Address
    address = StringField(attr="address_street",
                          analyzer=html_strip,
                          fields={
                              "raw": KeywordField(),
                          })

    # Town
    town = StringField(attr="address_town",
                       analyzer=html_strip,
                       fields={
                           "raw": KeywordField(),
                       })

    # Authority
    authority = StringField(attr="authority_name",
                            analyzer=html_strip,
                            fields={
                                "raw": KeywordField(),
                            })

    # URL fields /geocode/slug
    geocode = StringField(analyzer=html_strip,
                          fields={
                              "raw": KeywordField(),
                          })

    # Slug
    slug = StringField(analyzer=html_strip, fields={
        "raw": KeywordField(),
    })

    # ********************* Filter fields **********************
    # Category
    category = StringField(attr="group",
                           analyzer=html_strip,
                           fields={
                               "raw": KeywordField(),
                           })

    # Occupied
    occupied = StringField(attr="occupation_status_text",
                           analyzer=html_strip,
                           fields={
                               "raw": KeywordField(),
                           })
    size = fields.FloatField(attr="floor_area")
    staff = fields.FloatField(attr="employee_count")
    rent = fields.FloatField(attr="rental_valuation")
    revenue = fields.FloatField(attr="revenue")
    coordinates = fields.GeoPointField(attr="location_field_indexing")

    class Django(object):
        model = Location  # The model associate with this Document

    class Meta(object):
        parallel_indexing = True
        queryset_pagination = 50  # This will split the queryset
class AddressDocument(Document):
    """Address Elasticsearch document."""

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # *********************** Main data fields for search ****************
    # ********************************************************************
    __street_fields = {
        'raw': KeywordField(),
        'suggest': fields.CompletionField(),

    }

    if ELASTICSEARCH_GTE_5_0:
        __street_fields.update(
            {
                'suggest_context': fields.CompletionField(
                    contexts=[
                        {
                            "name": "loc",
                            "type": "geo",
                            "path": "location",
                            "precision": "1000km",
                        },
                    ]
                ),
            }
        )
    street = StringField(
        analyzer=html_strip,
        fields=__street_fields
    )

    house_number = StringField(analyzer=html_strip)

    appendix = StringField(analyzer=html_strip)

    zip_code = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'suggest': fields.CompletionField(),
        }
    )

    # ********************************************************************
    # ********** Additional fields for search and filtering **************
    # ********************************************************************

    # City object
    city = fields.ObjectField(
        properties={
            'name': StringField(
                analyzer=html_strip,
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            ),
            'info': StringField(analyzer=html_strip),
            'location': fields.GeoPointField(attr='location_field_indexing'),
            'country': fields.ObjectField(
                properties={
                    'name': StringField(
                        analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }
                    ),
                    'info': StringField(analyzer=html_strip),
                    'location': fields.GeoPointField(
                        attr='location_field_indexing'
                    )
                }
            )
        }
    )

    # Country object
    country = fields.NestedField(
        attr='country_indexing',
        properties={
            'name': StringField(
                analyzer=html_strip,
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            ),
            'city': fields.ObjectField(
                properties={
                    'name': StringField(
                        analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                        },
                    ),
                },
            ),
        },
    )

    # Continent object
    continent = fields.NestedField(
        attr='continent_indexing',
        properties={
            'id': fields.IntegerField(),
            'name': StringField(
                analyzer=html_strip,
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            ),
            'country': fields.NestedField(
                properties={
                    'id': fields.IntegerField(),
                    'name': StringField(
                        analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                        }
                    ),
                    'city': fields.NestedField(
                        properties={
                            'id': fields.IntegerField(),
                            'name': StringField(
                                analyzer=html_strip,
                                fields={
                                    'raw': KeywordField(),
                                }
                            )
                        }
                    )
                }
            )
        }
    )

    location = fields.GeoPointField(
        attr='location_field_indexing',
    )

    class Django(object):
        model = Address  # The model associate with this Document

    class Meta(object):
        parallel_indexing = True
class CityDocument(Document):
    """City Elasticsearch document.

    This document has been created purely for testing out complex fields.
    """

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # ********************** Main data fields for search *****************
    # ********************************************************************

    name = StringField(analyzer=html_strip,
                       fields={
                           'raw': KeywordField(),
                           'suggest': fields.CompletionField(),
                       })

    info = StringField(analyzer=html_strip)

    # ********************************************************************
    # ************** Nested fields for search and filtering **************
    # ********************************************************************

    # City object
    country = fields.NestedField(
        properties={
            'name':
            StringField(analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }),
            'info':
            StringField(analyzer=html_strip),
            'location':
            fields.GeoPointField(attr='location_field_indexing'),
        })

    location = fields.GeoPointField(attr='location_field_indexing')

    # ********************************************************************
    # ********** Other complex fields for search and filtering ***********
    # ********************************************************************

    boolean_list = fields.ListField(StringField(attr='boolean_list_indexing'))
    # boolean_dict_indexing = fields.ObjectField(
    #     properties={
    #         'true': fields.BooleanField(),
    #         'false': fields.BooleanField(),
    #     }
    # )
    datetime_list = fields.ListField(
        StringField(attr='datetime_list_indexing'))
    # datetime_dict_indexing
    float_list = fields.ListField(StringField(attr='float_list_indexing'))
    # float_dict_indexing
    integer_list = fields.ListField(StringField(attr='integer_list_indexing'))

    # integer_dict_indexing

    class Django(object):
        model = City  # The model associate with this Document

    class Meta:
        parallel_indexing = True
class BookDocument(DocType):
    """Book Elasticsearch document."""

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # *********************** Main data fields for search ****************
    # ********************************************************************

    title = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'suggest': fields.CompletionField(),
            'edge_ngram_completion': StringField(
                analyzer=edge_ngram_completion
            ),
            'mlt': StringField(analyzer='english'),
        }
    )

    description = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'mlt': StringField(analyzer='english'),
        }
    )

    summary = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'mlt': StringField(analyzer='english'),
        }
    )

    # ********************************************************************
    # ********** Additional fields for search and filtering **************
    # ********************************************************************

    authors = fields.ListField(
        StringField(
            analyzer=html_strip,
            fields={
                'raw': KeywordField(),
            }
        )
    )

    # Publisher
    publisher = StringField(
        attr='publisher_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'suggest': fields.CompletionField(),
        }
    )

    # Publication date
    publication_date = fields.DateField()

    # State
    state = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
        }
    )

    # ISBN
    isbn = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
        }
    )

    # Price
    price = fields.FloatField()

    # Pages
    pages = fields.IntegerField()

    # Stock count
    stock_count = fields.IntegerField()

    # Tags
    tags = StringField(
        attr='tags_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(multi=True),
            'suggest': fields.CompletionField(multi=True),
        },
        multi=True
    )

    null_field = StringField(attr='null_field_indexing')

    class Meta(object):
        """Meta options."""

        model = Book  # The model associate with this DocType

    def prepare_summary(self, instance):
        """Prepare summary."""
        return instance.summary[:32766]

    def prepare_authors(self, instance):
        """Prepare authors."""
        return [author.name for author in instance.authors.all()]
예제 #10
0
class CollectionItemDocument(DocType):
    """Collection item document."""

    # ID
    id = fields.IntegerField(attr='id')

    record_number = KeywordField()

    inventory_number = KeywordField()

    api_url = KeywordField(index="not_analyzed")

    web_url = KeywordField(index="not_analyzed")

    # ********************************************************************
    # *************** Main data fields for search and filtering **********
    # ********************************************************************

    importer_uid = KeywordField(attr='importer_uid_indexing')

    language_code_orig = KeywordField(attr='language_code_orig')

    department = StringField(
        attr='department_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    # ********************************************************************
    # ***************************** English ******************************
    # ********************************************************************

    title_en = StringField(
        attr='title_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    description_en = StringField(
        attr='description_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    period_en = StringField(
        attr='period_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    period_1_en = fields.NestedField(
        attr='period_1_en_indexing',
        properties={
            'name':
            StringField(analyzer=html_strip_synonyms_en,
                        fields={
                            'raw': KeywordField(),
                        }),
            'period_2_en':
            fields.NestedField(
                properties={
                    'name':
                    StringField(analyzer=html_strip_synonyms_en,
                                fields={
                                    'raw': KeywordField(),
                                }),
                    'period_3_en':
                    fields.NestedField(
                        properties={
                            'name':
                            StringField(analyzer=html_strip_synonyms_en,
                                        fields={
                                            'raw': KeywordField(),
                                        }),
                            'period_4_en':
                            fields.NestedField(
                                properties={
                                    'name':
                                    StringField(
                                        analyzer=html_strip_synonyms_en,
                                        fields={
                                            'raw': KeywordField(),
                                        })
                                })
                        })
                })
        })

    primary_object_type_en = StringField(
        attr='primary_object_type_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            'suggest': fields.CompletionField(),
        })

    object_type_en = StringField(attr='object_type_en_indexing',
                                 analyzer=html_strip_synonyms_en,
                                 fields={
                                     'raw': KeywordField(),
                                     'natural':
                                     StringField(analyzer='english'),
                                     'suggest': fields.CompletionField(),
                                 })

    # To be shown on the detail page
    object_type_detail_en = fields.TextField(
        attr='object_type_detail_en_indexing', index='no')

    material_en = StringField(
        attr='material_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    material_detail_en = fields.TextField(attr='material_detail_en_indexing',
                                          index='no')

    city_en = StringField(
        attr='city_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    country_en = StringField(
        attr='country_en_indexing',
        analyzer=html_strip_synonyms_en,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='english'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    references_en = fields.TextField(attr='references_en_indexing', index='no')

    # To be shown on the detail page
    acquired_en = fields.TextField(attr='acquired_en_indexing', index='no')

    # To be shown on the detail page
    site_found_en = fields.TextField(attr='site_found_en_indexing', index='no')

    # To be shown on the detail page
    reign_en = fields.TextField(attr='reign_en_indexing', index='no')

    # To be shown on the detail page
    keywords_en = fields.TextField(attr='keywords_en_indexing', index='no')

    # To be shown on the detail page
    dynasty_en = fields.TextField(attr='dynasty_en_indexing', index='no')

    # New fields
    # To be shown on the detail page
    credit_line_en = fields.TextField(attr='credit_line_en_indexing',
                                      index='no')

    # To be shown on the detail page
    region_en = fields.TextField(attr='region_en_indexing', index='no')

    # To be shown on the detail page
    sub_region_en = fields.TextField(attr='sub_region_en_indexing', index='no')

    # To be shown on the detail page
    locale_en = fields.TextField(attr='locale_en_indexing', index='no')

    # To be shown on the detail page
    excavation_en = fields.TextField(attr='excavation_en_indexing', index='no')

    # To be shown on the detail page
    museum_collection_en = fields.TextField(
        attr='museum_collection_en_indexing', index='no')

    # To be shown on the detail page
    style_en = fields.TextField(attr='style_en_indexing', index='no')

    # To be shown on the detail page
    culture_en = fields.TextField(attr='culture_en_indexing', index='no')

    # To be shown on the detail page
    inscriptions_en = fields.TextField(attr='inscriptions_en_indexing',
                                       index='no')

    # To be shown on the detail page
    provenance_en = fields.TextField(attr='provenance_en_indexing', index='no')

    # To be shown on the detail page
    exhibitions_en = fields.TextField(attr='exhibitions_en_indexing',
                                      index='no')

    # ********************************************************************
    # ****************************** Dutch *******************************
    # ********************************************************************

    title_nl = StringField(
        attr='title_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    description_nl = StringField(
        attr='description_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    period_nl = StringField(
        attr='period_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    period_1_nl = fields.NestedField(
        attr='period_1_nl_indexing',
        properties={
            'name':
            StringField(analyzer=html_strip_synonyms_nl,
                        fields={
                            'raw': KeywordField(),
                        }),
            'period_2_nl':
            fields.NestedField(
                properties={
                    'name':
                    StringField(analyzer=html_strip_synonyms_nl,
                                fields={
                                    'raw': KeywordField(),
                                }),
                    'period_3_nl':
                    fields.NestedField(
                        properties={
                            'name':
                            StringField(analyzer=html_strip_synonyms_nl,
                                        fields={
                                            'raw': KeywordField(),
                                        }),
                            'period_4_nl':
                            fields.NestedField(
                                properties={
                                    'name':
                                    StringField(
                                        analyzer=html_strip_synonyms_nl,
                                        fields={
                                            'raw': KeywordField(),
                                        })
                                })
                        })
                })
        })

    primary_object_type_nl = StringField(
        attr='primary_object_type_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            'suggest': fields.CompletionField(),
        })

    object_type_nl = StringField(attr='object_type_nl_indexing',
                                 analyzer=html_strip_synonyms_nl,
                                 fields={
                                     'raw': KeywordField(),
                                     'natural': StringField(analyzer='dutch'),
                                     'suggest': fields.CompletionField(),
                                 })

    # To be shown on the detail page
    object_type_detail_nl = fields.TextField(
        attr='object_type_detail_nl_indexing', index='no')

    material_nl = StringField(
        attr='material_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    material_detail_nl = fields.TextField(attr='material_detail_nl_indexing',
                                          index='no')

    city_nl = StringField(
        attr='city_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    country_nl = StringField(
        attr='country_nl_indexing',
        analyzer=html_strip_synonyms_nl,
        fields={
            'raw': KeywordField(),
            'natural': StringField(analyzer='dutch'),
            # 'suggest': fields.CompletionField(),
        })

    # To be shown on the detail page
    keywords_nl = fields.TextField(attr='keywords_nl_indexing', index='no')

    # To be shown on the detail page
    acquired_nl = fields.TextField(attr='acquired_nl_indexing', index='no')

    # To be shown on the detail page
    site_found_nl = fields.TextField(attr='site_found_nl_indexing', index='no')

    # To be shown on the detail page
    reign_nl = fields.TextField(attr='reign_nl_indexing', index='no')

    # To be shown on the detail page
    references_nl = fields.TextField(attr='references_nl_indexing', index='no')

    # To be shown on the detail page
    dynasty_nl = fields.TextField(attr='dynasty_nl_indexing', index='no')

    # New fields
    # To be shown on the detail page
    credit_line_nl = fields.TextField(attr='credit_line_nl_indexing',
                                      index='no')

    # To be shown on the detail page
    region_nl = fields.TextField(attr='region_nl_indexing', index='no')

    # To be shown on the detail page
    sub_region_nl = fields.TextField(attr='sub_region_nl_indexing', index='no')

    # To be shown on the detail page
    locale_nl = fields.TextField(attr='locale_nl_indexing', index='no')

    # To be shown on the detail page
    excavation_nl = fields.TextField(attr='excavation_nl_indexing', index='no')

    # To be shown on the detail page
    museum_collection_nl = fields.TextField(
        attr='museum_collection_nl_indexing', index='no')

    # To be shown on the detail page
    style_nl = fields.TextField(attr='style_nl_indexing', index='no')

    # To be shown on the detail page
    culture_nl = fields.TextField(attr='culture_nl_indexing', index='no')

    # To be shown on the detail page
    inscriptions_nl = fields.TextField(attr='inscriptions_nl_indexing',
                                       index='no')

    # To be shown on the detail page
    provenance_nl = fields.TextField(attr='provenance_nl_indexing', index='no')

    # To be shown on the detail page
    exhibitions_nl = fields.TextField(attr='exhibitions_nl_indexing',
                                      index='no')

    # ********************************************************************
    # ************************** Language independent ********************
    # ********************************************************************

    dimensions = StringField(
        attr='dimensions_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(),
            # 'suggest': fields.CompletionField(),
        })

    object_date_begin = StringField(
        attr='object_date_begin_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(),
            # 'suggest': fields.CompletionField(),
        })

    object_date_end = StringField(
        attr='object_date_end_indexing',
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'natural': StringField(),
            # 'suggest': fields.CompletionField(),
        })

    location = fields.GeoPointField(attr='geo_location_indexing')

    # List of 32x32 PNG versions of the images. Full path to.
    images = fields.ListField(StringField(attr='images_indexing'))

    # List of image URLs.
    images_urls = fields.ListField(
        fields.ObjectField(attr='images_urls_indexing',
                           properties={
                               'th': KeywordField(index="not_analyzed"),
                               'lr': KeywordField(index="not_analyzed"),
                           }))

    # Classified as by our AI
    classified_as = fields.ListField(
        StringField(attr='classified_as_indexing',
                    fields={
                        'raw': KeywordField(),
                    }))

    # Classified as 1st element
    classified_as_1 = StringField(attr='classified_as_1_indexing',
                                  fields={
                                      'raw': KeywordField(),
                                  })

    # Classified as 2nd element
    classified_as_2 = StringField(attr='classified_as_2_indexing',
                                  fields={
                                      'raw': KeywordField(),
                                  })

    # Classified as 3rd element
    classified_as_3 = StringField(attr='classified_as_3_indexing',
                                  fields={
                                      'raw': KeywordField(),
                                  })

    # ********************************************************************
    # ************** Nested fields for search and filtering **************
    # ********************************************************************

    # # City object
    # country = fields.NestedField(
    #     properties={
    #         'name': StringField(
    #             analyzer=html_strip,
    #             fields={
    #                 'raw': KeywordField(),
    #                 'suggest': fields.CompletionField(),
    #             }
    #         ),
    #         'info': StringField(analyzer=html_strip),
    #         'location': fields.GeoPointField(attr='location_field_indexing'),
    #     }
    # )
    #
    # location = fields.GeoPointField(attr='location_field_indexing')

    class Meta(object):
        """Meta options."""

        model = Item  # The model associate with this DocType

    def get_queryset(self):
        """Filter out items that are not eligible for indexing."""
        qs = super(CollectionItemDocument, self).get_queryset()

        # qs = qs.select_related('period_node').prefetch_related('images')

        filters = []
        for field in ['title']:
            for language in ['en', 'nl']:
                filters.extend([
                    Q(**{"{}_{}__isnull".format(field, language): True}),
                    Q(**{"{}_{}__exact".format(field, language): ''}),
                ])

        if filters:
            qs = qs.exclude(six.moves.reduce(operator.or_, filters))

        # We concatenate ``object_type`` and ``classification`` fields, after
        # cleaning them. Therefore, db-only checks don't work here.
        ids = []
        for item in qs:
            if not (item.object_type_en_indexing
                    and item.object_type_nl_indexing):
                ids.append(item.pk)

        return qs.exclude(id__in=ids)

    def prepare_department(self, instance):
        """Prepare department."""
        return instance.department_indexing \
            if instance.department_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_object_date_begin(self, instance):
        """Prepare material."""
        return instance.object_date_begin_indexing

    def prepare_object_date_end(self, instance):
        """Prepare material."""
        return instance.object_date_end_indexing

    # ********************************************************************
    # ***************************** English ******************************
    # ********************************************************************

    def prepare_material_en(self, instance):
        """Prepare material."""
        return instance.material_en_indexing \
            if instance.material_en_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_period_en(self, instance):
        """Prepare state."""
        return instance.period_en_indexing \
            if instance.period_en_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_dynasty_en(self, instance):
        """Prepare dynasty."""
        return instance.dynasty_en_indexing \
            if instance.dynasty_en_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_description_en(self, instance):
        """Prepare description."""
        return instance.description_en_indexing \
            if instance.description_en_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_city_en(self, instance):
        """Prepare city."""
        return instance.city_en_indexing \
            if instance.city_en_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_country_en(self, instance):
        """Prepare country."""
        return instance.country_en_indexing \
            if instance.country_en_indexing \
            else VALUE_NOT_SPECIFIED

    # ********************************************************************
    # ****************************** Dutch *******************************
    # ********************************************************************

    def prepare_material_nl(self, instance):
        """Prepare material."""
        return instance.material_nl_indexing \
            if instance.material_nl_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_period_nl(self, instance):
        """Prepare state."""
        return instance.period_nl_indexing \
            if instance.period_nl_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_dynasty_nl(self, instance):
        """Prepare dynasty."""
        return instance.dynasty_nl_indexing \
            if instance.dynasty_nl_indexing \
            else VALUE_NOT_SPECIFIED

    def prepare_description_nl(self, instance):
        """Prepare description."""
        return instance.description_nl_indexing \
            if instance.description_nl_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_city_nl(self, instance):
        """Prepare city."""
        return instance.city_nl_indexing \
            if instance.city_nl_indexing\
            else VALUE_NOT_SPECIFIED

    def prepare_country_nl(self, instance):
        """Prepare country."""
        return instance.country_nl_indexing \
            if instance.country_nl_indexing \
            else VALUE_NOT_SPECIFIED
예제 #11
0
class EFOTraitDocument(Document):
    """EFOTrait elasticsearch document"""

    id = fields.TextField()
    label = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword'),
            'suggest': fields.CompletionField()
        }
    )
    description = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword'),
            'suggest': fields.CompletionField()
        }
    )
    synonyms = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword'),
        }
    )
    mapped_terms = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword'),
        }
    )
    url = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword')
        }
    )
    traitcategory = fields.ObjectField(
        properties={
            'label': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                    'suggest': fields.CompletionField()
                }
            ),
            'parent': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                }
            )
        }
    )
    scores_direct_associations = fields.ObjectField(
        properties={
            'id': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                    'suggest': fields.CompletionField()
                }
            ),
            'name': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                }
            ),
            'trait_reported': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                }
            )
        }
    )
    scores_child_associations = fields.ObjectField(
        properties={
            'id': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                    'suggest': fields.CompletionField()
                }
            ),
            'name': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                }
            ),
            'trait_reported': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                }
            )
        }
    )
    parent_traits = fields.ObjectField(
        properties={
            'id': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                    'suggest': fields.CompletionField()
                }
            ),
            'label': fields.TextField(
                analyzer=html_strip,
                fields={
                    'raw': fields.TextField(analyzer='keyword'),
                }
            )
        }
    )

    class Django(object):
        """Inner nested class Django."""

        model = EFOTrait_Ontology  # The model associated with this Document
        related_models = [TraitCategory]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, TraitCategory):
            return related_instance.efotraits_ontology.all()
예제 #12
0
class PublicationDocument(Document):
    """Publication elasticsearch document"""

    id = fields.TextField()
    title = fields.TextField(analyzer=html_strip,
                             fields={
                                 'raw': fields.TextField(analyzer='keyword'),
                             })
    journal = fields.TextField(analyzer=html_strip,
                               fields={
                                   'raw': fields.TextField(analyzer='keyword'),
                               })
    pub_year = fields.TextField(analyzer=html_strip,
                                fields={
                                    'raw':
                                    fields.TextField(analyzer='keyword'),
                                })
    PMID = fields.TextField(analyzer=html_strip,
                            fields={
                                'raw': fields.TextField(analyzer='keyword'),
                            })
    firstauthor = fields.TextField(analyzer=html_strip,
                                   fields={
                                       'raw':
                                       fields.TextField(analyzer='keyword'),
                                   })
    authors = fields.TextField(analyzer=html_strip,
                               fields={
                                   'raw': fields.TextField(analyzer='keyword'),
                               })
    doi = fields.TextField(analyzer=html_strip,
                           fields={
                               'raw': fields.TextField(analyzer='keyword'),
                           })
    scores_count = fields.IntegerField()
    publication_score = fields.ObjectField(
        properties={
            'id':
            fields.TextField(analyzer=html_strip,
                             fields={
                                 'raw': fields.TextField(analyzer='keyword'),
                                 'suggest': fields.CompletionField()
                             }),
            'name':
            fields.TextField(analyzer=html_strip,
                             fields={
                                 'raw': fields.TextField(analyzer='keyword'),
                             }),
            'trait_reported':
            fields.TextField(analyzer=html_strip,
                             fields={
                                 'raw': fields.TextField(analyzer='keyword'),
                             }),
            'trait_efo':
            fields.ObjectField(
                properties={
                    'id':
                    fields.TextField(
                        analyzer=html_strip,
                        fields={
                            'raw': fields.TextField(analyzer='keyword'),
                            'suggest': fields.CompletionField()
                        }),
                    'label':
                    fields.TextField(
                        analyzer=html_strip,
                        fields={
                            'raw': fields.TextField(analyzer='keyword'),
                            'suggest': fields.CompletionField()
                        })
                })
        })

    class Django(object):
        """Inner nested class Django."""

        model = Publication  # The model associated with this Document
예제 #13
0
class AddressDocument(DocType):
    """Address Elasticsearch document."""

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # *********************** Main data fields for search ****************
    # ********************************************************************

    street = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'suggest': fields.CompletionField(),
        }
    )

    house_number = StringField(analyzer=html_strip)

    appendix = StringField(analyzer=html_strip)

    zip_code = StringField(
        analyzer=html_strip,
        fields={
            'raw': KeywordField(),
            'suggest': fields.CompletionField(),
        }
    )

    # ********************************************************************
    # ********** Additional fields for search and filtering **************
    # ********************************************************************

    # City object
    city = fields.ObjectField(
        properties={
            'name': StringField(
                analyzer=html_strip,
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            ),
            'info': StringField(analyzer=html_strip),
            'location': fields.GeoPointField(attr='location_field_indexing'),
            'country': fields.ObjectField(
                properties={
                    'name': StringField(
                        analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                            'suggest': fields.CompletionField(),
                        }
                    ),
                    'info': StringField(analyzer=html_strip),
                    'location': fields.GeoPointField(
                        attr='location_field_indexing'
                    )
                }
            )
        }
    )

    # Country object
    country = fields.NestedField(
        attr='country_indexing',
        properties={
            'name': StringField(
                analyzer=html_strip,
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            ),
            'city': fields.ObjectField(
                properties={
                    'name': StringField(
                        analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                        },
                    ),
                },
            ),
        },
    )

    # Continent object
    continent = fields.NestedField(
        attr='continent_indexing',
        properties={
            'id': fields.IntegerField(),
            'name': StringField(
                analyzer=html_strip,
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            ),
            'country': fields.NestedField(
                properties={
                    'id': fields.IntegerField(),
                    'name': StringField(
                        analyzer=html_strip,
                        fields={
                            'raw': KeywordField(),
                        }
                    ),
                    'city': fields.NestedField(
                        properties={
                            'id': fields.IntegerField(),
                            'name': StringField(
                                analyzer=html_strip,
                                fields={
                                    'raw': KeywordField(),
                                }
                            )
                        }
                    )
                }
            )
        }
    )

    location = fields.GeoPointField(attr='location_field_indexing')

    class Meta(object):
        """Meta options."""

        model = Address  # The model associate with this DocType
예제 #14
0
class DatasetsDoc(DocType):
    id = fields.IntegerField()
    slug = fields.KeywordField()
    title = fields.TextField(analyzer=polish_analyzer,
                             fields={
                                 'raw': fields.KeywordField(),
                                 'suggest': fields.CompletionField()
                             })
    version = fields.KeywordField()
    url = fields.KeywordField()
    notes = fields.TextField(analyzer=polish_analyzer,
                             fields={
                                 'raw': fields.KeywordField(),
                             })

    institution = fields.NestedField(
        attr='organization',
        properties={
            'id':
            fields.IntegerField(),
            'title':
            fields.TextField(analyzer=polish_analyzer,
                             fields={'raw': fields.KeywordField()})
        })

    category = fields.NestedField(
        properties={
            'id':
            fields.IntegerField(),
            'title':
            fields.TextField(analyzer=polish_analyzer,
                             fields={'raw': fields.KeywordField()})
        })

    resources = fields.NestedField(
        properties={
            'id':
            fields.IntegerField(),
            'title':
            fields.TextField(analyzer=polish_analyzer,
                             fields={'raw': fields.KeywordField()})
        })

    applications = fields.NestedField(
        properties={
            'id':
            fields.IntegerField(),
            'title':
            fields.TextField(analyzer=polish_analyzer,
                             fields={'raw': fields.KeywordField()})
        })

    articles = fields.NestedField(
        properties={
            'id':
            fields.IntegerField(),
            'title':
            fields.TextField(analyzer=polish_analyzer,
                             fields={'raw': fields.KeywordField()})
        })

    tags = fields.KeywordField(attr='tags_list', multi=True)
    # customfields = fields.TextField()
    formats = fields.KeywordField(attr='formats', multi=True)

    license_condition_db_or_copyrighted = fields.TextField()
    license_condition_modification = fields.BooleanField()
    license_condition_original = fields.BooleanField()
    license_condition_responsibilities = fields.TextField()
    license_condition_source = fields.BooleanField()
    license_condition_timestamp = fields.BooleanField()
    license_name = fields.StringField(attr='license_name')
    license_description = fields.StringField(attr='license_description')
    update_frequency = fields.KeywordField()

    openness_scores = fields.IntegerField(attr='openness_scores', multi=True)
    users_following = fields.KeywordField(attr='users_following_list',
                                          multi=True)
    views_count = fields.IntegerField()
    downloads_count = fields.IntegerField()
    status = fields.KeywordField()
    modified = fields.DateField()
    last_modified_resource = fields.DateField(attr='last_modified_resource')
    created = fields.DateField()

    class Meta:
        doc_type = 'dataset'
        model = Dataset
        related_models = [
            Organization, Category, Application, Article, Resource,
            UserFollowingDataset
        ]

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, UserFollowingDataset):
            return related_instance.follower.followed_applications.all()
        if isinstance(related_instance, Application):
            return related_instance.datasets.all()
        if isinstance(related_instance, Resource):
            return related_instance.dataset

    def get_queryset(self):
        return self._doc_type.model.objects.filter(status='published')
예제 #15
0
class BaseCourseDocument(OrganizationsMixin, BaseDocument):
    """
    Base course document index.

    Contains common fields and logic for Course and CourseRun indexes.
    """
    authoring_organizations = fields.TextField(
        multi=True,
        fields={
            'suggest':
            fields.CompletionField(),
            'edge_ngram_completion':
            fields.TextField(analyzer=edge_ngram_completion),
            'raw':
            fields.KeywordField(),
            'lower':
            fields.TextField(analyzer=case_insensitive_keyword)
        },
    )
    authoring_organization_bodies = fields.TextField(multi=True)
    key = fields.TextField(fields={'raw': fields.KeywordField()})
    title = fields.TextField(
        analyzer=synonym_text,
        fields={
            'suggest':
            fields.CompletionField(),
            'edge_ngram_completion':
            fields.TextField(analyzer=edge_ngram_completion),
        },
    )
    first_enrollable_paid_seat_price = fields.IntegerField()
    full_description = fields.TextField(analyzer=html_strip)
    image_url = fields.TextField()
    logo_image_urls = fields.TextField(multi=True)
    level_type = fields.TextField(
        fields={
            'raw': fields.KeywordField(),
            'lower': fields.TextField(analyzer=case_insensitive_keyword)
        })
    partner = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.KeywordField(),
            'lower': fields.TextField(analyzer=case_insensitive_keyword)
        })
    outcome = fields.TextField()
    org = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.KeywordField(),
            'lower': fields.TextField(analyzer=case_insensitive_keyword)
        })
    subject_uuids = fields.KeywordField(multi=True)
    short_description = fields.TextField(analyzer=html_strip)
    seat_types = fields.KeywordField(multi=True)
    subjects = fields.TextField(
        analyzer=html_strip,
        fields={'raw': fields.KeywordField(multi=True)},
        multi=True)
    sponsoring_organizations = fields.TextField(multi=True)

    def prepare_first_enrollable_paid_seat_price(self, obj):
        return obj.first_enrollable_paid_seat_price

    def prepare_level_type(self, obj):
        return obj.level_type.name if obj.level_type else None

    def prepare_logo_image_urls(self, obj):
        orgs = obj.authoring_organizations.all()
        return [org.logo_image.url for org in orgs if org.logo_image]

    def prepare_organizations(self, obj):
        return list(
            set(
                self.prepare_authoring_organizations(obj) +
                self.prepare_sponsoring_organizations(obj)))

    def prepare_subjects(self, obj):
        return [subject.name for subject in obj.subjects.all()]

    def prepare_subject_uuids(self, obj):
        return [str(subject.uuid) for subject in obj.subjects.all()]

    def prepare_sponsoring_organizations(self, obj):
        return self._prepare_organizations(obj.sponsoring_organizations.all())
예제 #16
0
class ProgramDocument(BaseDocument, OrganizationsMixin):
    """
    Program Elasticsearch document.
    """

    authoring_organization_uuids = fields.KeywordField(multi=True)
    authoring_organizations = fields.TextField(
        multi=True,
        fields={
            'suggest': fields.CompletionField(),
            'edge_ngram_completion': fields.TextField(analyzer=edge_ngram_completion),
            'raw': fields.KeywordField(),
            'lower': fields.TextField(analyzer=case_insensitive_keyword)
        },
    )
    authoring_organization_bodies = fields.TextField(multi=True)
    credit_backing_organizations = fields.TextField(multi=True)
    card_image_url = fields.TextField()
    hidden = fields.BooleanField()
    is_program_eligible_for_one_click_purchase = fields.BooleanField()
    language = fields.TextField(multi=True)
    marketing_url = fields.TextField()
    min_hours_effort_per_week = fields.IntegerField()
    max_hours_effort_per_week = fields.IntegerField()
    partner = fields.TextField(
        analyzer=html_strip,
        fields={'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword)}
    )
    published = fields.BooleanField()
    subtitle = fields.TextField(analyzer=html_strip)
    status = fields.KeywordField()
    search_card_display = fields.TextField(multi=True)
    subject_uuids = fields.KeywordField(multi=True)
    staff_uuids = fields.KeywordField(multi=True)
    start = fields.DateField()
    seat_types = fields.KeywordField(multi=True)
    title = fields.TextField(
        analyzer=synonym_text,
        fields={
            'suggest': fields.CompletionField(),
            'edge_ngram_completion': fields.TextField(analyzer=edge_ngram_completion),
        },
    )
    type = fields.TextField(
        analyzer=html_strip,
        fields={'raw': fields.KeywordField(), 'lower': fields.TextField(analyzer=case_insensitive_keyword)}
    )
    weeks_to_complete_min = fields.IntegerField()
    weeks_to_complete_max = fields.IntegerField()

    def prepare_aggregation_key(self, obj):
        return 'program:{}'.format(obj.uuid)

    def prepare_credit_backing_organizations(self, obj):
        return self._prepare_organizations(obj.credit_backing_organizations.all())

    def prepare_language(self, obj):
        return [self._prepare_language(language) for language in obj.languages]

    def prepare_organizations(self, obj):
        return self.prepare_authoring_organizations(obj) + self.prepare_credit_backing_organizations(obj)

    def prepare_partner(self, obj):
        return obj.partner.short_code

    def prepare_published(self, obj):
        return obj.status == ProgramStatus.Active

    def prepare_seat_types(self, obj):
        return [seat_type.slug for seat_type in obj.seat_types]

    def prepare_search_card_display(self, obj):
        try:
            degree = Degree.objects.get(uuid=obj.uuid)
        except Degree.DoesNotExist:

            return []
        return [degree.search_card_ranking, degree.search_card_cost, degree.search_card_courses]

    def prepare_subject_uuids(self, obj):
        return [str(subject.uuid) for subject in obj.subjects]

    def prepare_staff_uuids(self, obj):
        return list({str(staff.uuid) for course_run in obj.course_runs for staff in course_run.staff.all()})

    def prepare_type(self, obj):
        return obj.type.name_t

    def get_queryset(self):
        return super().get_queryset().select_related('type').select_related('partner')

    class Django:
        """
        Django Elasticsearch DSL ORM Meta.
        """

        model = Program

    class Meta:
        """
        Meta options.
        """

        parallel_indexing = True
        queryset_pagination = settings.ELASTICSEARCH_DSL_QUERYSET_PAGINATION