def setUp(self):
        self.out = StringIO()
        self.registry = DocumentRegistry()
        self.index_a = Index('foo')
        self.index_b = Index('bar')

        self.doc_a1_qs = Mock()
        self.doc_a1 = self._generate_doc_mock(
            self.ModelA, self.index_a, self.doc_a1_qs
        )

        self.doc_a2_qs = Mock()
        self.doc_a2 = self._generate_doc_mock(
            self.ModelA, self.index_a, self.doc_a2_qs
        )

        self.doc_b1_qs = Mock()
        self.doc_b1 = self._generate_doc_mock(
            self.ModelB, self.index_a, self.doc_b1_qs
        )

        self.doc_c1_qs = Mock()
        self.doc_c1 = self._generate_doc_mock(
            self.ModelC, self.index_b, self.doc_c1_qs
        )

        self._mock_setup()
Exemple #2
0
def get_index(index_name):
    index = Index(settings.ELASTICSEARCH_INDEXES[index_name])
    index.settings(
        number_of_shards=1,
        number_of_replicas=0,
        max_result_window=settings.MAX_PAGE_SIZE+1,  # allow for one extra for pagination
    )
    return index
Exemple #3
0
def get_index(name):
    index_name = '%s_%s' % (settings.ELASTICSEARCH_INDEX_PREFIX, name)
    # if settings.ELASTICSEARCH_INDEX_PREFIX == 'froide_test':
    #     index_name += '_%s' % threading.get_ident()
    index = Index(index_name)

    # See Elasticsearch Indices API reference for available settings
    index.settings(number_of_shards=1, number_of_replicas=0)
    return index
    def setUp(self):
        self.registry = DocumentRegistry()
        self.index_1 = Index(name='index_1')
        self.index_2 = Index(name='index_2')

        self.doc_a1 = self._generate_doc_mock(self.ModelA, self.index_1)
        self.doc_a2 = self._generate_doc_mock(self.ModelA, self.index_1)
        self.doc_b1 = self._generate_doc_mock(self.ModelB, self.index_2)
        self.doc_c1 = self._generate_doc_mock(self.ModelC, self.index_1)
Exemple #5
0
def rebuild_index_old(request):
    index_name=request.GET.get("name","profils")
    es = Index(index_name,using="default")
    if es.exists():es.delete()
    es.create("default")
    es.save("default")
    return Response({"message": "Reconstruction de l'index "+index_name+" terminée"})
    def _get_user_document(self):
        """Get user document."""
        index = Index('auth_user')

        # See Elasticsearch Indices API reference for available settings
        index.settings(
            number_of_shards=1,
            number_of_replicas=1
        )

        @index.doc_type
        class UserDocument(Document):
            """For testing purposes."""

            id = fields.IntegerField(attr='id')

            username = StringField(
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            )

            first_name = StringField(
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            )

            last_name = StringField(
                fields={
                    'raw': KeywordField(),
                    'suggest': fields.CompletionField(),
                }
            )

            email = StringField(
                fields={
                    'raw': KeywordField(),
                }
            )

            is_staff = fields.BooleanField()

            is_active = fields.BooleanField()

            date_joined = fields.DateField()

            class Django(object):
                model = User  # The model associate with this Document

        return UserDocument
Exemple #7
0
def get_index(name):
    index_name = '%s_%s' % (
        settings.ELASTICSEARCH_INDEX_PREFIX,
        name
    )
    # if settings.ELASTICSEARCH_INDEX_PREFIX == 'froide_test':
    #     index_name += '_%s' % threading.get_ident()
    index = Index(index_name)

    # See Elasticsearch Indices API reference for available settings
    index.settings(
        number_of_shards=1,
        number_of_replicas=0
    )
    return index
    def analyze(self, text: str) -> Dict[str, List[Dict]]:
        """Shows what elasticsearch does with the tokens"""

        elastic_index_file = Index(settings.ELASTICSEARCH_PREFIX + "-file")
        elastic_index_file.analyzer(autocomplete_analyzer)
        elastic_index_file.analyzer(text_analyzer)
        return elastic_index_file.analyze(
            body={"analyzer": "text_analyzer", "text": text}
        )
Exemple #9
0
    def test_delete_object_in_elasticsearch(self):
        """
        Test that the object is deleted normally when the user is properly authenticated.
        """
        set_current_user(self.user_obj)
        db_obj = self._create_object()
        self.es_doc_type().update(db_obj, refresh=True)

        response = self.user.delete(self.get_url(self.detail_url, kwargs={'pk': db_obj.pk}))
        self.assertStatus(response, status.HTTP_204_NO_CONTENT)

        Index(self.es_doc_type._doc_type.index).refresh()

        with self.assertRaises(NotFoundError):
            self.assertIsNotNone(self.es_doc_type.get(id=db_obj.pk))
Exemple #10
0
    def test_create_object_in_elasticsearch(self):
        """
        Test that the object is created normally when the user is properly authenticated.
        """
        set_current_user(self.user_obj)
        stub_dict = self._create_object_stub()

        response = self.user.post(self.get_url(self.list_url), stub_dict)
        self.assertStatus(response, status.HTTP_201_CREATED, stub_dict)

        created_id = json.loads(response.content).get('id')
        self.assertIsNotNone(created_id)

        db_obj = self.model_cls.objects.get(pk=created_id)

        Index(self.es_doc_type._doc_type.index).refresh()
        self.assertIsNotNone(self.es_doc_type.get(id=db_obj.pk))
Exemple #11
0
    def test_update_object_elasticsearch_tenant_filter(self):
        """
        Test that users from different tenants can't update each other's data.
        """
        set_current_user(self.user_obj)
        db_obj = self._create_object()
        stub_dict = self._create_object_stub()

        self.es_doc_type().update(db_obj, refresh=True)
        old_es_doc = self.es_doc_type.get(id=db_obj.pk)

        response = self.other_tenant_user.put(self.get_url(self.detail_url, kwargs={'pk': db_obj.pk}), stub_dict)
        self.assertStatus(response, status.HTTP_404_NOT_FOUND, stub_dict)

        Index(self.es_doc_type._doc_type.index).refresh()
        updated_es_doc = self.es_doc_type.get(id=db_obj.pk)

        self.assertEqual(old_es_doc.to_dict(), updated_es_doc.to_dict())
Exemple #12
0
    def test_update_in_elasticsearch(self):
        """
        Test that the object is updated normally when the user is properly authenticated.
        """
        set_current_user(self.user_obj)
        db_obj = self._create_object()
        stub_dict = self._create_object_stub()

        self.es_doc_type().update(db_obj, refresh=True)
        old_es_doc = self.es_doc_type.get(id=db_obj.pk)

        response = self.user.put(self.get_url(self.detail_url, kwargs={'pk': db_obj.pk}), data=stub_dict)
        self.assertStatus(response, status.HTTP_200_OK, stub_dict)

        created_id = response.data.get('id')
        self.assertIsNotNone(created_id)

        db_obj = self.model_cls.objects.get(pk=created_id)
        self._compare_objects(db_obj, response.data)

        Index(self.es_doc_type._doc_type.index).refresh()
        updated_es_doc = self.es_doc_type.get(id=created_id)

        self.assertNotEqual(old_es_doc.to_dict(), updated_es_doc.to_dict())
Exemple #13
0
 def __new__(cls, language=None, settings=None):
     index_name_parts = [app_settings.SHOP_APP_LABEL]
     if language:
         index_name_parts.append(language.lower())
         doc_name = 'ProductDocument{}'.format(language.title())
         analyzer = body_analyzers.get(language, body_analyzers['default'])
     else:
         doc_name = 'ProductDocument'
         analyzer = body_analyzers['default']
     index_name_parts.append('products')
     products_index = Index('.'.join(index_name_parts))
     if settings:
         products_index.settings(**settings)
     attrs = {
         '_language': language,
         'body': fields.TextField(analyzer=analyzer)
     }
     doc_class = type(doc_name, (_ProductDocument, ), attrs)
     products_index.document(doc_class)
     return doc_class
Exemple #14
0
from django_elasticsearch_dsl import DocType, Index
# from blog.models import Post
from addresses.models import Address

addresses = Index('addresses')


@addresses.doc_type
class AddressDocument(DocType):
    class Meta:
        model = Address

        fields = [
            'name',
            'full_address',
            'display_address',
            'url',
            # 'description',
        ]
Exemple #15
0
from django_elasticsearch_dsl import DocType, Index, fields
from books.models import Book
from authors.models import Author
from genres.models import GenreNew

# Name of the Elasticsearch index
book = Index('books')
# See Elasticsearch Indices API reference for available settings
book.settings(
    number_of_shards=1,
    number_of_replicas=0
)


@book.doc_type
class BookDocument(DocType):
    isbn = fields.TextField(attr="get_isbn")
    author_text = fields.TextField(attr="get_authors")
    author = fields.NestedField(properties={
        'id': fields.IntegerField(),
        'name': fields.StringField()
    })

    class Meta:
        model = Book  # The model associated with this DocType
        # queryset_pagination = 50000
        # The fields of the model you want to be indexed in Elasticsearch
        fields = [
            'id',
            'title'
        ]
from elasticsearch_dsl import analyzer
from django_elasticsearch_dsl import DocType, Index, fields

from .models import Ad, Category, Car, Manufacturer


car = Index('test_cars')
car.settings(
    number_of_shards=1,
    number_of_replicas=0
)


html_strip = analyzer(
    'html_strip',
    tokenizer="standard",
    filter=["lowercase", "stop", "snowball"],
    char_filter=["html_strip"]
)


@car.doc_type
class CarDocument(DocType):
    manufacturer = fields.ObjectField(properties={
        'name': fields.TextField(),
        'country': fields.TextField(),
        'logo': fields.FileField(),
    })

    ads = fields.NestedField(properties={
        'description': fields.TextField(analyzer=html_strip),
Exemple #17
0
from elasticsearch_dsl import analyzer
from django_elasticsearch_dsl import DocType, Index, fields
from pkdb_app.subjects.models import Individual, Characteristica, Group
from pkdb_app.interventions.documents import string_field, ObjectField

# Name of the Elasticsearch index
individuals_index = Index("individuals")

# See Elasticsearch Indices API reference for available settings
individuals_index.settings(number_of_shards=1, number_of_replicas=1)

html_strip = analyzer('html_strip',
                      tokenizer="standard",
                      filter=["standard", "lowercase", "stop", "snowball"],
                      char_filter=["html_strip"])


@individuals_index.doc_type
class IndividualDocument(DocType):
    """Individual elastic search document"""
    pk = fields.IntegerField(attr='pk')

    name = string_field('name')
    group = ObjectField(
        properties={
            'name': string_field('name'),
            'pk': fields.IntegerField('pk'),
            'count': fields.IntegerField('count')
        })

    study = ObjectField(
Exemple #18
0
from django.conf import settings

from django_elasticsearch_dsl import DocType, Index, fields
from django_elasticsearch_dsl_drf.compat import KeywordField, StringField

from books.models import Address

from .analyzers import html_strip

__all__ = ('AddressDocument', )

INDEX = Index(settings.ELASTICSEARCH_INDEX_NAMES[__name__])

# See Elasticsearch Indices API reference for available settings
INDEX.settings(number_of_shards=1, number_of_replicas=1)


@INDEX.doc_type
class AddressDocument(DocType):
    """Address Elasticsearch document."""

    # In different parts of the code different fields are used. There are
    # a couple of use cases: (1) more-like-this functionality, where `title`,
    # `description` and `summary` fields are used, (2) search and filtering
    # functionality where all of the fields are used.

    # ID
    id = fields.IntegerField(attr='id')

    # ********************************************************************
    # *********************** Main data fields for search ****************
Exemple #19
0
from django_elasticsearch_dsl import DocType, Index, fields
from elasticsearch_dsl import analyzer, tokenizer
from genres.models import GenreNew

# Name of the Elasticsearch index
genre = Index('genres')
# See Elasticsearch Indices API reference for available settings
genre.settings(
    number_of_shards=1,
    number_of_replicas=0
)

html_strip = analyzer(
    'genre',
    tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3),
    filter=["lowercase"]
)

@genre.doc_type
class GenreDocument(DocType):
    name = fields.TextField(
        analyzer=html_strip,
        fields={'raw': fields.KeywordField()}
    )

    class Meta:
        model = GenreNew # The model associated with this DocType
        # queryset_pagination = 50000
        # The fields of the model you want to be indexed in Elasticsearch
        fields = [
            'id'
Exemple #20
0
from django_elasticsearch_dsl import Document, Index, fields
from elasticsearch_dsl import analyzer
from django_elasticsearch_dsl.registries import registry
from .models import Product

products_index = Index("products")
products_index.settings(number_of_shards=1, number_of_replicas=1)

html_strip = analyzer(
    "html_strip",
    tokenizer="standard",
    filter=["standard", "lowercase", "stop", "snowball"],
    char_filter=["html_strip"],
)


# @registry.register_document
@products_index.doc_type
class ProductDocument(Document):

    # id = fields.IntegerField(attr='id')
    # title = fields.StringField(
    #     analyzer=html_strip,
    #     fields={
    #         'raw': fields.StringField(analyzer='keyword'),
    #     }
    # )
    # description = fields.TextField(
    #     analyzer=html_strip,
    #     fields={
    #         'raw': fields.TextField(analyzer='keyword'),
Exemple #21
0
import logging

from django.conf import settings
from django_elasticsearch_dsl import DocType, Index, fields
from elasticsearch import Elasticsearch

from readthedocs.projects.models import HTMLFile, Project

project_conf = settings.ES_INDEXES['project']
project_index = Index(project_conf['name'])
project_index.settings(**project_conf['settings'])

page_conf = settings.ES_INDEXES['page']
page_index = Index(page_conf['name'])
page_index.settings(**page_conf['settings'])

log = logging.getLogger(__name__)


class RTDDocTypeMixin:
    def update(self, *args, **kwargs):
        # Hack a fix to our broken connection pooling
        # This creates a new connection on every request,
        # but actually works :)
        log.info('Hacking Elastic indexing to fix connection pooling')
        self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default'])
        super().update(*args, **kwargs)


@project_index.doc_type
class ProjectDocument(RTDDocTypeMixin, DocType):
Exemple #22
0
from django_elasticsearch_dsl import DocType, Index, fields
from elasticsearch_dsl import analyzer, token_filter

from ..account.models import User
from ..order.models import Order
from ..product.models import Product

storefront = Index('storefront')
storefront.settings(number_of_shards=1, number_of_replicas=0)


partial_words = token_filter(
    'partial_words', 'edge_ngram', min_gram=3, max_gram=15)
title_analyzer = analyzer(
    'title_analyzer',
    tokenizer='standard',
    filter=[partial_words, 'lowercase'])
email_analyzer = analyzer('email_analyzer', tokenizer='uax_url_email')


@storefront.doc_type
class ProductDocument(DocType):
    title = fields.StringField(analyzer=title_analyzer)

    def prepare_title(self, instance):
        return instance.name

    class Meta:
        model = Product
        fields = ['name', 'description', 'is_published']
Exemple #23
0
from django_elasticsearch_dsl import DocType, Index
from fashion.models import Fashion

fashion = Index('fashion')

fashion.settings(number_of_shards=1, number_of_replicas=0)


@fashion.doc_type
class FashionDocument(DocType):
    class Meta:
        model = Fashion

        fields = [
            'image_id',
            'image_path',
            'embedding',
            'pub_date',
            'enable',
        ]
from django.conf import settings
from django_elasticsearch_dsl import DocType, Index, fields
from django_elasticsearch_dsl_drf.compat import KeywordField, StringField
from django_elasticsearch_dsl_drf.analyzers import edge_ngram_completion
from django_elasticsearch_dsl_drf.versions import ELASTICSEARCH_GTE_5_0
from elasticsearch_dsl import analyzer

from books.models import Location

# Name of the Elasticsearch index
INDEX = Index(settings.ELASTICSEARCH_INDEX_NAMES[__name__])

# See Elasticsearch Indices API reference for available settings
INDEX.settings(
    number_of_shards=1,
    number_of_replicas=1,
    blocks={'read_only_allow_delete': False},
)

html_strip = analyzer(
    "html_strip",
    tokenizer="standard",
    filter=["standard", "lowercase", "stop", "snowball"],
    char_filter=["html_strip"]
)


@INDEX.doc_type
class LocationDocument(DocType):
    """
    Location document.
Exemple #25
0
from elasticsearch_dsl import analyzer, analysis
from django_elasticsearch_dsl import Document, fields, Index

from movie.models import MovieModel

movie_index = Index('movies')

# Создаем TokenFilters из документации
russian_stop_filter = analysis.token_filter('russian_stop',
                                            type='stop',
                                            stopwords='_russian_')
russian_stemmer_filter = analysis.token_filter('russian_stemmer',
                                               type='stemmer',
                                               language='russian')
english_stop_filter = analysis.token_filter('english_stop',
                                            type='stop',
                                            stopwords='_english_')
english_stemmer_filter = analysis.token_filter('english_stemmer',
                                               type='stemmer',
                                               language='english')
english_possessive_stemmer_filter = analysis.token_filter(
    'english_stemmer', type='stemmer', language='possessive_english')

# Создаем анализаторы
ru_analyzer = analyzer(
    'ru_analyzer',
    type='custom',
    tokenizer='standard',
    filter=['lowercase', russian_stop_filter, russian_stemmer_filter],
)
en_analyzer = analyzer('en_analyzer',
from django_elasticsearch_dsl import DocType, Index
from .models import Notes

notes = Index('notes')


@notes.doc_type
class PostDocument(DocType):
    class Meta:
        model = Notes

        fields = [
            'title',
            'description',
            'id',
            'image',
        ]
Exemple #27
0
from django_elasticsearch_dsl import DocType, Index
from ingenieria_web.social.models import Grupo

grupos = Index('grupos')


@grupos.doc_type
class GrupoDocument(DocType):
    class Meta:
        model = Grupo

        fields = [
            'idGrupo',
            'NombreGrupo',
        ]
Exemple #28
0
from django_elasticsearch_dsl import DocType, Index
from .models import Post

posts = Index('posts')


@posts.doc_type
class PostDocument(DocType):
    class Meta:
        model = Post

        fields = [
            'title',
            'id',
            'slug',
            'image',
            'description',
            'date_posted',
        ]
import logging

from django.conf import settings
from django_elasticsearch_dsl import DocType, Index, fields

from readthedocs.projects.models import HTMLFile, Project
from readthedocs.sphinx_domains.models import SphinxDomain


project_conf = settings.ES_INDEXES['project']
project_index = Index(project_conf['name'])
project_index.settings(**project_conf['settings'])

page_conf = settings.ES_INDEXES['page']
page_index = Index(page_conf['name'])
page_index.settings(**page_conf['settings'])

domain_conf = settings.ES_INDEXES['domain']
domain_index = Index(domain_conf['name'])
domain_index.settings(**domain_conf['settings'])

log = logging.getLogger(__name__)


@domain_index.doc_type
class SphinxDomainDocument(DocType):
    project = fields.KeywordField(attr='project.slug')
    version = fields.KeywordField(attr='version.slug')
    role_name = fields.KeywordField(attr='role_name')

    # For linking to the URL
Exemple #30
0
from django_elasticsearch_dsl import DocType, Index, fields
from elasticsearch_dsl import analyzer
from .models import User

user = Index('users')
user.settings(number_of_shards=1, number_of_replicas=0)


@user.doc_type
class UserDocument(DocType):
    class Meta:
        model = User

        fields = ['nickname', 'id']
Exemple #31
0
from django_elasticsearch_dsl import DocType, Index
from ecomapp.models import Product

products = Index("products")


@products.doc_type
class ProductDocument(DocType):
    class Meta:
        model = Product
        fields = [
            'title',
            'slug',
            'description',
            'image',
        ]
Exemple #32
0
from django_elasticsearch_dsl import DocType, Index
from .models import Question

# Name of the Elasticsearch index
question = Index('questions')
question.settings(number_of_shards=1, number_of_replicas=0)


@question.doc_type
class QuestionDocument(DocType):
    class Meta:
        model = Question

        # The fields to index in Elasticsearch
        fields = [
            'question_text',
        ]
Exemple #33
0
from elasticsearch_dsl import analyzer

from django_elasticsearch_dsl import DocType, Index, fields
from elasticsearch_dsl.analysis import token_filter

from data_refinery_common.utils import get_supported_microarray_platforms, get_supported_rnaseq_platforms
from .models import Sample, Experiment, Organism

experiment_index = Index('experiments')
experiment_index.settings(number_of_shards=1,
                          number_of_replicas=0,
                          max_result_window=9999999)

# via https://django-elasticsearch-dsl-drf.readthedocs.io/en/0.17.2/advanced_usage_examples.html?highlight=ngram#id8
# via https://github.com/barseghyanartur/django-elasticsearch-dsl-drf/issues/110
edge_ngram_completion_filter = token_filter('edge_ngram_completion_filter',
                                            type="edge_ngram",
                                            min_gram=3,
                                            max_gram=12)
html_strip = analyzer('html_strip',
                      tokenizer="whitespace",
                      filter=[
                          edge_ngram_completion_filter, "standard",
                          "lowercase", "stop", "snowball"
                      ],
                      char_filter=["html_strip"])
html_strip_no_ngram = analyzer('html_strip_no_ngram',
                               tokenizer="standard",
                               filter=["standard", "lowercase", "stop"],
                               char_filter=["html_strip"])
html_strip_no_stop = analyzer('html_strip_no_stop',