def setUp(self): self.out = StringIO() self.registry = DocumentRegistry() self.index_a = Index('foo') self.index_b = Index('bar') self.doc_a1_qs = Mock() self.doc_a1 = self._generate_doc_mock( self.ModelA, self.index_a, self.doc_a1_qs ) self.doc_a2_qs = Mock() self.doc_a2 = self._generate_doc_mock( self.ModelA, self.index_a, self.doc_a2_qs ) self.doc_b1_qs = Mock() self.doc_b1 = self._generate_doc_mock( self.ModelB, self.index_a, self.doc_b1_qs ) self.doc_c1_qs = Mock() self.doc_c1 = self._generate_doc_mock( self.ModelC, self.index_b, self.doc_c1_qs ) self._mock_setup()
def get_index(index_name): index = Index(settings.ELASTICSEARCH_INDEXES[index_name]) index.settings( number_of_shards=1, number_of_replicas=0, max_result_window=settings.MAX_PAGE_SIZE+1, # allow for one extra for pagination ) return index
def get_index(name): index_name = '%s_%s' % (settings.ELASTICSEARCH_INDEX_PREFIX, name) # if settings.ELASTICSEARCH_INDEX_PREFIX == 'froide_test': # index_name += '_%s' % threading.get_ident() index = Index(index_name) # See Elasticsearch Indices API reference for available settings index.settings(number_of_shards=1, number_of_replicas=0) return index
def setUp(self): self.registry = DocumentRegistry() self.index_1 = Index(name='index_1') self.index_2 = Index(name='index_2') self.doc_a1 = self._generate_doc_mock(self.ModelA, self.index_1) self.doc_a2 = self._generate_doc_mock(self.ModelA, self.index_1) self.doc_b1 = self._generate_doc_mock(self.ModelB, self.index_2) self.doc_c1 = self._generate_doc_mock(self.ModelC, self.index_1)
def rebuild_index_old(request): index_name=request.GET.get("name","profils") es = Index(index_name,using="default") if es.exists():es.delete() es.create("default") es.save("default") return Response({"message": "Reconstruction de l'index "+index_name+" terminée"})
def _get_user_document(self): """Get user document.""" index = Index('auth_user') # See Elasticsearch Indices API reference for available settings index.settings( number_of_shards=1, number_of_replicas=1 ) @index.doc_type class UserDocument(Document): """For testing purposes.""" id = fields.IntegerField(attr='id') username = StringField( fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), } ) first_name = StringField( fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), } ) last_name = StringField( fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), } ) email = StringField( fields={ 'raw': KeywordField(), } ) is_staff = fields.BooleanField() is_active = fields.BooleanField() date_joined = fields.DateField() class Django(object): model = User # The model associate with this Document return UserDocument
def get_index(name): index_name = '%s_%s' % ( settings.ELASTICSEARCH_INDEX_PREFIX, name ) # if settings.ELASTICSEARCH_INDEX_PREFIX == 'froide_test': # index_name += '_%s' % threading.get_ident() index = Index(index_name) # See Elasticsearch Indices API reference for available settings index.settings( number_of_shards=1, number_of_replicas=0 ) return index
def analyze(self, text: str) -> Dict[str, List[Dict]]: """Shows what elasticsearch does with the tokens""" elastic_index_file = Index(settings.ELASTICSEARCH_PREFIX + "-file") elastic_index_file.analyzer(autocomplete_analyzer) elastic_index_file.analyzer(text_analyzer) return elastic_index_file.analyze( body={"analyzer": "text_analyzer", "text": text} )
def test_delete_object_in_elasticsearch(self): """ Test that the object is deleted normally when the user is properly authenticated. """ set_current_user(self.user_obj) db_obj = self._create_object() self.es_doc_type().update(db_obj, refresh=True) response = self.user.delete(self.get_url(self.detail_url, kwargs={'pk': db_obj.pk})) self.assertStatus(response, status.HTTP_204_NO_CONTENT) Index(self.es_doc_type._doc_type.index).refresh() with self.assertRaises(NotFoundError): self.assertIsNotNone(self.es_doc_type.get(id=db_obj.pk))
def test_create_object_in_elasticsearch(self): """ Test that the object is created normally when the user is properly authenticated. """ set_current_user(self.user_obj) stub_dict = self._create_object_stub() response = self.user.post(self.get_url(self.list_url), stub_dict) self.assertStatus(response, status.HTTP_201_CREATED, stub_dict) created_id = json.loads(response.content).get('id') self.assertIsNotNone(created_id) db_obj = self.model_cls.objects.get(pk=created_id) Index(self.es_doc_type._doc_type.index).refresh() self.assertIsNotNone(self.es_doc_type.get(id=db_obj.pk))
def test_update_object_elasticsearch_tenant_filter(self): """ Test that users from different tenants can't update each other's data. """ set_current_user(self.user_obj) db_obj = self._create_object() stub_dict = self._create_object_stub() self.es_doc_type().update(db_obj, refresh=True) old_es_doc = self.es_doc_type.get(id=db_obj.pk) response = self.other_tenant_user.put(self.get_url(self.detail_url, kwargs={'pk': db_obj.pk}), stub_dict) self.assertStatus(response, status.HTTP_404_NOT_FOUND, stub_dict) Index(self.es_doc_type._doc_type.index).refresh() updated_es_doc = self.es_doc_type.get(id=db_obj.pk) self.assertEqual(old_es_doc.to_dict(), updated_es_doc.to_dict())
def test_update_in_elasticsearch(self): """ Test that the object is updated normally when the user is properly authenticated. """ set_current_user(self.user_obj) db_obj = self._create_object() stub_dict = self._create_object_stub() self.es_doc_type().update(db_obj, refresh=True) old_es_doc = self.es_doc_type.get(id=db_obj.pk) response = self.user.put(self.get_url(self.detail_url, kwargs={'pk': db_obj.pk}), data=stub_dict) self.assertStatus(response, status.HTTP_200_OK, stub_dict) created_id = response.data.get('id') self.assertIsNotNone(created_id) db_obj = self.model_cls.objects.get(pk=created_id) self._compare_objects(db_obj, response.data) Index(self.es_doc_type._doc_type.index).refresh() updated_es_doc = self.es_doc_type.get(id=created_id) self.assertNotEqual(old_es_doc.to_dict(), updated_es_doc.to_dict())
def __new__(cls, language=None, settings=None): index_name_parts = [app_settings.SHOP_APP_LABEL] if language: index_name_parts.append(language.lower()) doc_name = 'ProductDocument{}'.format(language.title()) analyzer = body_analyzers.get(language, body_analyzers['default']) else: doc_name = 'ProductDocument' analyzer = body_analyzers['default'] index_name_parts.append('products') products_index = Index('.'.join(index_name_parts)) if settings: products_index.settings(**settings) attrs = { '_language': language, 'body': fields.TextField(analyzer=analyzer) } doc_class = type(doc_name, (_ProductDocument, ), attrs) products_index.document(doc_class) return doc_class
from django_elasticsearch_dsl import DocType, Index # from blog.models import Post from addresses.models import Address addresses = Index('addresses') @addresses.doc_type class AddressDocument(DocType): class Meta: model = Address fields = [ 'name', 'full_address', 'display_address', 'url', # 'description', ]
from django_elasticsearch_dsl import DocType, Index, fields from books.models import Book from authors.models import Author from genres.models import GenreNew # Name of the Elasticsearch index book = Index('books') # See Elasticsearch Indices API reference for available settings book.settings( number_of_shards=1, number_of_replicas=0 ) @book.doc_type class BookDocument(DocType): isbn = fields.TextField(attr="get_isbn") author_text = fields.TextField(attr="get_authors") author = fields.NestedField(properties={ 'id': fields.IntegerField(), 'name': fields.StringField() }) class Meta: model = Book # The model associated with this DocType # queryset_pagination = 50000 # The fields of the model you want to be indexed in Elasticsearch fields = [ 'id', 'title' ]
from elasticsearch_dsl import analyzer from django_elasticsearch_dsl import DocType, Index, fields from .models import Ad, Category, Car, Manufacturer car = Index('test_cars') car.settings( number_of_shards=1, number_of_replicas=0 ) html_strip = analyzer( 'html_strip', tokenizer="standard", filter=["lowercase", "stop", "snowball"], char_filter=["html_strip"] ) @car.doc_type class CarDocument(DocType): manufacturer = fields.ObjectField(properties={ 'name': fields.TextField(), 'country': fields.TextField(), 'logo': fields.FileField(), }) ads = fields.NestedField(properties={ 'description': fields.TextField(analyzer=html_strip),
from elasticsearch_dsl import analyzer from django_elasticsearch_dsl import DocType, Index, fields from pkdb_app.subjects.models import Individual, Characteristica, Group from pkdb_app.interventions.documents import string_field, ObjectField # Name of the Elasticsearch index individuals_index = Index("individuals") # See Elasticsearch Indices API reference for available settings individuals_index.settings(number_of_shards=1, number_of_replicas=1) html_strip = analyzer('html_strip', tokenizer="standard", filter=["standard", "lowercase", "stop", "snowball"], char_filter=["html_strip"]) @individuals_index.doc_type class IndividualDocument(DocType): """Individual elastic search document""" pk = fields.IntegerField(attr='pk') name = string_field('name') group = ObjectField( properties={ 'name': string_field('name'), 'pk': fields.IntegerField('pk'), 'count': fields.IntegerField('count') }) study = ObjectField(
from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields from django_elasticsearch_dsl_drf.compat import KeywordField, StringField from books.models import Address from .analyzers import html_strip __all__ = ('AddressDocument', ) INDEX = Index(settings.ELASTICSEARCH_INDEX_NAMES[__name__]) # See Elasticsearch Indices API reference for available settings INDEX.settings(number_of_shards=1, number_of_replicas=1) @INDEX.doc_type class AddressDocument(DocType): """Address Elasticsearch document.""" # In different parts of the code different fields are used. There are # a couple of use cases: (1) more-like-this functionality, where `title`, # `description` and `summary` fields are used, (2) search and filtering # functionality where all of the fields are used. # ID id = fields.IntegerField(attr='id') # ******************************************************************** # *********************** Main data fields for search ****************
from django_elasticsearch_dsl import DocType, Index, fields from elasticsearch_dsl import analyzer, tokenizer from genres.models import GenreNew # Name of the Elasticsearch index genre = Index('genres') # See Elasticsearch Indices API reference for available settings genre.settings( number_of_shards=1, number_of_replicas=0 ) html_strip = analyzer( 'genre', tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=["lowercase"] ) @genre.doc_type class GenreDocument(DocType): name = fields.TextField( analyzer=html_strip, fields={'raw': fields.KeywordField()} ) class Meta: model = GenreNew # The model associated with this DocType # queryset_pagination = 50000 # The fields of the model you want to be indexed in Elasticsearch fields = [ 'id'
from django_elasticsearch_dsl import Document, Index, fields from elasticsearch_dsl import analyzer from django_elasticsearch_dsl.registries import registry from .models import Product products_index = Index("products") products_index.settings(number_of_shards=1, number_of_replicas=1) html_strip = analyzer( "html_strip", tokenizer="standard", filter=["standard", "lowercase", "stop", "snowball"], char_filter=["html_strip"], ) # @registry.register_document @products_index.doc_type class ProductDocument(Document): # id = fields.IntegerField(attr='id') # title = fields.StringField( # analyzer=html_strip, # fields={ # 'raw': fields.StringField(analyzer='keyword'), # } # ) # description = fields.TextField( # analyzer=html_strip, # fields={ # 'raw': fields.TextField(analyzer='keyword'),
import logging from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields from elasticsearch import Elasticsearch from readthedocs.projects.models import HTMLFile, Project project_conf = settings.ES_INDEXES['project'] project_index = Index(project_conf['name']) project_index.settings(**project_conf['settings']) page_conf = settings.ES_INDEXES['page'] page_index = Index(page_conf['name']) page_index.settings(**page_conf['settings']) log = logging.getLogger(__name__) class RTDDocTypeMixin: def update(self, *args, **kwargs): # Hack a fix to our broken connection pooling # This creates a new connection on every request, # but actually works :) log.info('Hacking Elastic indexing to fix connection pooling') self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default']) super().update(*args, **kwargs) @project_index.doc_type class ProjectDocument(RTDDocTypeMixin, DocType):
from django_elasticsearch_dsl import DocType, Index, fields from elasticsearch_dsl import analyzer, token_filter from ..account.models import User from ..order.models import Order from ..product.models import Product storefront = Index('storefront') storefront.settings(number_of_shards=1, number_of_replicas=0) partial_words = token_filter( 'partial_words', 'edge_ngram', min_gram=3, max_gram=15) title_analyzer = analyzer( 'title_analyzer', tokenizer='standard', filter=[partial_words, 'lowercase']) email_analyzer = analyzer('email_analyzer', tokenizer='uax_url_email') @storefront.doc_type class ProductDocument(DocType): title = fields.StringField(analyzer=title_analyzer) def prepare_title(self, instance): return instance.name class Meta: model = Product fields = ['name', 'description', 'is_published']
from django_elasticsearch_dsl import DocType, Index from fashion.models import Fashion fashion = Index('fashion') fashion.settings(number_of_shards=1, number_of_replicas=0) @fashion.doc_type class FashionDocument(DocType): class Meta: model = Fashion fields = [ 'image_id', 'image_path', 'embedding', 'pub_date', 'enable', ]
from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields from django_elasticsearch_dsl_drf.compat import KeywordField, StringField from django_elasticsearch_dsl_drf.analyzers import edge_ngram_completion from django_elasticsearch_dsl_drf.versions import ELASTICSEARCH_GTE_5_0 from elasticsearch_dsl import analyzer from books.models import Location # Name of the Elasticsearch index INDEX = Index(settings.ELASTICSEARCH_INDEX_NAMES[__name__]) # See Elasticsearch Indices API reference for available settings INDEX.settings( number_of_shards=1, number_of_replicas=1, blocks={'read_only_allow_delete': False}, ) html_strip = analyzer( "html_strip", tokenizer="standard", filter=["standard", "lowercase", "stop", "snowball"], char_filter=["html_strip"] ) @INDEX.doc_type class LocationDocument(DocType): """ Location document.
from elasticsearch_dsl import analyzer, analysis from django_elasticsearch_dsl import Document, fields, Index from movie.models import MovieModel movie_index = Index('movies') # Создаем TokenFilters из документации russian_stop_filter = analysis.token_filter('russian_stop', type='stop', stopwords='_russian_') russian_stemmer_filter = analysis.token_filter('russian_stemmer', type='stemmer', language='russian') english_stop_filter = analysis.token_filter('english_stop', type='stop', stopwords='_english_') english_stemmer_filter = analysis.token_filter('english_stemmer', type='stemmer', language='english') english_possessive_stemmer_filter = analysis.token_filter( 'english_stemmer', type='stemmer', language='possessive_english') # Создаем анализаторы ru_analyzer = analyzer( 'ru_analyzer', type='custom', tokenizer='standard', filter=['lowercase', russian_stop_filter, russian_stemmer_filter], ) en_analyzer = analyzer('en_analyzer',
from django_elasticsearch_dsl import DocType, Index from .models import Notes notes = Index('notes') @notes.doc_type class PostDocument(DocType): class Meta: model = Notes fields = [ 'title', 'description', 'id', 'image', ]
from django_elasticsearch_dsl import DocType, Index from ingenieria_web.social.models import Grupo grupos = Index('grupos') @grupos.doc_type class GrupoDocument(DocType): class Meta: model = Grupo fields = [ 'idGrupo', 'NombreGrupo', ]
from django_elasticsearch_dsl import DocType, Index from .models import Post posts = Index('posts') @posts.doc_type class PostDocument(DocType): class Meta: model = Post fields = [ 'title', 'id', 'slug', 'image', 'description', 'date_posted', ]
import logging from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields from readthedocs.projects.models import HTMLFile, Project from readthedocs.sphinx_domains.models import SphinxDomain project_conf = settings.ES_INDEXES['project'] project_index = Index(project_conf['name']) project_index.settings(**project_conf['settings']) page_conf = settings.ES_INDEXES['page'] page_index = Index(page_conf['name']) page_index.settings(**page_conf['settings']) domain_conf = settings.ES_INDEXES['domain'] domain_index = Index(domain_conf['name']) domain_index.settings(**domain_conf['settings']) log = logging.getLogger(__name__) @domain_index.doc_type class SphinxDomainDocument(DocType): project = fields.KeywordField(attr='project.slug') version = fields.KeywordField(attr='version.slug') role_name = fields.KeywordField(attr='role_name') # For linking to the URL
from django_elasticsearch_dsl import DocType, Index, fields from elasticsearch_dsl import analyzer from .models import User user = Index('users') user.settings(number_of_shards=1, number_of_replicas=0) @user.doc_type class UserDocument(DocType): class Meta: model = User fields = ['nickname', 'id']
from django_elasticsearch_dsl import DocType, Index from ecomapp.models import Product products = Index("products") @products.doc_type class ProductDocument(DocType): class Meta: model = Product fields = [ 'title', 'slug', 'description', 'image', ]
from django_elasticsearch_dsl import DocType, Index from .models import Question # Name of the Elasticsearch index question = Index('questions') question.settings(number_of_shards=1, number_of_replicas=0) @question.doc_type class QuestionDocument(DocType): class Meta: model = Question # The fields to index in Elasticsearch fields = [ 'question_text', ]
from elasticsearch_dsl import analyzer from django_elasticsearch_dsl import DocType, Index, fields from elasticsearch_dsl.analysis import token_filter from data_refinery_common.utils import get_supported_microarray_platforms, get_supported_rnaseq_platforms from .models import Sample, Experiment, Organism experiment_index = Index('experiments') experiment_index.settings(number_of_shards=1, number_of_replicas=0, max_result_window=9999999) # via https://django-elasticsearch-dsl-drf.readthedocs.io/en/0.17.2/advanced_usage_examples.html?highlight=ngram#id8 # via https://github.com/barseghyanartur/django-elasticsearch-dsl-drf/issues/110 edge_ngram_completion_filter = token_filter('edge_ngram_completion_filter', type="edge_ngram", min_gram=3, max_gram=12) html_strip = analyzer('html_strip', tokenizer="whitespace", filter=[ edge_ngram_completion_filter, "standard", "lowercase", "stop", "snowball" ], char_filter=["html_strip"]) html_strip_no_ngram = analyzer('html_strip_no_ngram', tokenizer="standard", filter=["standard", "lowercase", "stop"], char_filter=["html_strip"]) html_strip_no_stop = analyzer('html_strip_no_stop',