def test_full_index_rebuild(self): author = Author.objects.create( publishing_name='Billy Fakington', age=4, user=self.user, ) # Clear and preform a full ES rebuild. Ignores setUp # so we can start fresh. Author.rebuild_es_index() alternative_author = Author.objects.create( publishing_name=self.publishing_name, age=4, user=self.user, ) author_1_es_data = get_es_client().get( id=author.pk, index=Author.get_read_alias_name(), ) author_2_es_data = get_es_client().get( id=alternative_author.pk, index=Author.get_read_alias_name(), ) # Assert that full table is in the indexed # and all model data is available. self.assertEqual(author.pk, int(author_1_es_data['_id'])) self.assertEqual( alternative_author.pk, int(author_2_es_data['_id']), )
def test_rebuild_optionally_drops_old_index(self): Author.rebuild_es_index() old_index = get_index_names_from_alias(Author.get_read_alias_name())[0] Author.rebuild_es_index() # Assert that rebuilding the full index wipes # the old index on completion. with self.assertRaises(NotFoundError): get_es_client().indices.get(old_index) updated_old_index = get_index_names_from_alias( Author.get_read_alias_name())[0] Author.rebuild_es_index(drop_old_index=False) updated_old_es_index_data = get_es_client().indices.get( updated_old_index) latest_index = get_index_names_from_alias( Author.get_read_alias_name())[0] # Assert that a full index rebuild with drop_old_index disabled keeps # the original index, while transfering over the aliases. self.assertIsNotNone(updated_old_es_index_data) self.assertNotEqual(updated_old_index, latest_index)
def test_model_manager_bulk_reindexer(self): author = Author.objects.create( publishing_name=self.publishing_name, age=3, user=self.user, ) new_publishing_name = 'Bill Fakeington 2' filtered_queryset = Author.objects.filter(pk=author.pk) filtered_queryset.update(publishing_name=new_publishing_name) es_data = get_es_client().get( id=author.pk, index=Author.get_read_alias_name(), ) self.assertEqual( self.publishing_name, es_data['_source']['publishing_name'], ) filtered_queryset.reindex_into_es() updated_es = get_es_client().get( id=author.pk, index=Author.get_read_alias_name(), ) self.assertEqual( new_publishing_name, updated_es['_source']['publishing_name'], )
def rebuild_es_index(cls, queryset=None, drop_old_index=True): """ Rebuilds the entire ESIndex for the model, utilizes Aliases to preserve access to the old index while the new is being built. By default will rebuild the entire database table in Elasticsearch, define a queryset to only rebuild a slice of this. Set drop_old_index to False if you want to preserve the old index for future use, this will no longer have the aliases tied to it but will still be accessable through the Elasticsearch API. """ old_indicy = get_index_names_from_alias(cls.get_read_alias_name())[0] new_indicy = cls.generate_index() cls.bind_alias(new_indicy, cls.get_write_alias_name()) chunked_qs_generator = queryset_iterator(queryset or cls.objects.all()) for qs_chunk in chunked_qs_generator: qs_chunk.reindex_into_es() cls.bind_alias(new_indicy, cls.get_read_alias_name()) if drop_old_index: get_es_client().indices.delete(old_indicy)
def test_model_manager_bulk_deletion(self): author = Author.objects.create( publishing_name=self.publishing_name, age=4, user=self.user, ) filtered_queryset = Author.objects.filter(pk=author.pk) es_data = get_es_client().get( id=author.pk, index=Author.get_read_alias_name(), ) self.assertEqual( self.publishing_name, es_data['_source']['publishing_name'], ) filtered_queryset.delete_from_es() with self.assertRaises(NotFoundError): get_es_client().get( id=author.pk, index=Author.get_read_alias_name(), )
def generate_index(cls) -> str: """ Generates a new index in Elasticsearch for the model returning the index name. """ index = cls.get_index_base_name() + '-' + uuid4().hex get_es_client().indices.create(index=index, body=cls.get_index_mapping()) return index
def test_alias_binding_to_model(self): # Override setUp ES setup. get_es_client().indices.delete('*') new_index = Author.generate_index() read_alias_name = Author.get_read_alias_name() self.assertFalse(get_es_client().indices.exists_alias( index=new_index, name=read_alias_name)) Author.bind_alias(index=new_index, alias=read_alias_name) self.assertTrue(get_es_client().indices.exists_alias( index=new_index, name=read_alias_name))
def test_es_instance_is_removed_on_model_delete(self): author_doc_id = self.author.pk es_data = get_es_client().get( id=self.author.pk, index=Author.get_read_alias_name(), ) self.assertEqual(str(author_doc_id), es_data['_id']) self.author.delete() with self.assertRaises(NotFoundError): get_es_client().get( id=author_doc_id, index=Author.get_read_alias_name(), )
def filter_by_es_search(self, query, sort_query={}): """ Taking an ES search query return the models that are resolved by the search. Queryset ordering can be denoted by setting sort_query, otherwise sorting will be determined by set model ordering. """ results = get_es_client().search( _source=False, index=self.model.get_read_alias_name(), body={ 'query': query, 'sort': sort_query, }) model_pks = [d['_id'] for d in results['hits']['hits']] # Force query to return in the order set by the sort_query if sort_query: preserved_pk_order = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(model_pks)]) return self.filter(pk__in=model_pks).order_by(preserved_pk_order) return self.filter(pk__in=model_pks)
def bind_alias(cls, index: str, alias: str): """ Connect an alias to a specified index by default removes alias from any other indices if present. """ old_indicy_names = [] if get_es_client().indices.exists_alias(name=alias): old_indicy_names = get_index_names_from_alias(alias) alias_updates = [{ 'remove': { 'index': indicy, 'alias': alias } } for indicy in old_indicy_names] alias_updates.append({'add': {'index': index, 'alias': alias}}) get_es_client().indices.update_aliases(body={'actions': alias_updates})
def save(self, *args, **kwargs): """ Override model save to index those fields nominated by es_cached_model_fields storring them in elasticsearch. """ super().save(*args, **kwargs) try: get_es_client().index( id=self.pk, index=self.get_write_alias_name(), body=build_document_from_model(self), ) except Exception: raise UnableToSaveModelToElasticSearch( 'Attempted to save/update the {} related es document ' 'from index {}, please check your ' 'connection and status of your ES cluster.'.format( str(self), self.get_index_base_name()))
def test_custom_fields_can_be_indexed(self): es_data = get_es_client().get( id=self.user.pk, index=User.get_read_alias_name(), ) unique_identifer = es_data['_source']['unique_identifer'] # Assert a save event includes custom field values self.assertIsNotNone(unique_identifer) User.objects.filter(pk=self.user.pk).reindex_into_es() updated_es_data = get_es_client().get( id=self.user.pk, index=User.get_read_alias_name(), ) # Assert a full queryset rebuild will re-index custom fields. self.assertIsNotNone(updated_es_data['_source']['unique_identifer']) self.assertNotEqual( unique_identifer, updated_es_data['_source']['unique_identifer'], )
def delete_from_es(self): """ Bulk remove models in queryset that exist within ES. """ model_documents_to_remove = [{ '_id': pk, '_op_type': 'delete' } for pk in self.values_list('pk', flat=True)] bulk(get_es_client(), model_documents_to_remove, index=self.model.get_write_alias_name(), doc_type='_doc')
def reindex_into_es(self): """ Generate and bulk re-index all nominated fields into elasticsearch """ try: bulk( get_es_client(), build_documents_from_queryset(self).values(), index=self.model.get_write_alias_name(), doc_type='_doc', ) except Exception as e: raise UnableToBulkIndexModelsToElasticSearch(e)
def test_nominated_fields_are_saved_in_es(self): es_data = get_es_client().get( id=self.author.pk, index=Author.get_read_alias_name(), ) self.assertEqual(str(self.author.pk), es_data['_id']) self.assertEqual(self.user.pk, es_data['_source']['user']) self.assertEqual( self.publishing_name, es_data['_source']['publishing_name'], ) self.assertTrue(es_data['found'])
def delete(self, *args, **kwargs): """ Same as save but in reverse, remove the model instances cached fields in Elasticsearch. """ # We temporarily cache the model pk here so we can delete the model # instance first before we remove from Elasticsearch. author_document_id = self.pk super().delete(*args, **kwargs) try: get_es_client().delete( index=self.get_write_alias_name(), id=author_document_id, ) except Exception: # Catch failure and reraise with specific exception. raise UnableToDeleteModelFromElasticSearch( 'Attempted to remove {} related es document ' 'from index {}, please check your ' 'connection and status of your ES cluster.'.format( str(self), self.get_index_base_name()))
def retrive_es_fields(self, only_include_fields=True): """ Returns the currently indexed fields within ES for the model. """ try: results = get_es_client().get( id=self.pk, index=self.get_read_alias_name(), ) except NotFoundError: raise ElasticSearchFailure( f'Model {repr(self)} is not found in ' f'{self.get_index_base_name()}, model requires ' f'indexing to retrieve fields back.') if only_include_fields: return results['_source'] return results
def retrieve_es_docs(self, only_include_fields=True): """ Retrieve all ES Cached fields for the queryset. Set only_include_source=False to return verbose response from ElasticSearch. """ results = get_es_client().search( index=self.model.get_read_alias_name(), body={ 'query': { 'ids': { 'values': list(self.values_list('pk', flat=True)) } } }) if only_include_fields: return [doc['_source'] for doc in results['hits']['hits']] return results
def tearDown(self): """ Clear build indices between tests, allows for side affect free testing. """ get_es_client().indices.delete('*')