def search(request, locale): """ An API endpoint to return search results as a JSON blob. This endpoint makes a relatively simple ElasticSearch query for documents matching the value of the q parameter. """ # TODO: I'm betting that a simple search like this will be faster # and just as good as the more complex searches implemented by the # code in kuma/search/. Peter disagrees and thinks that we might # eventually want to make this endpoint use code from kuma/search/. # An alternative is to just abandon this API endpoint and have # the frontend call wiki.d.m.o/locale/search.json?q=query. On the # other hand, if we're ever going to implement any kind of # search-as-you-type interface, we'll need a super-fast custom # endpoint like this one. query_string = request.GET.get('q') if locale == 'en-US': search = (WikiDocumentType.search().filter( 'term', locale=locale).source( ['slug', 'title', 'summary', 'tags']).query('multi_match', query=query_string, fields=['title^7', 'summary^2', 'content'])) else: search = ( WikiDocumentType.search().filter('terms', locale=[ locale, 'en-US' ]).source(['slug', 'title', 'summary', 'tags', 'locale']).query( query.Bool( must=Q('multi_match', query=query_string, fields=['title^7', 'summary^2', 'content']), should=[ # boost the score if the document is translated Q('term', locale={ 'value': locale, 'boost': 8 }), ]))) # Add excerpts with search results highlighted search = search.highlight('content') search = search.highlight_options(order='score', pre_tags=['<mark>'], post_tags=['</mark>']) # Return as many as 40 matches, since we're not implementing pagination yet response = search[0:40].execute() return JsonResponse(response.to_dict())
def test_excerpt(self): search = WikiDocumentType.search() search = search.query("match", summary="CSS") search = search.highlight(*WikiDocumentType.excerpt_fields) result = search.execute() serializer = DocumentSerializer(result, many=True) assert "A <em>CSS</em> article" == serializer.data[0]["excerpt"]
def test_current_locale_results(self): self.refresh() results = (WikiDocumentType.search().query( query.Match(title='article') | query.Match(content='article')).filter('term', locale='en-US')) for doc in results.execute(): eq_('en-US', doc.locale)
def test_excerpt(self): search = WikiDocumentType.search() search = search.query('match', summary='CSS') search = search.highlight(*WikiDocumentType.excerpt_fields) result = search.execute() serializer = DocumentSerializer(result, many=True) assert 'A <em>CSS</em> article' == serializer.data[0]['excerpt']
def test_excerpt(self): search = WikiDocumentType.search() search = search.query("match", summary="CSS") search = search.highlight(*WikiDocumentType.excerpt_fields) result = search.execute() serializer = DocumentSerializer(result, many=True) eq_(serializer.data[0]["excerpt"], u"A <em>CSS</em> article")
def test_excerpt(self): search = WikiDocumentType.search() search = search.query('match', summary='CSS') search = search.highlight(*WikiDocumentType.excerpt_fields) result = search.execute() data = DocumentSerializer(result).data eq_(data[0]['excerpt'], u'A <em>CSS</em> article')
def test_excerpt(self): search = WikiDocumentType.search() search = search.query('match', summary='CSS') search = search.highlight(*WikiDocumentType.excerpt_fields) result = search.execute() data = DocumentSerializer(result, many=True).data eq_(data[0]['excerpt'], u'A <em>CSS</em> article')
def test_get_excerpt_uses_summary(self): self.refresh() results = WikiDocumentType.search().query("match", content="audio") assert results.count() for doc in results.execute(): excerpt = doc.get_excerpt() assert "the word for tough things" in excerpt assert "extra content" not in excerpt
def test_get_excerpt_strips_html(self): self.refresh() results = WikiDocumentType.search().query('match', content='audio') assert results.count() for doc in results.execute(): excerpt = doc.get_excerpt() assert 'audio' in excerpt assert '<strong>' not in excerpt
def delete_if_exists(self): es = WikiDocumentType.get_connection() try: es.indices.delete(self.prefixed_name) except NotFoundError: # Can ignore this since it indicates the index doesn't exist # and therefore there's nothing to delete. pass
def test_get_excerpt_strips_html(self): self.refresh() results = WikiDocumentType.search().query("match", content="audio") assert results.count() for doc in results.execute(): excerpt = doc.get_excerpt() assert "audio" in excerpt assert "<strong>" not in excerpt
def test_get_excerpt_uses_summary(self): self.refresh() results = WikiDocumentType.search().query('match', content='audio') assert results.count() for doc in results.execute(): excerpt = doc.get_excerpt() assert 'the word for tough things' in excerpt assert 'extra content' not in excerpt
def test_get_excerpt_uses_summary(self): self.refresh() results = WikiDocumentType.search().query('match', content='audio') ok_(results.count() > 0) for doc in results.execute(): excerpt = doc.get_excerpt() ok_('the word for tough things' in excerpt) ok_('extra content' not in excerpt)
def test_get_excerpt_strips_html(self): self.refresh() results = WikiDocumentType.search().query('match', content='audio') ok_(results.count() > 0) for doc in results.execute(): excerpt = doc.get_excerpt() ok_('audio' in excerpt) ok_('<strong>' not in excerpt)
def test_current_locale_results(self): self.refresh() results = (WikiDocumentType.search() .query(query.Match(title='article') | query.Match(content='article')) .filter('term', locale='en-US')) for doc in results.execute(): eq_('en-US', doc.locale)
def test_base_search(db): '''WikiDocumentType.search() searches all documents by default.''' search = WikiDocumentType.search() expected = { 'query': { 'match_all': {} } } assert search.to_dict() == expected
def test_current_locale_results(self): self.refresh() results = ( WikiDocumentType.search() .query(query.Match(title="article") | query.Match(content="article")) .filter("term", locale="en-US") ) for doc in results.execute(): assert "en-US" == doc.locale
def handle(self, *args, **options): logging.basicConfig(level=logging.INFO) percent = options["percent"] if not 1 <= percent <= 100: raise CommandError("percent should be between 1 and 100") message = WikiDocumentType.reindex_all(options["chunk_size"], percent=percent) self.stdout.write(message + "\n")
def handle(self, *args, **options): logging.basicConfig(level=logging.INFO) percent = options['percent'] if not 1 <= percent <= 100: raise CommandError('percent should be between 1 and 100') message = WikiDocumentType.reindex_all(options['chunk_size'], percent=percent) self.stdout.write(unicode(message) + '\n')
def pre_delete_handler(instance, **kwargs): if not settings.ES_LIVE_INDEX: return doc = instance current_index = Index.objects.get_current() if WikiDocumentType.should_update(doc): unindex_documents.delay([doc.pk], current_index.pk) else: log.info('Ignoring wiki document %r while updating search index', doc.pk, exc_info=True)
def mock_search(mock_elasticsearch): """Mock WikiDocumentType.search() for a fake Elasticsearch and index.""" patcher_get_conn = mock.patch( "kuma.wiki.search.connections.get_connection", return_value=mock_elasticsearch) patcher_get_index = mock.patch( "kuma.wiki.search.WikiDocumentType.get_index", return_value="mdn-test") patcher_get_conn.start() patcher_get_index.start() yield WikiDocumentType.search() patcher_get_index.stop() patcher_get_conn.stop()
def mock_search(mock_elasticsearch): '''Mock WikiDocumentType.search() for a fake Elasticsearch and index.''' patcher_get_conn = mock.patch( 'kuma.wiki.search.connections.get_connection', return_value=mock_elasticsearch) patcher_get_index = mock.patch( 'kuma.wiki.search.WikiDocumentType.get_index', return_value='mdn-test') patcher_get_conn.start() patcher_get_index.start() yield WikiDocumentType.search() patcher_get_index.stop() patcher_get_conn.stop()
def test_document_serializer(self): search = WikiDocumentType.search() result = search.execute() doc_serializer = DocumentSerializer(result, many=True) list_data = doc_serializer.data eq_(len(list_data), 7) ok_(isinstance(list_data, list)) ok_(1 in [data['id'] for data in list_data]) doc_serializer = DocumentSerializer(result[0], many=False) dict_data = doc_serializer.data ok_(isinstance(dict_data, dict)) eq_(dict_data['id'], result[0].id)
def test_document_serializer(self): search = WikiDocumentType.search() result = search.execute() doc_serializer = DocumentSerializer(result, many=True) list_data = doc_serializer.data assert 7 == len(list_data) assert isinstance(list_data, list) assert 1 in [data["id"] for data in list_data] doc_serializer = DocumentSerializer(result[0], many=False) dict_data = doc_serializer.data assert isinstance(dict_data, dict) assert dict_data["id"] == result[0].id
def pre_delete_handler(**kwargs): if not settings.ES_LIVE_INDEX or 'instance' not in kwargs: return from kuma.wiki.tasks import unindex_documents from .models import Index doc = kwargs['instance'] current_index = Index.objects.get_current() if WikiDocumentType.should_update(doc): unindex_documents.delay([doc.pk], current_index.pk) else: log.info('Ignoring wiki document %r while updating search index', doc.pk, exc_info=True)
def render_done_handler(instance, **kwargs): if not settings.ES_LIVE_INDEX: return doc = instance if WikiDocumentType.should_update(doc): current_index = Index.objects.get_current() outdated = current_index.record_outdated(doc) if outdated: log.info('Found a newer index and scheduled ' 'indexing it after promotion.') doc_pks = set([item.pk for item in doc.other_translations]) doc_pks.add(doc.id) try: index_documents.delay(list(doc_pks), current_index.pk) except Exception: log.error('Search indexing task failed', exc_info=True) else: log.info('Ignoring wiki document %r while updating search index', doc.id, exc_info=True)
def render_done_handler(instance, **kwargs): if not settings.ES_LIVE_INDEX: return doc = instance if WikiDocumentType.should_update(doc): current_index = Index.objects.get_current() outdated = current_index.record_outdated(doc) if outdated: log.info('Found a newer index and scheduled ' 'indexing it after promotion.') doc_pks = {item.pk for item in doc.other_translations} doc_pks.add(doc.id) try: index_documents.delay(list(doc_pks), current_index.pk) except Exception: log.error('Search indexing task failed', exc_info=True) else: log.info('Ignoring wiki document %r while updating search index', doc.id, exc_info=True)
def render_done_handler(**kwargs): if not settings.ES_LIVE_INDEX or 'instance' not in kwargs: return from kuma.wiki.tasks import index_documents from .models import Index doc = kwargs['instance'] if WikiDocumentType.should_update(doc): current_index = Index.objects.get_current() outdated = current_index.record_outdated(doc) if outdated: log.info('Found a newer index and scheduled ' 'indexing it after promotion.') doc_pks = set(doc.other_translations.values_list('pk', flat=True)) doc_pks.add(doc.id) try: index_documents.delay(list(doc_pks), current_index.pk) except: log.error('Search indexing task failed', exc_info=True) else: log.info('Ignoring wiki document %r while updating search index', doc.id, exc_info=True)
def test_hidden_slugs_should_update(self): jezdez_doc = Document.objects.get(slug='User:jezdez') eq_(WikiDocumentType.should_update(jezdez_doc), False)
def populate(self): return WikiDocumentType.reindex_all(index=self, chunk_size=500)
def test_hidden_slugs_get_indexable(self): self.refresh() title_list = WikiDocumentType.get_indexable().values_list('title', flat=True) assert 'User:jezdez' not in title_list
def status(request): """ Return summary information about this Kuma instance. Functional tests can use this to customize the test process. """ data = { 'version': 1, 'request': { 'url': request.build_absolute_uri(''), 'host': request.get_host(), 'is_secure': request.is_secure(), 'scheme': request.scheme, }, 'services': { 'database': {}, 'kumascript': {}, 'search': {}, 'test_accounts': {}, }, 'settings': { 'ALLOWED_HOSTS': settings.ALLOWED_HOSTS, 'ATTACHMENT_HOST': settings.ATTACHMENT_HOST, 'ATTACHMENT_ORIGIN': settings.ATTACHMENT_ORIGIN, 'DEBUG': settings.DEBUG, 'INTERACTIVE_EXAMPLES_BASE': settings.INTERACTIVE_EXAMPLES_BASE, 'LEGACY_HOSTS': settings.LEGACY_HOSTS, 'MAINTENANCE_MODE': settings.MAINTENANCE_MODE, 'PROTOCOL': settings.PROTOCOL, 'REVISION_HASH': settings.REVISION_HASH, 'SITE_URL': settings.SITE_URL, 'STATIC_URL': settings.STATIC_URL, }, } # Check that database is reachable, populated doc_data = {'available': True, 'populated': False, 'document_count': 0} try: doc_count = Document.objects.count() except DatabaseError: doc_data['available'] = False else: if doc_count: doc_data['populated'] = True doc_data['document_count'] = doc_count data['services']['database'] = doc_data # Check that KumaScript is reachable ks_data = { 'available': True, 'revision': None, } try: ks_response = request_revision_hash() except Requests_ConnectionError: ks_response = None if not ks_response or ks_response.status_code != 200: ks_data['available'] = False else: ks_data['revision'] = ks_response.text data['services']['kumascript'] = ks_data # Check that ElasticSearch is reachable, populated search_data = {'available': True, 'populated': False, 'count': 0} try: search_count = WikiDocumentType.search().count() except ES_ConnectionError: search_data['available'] = False except NotFoundError: pass # available but unpopulated (and maybe uncreated) else: if search_count: search_data['populated'] = True search_data['count'] = search_count data['services']['search'] = search_data # Check if the testing accounts are available test_account_data = {'available': False} test_account_names = [ 'test-super', 'test-moderator', 'test-new', 'test-banned', 'viagra-test-123' ] try: users = list( User.objects.only( 'id', 'username', 'password').filter(username__in=test_account_names)) except DatabaseError: users = [] if len(users) == len(test_account_names): for user in users: if not user.check_password('test-password'): break else: # All users have the testing password test_account_data['available'] = True data['services']['test_accounts'] = test_account_data return JsonResponse(data)
def setup_indexes(self): """Clear and repopulate the current index.""" WikiDocumentType.reindex_all()
def test_should_not_update_excluded_slug(mock_doc, slug): """Excluded slugs should not update the search index.""" mock_doc.slug = slug assert not WikiDocumentType.should_update(mock_doc)
def status(request): """ Return summary information about this Kuma instance. Functional tests can use this to customize the test process. """ data = { 'version': 1, 'request': { 'url': request.build_absolute_uri(''), 'host': request.get_host(), 'is_secure': request.is_secure(), 'scheme': request.scheme, }, 'services': { 'database': {}, 'kumascript': {}, 'search': {}, 'test_accounts': {}, }, 'settings': { 'ALLOWED_HOSTS': settings.ALLOWED_HOSTS, 'ATTACHMENT_HOST': settings.ATTACHMENT_HOST, 'ATTACHMENT_ORIGIN': settings.ATTACHMENT_ORIGIN, 'DEBUG': settings.DEBUG, 'INTERACTIVE_EXAMPLES_BASE': settings.INTERACTIVE_EXAMPLES_BASE, 'LEGACY_HOSTS': settings.LEGACY_HOSTS, 'MAINTENANCE_MODE': settings.MAINTENANCE_MODE, 'PROTOCOL': settings.PROTOCOL, 'REVISION_HASH': settings.REVISION_HASH, 'SITE_URL': settings.SITE_URL, 'STATIC_URL': settings.STATIC_URL, }, } # Check that database is reachable, populated doc_data = { 'available': True, 'populated': False, 'document_count': 0 } try: doc_count = Document.objects.count() except DatabaseError: doc_data['available'] = False else: if doc_count: doc_data['populated'] = True doc_data['document_count'] = doc_count data['services']['database'] = doc_data # Check that KumaScript is reachable ks_data = { 'available': True, 'revision': None, } try: ks_response = request_revision_hash() except Requests_ConnectionError: ks_response = None if not ks_response or ks_response.status_code != 200: ks_data['available'] = False else: ks_data['revision'] = ks_response.text data['services']['kumascript'] = ks_data # Check that ElasticSearch is reachable, populated search_data = { 'available': True, 'populated': False, 'count': 0 } try: search_count = WikiDocumentType.search().count() except ES_ConnectionError: search_data['available'] = False except NotFoundError: pass # available but unpopulated (and maybe uncreated) else: if search_count: search_data['populated'] = True search_data['count'] = search_count data['services']['search'] = search_data # Check if the testing accounts are available test_account_data = { 'available': False } test_account_names = ['test-super', 'test-moderator', 'test-new', 'test-banned', 'viagra-test-123'] try: users = list(User.objects.only('id', 'username', 'password') .filter(username__in=test_account_names)) except DatabaseError: users = [] if len(users) == len(test_account_names): for user in users: if not user.check_password('test-password'): break else: # All users have the testing password test_account_data['available'] = True data['services']['test_accounts'] = test_account_data return JsonResponse(data)
def test_should_update_standard_doc(mock_doc): """The mock_doc should update search index.""" assert WikiDocumentType.should_update(mock_doc)
def get_queryset(self): return WikiDocumentType.search()
def test_base_search(db): '''WikiDocumentType.search() searches all documents by default.''' search = WikiDocumentType.search() assert search.to_dict() == {}
def test_hidden_slugs_get_indexable(self): self.refresh() title_list = WikiDocumentType.get_indexable().values_list('title', flat=True) ok_('User:jezdez' not in title_list)
def test_should_not_update_excluded_flags(mock_doc, flag): """Do not update the search index if some flags are set.""" setattr(mock_doc, flag, True) assert not WikiDocumentType.should_update(mock_doc)