def elastic_search(request): """Use Elasticsearch for global search.""" user_input = UserInput( query=request.GET.get('q'), type=request.GET.get('type', 'project'), project=request.GET.get('project'), version=request.GET.get('version', LATEST), taxonomy=request.GET.get('taxonomy'), language=request.GET.get('language'), ) results = '' facets = {} if user_input.query: if user_input.type == 'project': project_search = ProjectDocument.faceted_search( query=user_input.query, language=user_input.language) results = project_search.execute() facets = results.facets elif user_input.type == 'file': kwargs = {} if user_input.project: projects_list = get_project_list_or_404( project_slug=user_input.project, user=request.user) project_slug_list = [project.slug for project in projects_list] kwargs['projects_list'] = project_slug_list if user_input.version: kwargs['versions_list'] = user_input.version page_search = PageDocument.faceted_search(query=user_input.query, **kwargs) results = page_search.execute() facets = results.facets if settings.DEBUG: print(pprint(results)) print(pprint(facets)) if user_input.query: user = '' if request.user.is_authenticated(): user = request.user log.info( LOG_TEMPLATE.format( user=user, project=user_input.project or '', type=user_input.type or '', version=user_input.version or '', language=user_input.language or '', msg=user_input.query or '', )) template_vars = user_input._asdict() template_vars.update({'results': results, 'facets': facets}) return render( request, 'search/elastic_search.html', template_vars, )
def test_facted_page_xss(self, client, project): query = 'XSS' page_search = PageDocument.faceted_search(query=query, user='') results = page_search.execute() expected = """ <h3><span>XSS</span> exploit</h3> """.strip() hits = results.hits.hits assert len(hits) == 1 # there should be only one result inner_hits = hits[0]['inner_hits'] domain_hits = inner_hits['domains']['hits']['hits'] assert len( domain_hits) == 0 # there shouldn't be any results from domains section_hits = inner_hits['sections']['hits']['hits'] assert len(section_hits) == 1 section_content_highlight = section_hits[0]['highlight'][ 'sections.content'] assert len(section_content_highlight) == 1 assert expected in section_content_highlight[0]
def test_facted_page_xss(self, client, project): query = 'XSS' page_search = PageDocument.faceted_search(query=query, user='') results = page_search.execute() expected = """ <h3><em>XSS</em> exploit</h3> """.strip() assert results[0].meta.highlight.content[0][:len(expected)] == expected
def test_search_exact_match(self, client, project, case): """Check quoted query match exact phrase with case insensitively Making a query with quoted text like ``"foo bar"`` should match exactly ``foo bar`` or ``Foo Bar`` etc """ # `Github` word is present both in `kuma` and `pipeline` files # But the phrase Github can is available only in kuma docs. # So search with this phrase to check query_text = r'"GitHub can"' cased_query = getattr(query_text, case) query = cased_query() page_search = PageDocument.faceted_search(query=query, user='') results = page_search.execute() assert len(results) == 1 assert results[0]['project'] == 'kuma' assert results[0]['path'] == 'documentation'
def get_queryset(self): """ Return Elasticsearch DSL Search object instead of Django Queryset. Django Queryset and elasticsearch-dsl ``Search`` object is similar pattern. So for searching, its possible to return ``Search`` object instead of queryset. The ``filter_backends`` and ``pagination_class`` is compatible with ``Search`` """ # Validate all the required params are there self.validate_query_params() query = self.request.query_params.get('q', '') kwargs = {'filter_by_user': False} kwargs['projects_list'] = [p.slug for p in self.get_all_projects()] kwargs['versions_list'] = self.request.query_params.get('version') user = self.request.user queryset = PageDocument.faceted_search(query=query, user=user, **kwargs) return queryset
def test_search_combined_result(self, client, project): """Check search result are combined of both `AND` and `OR` operator If query is `Foo Bar` then the result should be as following order: - Where both `Foo Bar` is present - Where `Foo` or `Bar` is present """ query = 'Official Support' page_search = PageDocument.faceted_search(query=query, user='') results = page_search.execute() assert len(results) == 3 result_paths = [r.path for r in results] # ``open-source-philosophy`` page has both ``Official Support`` words # ``docker`` page has ``Support`` word # ``installation`` page has ``Official`` word expected_paths = ['open-source-philosophy', 'docker', 'installation'] assert result_paths == expected_paths
def test_search_combined_result(self, client, project): """Check search result are combined of both `AND` and `OR` operator If query is `Foo Bar` then the result should be as following order: - Where both `Foo Bar` is present - Where `Foo` or `Bar` is present """ query = 'Elasticsearch Query' page_search = PageDocument.faceted_search(query=query, user='') results = page_search.execute() assert len(results) == 3 result_paths = [r.path for r in results] # ``guides/wipe-environment`` page has both ``Elasticsearch Query`` words # ``docker`` page has ``Elasticsearch`` word # ``installation`` page has ``Query`` word. expected_paths = ['guides/wipe-environment', 'docker', 'installation'] assert result_paths == expected_paths
def elastic_project_search(request, project_slug): """Use elastic search to search in a project.""" queryset = Project.objects.protected(request.user) project = get_object_or_404(queryset, slug=project_slug) version_slug = request.GET.get('version', LATEST) query = request.GET.get('q', None) results = None if query: kwargs = {} kwargs['projects_list'] = [project.slug] kwargs['versions_list'] = version_slug page_search = PageDocument.faceted_search(query=query, user=request.user, **kwargs) results = page_search.execute() log.debug('Search results: %s', pformat(results.to_dict())) log.debug('Search facets: %s', pformat(results.facets.to_dict())) log.info( LOG_TEMPLATE.format( user=request.user, project=project or '', type='inproject', version=version_slug or '', language='', msg=query or '', ), ) return render( request, 'search/elastic_project_search.html', { 'project': project, 'query': query, 'results': results, }, )
def elastic_search(request): """Use Elasticsearch for global search.""" user_input = UserInput( query=request.GET.get('q'), type=request.GET.get('type', 'project'), project=request.GET.get('project'), version=request.GET.get('version', LATEST), taxonomy=request.GET.get('taxonomy'), language=request.GET.get('language'), ) results = '' facets = {} if user_input.query: if user_input.type == 'project': project_search = ProjectDocument.faceted_search( query=user_input.query, user=request.user, language=user_input.language) results = project_search.execute() facets = results.facets elif user_input.type == 'file': kwargs = {} if user_input.project: kwargs['projects_list'] = [user_input.project] if user_input.version: kwargs['versions_list'] = [user_input.version] page_search = PageDocument.faceted_search(query=user_input.query, user=request.user, **kwargs) results = page_search.execute() facets = results.facets log.info( LOG_TEMPLATE.format( user=request.user, project=user_input.project or '', type=user_input.type or '', version=user_input.version or '', language=user_input.language or '', msg=user_input.query or '', ), ) if results: if user_input.type == 'file': # Change results to turn newlines in highlight into periods # https://github.com/rtfd/readthedocs.org/issues/5168 for result in results: if hasattr(result.meta.highlight, 'content'): result.meta.highlight.content = [ result.replace('\n', '. ') for result in result.meta.highlight.content ] log.debug('Search results: %s', pformat(results.to_dict())) log.debug('Search facets: %s', pformat(results.facets.to_dict())) template_vars = user_input._asdict() template_vars.update({'results': results, 'facets': facets}) return render( request, 'search/elastic_search.html', template_vars, )