def test_file_search_show_versions(self, client, all_projects, es_index, settings): # override the settings to index all versions settings.INDEX_ONLY_LATEST = False project = all_projects[0] # Create some versions of the project versions = [G(Version, project=project) for _ in range(3)] self._reindex_elasticsearch(es_index=es_index) query = get_search_query_from_project_file(project_slug=project.slug) result, page = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) # There should be only one result because by default # only latest version result should be there assert len(result) == 1 content = page.find('.navigable .version-list') # There should be total 4 versions # one is latest, and other 3 that we created above assert len(content) == 4 project_versions = [v.slug for v in versions] + [LATEST] content_versions = [] for element in content: text = element.text_content() # strip and split to keep the version slug only slug = text.strip().split('\n')[0] content_versions.append(slug) assert sorted(project_versions) == sorted(content_versions)
def test_file_search(self, client, project, data_type, page_num): data_type = data_type.split('.') type, field = None, None if len(data_type) < 2: field = data_type[0] else: type, field = data_type query = get_search_query_from_project_file( project_slug=project.slug, page_num=page_num, type=type, field=field, ) results, _ = self._get_search_result(url=self.url, client=client, search_params={ 'q': query, 'type': 'file' }) assert len(results) >= 1 # checking first result result_0 = results[0] highlight = self._get_highlight(result_0, field, type) assert len(highlight) == 1 highlighted_words = self._get_highlighted_words(highlight[0]) assert len(highlighted_words) >= 1 for word in highlighted_words: # Make it lower because our search is case insensitive assert word.lower() in query.lower()
def test_file_search_show_versions(self, client, all_projects, es_index, settings): # override the settings to index all versions settings.INDEX_ONLY_LATEST = False project = all_projects[0] # Create some versions of the project versions = [G(Version, project=project) for _ in range(3)] query = get_search_query_from_project_file(project_slug=project.slug) results, facets = self._get_search_result( url=self.url, client=client, search_params={ 'q': query, 'type': 'file' }, ) # Results can be from other projects also assert len(results) >= 1 version_facets = facets['version'] version_facets_str = [facet[0] for facet in version_facets] # There should be total 4 versions # one is latest, and other 3 that we created above assert len(version_facets) == 4 project_versions = [v.slug for v in versions] + [LATEST] assert sorted(project_versions) == sorted(resulted_version_facets)
def test_search_by_file_content(self, client, project, data_type, page_num): query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, data_type=data_type) result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) assert len(result) == 1
def test_file_search_subprojects(self, client, all_projects, es_index): """ TODO: File search should return results from subprojects also. This is currently disabled because the UX around it is weird. You filter by a project, and get results for multiple. """ project = all_projects[0] subproject = all_projects[1] # Add another project as subproject of the project project.add_subproject(subproject) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file( project_slug=subproject.slug) search_params = { 'q': query, 'type': 'file', 'project': project.slug, } results, _ = self._get_search_result( url=self.url, client=client, search_params=search_params, ) assert len(results) == 0
def test_doc_search_filter_by_version(self, api_client, project): """Test Doc search result are filtered according to version""" query = get_search_query_from_project_file(project_slug=project.slug) latest_version = project.versions.all()[0] # Create another version dummy_version = G( Version, project=project, active=True, privacy_level=PUBLIC, ) # Create HTMLFile same as the latest version latest_version_files = HTMLFile.objects.all().filter( version=latest_version) for f in latest_version_files: f.version = dummy_version # Make primary key to None, so django will create new object f.pk = None f.save() PageDocument().update(f) search_params = { 'q': query, 'project': project.slug, 'version': dummy_version.slug } resp = self.get_search(api_client, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) == 1 assert data[0]['project'] == project.slug
def test_doc_search_subprojects(self, api_client, all_projects): """Test Document search return results from subprojects also""" project = all_projects[0] subproject = all_projects[1] version = project.versions.all()[0] # Add another project as subproject of the project project.add_subproject(subproject) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file( project_slug=subproject.slug) search_params = { 'q': query, 'project': project.slug, 'version': version.slug } resp = self.get_search(api_client, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) >= 1 # there may be results from another projects # First result should be the subproject first_result = data[0] assert first_result['project'] == subproject.slug # Check the link is the subproject document link document_link = subproject.get_docs_url(version_slug=version.slug) assert document_link in first_result['link']
def test_file_search_show_versions(self, client, all_projects, es_index, settings): # override the settings to index all versions settings.INDEX_ONLY_LATEST = False project = all_projects[0] # Create some versions of the project versions = [G(Version, project=project) for _ in range(3)] query = get_search_query_from_project_file(project_slug=project.slug) result, page = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) # There should be only one result because by default # only latest version result should be there assert len(result) == 1 content = page.find('.navigable .version-list') # There should be total 4 versions # one is latest, and other 3 that we created above assert len(content) == 4 project_versions = [v.slug for v in versions] + [LATEST] content_versions = [] for element in content: text = element.text_content() # strip and split to keep the version slug only slug = text.strip().split('\n')[0] content_versions.append(slug) assert sorted(project_versions) == sorted(content_versions)
def test_search_works_with_title_query(self, api_client, project, page_num): query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, field='title') version = project.versions.all().first() search_params = { 'project': project.slug, 'version': version.slug, 'q': query } resp = self.get_search(api_client, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) >= 1 # Matching first result project_data = data[0] assert project_data['project'] == project.slug # Check highlight return correct object of first result title_highlight = project_data['highlight']['title'] assert len(title_highlight) == 1 assert query.lower() in title_highlight[0].lower()
def test_file_search(self, client, project, data_type, page_num): query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, data_type=data_type) result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) assert len(result) == 1 assert query in result.text()
def test_doc_search_hidden_versions(self, api_client, all_projects): """Test Document search return results from subprojects also""" project = all_projects[0] subproject = all_projects[1] version = project.versions.all()[0] # Add another project as subproject of the project project.add_subproject(subproject) version_subproject = subproject.versions.first() version_subproject.hidden = True version_subproject.save() # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file( project_slug=subproject.slug) search_params = { 'q': query, 'project': project.slug, 'version': version.slug } resp = self.get_search(api_client, search_params) assert resp.status_code == 200 # The version from the subproject is hidden, so isn't show on the results. data = resp.data['results'] assert len(data) == 0 # Now search on the subproject with hidden version query = get_search_query_from_project_file( project_slug=subproject.slug) search_params = { 'q': query, 'project': subproject.slug, 'version': version_subproject.slug } resp = self.get_search(api_client, search_params) assert resp.status_code == 200 # We can still search inside the hidden version data = resp.data['results'] assert len(data) == 1 first_result = data[0] assert first_result['project'] == subproject.slug
def test_search_works_with_sections_and_domains_query( self, api_client, project, page_num, data_type ): type, field = data_type.split('.') query = get_search_query_from_project_file( project_slug=project.slug, page_num=page_num, type=type, field=field, ) version = project.versions.all().first() search_params = { 'project': project.slug, 'version': version.slug, 'q': query } resp = self.get_search(api_client, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) >= 1 # Matching first result project_data = data[0] assert project_data['project'] == project.slug blocks = project_data['blocks'] # since there was a nested query, # blocks should not be empty assert len(blocks) >= 1 block_0 = blocks[0] assert block_0['type'] == type highlights = block_0['highlights'][field] assert ( len(highlights) == 1 ), 'number_of_fragments is set to 1' # checking highlighting of results highlighted_words = re.findall( # this gets all words inside <em> tag '<span>(.*?)</span>', highlights[0] ) assert len(highlighted_words) > 0 for word in highlighted_words: # Make it lower because our search is case insensitive assert word.lower() in query.lower()
def has_results(self, api_client, project_slug, version_slug): query = get_search_query_from_project_file(project_slug=project_slug, ) search_params = { 'project': project_slug, 'version': version_slug, 'q': query } resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] return len(data) > 0
def test_search_works_with_sections_and_domains_query( self, api_client, project, page_num, data_type ): query = get_search_query_from_project_file( project_slug=project.slug, page_num=page_num, data_type=data_type ) version = project.versions.all().first() search_params = { 'project': project.slug, 'version': version.slug, 'q': query } resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) >= 1 # Matching first result project_data = data[0] assert project_data['project'] == project.slug inner_hits = project_data['inner_hits'] # since there was a nested query, # inner_hits should not be empty assert len(inner_hits) >= 1 inner_hit_0 = inner_hits[0] # first inner_hit expected_type = data_type.split('.')[0] # can be "sections" or "domains" assert inner_hit_0['type'] == expected_type highlight = inner_hit_0['highlight'][data_type] assert ( len(highlight) == 1 ), 'number_of_fragments is set to 1' # checking highlighting of results highlighted_words = re.findall( # this gets all words inside <em> tag '<span>(.*?)</span>', highlight[0] ) assert len(highlighted_words) > 0 for word in highlighted_words: # Make it lower because our search is case insensitive assert word.lower() in query.lower()
def test_doc_search_subprojects_default_version(self, api_client, all_projects): """Return results from subprojects that match the version from the main project or fallback to its default version.""" project = all_projects[0] version = project.versions.all()[0] feature, _ = Feature.objects.get_or_create( feature_id=Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION, ) project.feature_set.add(feature) subproject = all_projects[1] subproject_version = subproject.versions.all()[0] # Change the name of the version, and make it default. subproject_version.slug = 'different' subproject_version.save() subproject.default_version = subproject_version.slug subproject.save() subproject.versions.filter(slug=version.slug).delete() # Refresh index version_files = HTMLFile.objects.all().filter( version=subproject_version) for f in version_files: PageDocument().update(f) # Add another project as subproject of the project project.add_subproject(subproject) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file( project_slug=subproject.slug) search_params = { 'q': query, 'project': project.slug, 'version': version.slug } resp = self.get_search(api_client, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) >= 1 # there may be results from another projects # First result should be the subproject first_result = data[0] assert first_result['project'] == subproject.slug assert first_result['version'] == 'different' # Check the link is the subproject document link document_link = subproject.get_docs_url( version_slug=subproject_version.slug) link = first_result['domain'] + first_result['path'] assert document_link in link
def test_file_search_subprojects(self, client, all_projects, es_index): """File search should return results from subprojects also""" project = all_projects[0] subproject = all_projects[1] # Add another project as subproject of the project project.add_subproject(subproject) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) search_params = {'q': query, 'type': 'file', 'project': project.slug} result, page = self._get_search_result(url=self.url, client=client, search_params=search_params) assert len(result) == 1
def test_page_search_not_return_removed_page(self, client, project): """Check removed page are not in the search index""" query = get_search_query_from_project_file(project_slug=project.slug) # Make a query to check it returns result result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) assert len(result) == 1 # Delete all the HTML files of the project HTMLFile.objects.filter(project=project).delete() # Run the query again and this time there should not be any result result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) assert len(result) == 0
def test_file_search_subprojects(self, client, all_projects, es_index): """File search should return results from subprojects also""" project = all_projects[0] subproject = all_projects[1] # Add another project as subproject of the project project.add_subproject(subproject) self._reindex_elasticsearch(es_index=es_index) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) search_params = {'q': query, 'type': 'file', 'project': project.slug} result, page = self._get_search_result(url=self.url, client=client, search_params=search_params) assert len(result) == 1
def test_file_search_case_insensitive(self, client, project, case): """Check File search is case insensitive It tests with uppercase, lowercase and camelcase """ query_text = get_search_query_from_project_file(project_slug=project.slug) cased_query = getattr(query_text, case) query = cased_query() result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) assert len(result) == 1 # Check the actual text is in the result, not the cased one assert query_text in result.text()
def test_file_search_case_insensitive(self, client, project, case): """ Check File search is case insensitive. It tests with uppercase, lowercase and camelcase """ query_text = get_search_query_from_project_file(project_slug=project.slug) cased_query = getattr(query_text, case) query = cased_query() result, _ = self._get_search_result(url=self.url, client=client, search_params={'q': query, 'type': 'file'}) assert len(result) == 1 # Check the actual text is in the result, not the cased one assert query_text in result.text()
def test_search_works(self, api_client, project, data_type, page_num): query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, data_type=data_type) version = project.versions.all()[0] search_params = {'project': project.slug, 'version': version.slug, 'q': query} resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) == 1 project_data = data[0] assert project_data['project'] == project.slug # Check highlight return correct object all_highlights = project_data['highlight'][data_type] for highlight in all_highlights: # Make it lower because our search is case insensitive assert query.lower() in highlight.lower()
def test_doc_search_subprojects(self, api_client, all_projects): """Test Document search return results from subprojects also""" project = all_projects[0] subproject = all_projects[1] version = project.versions.all()[0] # Add another project as subproject of the project project.add_subproject(subproject) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) search_params = {'q': query, 'project': project.slug, 'version': version.slug} resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) == 1 assert data[0]['project'] == subproject.slug # Check the link is the subproject document link document_link = subproject.get_docs_url(version_slug=version.slug) assert document_link in data[0]['link']
def test_file_search_subprojects(self, client, all_projects, es_index): """ TODO: File search should return results from subprojects also. This is currently disabled because the UX around it is weird. You filter by a project, and get results for multiple. """ project = all_projects[0] subproject = all_projects[1] # Add another project as subproject of the project project.add_subproject(subproject) # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) search_params = {'q': query, 'type': 'file', 'project': project.slug} result, page = self._get_search_result( url=self.url, client=client, search_params=search_params, ) assert len(result) == 0
def test_doc_search_filter_by_version(self, api_client, project): """Test Doc search result are filtered according to version""" query = get_search_query_from_project_file(project_slug=project.slug) latest_version = project.versions.all()[0] # Create another version dummy_version = G(Version, project=project, active=True) # Create HTMLFile same as the latest version latest_version_files = HTMLFile.objects.all().filter(version=latest_version) for f in latest_version_files: f.version = dummy_version # Make primary key to None, so django will create new object f.pk = None f.save() PageDocument().update(f) search_params = {'q': query, 'project': project.slug, 'version': dummy_version.slug} resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] assert len(data) == 1 assert data[0]['project'] == project.slug
class TestPageSearch(object): url = reverse('search') def _get_search_result(self, url, client, search_params): resp = client.get(url, search_params) assert resp.status_code == 200 results = resp.context['results'] facets = resp.context['facets'] return results, facets def _get_highlight(self, result, data_type): # if query is from page title, # highlighted title is present in 'result.meta.highlight.title' if data_type == 'title': highlight = result.meta.highlight.title # if result is not from page title, # then results and highlighted results are present inside 'inner_hits' else: inner_hits = result.meta.inner_hits assert len(inner_hits) >= 1 # checking first inner_hit inner_hit_0 = inner_hits[0] expected_type = data_type.split('.')[ 0] # can be either 'sections' or 'domains' assert inner_hit_0['type'] == expected_type highlight = inner_hit_0['highlight'][data_type] return highlight def _get_highlighted_words(self, string): highlighted_words = re.findall('<span>(.*?)</span>', string) return highlighted_words @pytest.mark.parametrize('data_type', DATA_TYPES_VALUES) @pytest.mark.parametrize('page_num', [0, 1]) def test_file_search(self, client, project, data_type, page_num): query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, data_type=data_type) results, _ = self._get_search_result(url=self.url, client=client, search_params={ 'q': query, 'type': 'file' }) assert len(results) >= 1 # checking first result result_0 = results[0] highlight = self._get_highlight(result_0, data_type) assert len(highlight) == 1 highlighted_words = self._get_highlighted_words(highlight[0]) assert len(highlighted_words) >= 1 for word in highlighted_words: # Make it lower because our search is case insensitive assert word.lower() in query.lower() def test_file_search_have_correct_role_name_facets(self, client): """Test that searching files should result all role_names.""" # searching for 'celery' to test that # correct role_names are displayed results, facets = self._get_search_result(url=self.url, client=client, search_params={ 'q': 'celery', 'type': 'file' }) assert len(results) >= 1 role_name_facets = facets['role_name'] role_name_facets_str = [facet[0] for facet in role_name_facets] expected_role_names = ['py:class', 'py:function', 'py:method'] assert sorted(expected_role_names) == sorted(role_name_facets_str) for facet in role_name_facets: assert facet[2] == False # because none of the facets are applied def test_file_search_filter_role_name(self, client): """Test that searching files filtered according to role_names.""" search_params = {'q': 'celery', 'type': 'file'} # searching without the filter results, facets = self._get_search_result(url=self.url, client=client, search_params=search_params) assert len(results) >= 2 # there are > 1 results without the filter role_name_facets = facets['role_name'] for facet in role_name_facets: assert facet[2] == False # because none of the facets are applied confval_facet = 'py:class' # checking if 'py:class' facet is present in results assert confval_facet in [facet[0] for facet in role_name_facets] # filtering with role_name=py:class search_params['role_name'] = confval_facet new_results, new_facets = self._get_search_result( url=self.url, client=client, search_params=search_params) new_role_names_facets = new_facets['role_name'] # there is only one result with role_name='py:class' # in `signals` page assert len(new_results) == 1 first_result = new_results[0] # first result inner_hits = first_result.meta.inner_hits # inner_hits of first results assert len(inner_hits) >= 1 inner_hit_0 = inner_hits[0] # first inner_hit assert inner_hit_0.type == 'domains' assert inner_hit_0.source.role_name == confval_facet for facet in new_role_names_facets: if facet[0] == confval_facet: assert facet[ 2] == True # because 'std:confval' filter is active else: assert facet[2] == False @pytest.mark.parametrize('data_type', DATA_TYPES_VALUES) @pytest.mark.parametrize('case', ['upper', 'lower', 'title']) def test_file_search_case_insensitive(self, client, project, case, data_type): """ Check File search is case insensitive. It tests with uppercase, lowercase and camelcase. """ query_text = get_search_query_from_project_file( project_slug=project.slug, data_type=data_type) cased_query = getattr(query_text, case) query = cased_query() results, _ = self._get_search_result(url=self.url, client=client, search_params={ 'q': query, 'type': 'file' }) assert len(results) >= 1 first_result = results[0] highlight = self._get_highlight(first_result, data_type) assert len(highlight) == 1 highlighted_words = self._get_highlighted_words(highlight[0]) assert len(highlighted_words) >= 1 for word in highlighted_words: assert word.lower() in query.lower()