def test_demo_collections_are_added_to_search_index(self): results = search_services.search_collections('Welcome', [], [], 2)[0] self.assertEqual(results, []) collection_services.load_demo('0') results = search_services.search_collections('Welcome', [], [], 2)[0] self.assertEqual(results, ['0'])
def test_clear_collection_search_index(self): collection_services.load_demo('0') result = search_services.search_collections('Welcome', [], [], 2)[0] self.assertEqual(result, ['0']) search_services.clear_collection_search_index() result = search_services.search_collections('Welcome', [], [], 2)[0] self.assertEqual(result, [])
def test_search_collections(self): expected_query_string = 'a query string' expected_cursor = 'cursor' expected_size = 30 expected_result_cursor = 'rcursor' doc_ids = ['id1', 'id2'] def mock_search(query_string, index, cursor=None, size=20, ids_only=False, retries=3): self.assertEqual(query_string, expected_query_string) self.assertEqual(index, collection_services.SEARCH_INDEX_COLLECTIONS) self.assertEqual(cursor, expected_cursor) self.assertEqual(size, expected_size) self.assertEqual(ids_only, True) self.assertEqual(retries, 3) return doc_ids, expected_result_cursor with self.swap(gae_search_services, 'search', mock_search): result, result_cursor = search_services.search_collections( expected_query_string, expected_size, cursor=expected_cursor, ) self.assertEqual(result_cursor, expected_result_cursor) self.assertEqual(result, doc_ids)
def test_search_collections(self): expected_query_string = 'a query string' expected_offset = 0 expected_size = 30 expected_result_offset = 30 doc_ids = ['id1', 'id2'] def mock_search( query_string, index, categories, language_codes, offset=None, size=20, ids_only=False, retries=3): self.assertEqual(query_string, expected_query_string) self.assertEqual( index, collection_services.SEARCH_INDEX_COLLECTIONS) self.assertEqual(categories, []) self.assertEqual(language_codes, []) self.assertEqual(offset, expected_offset) self.assertEqual(size, expected_size) self.assertEqual(ids_only, True) self.assertEqual(retries, 3) return doc_ids, expected_result_offset with self.swap(gae_search_services, 'search', mock_search): result, result_offset = search_services.search_collections( expected_query_string, [], [], expected_size, offset=expected_offset, ) self.assertEqual(result_offset, expected_result_offset) self.assertEqual(result, doc_ids)
def get_collection_ids_matching_query(query_string, categories, language_codes, offset=None): """Returns a list with all collection ids matching the given search query string, as well as a search offset for future fetches. Args: query_string: str. The search query string. categories: list(str). The list of categories to query for. If it is empty, no category filter is applied to the results. If it is not empty, then a result is considered valid if it matches at least one of these categories. language_codes: list(str). The list of language codes to query for. If it is empty, no language code filter is applied to the results. If it is not empty, then a result is considered valid if it matches at least one of these language codes. offset: str or None. Offset indicating where, in the list of collections, to start the search from. Returns: 2-tuple of (returned_collection_ids, search_offset). Where: returned_collection_ids : list(str). A list with all collection ids matching the given search query string, as well as a search offset for future fetches. The list contains exactly feconf.SEARCH_RESULTS_PAGE_SIZE results if there are at least that many, otherwise it contains all remaining results. (If this behaviour does not occur, an error will be logged.) search_offset: str. Search offset for future fetches. """ returned_collection_ids = [] search_offset = offset for _ in python_utils.RANGE(MAX_ITERATIONS): remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len( returned_collection_ids) collection_ids, search_offset = search_services.search_collections( query_string, categories, language_codes, remaining_to_fetch, offset=search_offset) # Collection model cannot be None as we are fetching the collection ids # through query and there cannot be a collection id for which there is # no collection. for ind, _ in enumerate( collection_models.CollectionSummaryModel.get_multi( collection_ids)): returned_collection_ids.append(collection_ids[ind]) # The number of collections in a page is always less than or equal to # feconf.SEARCH_RESULTS_PAGE_SIZE. if len(returned_collection_ids) == feconf.SEARCH_RESULTS_PAGE_SIZE or ( search_offset is None): break return (returned_collection_ids, search_offset)
def get_collection_ids_matching_query(query_string, cursor=None): """Returns a list with all collection ids matching the given search query string, as well as a search cursor for future fetches. Args: query_string: str. The search query string. cursor: str or None. Cursor indicating where, in the list of collections, to start the search from. Returns: 2-tuple of (returned_collection_ids, search_cursor), where: returned_collection_ids : list(str). A list with all collection ids matching the given search query string, as well as a search cursor for future fetches. The list contains exactly feconf.SEARCH_RESULTS_PAGE_SIZE results if there are at least that many, otherwise it contains all remaining results. (If this behaviour does not occur, an error will be logged.) search_cursor: str. Search cursor for future fetches. """ returned_collection_ids = [] search_cursor = cursor for _ in range(MAX_ITERATIONS): remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len( returned_collection_ids) collection_ids, search_cursor = search_services.search_collections( query_string, remaining_to_fetch, cursor=search_cursor) invalid_collection_ids = [] for ind, model in enumerate( collection_models.CollectionSummaryModel.get_multi( collection_ids)): if model is not None: returned_collection_ids.append(collection_ids[ind]) else: invalid_collection_ids.append(collection_ids[ind]) if len(returned_collection_ids) == feconf.SEARCH_RESULTS_PAGE_SIZE or ( search_cursor is None): break else: logging.error( 'Search index contains stale collection ids: %s' % ', '.join(invalid_collection_ids)) if (len(returned_collection_ids) < feconf.SEARCH_RESULTS_PAGE_SIZE and search_cursor is not None): logging.error( 'Could not fulfill search request for query string %s; at least ' '%s retries were needed.' % (query_string, MAX_ITERATIONS)) return (returned_collection_ids, search_cursor)
def test_clear_search_index(self): exp_services.load_demo('0') result_explorations = search_services.search_explorations( 'Welcome', 2)[0] self.assertEqual(result_explorations, ['0']) collection_services.load_demo('0') result_collections = search_services.search_collections('Welcome', 2)[0] self.assertEqual(result_collections, ['0']) self.signup(self.ADMIN_EMAIL, self.ADMIN_USERNAME) self.login(self.ADMIN_EMAIL, is_super_admin=True) csrf_token = self.get_new_csrf_token() generated_exps_response = self.post_json( '/adminhandler', {'action': 'clear_search_index'}, csrf_token=csrf_token) self.assertEqual(generated_exps_response, {}) result_explorations = search_services.search_explorations( 'Welcome', 2)[0] self.assertEqual(result_explorations, []) result_collections = search_services.search_collections('Welcome', 2)[0] self.assertEqual(result_collections, [])
def get_collection_ids_matching_query(query_string, cursor=None): """Returns a list with all collection ids matching the given search query string, as well as a search cursor for future fetches. Args: query_string: str. The search query string. cursor: str or None. Cursor indicating where, in the list of collections, to start the search from. Returns: 2-tuple of (returned_collection_ids, search_cursor), where: returned_collection_ids : list(str). A list with all collection ids matching the given search query string, as well as a search cursor for future fetches. The list contains exactly feconf.SEARCH_RESULTS_PAGE_SIZE results if there are at least that many, otherwise it contains all remaining results. (If this behaviour does not occur, an error will be logged.) search_cursor: str. Search cursor for future fetches. """ returned_collection_ids = [] search_cursor = cursor for _ in range(MAX_ITERATIONS): remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len( returned_collection_ids) collection_ids, search_cursor = search_services.search_collections( query_string, remaining_to_fetch, cursor=search_cursor) # Collection model cannot be None as we are fetching the collection ids # through query and there cannot be a collection id for which there is # no collection. for ind, _ in enumerate( collection_models.CollectionSummaryModel.get_multi( collection_ids)): returned_collection_ids.append(collection_ids[ind]) # The number of collections in a page is always lesser or equal to # feconf.SEARCH_RESULTS_PAGE_SIZE. if len(returned_collection_ids) == feconf.SEARCH_RESULTS_PAGE_SIZE or ( search_cursor is None): break return (returned_collection_ids, search_cursor)
def test_search_collections(self) -> None: expected_query_string = 'a query string' expected_offset = 0 expected_size = 30 expected_result_offset = 30 doc_ids = ['id1', 'id2'] def mock_search(query_string: str, index: str, categories: List[str], language_codes: List[str], offset: Optional[int] = None, size: int = 20, ids_only: bool = False, retries: int = 3) -> Tuple[List[str], Optional[int]]: self.assertEqual(query_string, expected_query_string) self.assertEqual(index, collection_services.SEARCH_INDEX_COLLECTIONS) self.assertEqual(categories, []) self.assertEqual(language_codes, []) self.assertEqual(offset, expected_offset) self.assertEqual(size, expected_size) self.assertEqual(ids_only, True) self.assertEqual(retries, 3) return doc_ids, expected_result_offset with self.swap(gae_search_services, 'search', mock_search): result, result_offset = search_services.search_collections( expected_query_string, [], [], expected_size, offset=expected_offset, ) self.assertEqual(result_offset, expected_result_offset) self.assertEqual(result, doc_ids)
def get_library_groups(language_codes): """Returns a list of groups for the library index page. Each group has a header and a list of dicts representing activity summaries. Args: language_codes: list(str). A list of language codes. Only explorations with these languages will be returned. Returns: list(dict). A list of groups for the library index page. Each group is represented by a dict with the following keys and values: - activity_summary_dicts: list(dict). A list of dicts representing activity summaries. - categories: list(str). The list of group categories. - header_i18n_id: str. The i18n id for the header of the category. - has_full_results_page: bool. Whether the group header links to a "full results" page. This is always True for the "exploration category" groups. - full_results_url: str. The URL to the corresponding "full results" page. """ language_codes_suffix = '' if language_codes: language_codes_suffix = ' language_code=("%s")' % ( '" OR "'.join(language_codes)) def _generate_query(categories): """Generates query based on the categories and language codes. Args: categories: list(str). List of categories. Returns: str. Generated query. """ # This assumes that 'categories' is non-empty. return 'category=("%s")%s' % ('" OR "'.join(categories), language_codes_suffix) # Collect all collection ids so that the summary details can be retrieved # with a single get_multi() call. all_collection_ids = [] header_id_to_collection_ids = {} for group in _LIBRARY_INDEX_GROUPS: collection_ids = search_services.search_collections( _generate_query(group['search_categories']), 8)[0] header_id_to_collection_ids[group['header_i18n_id']] = collection_ids all_collection_ids += collection_ids collection_summaries = [ summary for summary in collection_services. get_collection_summaries_matching_ids(all_collection_ids) if summary is not None ] collection_summary_dicts = { summary_dict['id']: summary_dict for summary_dict in _get_displayable_collection_summary_dicts( collection_summaries) } # Collect all exp ids so that the summary details can be retrieved with a # single get_multi() call. all_exp_ids = [] header_to_exp_ids = {} for group in _LIBRARY_INDEX_GROUPS: exp_ids = search_services.search_explorations( _generate_query(group['search_categories']), 8)[0] header_to_exp_ids[group['header_i18n_id']] = exp_ids all_exp_ids += exp_ids exp_summaries = [ summary for summary in exp_fetchers.get_exploration_summaries_matching_ids(all_exp_ids) if summary is not None ] exp_summary_dicts = { summary_dict['id']: summary_dict for summary_dict in get_displayable_exp_summary_dicts(exp_summaries) } results = [] for group in _LIBRARY_INDEX_GROUPS: summary_dicts = [] collection_ids_to_display = ( header_id_to_collection_ids[group['header_i18n_id']]) summary_dicts = [ collection_summary_dicts[collection_id] for collection_id in collection_ids_to_display if collection_id in collection_summary_dicts ] exp_ids_to_display = header_to_exp_ids[group['header_i18n_id']] summary_dicts += [ exp_summary_dicts[exp_id] for exp_id in exp_ids_to_display if exp_id in exp_summary_dicts ] if not summary_dicts: continue results.append({ 'header_i18n_id': group['header_i18n_id'], 'categories': group['search_categories'], 'activity_summary_dicts': summary_dicts, 'has_full_results_page': True, 'full_results_url': None, }) return results