def test_demo_collections_are_added_to_search_index(self):
        results = search_services.search_collections('Welcome', [], [], 2)[0]
        self.assertEqual(results, [])

        collection_services.load_demo('0')
        results = search_services.search_collections('Welcome', [], [], 2)[0]
        self.assertEqual(results, ['0'])
 def test_clear_collection_search_index(self):
     collection_services.load_demo('0')
     result = search_services.search_collections('Welcome', [], [], 2)[0]
     self.assertEqual(result, ['0'])
     search_services.clear_collection_search_index()
     result = search_services.search_collections('Welcome', [], [], 2)[0]
     self.assertEqual(result, [])
Exemple #3
0
    def test_search_collections(self):
        expected_query_string = 'a query string'
        expected_cursor = 'cursor'
        expected_size = 30
        expected_result_cursor = 'rcursor'
        doc_ids = ['id1', 'id2']

        def mock_search(query_string,
                        index,
                        cursor=None,
                        size=20,
                        ids_only=False,
                        retries=3):
            self.assertEqual(query_string, expected_query_string)
            self.assertEqual(index,
                             collection_services.SEARCH_INDEX_COLLECTIONS)
            self.assertEqual(cursor, expected_cursor)
            self.assertEqual(size, expected_size)
            self.assertEqual(ids_only, True)
            self.assertEqual(retries, 3)

            return doc_ids, expected_result_cursor

        with self.swap(gae_search_services, 'search', mock_search):
            result, result_cursor = search_services.search_collections(
                expected_query_string,
                expected_size,
                cursor=expected_cursor,
            )

        self.assertEqual(result_cursor, expected_result_cursor)
        self.assertEqual(result, doc_ids)
    def test_search_collections(self):
        expected_query_string = 'a query string'
        expected_offset = 0
        expected_size = 30
        expected_result_offset = 30
        doc_ids = ['id1', 'id2']

        def mock_search(
                query_string, index, categories, language_codes, offset=None,
                size=20, ids_only=False, retries=3):
            self.assertEqual(query_string, expected_query_string)
            self.assertEqual(
                index, collection_services.SEARCH_INDEX_COLLECTIONS)
            self.assertEqual(categories, [])
            self.assertEqual(language_codes, [])
            self.assertEqual(offset, expected_offset)
            self.assertEqual(size, expected_size)
            self.assertEqual(ids_only, True)
            self.assertEqual(retries, 3)

            return doc_ids, expected_result_offset

        with self.swap(gae_search_services, 'search', mock_search):
            result, result_offset = search_services.search_collections(
                expected_query_string, [], [], expected_size,
                offset=expected_offset,
            )

        self.assertEqual(result_offset, expected_result_offset)
        self.assertEqual(result, doc_ids)
Exemple #5
0
def get_collection_ids_matching_query(query_string,
                                      categories,
                                      language_codes,
                                      offset=None):
    """Returns a list with all collection ids matching the given search query
    string, as well as a search offset for future fetches.

    Args:
        query_string: str. The search query string.
        categories: list(str). The list of categories to query for. If it is
            empty, no category filter is applied to the results. If it is not
            empty, then a result is considered valid if it matches at least one
            of these categories.
        language_codes: list(str). The list of language codes to query for. If
            it is empty, no language code filter is applied to the results. If
            it is not empty, then a result is considered valid if it matches at
            least one of these language codes.
        offset: str or None. Offset indicating where, in the list of
            collections, to start the search from.

    Returns:
        2-tuple of (returned_collection_ids, search_offset). Where:
            returned_collection_ids : list(str). A list with all collection ids
                matching the given search query string, as well as a search
                offset for future fetches. The list contains exactly
                feconf.SEARCH_RESULTS_PAGE_SIZE results if there are at least
                that many, otherwise it contains all remaining results. (If this
                behaviour does not occur, an error will be logged.)
            search_offset: str. Search offset for future fetches.
    """
    returned_collection_ids = []
    search_offset = offset

    for _ in python_utils.RANGE(MAX_ITERATIONS):
        remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len(
            returned_collection_ids)

        collection_ids, search_offset = search_services.search_collections(
            query_string,
            categories,
            language_codes,
            remaining_to_fetch,
            offset=search_offset)

        # Collection model cannot be None as we are fetching the collection ids
        # through query and there cannot be a collection id for which there is
        # no collection.
        for ind, _ in enumerate(
                collection_models.CollectionSummaryModel.get_multi(
                    collection_ids)):
            returned_collection_ids.append(collection_ids[ind])

        # The number of collections in a page is always less than or equal to
        # feconf.SEARCH_RESULTS_PAGE_SIZE.
        if len(returned_collection_ids) == feconf.SEARCH_RESULTS_PAGE_SIZE or (
                search_offset is None):
            break

    return (returned_collection_ids, search_offset)
Exemple #6
0
def get_collection_ids_matching_query(query_string, cursor=None):
    """Returns a list with all collection ids matching the given search query
    string, as well as a search cursor for future fetches.

    Args:
        query_string: str. The search query string.
        cursor: str or None. Cursor indicating where, in the list of
            collections, to start the search from.

    Returns:
        2-tuple of (returned_collection_ids, search_cursor), where:
            returned_collection_ids : list(str). A list with all collection ids
                matching the given search query string, as well as a search
                cursor for future fetches. The list contains exactly
                feconf.SEARCH_RESULTS_PAGE_SIZE results if there are at least
                that many, otherwise it contains all remaining results. (If this
                behaviour does not occur, an error will be logged.)
            search_cursor: str. Search cursor for future fetches.
    """
    returned_collection_ids = []
    search_cursor = cursor

    for _ in range(MAX_ITERATIONS):
        remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len(
            returned_collection_ids)

        collection_ids, search_cursor = search_services.search_collections(
            query_string, remaining_to_fetch, cursor=search_cursor)

        invalid_collection_ids = []
        for ind, model in enumerate(
                collection_models.CollectionSummaryModel.get_multi(
                    collection_ids)):
            if model is not None:
                returned_collection_ids.append(collection_ids[ind])
            else:
                invalid_collection_ids.append(collection_ids[ind])

        if len(returned_collection_ids) == feconf.SEARCH_RESULTS_PAGE_SIZE or (
                search_cursor is None):
            break
        else:
            logging.error(
                'Search index contains stale collection ids: %s' %
                ', '.join(invalid_collection_ids))

    if (len(returned_collection_ids) < feconf.SEARCH_RESULTS_PAGE_SIZE
            and search_cursor is not None):
        logging.error(
            'Could not fulfill search request for query string %s; at least '
            '%s retries were needed.' % (query_string, MAX_ITERATIONS))

    return (returned_collection_ids, search_cursor)
Exemple #7
0
 def test_clear_search_index(self):
     exp_services.load_demo('0')
     result_explorations = search_services.search_explorations(
         'Welcome', 2)[0]
     self.assertEqual(result_explorations, ['0'])
     collection_services.load_demo('0')
     result_collections = search_services.search_collections('Welcome',
                                                             2)[0]
     self.assertEqual(result_collections, ['0'])
     self.signup(self.ADMIN_EMAIL, self.ADMIN_USERNAME)
     self.login(self.ADMIN_EMAIL, is_super_admin=True)
     csrf_token = self.get_new_csrf_token()
     generated_exps_response = self.post_json(
         '/adminhandler', {'action': 'clear_search_index'},
         csrf_token=csrf_token)
     self.assertEqual(generated_exps_response, {})
     result_explorations = search_services.search_explorations(
         'Welcome', 2)[0]
     self.assertEqual(result_explorations, [])
     result_collections = search_services.search_collections('Welcome',
                                                             2)[0]
     self.assertEqual(result_collections, [])
Exemple #8
0
def get_collection_ids_matching_query(query_string, cursor=None):
    """Returns a list with all collection ids matching the given search query
    string, as well as a search cursor for future fetches.

    Args:
        query_string: str. The search query string.
        cursor: str or None. Cursor indicating where, in the list of
            collections, to start the search from.

    Returns:
        2-tuple of (returned_collection_ids, search_cursor), where:
            returned_collection_ids : list(str). A list with all collection ids
                matching the given search query string, as well as a search
                cursor for future fetches. The list contains exactly
                feconf.SEARCH_RESULTS_PAGE_SIZE results if there are at least
                that many, otherwise it contains all remaining results. (If this
                behaviour does not occur, an error will be logged.)
            search_cursor: str. Search cursor for future fetches.
    """
    returned_collection_ids = []
    search_cursor = cursor

    for _ in range(MAX_ITERATIONS):
        remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len(
            returned_collection_ids)

        collection_ids, search_cursor = search_services.search_collections(
            query_string, remaining_to_fetch, cursor=search_cursor)

        # Collection model cannot be None as we are fetching the collection ids
        # through query and there cannot be a collection id for which there is
        # no collection.
        for ind, _ in enumerate(
                collection_models.CollectionSummaryModel.get_multi(
                    collection_ids)):
            returned_collection_ids.append(collection_ids[ind])

        # The number of collections in a page is always lesser or equal to
        # feconf.SEARCH_RESULTS_PAGE_SIZE.
        if len(returned_collection_ids) == feconf.SEARCH_RESULTS_PAGE_SIZE or (
                search_cursor is None):
            break

    return (returned_collection_ids, search_cursor)
Exemple #9
0
    def test_search_collections(self) -> None:
        expected_query_string = 'a query string'
        expected_offset = 0
        expected_size = 30
        expected_result_offset = 30
        doc_ids = ['id1', 'id2']

        def mock_search(query_string: str,
                        index: str,
                        categories: List[str],
                        language_codes: List[str],
                        offset: Optional[int] = None,
                        size: int = 20,
                        ids_only: bool = False,
                        retries: int = 3) -> Tuple[List[str], Optional[int]]:
            self.assertEqual(query_string, expected_query_string)
            self.assertEqual(index,
                             collection_services.SEARCH_INDEX_COLLECTIONS)
            self.assertEqual(categories, [])
            self.assertEqual(language_codes, [])
            self.assertEqual(offset, expected_offset)
            self.assertEqual(size, expected_size)
            self.assertEqual(ids_only, True)
            self.assertEqual(retries, 3)

            return doc_ids, expected_result_offset

        with self.swap(gae_search_services, 'search', mock_search):
            result, result_offset = search_services.search_collections(
                expected_query_string,
                [],
                [],
                expected_size,
                offset=expected_offset,
            )

        self.assertEqual(result_offset, expected_result_offset)
        self.assertEqual(result, doc_ids)
Exemple #10
0
def get_library_groups(language_codes):
    """Returns a list of groups for the library index page. Each group has a
    header and a list of dicts representing activity summaries.

    Args:
        language_codes: list(str). A list of language codes. Only explorations
            with these languages will be returned.

    Returns:
        list(dict). A list of groups for the library index page. Each group is
        represented by a dict with the following keys and values:
            - activity_summary_dicts: list(dict). A list of dicts representing
                activity summaries.
            - categories: list(str). The list of group categories.
            - header_i18n_id: str. The i18n id for the header of the category.
            - has_full_results_page: bool. Whether the group header links to
                a "full results" page. This is always True for the
                "exploration category" groups.
            - full_results_url: str. The URL to the corresponding "full results"
                page.
    """
    language_codes_suffix = ''
    if language_codes:
        language_codes_suffix = ' language_code=("%s")' % (
            '" OR "'.join(language_codes))

    def _generate_query(categories):
        """Generates query based on the categories and language codes.

        Args:
            categories: list(str). List of categories.

        Returns:
            str. Generated query.
        """
        # This assumes that 'categories' is non-empty.
        return 'category=("%s")%s' % ('" OR "'.join(categories),
                                      language_codes_suffix)

    # Collect all collection ids so that the summary details can be retrieved
    # with a single get_multi() call.
    all_collection_ids = []
    header_id_to_collection_ids = {}
    for group in _LIBRARY_INDEX_GROUPS:
        collection_ids = search_services.search_collections(
            _generate_query(group['search_categories']), 8)[0]
        header_id_to_collection_ids[group['header_i18n_id']] = collection_ids
        all_collection_ids += collection_ids

    collection_summaries = [
        summary for summary in collection_services.
        get_collection_summaries_matching_ids(all_collection_ids)
        if summary is not None
    ]
    collection_summary_dicts = {
        summary_dict['id']: summary_dict
        for summary_dict in _get_displayable_collection_summary_dicts(
            collection_summaries)
    }

    # Collect all exp ids so that the summary details can be retrieved with a
    # single get_multi() call.
    all_exp_ids = []
    header_to_exp_ids = {}
    for group in _LIBRARY_INDEX_GROUPS:
        exp_ids = search_services.search_explorations(
            _generate_query(group['search_categories']), 8)[0]
        header_to_exp_ids[group['header_i18n_id']] = exp_ids
        all_exp_ids += exp_ids

    exp_summaries = [
        summary for summary in
        exp_fetchers.get_exploration_summaries_matching_ids(all_exp_ids)
        if summary is not None
    ]

    exp_summary_dicts = {
        summary_dict['id']: summary_dict
        for summary_dict in get_displayable_exp_summary_dicts(exp_summaries)
    }

    results = []
    for group in _LIBRARY_INDEX_GROUPS:
        summary_dicts = []
        collection_ids_to_display = (
            header_id_to_collection_ids[group['header_i18n_id']])
        summary_dicts = [
            collection_summary_dicts[collection_id]
            for collection_id in collection_ids_to_display
            if collection_id in collection_summary_dicts
        ]

        exp_ids_to_display = header_to_exp_ids[group['header_i18n_id']]
        summary_dicts += [
            exp_summary_dicts[exp_id] for exp_id in exp_ids_to_display
            if exp_id in exp_summary_dicts
        ]

        if not summary_dicts:
            continue

        results.append({
            'header_i18n_id': group['header_i18n_id'],
            'categories': group['search_categories'],
            'activity_summary_dicts': summary_dicts,
            'has_full_results_page': True,
            'full_results_url': None,
        })

    return results