def test_indexers_organizations_build_es_query_with_invalid_params(self): """ Error case: the request contained invalid parameters """ with self.assertRaises(QueryFormatException): OrganizationsIndexer.build_es_query( SimpleNamespace(query_params={"limit": "invalid input"}))
def test_get_data_for_es_with_unexpected_organization_shape(self): """ Error case: the API returned an object that is not shaped like an expected organization """ responses.add( method="GET", url=settings.ORGANIZATION_API_ENDPOINT, status=200, json={ "count": 1, "results": [ { "id": 62, "banner": "example.com/banner_62.png", # 'code': 'org-62', missing code key will trigger the KeyError "logo": "example.com/logo_62.png", "name": {"fr": "Organization N°62"}, } ], }, ) with self.assertRaises(IndexerDataException): list( OrganizationsIndexer.get_data_for_es( index="some_index", action="some_action" ) )
def test_indexers_organizations_format_es_object_for_api(self): """ Make sure format_es_object_for_api returns a properly formatted organization """ es_organization = { "_id": 217, "_source": { "banner": "example.com/banner.png", "code": "univ-paris-13", "logo": "example.com/logo.png", "name": { "en": "University of Paris XIII", "fr": "Université Paris 13" }, }, } self.assertEqual( OrganizationsIndexer.format_es_object_for_api( es_organization, "en"), { "banner": "example.com/banner.png", "code": "univ-paris-13", "id": 217, "logo": "example.com/logo.png", "name": "University of Paris XIII", }, )
def test_indexers_organizations_build_es_query_search_by_name(self): """ Happy path: the expected ES query object is returned """ request = SimpleNamespace(query_params={ "limit": 12, "offset": 3, "query": "user entered some text" }) self.assertEqual( OrganizationsIndexer.build_es_query(request), ( 12, 3, { "query": { "match": { "name.fr": { "query": "user entered some text", "analyzer": "french", } } } }, ), )
def test_indexers_organizations_format_es_document_for_autocomplete(self): """ Make sure format_es_document_for_autocomplete returns a properly formatted organization suggestion. """ es_organization = { "_id": 217, "_source": { "logo": { "en": "/my_logo.png", "fr": "/mon_logo.png" }, "title": { "en": "University of Paris XIII", "fr": "Université Paris 13", }, }, } self.assertEqual( OrganizationsIndexer.format_es_document_for_autocomplete( es_organization, "en"), { "id": 217, "kind": "organizations", "title": "University of Paris XIII" }, )
def test_indexers_organizations_format_es_object_for_api(self): """ Make sure format_es_object_for_api returns a properly formatted organization """ es_organization = { "_id": 217, "_source": { "logo": { "en": "/my_logo.png", "fr": "/mon_logo.png" }, "title": { "en": "University of Paris XIII", "fr": "Université Paris 13", }, }, } self.assertEqual( OrganizationsIndexer.format_es_object_for_api( es_organization, "en"), { "id": 217, "logo": "/my_logo.png", "title": "University of Paris XIII" }, )
def test_indexers_courses_related_objects_consistency(self): """ The organization and category ids in the Elasticsearch course document should be the same as the ids with which the corresponding organization and category objects are indexed. """ # Create a course with a page in both english and french organization = OrganizationFactory(should_publish=True) category = CategoryFactory(should_publish=True) course = CourseFactory(fill_organizations=[organization], fill_categories=[category]) CourseRunFactory(direct_course=course) course.extended_object.publish("en") course_document = list( CoursesIndexer.get_es_documents(index="some_index", action="some_action"))[0] self.assertEqual( course_document["organizations"], [ next( OrganizationsIndexer.get_es_documents( index="some_index", action="some_action"))["_id"] ], ) self.assertEqual( course_document["categories"], [ next( CategoriesIndexer.get_es_documents( index="some_index", action="some_action"))["_id"] ], )
def test_build_es_query_search_all_organizations(self): """ Happy path: the expected ES query object is returned """ request = SimpleNamespace(query_params={"limit": 11, "offset": 4}) self.assertEqual( OrganizationsIndexer.build_es_query(request), (11, 4, {"query": {"match_all": {}}}), )
def prepare_index(self, courses, organizations=None): """ Not a test. This method is doing the heavy lifting for the tests in this class: preparing the Elasticsearch index so that individual tests just have to execute the query. """ organizations = organizations or [] self.create_filter_pages() # Delete any existing indices so we get a clean slate ES_INDICES_CLIENT.delete(index="_all") # Create an index for our organizations ES_INDICES_CLIENT.create(index="richie_organizations") ES_INDICES_CLIENT.close(index="richie_organizations") ES_INDICES_CLIENT.put_settings(body=ANALYSIS_SETTINGS, index="richie_organizations") ES_INDICES_CLIENT.open(index="richie_organizations") # Use the default organizations mapping from the Indexer ES_INDICES_CLIENT.put_mapping(body=OrganizationsIndexer.mapping, index="richie_organizations") # Set up empty indices for categories & persons. They need to exist to avoid errors # but we do not use results from them in our tests. ES_INDICES_CLIENT.create(index="richie_categories") ES_INDICES_CLIENT.create(index="richie_persons") # Create an index we'll use to test the ES features ES_INDICES_CLIENT.create(index="test_courses") ES_INDICES_CLIENT.close(index="test_courses") ES_INDICES_CLIENT.put_settings(body=ANALYSIS_SETTINGS, index="test_courses") ES_INDICES_CLIENT.open(index="test_courses") # Use the default courses mapping from the Indexer ES_INDICES_CLIENT.put_mapping(body=CoursesIndexer.mapping, index="test_courses") # Add the sorting script ES_CLIENT.put_script(id="score", body=CoursesIndexer.scripts["score"]) ES_CLIENT.put_script(id="state_field", body=CoursesIndexer.scripts["state_field"]) # Prepare actions to insert our courses and organizations in their indices actions = [ OrganizationsIndexer.get_es_document_for_organization( organization.public_extension) for organization in organizations ] + [{ "_id": course["id"], "_index": "test_courses", "_op_type": "create", **course, } for course in courses] bulk_compat(actions=actions, chunk_size=500, client=ES_CLIENT) ES_INDICES_CLIENT.refresh()
def test_indexers_organizations_get_es_documents_unpublished(self): """Unpublished organizations should not be indexed""" OrganizationFactory() # The unpublished organization should not get indexed self.assertEqual( list( OrganizationsIndexer.get_es_documents(index="some_index", action="some_action")), [], )
def test_indexers_organizations_get_es_documents_language_fallback(self): """Absolute urls should be computed as expected with language fallback.""" OrganizationFactory( page_title={ "fr": "ma première organisation", }, should_publish=True, ) indexed_organizations = list( OrganizationsIndexer.get_es_documents(index="some_index", action="some_action")) self.assertEqual( indexed_organizations[0]["absolute_url"], { "en": "/en/ma-premiere-organisation/", "fr": "/fr/ma-premiere-organisation/", }, )
def test_get_data_for_es(self): """ Happy path: organization data is fetched from the API properly formatted """ responses.add( method="GET", url=settings.ORGANIZATION_API_ENDPOINT + "?page=1&rpp=50", match_querystring=True, json={ "count": 51, "results": [ { "id": 1, "banner": "example.com/banner_1.png", "code": "org-1", "logo": "example.com/logo_1.png", "name": "Organization N°1", } ], }, ) responses.add( method="GET", url=settings.ORGANIZATION_API_ENDPOINT + "?page=2&rpp=50", match_querystring=True, json={ "count": 51, "results": [ { "id": 80, "banner": "example.com/banner_80.png", "code": "org-80", "logo": "example.com/logo_80.png", "name": "Organization N°80", } ], }, ) # The results were properly formatted and passed to the consumer self.assertEqual( list( OrganizationsIndexer.get_data_for_es( index="some_index", action="some_action" ) ), [ { "_id": 1, "_index": "some_index", "_op_type": "some_action", "_type": "organization", "banner": "example.com/banner_1.png", "code": "org-1", "logo": "example.com/logo_1.png", "name": {"fr": "Organization N°1"}, }, { "_id": 80, "_index": "some_index", "_op_type": "some_action", "_type": "organization", "banner": "example.com/banner_80.png", "code": "org-80", "logo": "example.com/logo_80.png", "name": {"fr": "Organization N°80"}, }, ], )
def get_es_document_for_course(cls, course, index=None, action="index"): """ Build an Elasticsearch document from the course instance. """ index = index or cls.index_name # Prepare published titles titles = { t.language: t.title for t in Title.objects.filter(page=course.extended_object, published=True) } # Prepare cover images cover_images = {} for cover in Picture.objects.filter( cmsplugin_ptr__placeholder__page=course.extended_object, cmsplugin_ptr__placeholder__slot="course_cover", ): language = cover.cmsplugin_ptr.language with translation.override(language): picture_info = get_picture_info(cover, "cover") if picture_info: cover_images[language] = picture_info # Prepare the related category icon icon_images = {} for plugin_model in CategoryPluginModel.objects.filter( cmsplugin_ptr__placeholder__page=course.extended_object_id, cmsplugin_ptr__placeholder__slot="course_icons", cmsplugin_ptr__position=0, ): language = plugin_model.language for icon in Picture.objects.filter( cmsplugin_ptr__language=language, cmsplugin_ptr__placeholder__page=plugin_model.page_id, cmsplugin_ptr__placeholder__slot="icon", cmsplugin_ptr__position=0, ): with translation.override(language): picture_info = get_picture_info(icon, "icon") or {} icon_images[language] = { **picture_info, "color": plugin_model.page.category.color, "title": plugin_model.page.get_title(), } # Prepare description texts descriptions = defaultdict(list) for simple_text in SimpleText.objects.filter( cmsplugin_ptr__placeholder__page=course.extended_object, cmsplugin_ptr__placeholder__slot="course_description", ): descriptions[simple_text.cmsplugin_ptr.language].append(simple_text.body) # Prepare introduction texts introductions = defaultdict(list) for plain_text in PlainText.objects.filter( cmsplugin_ptr__placeholder__page=course.extended_object, cmsplugin_ptr__placeholder__slot="course_introduction", ): introductions[plain_text.cmsplugin_ptr.language].append(plain_text.body) # Prepare localized duration texts duration = {} for language, _ in settings.LANGUAGES: with translation.override(language): duration[language] = course.get_duration_display() # Prepare localized effort texts effort = {} for language, _ in settings.LANGUAGES: with translation.override(language): effort[language] = course.get_effort_display() # Prepare categories, making sure we get title information for categories # in the same query category_pages = ( course.get_root_to_leaf_public_category_pages() .prefetch_related( Prefetch( "title_set", to_attr="published_titles", queryset=Title.objects.filter(published=True), ) ) .distinct() ) # Prepare organizations, making sure we get title information for organizations # in the same query organizations = ( course.get_organizations() .prefetch_related( Prefetch( "extended_object__title_set", to_attr="published_titles", queryset=Title.objects.filter(published=True), ) ) .distinct() ) organization_main = course.get_main_organization() organization_highlighted = ( organizations.get(id=organization_main.id) if organization_main else None ) organization_highlighted_cover_image = ( OrganizationsIndexer.get_logo_images(organization_main) if organization_main else {} ) # Prepare persons, making sure we get title information for persons # in the same query persons = ( course.get_persons() .prefetch_related( Prefetch( "extended_object__title_set", to_attr="published_titles", queryset=Title.objects.filter(published=True), ) ) .distinct() ) # Prepare course runs # Ordering them by their `end` date is important to optimize sorting and other # computations that require looping on the course runs # Course runs with no start date or no start of enrollment date are ignored as # they are still to be scheduled. course_runs = [ { "start": cr["start"], "end": cr["end"] or MAX_DATE, "enrollment_start": cr["enrollment_start"], "enrollment_end": cr["enrollment_end"] or cr["end"] or MAX_DATE, "languages": cr["languages"], } for cr in course.course_runs.filter( start__isnull=False, enrollment_start__isnull=False, catalog_visibility=CourseRunCatalogVisibility.COURSE_AND_SEARCH, ) .order_by("-end") .values("start", "end", "enrollment_start", "enrollment_end", "languages") ] licences = ( Licence.objects.filter( licencepluginmodel__cmsplugin_ptr__placeholder__page__course=course, licencepluginmodel__cmsplugin_ptr__placeholder__slot="course_license_content", ) .distinct() .order_by("id") .values_list("id", flat=True) ) return { "_id": course.get_es_id(), "_index": index, "_op_type": action, "absolute_url": { lang: course.extended_object.get_absolute_url(lang) for lang, _ in settings.LANGUAGES }, "categories": [page.category.get_es_id() for page in category_pages], # Index the names of categories to surface them in full text searches "categories_names": reduce( lambda acc, title: { **acc, title.language: acc[title.language] + [title.title] if acc.get(title.language) else [title.title], }, [title for page in category_pages for title in page.published_titles], {}, ), "code": course.code, "complete": { language: slice_string_for_completion(title) for language, title in titles.items() } if course.is_listed else None, "course_runs": course_runs, "cover_image": cover_images, "description": { language: " ".join(st) for language, st in descriptions.items() }, "duration": duration, "effort": effort, "icon": icon_images, "introduction": { language: " ".join(st) for language, st in introductions.items() }, "is_new": len(course_runs) == 1, # If titles is an empty dict, it means the course is not published in any language: "is_listed": bool(course.is_listed and titles), "licences": list(licences), # Pick the highlighted organization from the organizations QuerySet to benefit from # the prefetch of related title sets "organization_highlighted": { title.language: title.menu_title if title.menu_title else title.title for title in organization_highlighted.extended_object.published_titles } if organization_highlighted else None, "organization_highlighted_cover_image": organization_highlighted_cover_image, "organizations": [ organization.get_es_id() for organization in organizations ], # Index the names of organizations to surface them in full text searches "organizations_names": reduce( lambda acc, title: { **acc, title.language: acc[title.language] + [title.title] if acc.get(title.language) else [title.title], }, [ title for organization in organizations for title in organization.extended_object.published_titles ], {}, ), "persons": [person.get_es_id() for person in persons], "persons_names": reduce( lambda acc, title: { **acc, title.language: acc[title.language] + [title.title] if acc.get(title.language) else [title.title], }, [ title for person in persons for title in person.extended_object.published_titles ], {}, ), "pace": None if course.is_self_paced else get_course_pace(course.effort, course.duration), "title": titles, }
def test_indexers_organizations_get_es_documents(self, _mock_picture): """ Happy path: organization data is fetched from the models properly formatted """ organization1 = OrganizationFactory( page_title={ "en": "my first organization", "fr": "ma première organisation", }, fill_logo=True, should_publish=True, ) OrganizationFactory( page_title={ "en": "my second organization", "fr": "ma deuxième organisation", }, should_publish=True, ) # Add a description in several languages to the first organization placeholder = organization1.public_extension.extended_object.placeholders.get( slot="description") plugin_params = { "placeholder": placeholder, "plugin_type": "CKEditorPlugin" } add_plugin(body="english description line 1.", language="en", **plugin_params) add_plugin(body="english description line 2.", language="en", **plugin_params) add_plugin(body="description français ligne 1.", language="fr", **plugin_params) add_plugin(body="description français ligne 2.", language="fr", **plugin_params) # The results were properly formatted and passed to the consumer self.assertEqual( list( OrganizationsIndexer.get_es_documents(index="some_index", action="some_action")), [ { "_id": "L-0002", "_index": "some_index", "_op_type": "some_action", "_type": "organization", "absolute_url": { "en": "/en/my-second-organization/", "fr": "/fr/ma-deuxieme-organisation/", }, "complete": { "en": [ "my second organization", "second organization", "organization", ], "fr": [ "ma deuxième organisation", "deuxième organisation", "organisation", ], }, "description": {}, "logo": {}, "title": { "en": "my second organization", "fr": "ma deuxième organisation", }, }, { "_id": "L-0001", "_index": "some_index", "_op_type": "some_action", "_type": "organization", "absolute_url": { "en": "/en/my-first-organization/", "fr": "/fr/ma-premiere-organisation/", }, "complete": { "en": [ "my first organization", "first organization", "organization", ], "fr": [ "ma première organisation", "première organisation", "organisation", ], }, "description": { "en": "english description line 1. english description line 2.", "fr": "description français ligne 1. description français ligne 2.", }, "logo": { "en": "logo info", "fr": "logo info" }, "title": { "en": "my first organization", "fr": "ma première organisation", }, }, ], )