Beispiel #1
0
    def test_upercase_and_lowercase_search_give_same_results(self):
        """Pretty self-explanatory function name, isn't it ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Index lowercase stuffs
        text_lc = "test"

        topic_1_lc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_lc)

        tag_lc = TagFactory(title=text_lc)
        topic_1_lc.tags.add(tag_lc)
        topic_1_lc.subtitle = text_lc
        topic_1_lc.save()

        post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1)
        post_1_lc.text = post_1_lc.text_html = text_lc
        post_1_lc.save()

        tuto_lc = PublishableContentFactory(type="TUTORIAL")
        tuto_draft_lc = tuto_lc.load_version()

        tuto_lc.title = text_lc
        tuto_lc.authors.add(self.user)
        subcategory_lc = SubCategoryFactory(title=text_lc)
        tuto_lc.subcategory.add(subcategory_lc)
        tuto_lc.tags.add(tag_lc)
        tuto_lc.save()

        tuto_draft_lc.description = text_lc
        tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc,
                                                text_lc)

        chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc)
        extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc)
        extract_lc.repo_update(text_lc, text_lc)

        published_lc = publish_content(tuto_lc,
                                       tuto_draft_lc,
                                       is_major_update=True)

        tuto_lc.sha_public = tuto_draft_lc.current_version
        tuto_lc.sha_draft = tuto_draft_lc.current_version
        tuto_lc.public_version = published_lc
        tuto_lc.save()

        # 2. Index uppercase stuffs
        text_uc = "TEST"

        topic_1_uc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_uc)

        topic_1_uc.tags.add(
            tag_lc)  # Note: a constraint forces tags title to be unique
        topic_1_uc.subtitle = text_uc
        topic_1_uc.save()

        post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1)
        post_1_uc.text = post_1_uc.text_html = text_uc
        post_1_uc.save()

        tuto_uc = PublishableContentFactory(type="TUTORIAL")
        tuto_draft_uc = tuto_uc.load_version()

        tuto_uc.title = text_uc
        tuto_uc.authors.add(self.user)
        tuto_uc.subcategory.add(subcategory_lc)
        tuto_uc.tags.add(tag_lc)
        tuto_uc.save()

        tuto_draft_uc.description = text_uc
        tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc,
                                                text_uc)

        chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc)
        extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc)
        extract_uc.repo_update(text_uc, text_uc)

        published_uc = publish_content(tuto_uc,
                                       tuto_draft_uc,
                                       is_major_update=True)

        tuto_uc.sha_public = tuto_draft_uc.current_version
        tuto_uc.sha_draft = tuto_draft_uc.current_version
        tuto_uc.public_version = published_uc
        tuto_uc.save()

        # 3. Index and search:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse("search:query") + "?q=" + text_lc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_lc = result.context["object_list"].execute()
        self.assertEqual(response_lc.hits.total, 8)

        result = self.client.get(reverse("search:query") + "?q=" + text_uc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_uc = result.context["object_list"].execute()
        self.assertEqual(response_uc.hits.total, 8)

        for responses in zip(
                response_lc,
                response_uc):  # we should get results in the same order!
            self.assertEqual(responses[0].meta.id, responses[1].meta.id)
Beispiel #2
0
    def test_change_publishedcontents_impacts_chapter(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create middle-size content and index it
        text = "test"

        tuto = PublishableContentFactory(type="TUTORIAL")
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=content",
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context["object_list"].execute()

        self.assertEqual(response.hits.total, 2)

        chapters = [r for r in response if r.meta.doc_type == "chapter"]
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id,
                         published.content_public_slug + "__" + chapter1.slug)

        # 2. Change tuto: delete chapter and insert new one !
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        tuto_draft = tuto.load_version()

        tuto_draft.children[0].repo_delete()  # chapter 1 is gone !

        another_text = "another thing"
        self.assertTrue(
            text not in another_text
        )  # to prevent a future modification from breaking this test

        chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter2.repo_update(another_text, another_text, another_text)
        extract2 = ExtractFactory(container=chapter2, db_object=tuto)
        extract2.repo_update(another_text, another_text)

        published = publish_content(tuto, tuto_draft, is_major_update=False)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # 2 objects, not 3 !

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=content",
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context["object_list"].execute()

        contents = [r for r in response if r.meta.doc_type != "chapter"]
        self.assertEqual(response.hits.total,
                         len(contents))  # no chapter found anymore

        result = self.client.get(reverse("search:query") + "?q=" +
                                 another_text + "&models=content",
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context["object_list"].execute()
        chapters = [r for r in response if r.meta.doc_type == "chapter"]
        self.assertEqual(response.hits.total, 1)
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id, published.content_public_slug +
                         "__" + chapter2.slug)  # got new chapter
Beispiel #3
0
    def test_category_and_subcategory_impact_search(self):
        """If two contents do not belong to the same (sub)category"""

        if not self.manager.connected_to_es:
            return

        text = "Did you ever hear the tragedy of Darth Plagueis The Wise?"

        # 1. Create two contents with different subcategories
        category_1 = "category 1"
        subcategory_1 = SubCategoryFactory(title=category_1)
        category_2 = "category 2"
        subcategory_2 = SubCategoryFactory(title=category_2)

        tuto_1 = PublishableContentFactory(type="TUTORIAL")
        tuto_1_draft = tuto_1.load_version()

        tuto_1.title = text
        tuto_1.authors.add(self.user)
        tuto_1.subcategory.add(subcategory_1)
        tuto_1.save()

        tuto_1_draft.description = text
        tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text)

        chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1)
        extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1)
        extract_1.repo_update(text, text)

        published_1 = publish_content(tuto_1,
                                      tuto_1_draft,
                                      is_major_update=True)

        tuto_1.sha_public = tuto_1_draft.current_version
        tuto_1.sha_draft = tuto_1_draft.current_version
        tuto_1.public_version = published_1
        tuto_1.save()

        tuto_2 = PublishableContentFactory(type="TUTORIAL")
        tuto_2_draft = tuto_2.load_version()

        tuto_2.title = text
        tuto_2.authors.add(self.user)
        tuto_2.subcategory.add(subcategory_2)
        tuto_2.save()

        tuto_2_draft.description = text
        tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text)

        chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2)
        extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2)
        extract_2.repo_update(text, text)

        published_2 = publish_content(tuto_2,
                                      tuto_2_draft,
                                      is_major_update=True)

        tuto_2.sha_public = tuto_2_draft.current_version
        tuto_2.sha_draft = tuto_2_draft.current_version
        tuto_2.public_version = published_2
        tuto_2.save()

        # 2. Index:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse("search:query") + "?q=" + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 4)  # Ok

        # 3. Test
        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&model=content&subcategory=" +
                                 subcategory_1.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == "publishedcontent"
        ][0], published_1.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == "chapter"
        ][0], tuto_1.slug + "__" + chapter_1.slug)

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&model=content&subcategory=" +
                                 subcategory_2.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == "publishedcontent"
        ][0], published_2.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == "chapter"
        ][0], tuto_2.slug + "__" + chapter_2.slug)
Beispiel #4
0
    def test_basic_search(self):
        """Basic search and filtering"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = "test"

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        # create a middle-size content and publish it
        tuto = PublishableContentFactory(type="TUTORIAL")
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        # nothing has been indexed yet:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse("search:query") + "?q=" + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context["object_list"].execute()

        self.assertEqual(response.hits.total, 4)  # get 4 results

        # 2. Test filtering:
        topic_1 = Topic.objects.get(pk=topic_1.pk)
        post_1 = Post.objects.get(pk=post_1.pk)
        published = PublishedContent.objects.get(pk=published.pk)

        ids = {
            "topic": [topic_1.es_id],
            "post": [post_1.es_id],
            "content": [
                published.es_id,
                published.content_public_slug + "__" + chapter1.slug
            ],
        }

        search_groups = [
            k for k, v in settings.ZDS_APP["search"]["search_groups"].items()
        ]
        group_to_model = {
            k: v[1]
            for k, v in settings.ZDS_APP["search"]["search_groups"].items()
        }

        for doc_type in search_groups:
            result = self.client.get(reverse("search:query") + "?q=" + text +
                                     "&models=" + doc_type,
                                     follow=False)
            self.assertEqual(result.status_code, 200)

            response = result.context["object_list"].execute()

            self.assertEqual(response.hits.total,
                             len(ids[doc_type]))  # get 1 result of each …
            for i, r in enumerate(response):
                self.assertIn(
                    r.meta.doc_type,
                    group_to_model[doc_type])  # … and only of the right type …
                self.assertEqual(r.meta.id,
                                 ids[doc_type][i])  # … with the right id !
Beispiel #5
0
    def test_es_manager(self):
        """Test the behavior of the ``es_manager`` command"""

        if not self.index_manager.connected_to_es:
            return

        # in the beginning: the void
        self.assertTrue(self.index_manager.index not in
                        self.index_manager.es.cat.indices())

        text = "Ceci est un texte de test"

        # create a topic with a post
        topic = TopicFactory(forum=self.forum, author=self.user, title=text)
        post = PostFactory(topic=topic, author=self.user, position=1)
        post.text = post.text_html = text
        post.save()

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        # create a middle-tutorial and publish it
        tuto = PublishableContentFactory(type="TUTORIAL")
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract1 = ExtractFactory(container=chapter1, db_object=tuto)
        version = extract1.repo_update(text, text)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = version
        tuto.sha_draft = version
        tuto.public_version = published
        tuto.save()

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        # 1. test "index-all"
        call_command("es_manager", "index_all")
        self.assertTrue(
            self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get 4 results, one of each type

        must_contain = {
            "post": False,
            "topic": False,
            "publishedcontent": False,
            "chapter": False
        }
        id_must_be = {
            "post": str(post.pk),
            "topic": str(topic.pk),
            "publishedcontent": str(published.pk),
            "chapter": tuto.slug + "__" + chapter1.slug,
        }

        for hit in results:
            doc_type = hit.meta.doc_type
            must_contain[doc_type] = True
            self.assertEqual(hit.meta.id, id_must_be[doc_type])

        self.assertTrue(all(must_contain))

        # 2. test "clear"
        self.assertTrue(self.index_manager.index
                        in self.index_manager.es.cat.indices())  # index in

        call_command("es_manager", "clear")
        self.assertFalse(
            self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = False

        # must reset every object
        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        self.assertTrue(
            self.index_manager.index
            not in self.index_manager.es.cat.indices())  # index wiped out !

        # 3. test "setup"
        call_command("es_manager", "setup")
        self.assertTrue(
            self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        self.assertTrue(
            self.index_manager.index
            in self.index_manager.es.cat.indices())  # index back in ...

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 0)  # ... but with nothing in it

        result = self.index_manager.es.indices.get_settings(
            index=self.index_manager.index)
        settings_index = result[self.index_manager.index]["settings"]["index"]
        self.assertTrue("analysis"
                        in settings_index)  # custom analyzer was setup

        # 4. test "index-flagged" once ...
        call_command("es_manager", "index_flagged")

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get the 4 results back