Esempio n. 1
0
    def test_shelfmark_partialmatch(self, empty_solr, multifragment):
        # integration test for shelfmark indexing with partial matching
        # - using empty solr fixture to ensure solr is empty when this test starts

        # multifragment shelfmark can test for this problem: T-S 16.377
        doc1 = Document.objects.create()
        TextBlock.objects.create(document=doc1, fragment=multifragment)
        # create an arbitrary fragment with similar numeric shelfmark
        folder_fragment = Fragment.objects.create(shelfmark="T-S 16.378")
        doc2 = Document.objects.create()
        TextBlock.objects.create(document=doc2, fragment=folder_fragment)

        # ensure solr index is updated with the two test documents
        SolrClient().update.index(
            [
                doc1.index_data(),
                doc2.index_data(),
            ],
            commit=True,
        )

        docsearch_view = DocumentSearchView()
        docsearch_view.request = Mock()
        # sort doesn't matter in this case
        docsearch_view.request.GET = {"q": "T-S 16"}
        qs = docsearch_view.get_queryset()
        # should return both documents
        assert qs.count() == 2
        resulting_ids = [result["pgpid"] for result in qs]
        assert doc1.id in resulting_ids
        assert doc2.id in resulting_ids
Esempio n. 2
0
 def test_get_paginate_by(self):
     """Should set pagination to 2 per page"""
     docsearch_view = DocumentSearchView(kwargs={})
     docsearch_view.request = Mock()
     docsearch_view.request.GET = {"per_page": "2"}
     qs = docsearch_view.get_queryset()
     assert docsearch_view.get_paginate_by(qs) == 2
Esempio n. 3
0
    def test_doctype_filter(self, document, join, empty_solr):
        """Integration test for document type filter"""
        SolrClient().update.index(
            [
                document.index_data(),  # type = Legal document
                join.index_data(),  # type = Letter
            ],
            commit=True,
        )
        docsearch_view = DocumentSearchView()
        docsearch_view.request = Mock()

        # no filter
        docsearch_view.request.GET = {}
        qs = docsearch_view.get_queryset()
        assert qs.count() == 2

        # filter by doctype "Legal document"
        docsearch_view.request.GET = {"doctype": ["Legal document"]}
        qs = docsearch_view.get_queryset()
        assert qs.count() == 1
        assert qs[0]["pgpid"] == document.id, "Only legal document returned"
Esempio n. 4
0
    def test_shelfmark_boost(self, empty_solr, document, multifragment):
        # integration test for shelfmark field boosting
        # in solr configuration
        # - using empty solr fixture to ensure solr is empty when this test starts

        # create a second document with a different shelfmark
        # that references the shelfmark of the first in the description
        related_doc = Document.objects.create(
            description="See also %s" % document.shelfmark
        )
        TextBlock.objects.create(document=related_doc, fragment=multifragment)

        # third document with similar shelfmark
        frag = Fragment.objects.create(
            shelfmark="CUL Add.300",  # fixture has shelfmark CUL Add.2586
        )
        neighbor_doc = Document.objects.create()
        TextBlock.objects.create(document=neighbor_doc, fragment=frag)
        # ensure solr index is updated with all three test documents
        SolrClient().update.index(
            [
                document.index_data(),
                neighbor_doc.index_data(),
                related_doc.index_data(),
            ],
            commit=True,
        )

        docsearch_view = DocumentSearchView()
        docsearch_view.request = Mock()
        # assuming relevance sort is default; update if that changes
        docsearch_view.request.GET = {"q": document.shelfmark, "sort": "relevance"}
        qs = docsearch_view.get_queryset()
        # should return all three documents
        assert qs.count() == 3
        # document with exact match on shelfmark should be returned first
        assert (
            qs[0]["pgpid"] == document.id
        ), "document with matching shelfmark returned first"
        # document with full shelfmark should in description should be second
        assert (
            qs[1]["pgpid"] == related_doc.id
        ), "document with shelfmark in description returned second"
Esempio n. 5
0
    def test_ignore_suppressed_documents(self, document, empty_solr):
        suppressed_document = Document.objects.create(status=Document.SUPPRESSED)
        Document.index_items([document, suppressed_document])
        SolrClient().update.index([], commit=True)
        # [d.index_data() for d in [document, suppressed_document]], commit=True
        # )
        print(suppressed_document.index_data())

        docsearch_view = DocumentSearchView()
        # mock request with empty keyword search
        docsearch_view.request = Mock()
        docsearch_view.request.GET = {"q": ""}
        qs = docsearch_view.get_queryset()
        result_pgpids = [obj["pgpid"] for obj in qs]
        print(result_pgpids)
        print(qs)
        assert qs.count() == 1
        assert document.id in result_pgpids
        assert suppressed_document.id not in result_pgpids
Esempio n. 6
0
    def test_scholarship_sort(
        self,
        document,
        join,
        empty_solr,
        source,
        twoauthor_source,
        multiauthor_untitledsource,
    ):
        """integration test for sorting by scholarship asc and desc"""

        Footnote.objects.create(
            content_object=join,
            source=source,
            doc_relation=Footnote.EDITION,
        )
        doc_three_records = Document.objects.create(
            description="testing description",
        )
        for src in [source, twoauthor_source, multiauthor_untitledsource]:
            Footnote.objects.create(
                content_object=doc_three_records,
                source=src,
                doc_relation=Footnote.EDITION,
            )

        # ensure solr index is updated with all three test documents
        SolrClient().update.index(
            [
                document.index_data(),  # no scholarship records
                join.index_data(),  # one scholarship record
                doc_three_records.index_data(),  # 3 scholarship records
            ],
            commit=True,
        )
        docsearch_view = DocumentSearchView()
        docsearch_view.request = Mock()

        # no sort, no query
        docsearch_view.request.GET = {}
        qs = docsearch_view.get_queryset()
        # should return all three documents
        assert qs.count() == 3
        # by default, should return document with most records first
        assert (
            qs[0]["pgpid"] == doc_three_records.id
        ), "document with most scholarship records returned first"

        # sort by scholarship desc
        docsearch_view.request.GET = {"sort": "scholarship_desc"}
        qs = docsearch_view.get_queryset()
        # should return document with most records first
        assert (
            qs[0]["pgpid"] == doc_three_records.id
        ), "document with most scholarship records returned first"

        # sort by scholarship asc
        docsearch_view.request.GET = {"sort": "scholarship_asc"}
        qs = docsearch_view.get_queryset()
        # should return document with fewest records first
        assert (
            qs[0]["pgpid"] == document.id
        ), "document with fewest scholarship records returned first"

        # sort by scholarship asc with query
        docsearch_view.request.GET = {"sort": "scholarship_asc", "q": "testing"}
        qs = docsearch_view.get_queryset()
        # should return 2 documents
        assert qs.count() == 2
        # should return document with fewest records first
        assert (
            qs[0]["pgpid"] == join.id
        ), "document with matching description and fewest scholarship records returned first"
Esempio n. 7
0
    def test_get_queryset(self, mock_solr_queryset):
        with patch(
            "geniza.corpus.views.DocumentSolrQuerySet",
            new=self.mock_solr_queryset(
                DocumentSolrQuerySet, extra_methods=["admin_search", "keyword_search"]
            ),
        ) as mock_queryset_cls:

            docsearch_view = DocumentSearchView()
            docsearch_view.request = Mock()

            # keyword search param
            docsearch_view.request.GET = {"q": "six apartments"}
            qs = docsearch_view.get_queryset()

            mock_queryset_cls.assert_called_with()
            mock_sqs = mock_queryset_cls.return_value
            mock_sqs.keyword_search.assert_called_with("six apartments")
            mock_sqs.keyword_search.return_value.highlight.assert_any_call(
                "description", snippets=3, method="unified", requireFieldMatch=True
            )
            mock_sqs.also.assert_called_with("score")
            mock_sqs.also.return_value.order_by.assert_called_with("-score")

            # sort search param
            mock_sqs.reset_mock()
            docsearch_view.request = Mock()
            docsearch_view.request.GET = {"sort": "relevance"}
            qs = docsearch_view.get_queryset()
            mock_sqs = mock_queryset_cls.return_value
            mock_sqs.keyword_search.assert_not_called()
            # filter called once to limit by status
            assert mock_sqs.filter.call_count == 1
            mock_sqs.filter.assert_called_with(status=Document.STATUS_PUBLIC)
            # order_by should not be called when there is no search query
            mock_sqs.order_by.assert_not_called()

            # sort and keyword search params
            mock_sqs.reset_mock()
            docsearch_view.request = Mock()
            docsearch_view.request.GET = {"q": "six apartments", "sort": "relevance"}
            qs = docsearch_view.get_queryset()
            mock_sqs = mock_queryset_cls.return_value
            mock_sqs.keyword_search.assert_called_with("six apartments")
            mock_sqs.keyword_search.return_value.also.return_value.order_by.return_value.filter.assert_called_with(
                status=Document.STATUS_PUBLIC
            )
            mock_sqs.keyword_search.return_value.also.return_value.order_by.assert_called_with(
                "-score"
            )

            # keyword, sort, and doctype filter search params
            mock_sqs.reset_mock()
            docsearch_view.request = Mock()
            docsearch_view.request.GET = {
                "q": "six apartments",
                "sort": "scholarship_desc",
                "doctype": ["Legal"],
            }
            qs = docsearch_view.get_queryset()
            mock_sqs = mock_queryset_cls.return_value
            mock_sqs.keyword_search.assert_called_with("six apartments")
            mock_sqs.keyword_search.return_value.also.return_value.order_by.return_value.filter.assert_called()
            mock_sqs.keyword_search.return_value.also.return_value.order_by.assert_called_with(
                "-scholarship_count_i"
            )

            # empty params
            mock_sqs.reset_mock()
            docsearch_view.request = Mock()
            docsearch_view.request.GET = {"q": "", "sort": ""}
            qs = docsearch_view.get_queryset()
            mock_sqs = mock_queryset_cls.return_value
            mock_sqs.keyword_search.assert_not_called()
            mock_sqs.order_by.assert_called_with("-scholarship_count_i")

            # no params
            mock_sqs.reset_mock()
            docsearch_view.request = Mock()
            docsearch_view.request.GET = {}
            qs = docsearch_view.get_queryset()
            mock_sqs = mock_queryset_cls.return_value
            mock_sqs.keyword_search.assert_not_called()
            mock_sqs.order_by.assert_called_with("-scholarship_count_i")