Exemple #1
0
    def test_stats(self):
        # test collection stats from Solr

        coll1 = Collection.objects.create(name='Random Grabbag')
        coll2 = Collection.objects.create(
            name='Foo through Time',
            description="A <em>very</em> useful collection."
        )

        # add items to collections
        # - put everything in collection 1
        digworks = DigitizedWork.objects.all()
        for digwork in digworks:
            digwork.collections.add(coll1)
        # just one item in collection 2
        wintry = digworks.get(title__icontains='Wintry')
        wintry.collections.add(coll2)

        # reindex the digitized works so we can check stats
        solr, solr_collection = get_solr_connection()
        solr.index(solr_collection, [dw.index_data() for dw in digworks],
                   params={"commitWithin": 100})
        sleep(2)

        stats = Collection.stats()
        assert stats[coll1.name]['count'] == digworks.count()
        assert stats[coll1.name]['dates'] == '1880–1904'
        assert stats[coll2.name]['count'] == 1
        assert stats[coll2.name]['dates'] == '1903'
Exemple #2
0
    def index(self):
        '''Index newly imported content, both metadata and full text.'''
        if self.imported_works:
            DigitizedWork.index_items(self.imported_works)
            for work in self.imported_works:
                # index page index data in chunks (returns a generator)
                DigitizedWork.index_items(work.page_index_data())

            solr, solr_collection = get_solr_connection()
            solr.commit(solr_collection, openSearcher=True)
    def test_digitizedwork_detailview_nonhathi(self):
        # non-hathi work
        thesis = DigitizedWork.objects.create(
            source=DigitizedWork.OTHER,
            source_id='788423659',
            source_url=
            'http://www.worldcat.org/title/study-of-the-accentual-structure-of-caesural-phrases-in-the-lady-of-the-lake/oclc/788423659',
            title=
            'A study of the accentual structure of caesural phrases in The lady of the lake',
            sort_title=
            'study of the accentual structure of caesural phrases in The lady of the lake',
            author='Farley, Odessa',
            publisher='University of Iowa',
            pub_date=1924,
            page_count=81)

        # index in solr to add last modified for header
        solr, solr_collection = get_solr_connection()
        solr.index(solr_collection, [thesis.index_data()],
                   params={"commitWithin": 100})
        sleep(1)

        response = self.client.get(thesis.get_absolute_url())
        # should display item details
        self.assertContains(response, thesis.title)
        self.assertContains(response, thesis.author)
        self.assertContains(response, thesis.source_id)
        self.assertContains(response, 'View external record')
        self.assertContains(response, thesis.source_url)
        self.assertContains(response, thesis.pub_date)
        self.assertContains(response, thesis.page_count)
        self.assertNotContains(response, 'HathiTrust')
        self.assertNotContains(response, 'Search within the Volume')

        # no source url - should not display link
        thesis.source_url = ''
        thesis.save()
        response = self.client.get(thesis.get_absolute_url())
        self.assertNotContains(response, 'View external record')

        # search term should be ignored for items without fulltext
        with patch('ppa.archive.views.PagedSolrQuery') as mock_paged_solrq:
            mock_paged_solrq.return_value.count.return_value = 0
            mock_paged_solrq.return_value.__getitem__.side_effect = IndexError
            response = self.client.get(thesis.get_absolute_url(),
                                       {'query': 'lady'})
            # called once for last modified, but not for search
            assert mock_paged_solrq.call_count == 1
Exemple #4
0
    def test_update_solr(self):
        stdout = StringIO()
        call_command('solr_schema', stdout=stdout)
        output = stdout.getvalue()
        assert 'Updated ' in output
        assert 'Added ' not in output

        # create field to be removed
        solr, coll = get_solr_connection()
        solr.schema.create_field(coll, {
            'name': 'bogus',
            'type': 'string',
            'required': False
        })
        call_command('solr_schema', stdout=stdout)
        output = stdout.getvalue()
        assert 'Removed 1 field' in output
Exemple #5
0
    def post(self, request, *args, **kwargs):
        '''
        Add :class:`ppa.archive.models.DigitizedWork` instances passed in form
        data to selected instances of :class:`ppa.archive.models.Collection`,
        then return to change_list view.

        Expects a list of DigitizedWork ids to be set in the request session.

        '''
        form = AddToCollectionForm(request.POST)
        if form.is_valid() and request.session['collection-add-ids']:
            data = form.cleaned_data
            # get digitzed works from validated form
            digitized_works = self.get_queryset()
            del request.session['collection-add-ids']
            for collection in data['collections']:
                # add rather than set to ensure add does not replace
                # previous digitized works in set.
                collection.digitizedwork_set.add(*digitized_works)
            # reindex solr with the new collection data
            solr_docs = [work.index_data() for work in digitized_works]
            solr, solr_collection = get_solr_connection()
            solr.index(solr_collection, solr_docs,
                       params={'commitWithin': 2000})
            # create a success message to add to message framework stating
            # what happened
            num_works = digitized_works.count()
            collections = ', '.join(collection.name for
                                    collection in data['collections'])
            messages.success(request, 'Successfully added %d works to: %s.'
                             % (num_works, collections))
            # redirect to the change list with the message intact
            return redirect(self.get_success_url())
        # make form error more descriptive, default to an error re: pks
        if 'collections' in form.errors:
            del form.errors['collections']
            form.add_error(
                'collections',
                ValidationError('Please select at least one Collection')
            )
        # Provide an object list for ListView and emulate CBV calling
        # render_to_response to pass form with errors; just calling super
        # doesn't pass the form with error set
        self.object_list = self.get_queryset()
        return self.render_to_response(self.get_context_data(form=form))
Exemple #6
0
    def test_index(self):
        with open(self.bibdata_brief) as bibdata:
            brief_bibdata = hathi.HathiBibliographicRecord(json.load(bibdata))

        digwork = DigitizedWork(source_id='njp.32101013082597')
        digwork.populate_from_bibdata(brief_bibdata)
        digwork.save()
        solr, solr_collection = get_solr_connection()
        # digwork should be unindexed
        res = solr.query(solr_collection, {'q': '*:*'})
        assert res.get_results_count() == 0
        # reindex to check that the method works on a saved object
        digwork.index()
        # digwork should be unindexed still because no commitWithin
        res = solr.query(solr_collection, {'q': '*:*'})
        assert res.get_results_count() == 0
        digwork.index(params={'commitWithin': 500})
        sleep(1)
        # digwork should be returned by a query
        res = solr.query(solr_collection, {'q': '*:*'})
        assert res.get_results_count() == 1
        assert res.docs[0]['id'] == 'njp.32101013082597'
Exemple #7
0
    def test_template(self):
        # Check that the template is rendering as expected
        site = Site.objects.first()
        coll1 = Collection.objects.create(name='Random Grabbag')
        coll2 = Collection.objects.create(
            name='Foo through Time',
            description="A <em>very</em> useful collection.")
        empty_coll = Collection.objects.create(name='Empty Box')

        # add items to collections to check stats & links
        # - put everything in collection 1
        digworks = DigitizedWork.objects.all()
        for digwork in digworks:
            digwork.collections.add(coll1)
        # just one item in collection 2
        wintry = digworks.get(title__icontains='Wintry')
        wintry.collections.add(coll2)

        # reindex the digitized works so we can check stats
        solr, solr_collection = get_solr_connection()
        solr.index(solr_collection, [dw.index_data() for dw in digworks],
                   params={"commitWithin": 100})
        sleep(2)

        response = self.client.get(self.collection_page.relative_url(site))

        # - check that correct templates are used
        self.assertTemplateUsed(response, 'base.html')
        self.assertTemplateUsed(response, 'pages/content_page.html')
        self.assertTemplateUsed(response, 'pages/collection_page.html')
        # - check user-editable page content displayed
        self.assertContains(response, self.collection_page.body)
        # - check collection display
        self.assertContains(
            response,
            coll1.name,
            msg_prefix='should list a collection called Random Grabbag')
        self.assertContains(
            response,
            coll2.name,
            msg_prefix='should list a collection called Foo through Time')
        self.assertContains(
            response,
            coll2.description,
            html=True,
            msg_prefix='should render the description with HTML intact.')

        # - check collection stats displayed on template
        self.assertContains(response, '%d digitized works' % digworks.count())
        self.assertContains(response, '1 digitized work')
        self.assertNotContains(response, '1 digitized works')
        self.assertContains(response, '1880–1904')
        self.assertContains(response, '1903')
        # - check collection search links
        archive_url = reverse('archive:list')
        self.assertContains(
            response, 'href="%s?collections=%s"' % (archive_url, coll1.pk))
        self.assertContains(
            response, 'href="%s?collections=%s"' % (archive_url, coll2.pk))
        # empty collection should not link
        self.assertNotContains(
            response,
            'href="%s?collections=%s"' % (archive_url, empty_coll.pk))
    def test_digitizedwork_detailview(self):
        # get a work and its detail page to test with
        dial = DigitizedWork.objects.get(source_id='chi.78013704')
        url = reverse('archive:detail', kwargs={'source_id': dial.source_id})

        # index in solr to add last modified for header
        solr, solr_collection = get_solr_connection()
        solr.index(solr_collection, [dial.index_data()],
                   params={"commitWithin": 100})
        sleep(1)

        # get the detail view page and check that the response is 200
        response = self.client.get(url)
        assert response.status_code == 200
        # no keyword search so no note about that
        # no page_obj or search results reflected
        assert 'page_obj' not in response.context
        self.assertNotContains(response, 'No keyword results.')

        # now check that the right template is used
        assert 'archive/digitizedwork_detail.html' in \
            [template.name for template in response.templates]

        # check that the appropriate item is in context
        assert 'object' in response.context
        assert response.context['object'] == dial

        # last modified header should be set on response
        assert response.has_header('last-modified')

        # get a work and its detail page to test with
        # wintry = DigitizedWork.objects.get(source_id='chi.13880510')
        # url = reverse('archive:detail', kwargs={'source_id': wintry.source_id})

        # - check that the information we expect is displayed
        # TODO: Make these HTML when the page is styled
        # hathitrust ID
        self.assertContains(response, dial.title, msg_prefix='Missing title')
        self.assertContains(response,
                            dial.source_id,
                            msg_prefix='Missing HathiTrust ID (source_id)')
        self.assertContains(response,
                            dial.source_url,
                            msg_prefix='Missing source_url')
        # self.assertContains(  # disabled for now since it's not in design spec
        #     response, dial.enumcron,
        #     msg_prefix='Missing volume/chronology (enumcron)'
        # )
        self.assertContains(response, dial.author, msg_prefix='Missing author')
        self.assertContains(
            response,
            dial.pub_place,
            msg_prefix='Missing place of publication (pub_place)')
        self.assertContains(response,
                            dial.publisher,
                            msg_prefix='Missing publisher')
        self.assertContains(response,
                            dial.pub_date,
                            msg_prefix='Missing publication date (pub_date)')
        # only displaying these if logged in currently
        #
        # self.assertContains(
        #     response, dial.added.strftime("%d %b %Y"),
        #     msg_prefix='Missing added or in wrong format (d M Y in filter)'
        # )
        # self.assertContains(
        #     response, dial.updated.strftime("%d %b %Y"),
        #     msg_prefix='Missing updated or in wrong format (d M Y in filter)'
        # )

        # notes not present since none set
        self.assertNotContains(
            response,
            'Note on edition',
            msg_prefix='Notes field should not be visible without notes')

        # set a note and re-query to see if it now appears
        dial.public_notes = 'Nota bene'
        dial.notes = 'Secret note'
        dial.save()
        response = self.client.get(url)
        self.assertContains(
            response,
            'Note on edition',
            msg_prefix='Notes field should be visible if notes is set')
        self.assertContains(
            response,
            dial.public_notes,
            msg_prefix='The actual value of the notes field should be displayed'
        )
        self.assertNotContains(
            response,
            dial.notes,
            msg_prefix='The private notes field should not be displayed')

        # a logged in user should see the private notes
        self.client.force_login(
            get_user_model().objects.create(username='******'))
        response = self.client.get(url)
        self.assertContains(
            response,
            dial.notes,
            msg_prefix='The private notes field should be displayed')

        # unapi server link present
        self.assertContains(response,
                            '''<link rel="unapi-server" type="application/xml"
            title="unAPI" href="%s" />''' % reverse('unapi'),
                            msg_prefix='unapi server link should be set',
                            html=True)
        # unapi id present
        self.assertContains(
            response,
            '<abbr class="unapi-id" title="%s"></abbr>' % dial.source_id,
            msg_prefix='unapi id should be embedded for each work')
    def test_digitizedwork_listview(self):
        url = reverse('archive:list')

        # sample page content associated with one of the fixture works
        sample_page_content = [
            'something about winter and wintry and wintriness',
            'something else delightful',
            'an alternate thing with words like blood and bone not in the title'
        ]
        htid = 'chi.13880510'
        solr_page_docs = [{
            'content': content,
            'order': i,
            'item_type': 'page',
            'source_id': htid,
            'id': '%s.%s' % (htid, i)
        } for i, content in enumerate(sample_page_content)]
        # Contrive a sort title such that tests below for title_asc will fail
        # if case insensitive sorting is not working
        dial = DigitizedWork.objects.filter(title__icontains='Dial').first()
        dial.sort_title = 'The deal'
        dial.save()
        # add a collection to use in testing the view
        collection = Collection.objects.create(name='Test Collection')
        digitized_works = DigitizedWork.objects.all()
        wintry = digitized_works.filter(title__icontains='Wintry')[0]
        wintry.collections.add(collection)
        solr_work_docs = [digwork.index_data() for digwork in digitized_works]
        solr, solr_collection = get_solr_connection()
        index_data = solr_work_docs + solr_page_docs
        solr.index(solr_collection, index_data, params={"commitWithin": 100})
        sleep(2)

        # also get dial for use with author and title searching
        dial = digitized_works.filter(title__icontains='Dial')[0]

        # no query - should find all
        response = self.client.get(url)
        assert response.status_code == 200
        self.assertContains(response,
                            '%d digitized works' % len(digitized_works))
        self.assertContains(response,
                            '<p class="result-number">1</p>',
                            msg_prefix='results have numbers')
        self.assertContains(response,
                            '<p class="result-number">2</p>',
                            msg_prefix='results have multiple numbers')

        # unapi server link present
        self.assertContains(response,
                            '''<link rel="unapi-server" type="application/xml"
            title="unAPI" href="%s" />''' % reverse('unapi'),
                            msg_prefix='unapi server link should be set',
                            html=True)

        # last modified header should be set on response
        assert response.has_header('last-modified')

        # should not have scores for all results, as not logged in
        self.assertNotContains(response, 'score')
        # log in a user and then should have them displayed
        self.client.force_login(
            get_user_model().objects.create(username='******'))
        response = self.client.get(url)
        self.assertContains(response, 'score')

        # search form should be set in context for display
        assert isinstance(response.context['search_form'], SearchForm)
        # page group details from expanded part of collapsed query
        assert 'page_groups' in response.context
        # facet range information from publication date range facet
        assert 'facet_ranges' in response.context

        for digwork in digitized_works:

            # temporarily skip until uncategorized collection support is added
            if not digwork.collections.count():
                continue

            # basic metadata for each work
            self.assertContains(response, digwork.title)
            self.assertContains(response, digwork.subtitle)
            self.assertContains(response, digwork.source_id)
            self.assertContains(response, digwork.author)
            self.assertContains(response, digwork.enumcron)
            # at least one publisher includes an ampersand, so escape text
            self.assertContains(response, escape(digwork.publisher))
            # self.assertContains(response, digwork.pub_place)
            self.assertContains(response, digwork.pub_date)
            # link to detail page
            self.assertContains(response, digwork.get_absolute_url())
            # unapi identifier for each work
            self.assertContains(
                response,
                '<abbr class="unapi-id" title="%s"' % digwork.source_id,
                msg_prefix='unapi id should be embedded for each work')

        # no page images or highlights displayed without search term
        self.assertNotContains(
            response,
            'babel.hathitrust.org/cgi/imgsrv/image',
            msg_prefix='no page images displayed without keyword search')

        # no collection label should only display once
        # (for collection selection badge, not for result display)
        self.assertContains(response, NO_COLLECTION_LABEL, count=1)

        # search term in title
        response = self.client.get(url, {'query': 'wintry'})
        # relevance sort for keyword search
        assert len(response.context['object_list']) == 1
        self.assertContains(response, '1 digitized work')
        self.assertContains(response, wintry.source_id)
        # page image & text highlight displayed for matching page
        self.assertContains(
            response,
            'babel.hathitrust.org/cgi/imgsrv/image?id=%s;seq=0' % htid,
            msg_prefix=
            'page image displayed for matching pages on keyword search')
        self.assertContains(
            response,
            'winter and <em>wintry</em> and',
            msg_prefix='highlight snippet from page content displayed')

        # page image and text highlight should still display with year filter
        response = self.client.get(url, {
            'query': 'wintry',
            'pub_date_0': 1800
        })
        assert response.context['page_highlights']

        self.assertContains(
            response,
            'winter and <em>wintry</em> and',
            msg_prefix='highlight snippet from page content displayed')
        self.assertContains(
            response,
            'babel.hathitrust.org/cgi/imgsrv/image?id=%s;seq=0' % htid,
            msg_prefix=
            'page image displayed for matching pages on keyword search')
        self.assertContains(
            response,
            'winter and <em>wintry</em> and',
            msg_prefix='highlight snippet from page content displayed')

        # match in page content but not in book metadata should pull back title
        response = self.client.get(url, {'query': 'blood'})
        self.assertContains(response, '1 digitized work')

        self.assertContains(response, wintry.source_id)
        self.assertContains(response, wintry.title)

        # search text in author name
        response = self.client.get(url, {'query': 'Robert Bridges'})
        self.assertContains(response, wintry.source_id)

        # search author as author field only
        response = self.client.get(url, {'author': 'Robert Bridges'})
        self.assertContains(response, wintry.source_id)
        self.assertNotContains(response, dial.source_id)

        # search title using the title field
        response = self.client.get(url, {'title': 'The Dial'})
        self.assertContains(response, dial.source_id)
        self.assertNotContains(response, wintry.source_id)

        # search on subtitle using the title query field
        response = self.client.get(url, {'title': 'valuable'})
        self.assertNotContains(response, dial.source_id)
        self.assertNotContains(response, wintry.source_id)
        self.assertContains(response, '135000 words')

        # search text in publisher name
        response = self.client.get(url, {'query': 'McClurg'})
        for digwork in DigitizedWork.objects.filter(
                publisher__icontains='mcclurg'):
            self.assertContains(response, digwork.source_id)

        # search text in publication place - matches wintry
        response = self.client.get(url, {'query': 'Oxford'})
        self.assertContains(response, wintry.source_id)

        # exact phrase
        response = self.client.get(url, {'query': '"wintry delights"'})
        self.assertContains(response, '1 digitized work')
        self.assertContains(response, wintry.source_id)

        # boolean
        response = self.client.get(url,
                                   {'query': 'blood AND bone AND alternate'})
        self.assertContains(response, '1 digitized work')
        self.assertContains(response, wintry.source_id)
        response = self.client.get(url, {'query': 'blood NOT bone'})
        self.assertContains(response, 'No matching works.')

        # bad syntax
        # NOTE: According to Solr docs, edismax query parser
        # "includes improved smart partial escaping in the case of syntax
        # errors"; not sure how to trigger this error anymore!
        # response = self.client.get(url, {'query': '"incomplete phrase'})
        # self.assertContains(response, 'Unable to parse search query')

        # add a sort term - pub date
        response = self.client.get(url, {'query': '', 'sort': 'pub_date_asc'})
        # explicitly sort by pub_date manually
        sorted_object_list = sorted(response.context['object_list'],
                                    key=operator.itemgetter('pub_date'))
        # the two context lists should match exactly
        assert sorted_object_list == response.context['object_list']
        # test sort date in reverse
        response = self.client.get(url, {'query': '', 'sort': 'pub_date_desc'})
        # explicitly sort by pub_date manually in descending order
        sorted_object_list = sorted(response.context['object_list'],
                                    key=operator.itemgetter('pub_date'),
                                    reverse=True)
        # the two context lists should match exactly
        assert sorted_object_list == response.context['object_list']
        # one last test using title
        response = self.client.get(url, {'query': '', 'sort': 'title_asc'})
        sorted_work_ids = DigitizedWork.objects.order_by(Lower('sort_title')) \
                                       .values_list('source_id', flat=True)
        # the list of ids should match exactly
        assert list(sorted_work_ids) == \
            [work['source_id'] for work in response.context['object_list']]

        # - check that a query allows relevance as sort order toggle in form
        response = self.client.get(url, {'query': 'foo', 'sort': 'title_asc'})
        enabled_input = \
            '<div class="item " data-value="relevance">Relevance</div>'
        self.assertContains(response, enabled_input, html=True)
        response = self.client.get(url, {'title': 'foo', 'sort': 'title_asc'})
        self.assertContains(response, enabled_input, html=True)
        response = self.client.get(url, {'author': 'foo', 'sort': 'title_asc'})
        self.assertContains(response, enabled_input, html=True)
        # check that a search that does not have a query disables
        # relevance as a sort order option
        response = self.client.get(url, {'sort': 'title_asc'})
        self.assertContains(
            response,
            '<div class="item disabled" data-value="relevance">Relevance</div>',
            html=True)
        # default sort should be title if no keyword search and no sort specified
        response = self.client.get(url)
        assert response.context['search_form'].cleaned_data[
            'sort'] == 'title_asc'
        # default collections should be set based on exclude option
        assert set(response.context['search_form'].cleaned_data['collections']) == \
            set([NO_COLLECTION_LABEL]).union((set(Collection.objects.filter(exclude=False))))

        # if relevance sort is requested but no keyword, switch to default sort
        response = self.client.get(url, {'sort': 'relevance'})
        assert response.context['search_form'].cleaned_data[
            'sort'] == 'title_asc'

        # collection search
        # restrict to test collection by id
        response = self.client.get(url, {'collections': collection.pk})
        assert len(response.context['object_list']) == 1
        self.assertContains(response, wintry.source_id)

        # basic date range request
        response = self.client.get(url, {
            'pub_date_0': 1900,
            'pub_date_1': 1922
        })
        # in fixture data, only wintry and 135000 words are after 1900
        assert len(response.context['object_list']) == 2
        self.assertContains(response, wintry.source_id)

        # invalid date range request / invalid form - not an exception
        response = self.client.get(url, {
            'pub_date_0': 1900,
            'pub_date_1': 1800
        })
        assert not response.context['object_list'].count()
        self.assertContains(response, 'Invalid range')

        # no collections = no items (but not an error)
        response = self.client.get(url, {'collections': ''})
        assert response.status_code == 200
        assert not response.context['object_list']

        # special 'uncategorized' collection
        response = self.client.get(
            url, {'collections': ModelMultipleChoiceFieldWithEmpty.EMPTY_ID})
        print(response.context['object_list'])

        assert len(response.context['object_list']) == \
            DigitizedWork.objects.filter(collections__isnull=True).count()

        # ajax request for search results
        response = self.client.get(url, HTTP_X_REQUESTED_WITH='XMLHttpRequest')
        assert response.status_code == 200
        # should render the results list partial and single result partial
        self.assertTemplateUsed('archive/snippets/results_list.html')
        self.assertTemplateUsed('archive/snippest/search_result.html')
        # shouldn't render the search form or whole list
        self.assertTemplateNotUsed('archive/snippets/search_form.html')
        self.assertTemplateNotUsed('archive/digitizedwork_list.html')
        # should have all the results
        assert len(response.context['object_list']) == len(digitized_works)
        # should have the results count
        self.assertContains(response, " digitized works")
        # should have the histogram data
        self.assertContains(response, "<pre class=\"count\">")
        # should have pagination
        self.assertContains(response, "<div class=\"page-controls")
        # test a query
        response = self.client.get(url,
                                   {'query': 'blood AND bone AND alternate'},
                                   HTTP_X_REQUESTED_WITH='XMLHttpRequest')
        self.assertContains(response, '1 digitized work')
        self.assertContains(response, wintry.source_id)

        # nothing indexed - should not error
        solr.delete_doc_by_query(solr_collection,
                                 '*:*',
                                 params={"commitWithin": 100})
        sleep(2)
        response = self.client.get(url)
        assert response.status_code == 200

        # simulate solr exception (other than query syntax)
        with patch('ppa.archive.views.PagedSolrQuery') as mockpsq:
            mockpsq.return_value.get_expanded.side_effect = SolrError
            # count needed for paginator
            mockpsq.return_value.count.return_value = 0
            # simulate empty result doc for last modified check
            mockpsq.return_value.__getitem__.return_value = {}
            response = self.client.get(url, {'query': 'something'})
            # paginator variables should still be set
            assert 'object_list' in response.context
            assert 'paginator' in response.context
            self.assertContains(response, 'Something went wrong.')
Exemple #10
0
    def test_digitizedwork_detailview_query(self):
        '''test digitized work detail page with search query'''

        # get a work and its detail page to test with
        dial = DigitizedWork.objects.get(source_id='chi.78013704')
        url = reverse('archive:detail', kwargs={'source_id': dial.source_id})

        # index in solr to add last modified for header
        solr, solr_collection = get_solr_connection()
        solr.index(solr_collection, [dial.index_data()],
                   params={"commitWithin": 100})
        sleep(1)

        # make some sample page content
        # sample page content associated with one of the fixture works
        sample_page_content = [
            'something about dials and clocks',
            'knobs and buttons',
        ]
        htid = 'chi.78013704'
        solr_page_docs = [{
            'content': content,
            'order': i + 1,
            'item_type': 'page',
            'source_id': htid,
            'id': '%s.%s' % (htid, i),
            'label': i
        } for i, content in enumerate(sample_page_content)]
        dial = DigitizedWork.objects.get(source_id='chi.78013704')
        solr_work_docs = [dial.index_data()]
        solr, solr_collection = get_solr_connection()
        index_data = solr_work_docs + solr_page_docs
        solr.index(solr_collection, index_data, params={"commitWithin": 100})
        sleep(2)

        # search should include query in the context and a PageSolrQuery

        # search with no matches - test empty search result
        response = self.client.get(url, {'query': 'thermodynamics'})
        assert response.status_code == 200
        # test that the search form is rendered
        assert 'search_form' in response.context
        assert 'query' in response.context['search_form'].fields
        # query string passsed into context for form
        assert 'query' in response.context
        assert response.context['query'] == 'thermodynamics'
        # solr highlight results in query
        assert 'page_highlights' in response.context
        # should be an empty dict
        assert response.context['page_highlights'] == {}
        # assert solr result in query
        assert 'solr_results' in response.context
        # should be an empty list
        assert response.context['solr_results'] == []

        # test with a word that will produce some snippets
        response = self.client.get(url, {'query': 'knobs'})
        assert response.status_code == 200
        # paginator should be in context
        assert 'page_obj' in response.context
        # it should be one (because we have one result)
        assert response.context['page_obj'].number == 1
        # it should have an object list equal in length to the page solr query
        assert len(response.context['page_obj'].object_list) == \
            len(response.context['solr_results'])
        # get the solr results (should be one)
        result = response.context['solr_results'][0]
        # grab the highlight object that's rendered with our one match
        highlights = response.context['page_highlights'][result['id']]
        # template has the expected information rendered
        self.assertContains(response, highlights['content'][0])
        # page number that correspondeds to label field should be present
        self.assertContains(
            response,
            'p. %s' % result['label'],
            count=1,
            msg_prefix='has page label for the print page numb.')
        # image url should appear in each src and and srcset
        # (one for lazy load image and one for noscript image)
        self.assertContains(response,
                            page_image_url(result['source_id'],
                                           result['order'], 225),
                            count=4,
                            msg_prefix='has img src url')
        # 2x image url should appear in srcset for img and noscript img
        self.assertContains(response,
                            page_image_url(result['source_id'],
                                           result['order'], 450),
                            count=2,
                            msg_prefix='has imgset src url')
        self.assertContains(response, '1 occurrence')
        # image should have a link to hathitrust as should the page number
        self.assertContains(response,
                            page_url(result['source_id'], result['order']),
                            count=2,
                            msg_prefix='should include a link to HathiTrust')

        # bad syntax
        # no longer a problem with edismax
        # response = self.client.get(url, {'query': '"incomplete phrase'})
        # self.assertContains(response, 'Unable to parse search query')

        # test raising a generic solr error
        with patch('ppa.archive.views.PagedSolrQuery') as mockpsq:
            mockpsq.return_value.get_results.side_effect = SolrError
            # count needed for paginator
            mockpsq.return_value.count.return_value = 0
            # error for last-modified
            mockpsq.return_value.__getitem__.side_effect = SolrError
            response = self.client.get(url, {'query': 'knobs'})
            self.assertContains(response, 'Something went wrong.')

        # ajax request for search results
        response = self.client.get(url, {'query': 'knobs'},
                                   HTTP_X_REQUESTED_WITH='XMLHttpRequest')
        assert response.status_code == 200
        # should render the results list partial
        self.assertTemplateUsed('archive/snippets/results_within_list.html')
        # shouldn't render the whole list
        self.assertTemplateNotUsed('archive/digitizedwork_detail.html')
        # should have all the results
        assert len(response.context['page_highlights']) == 1
        print(response.content)
        # should have the results count
        self.assertContains(response, "1 occurrence")
        # should have pagination
        self.assertContains(response, "<div class=\"page-controls")
Exemple #11
0
def test_get_solr_connection():
    # test basic solr connection setup
    solr, collection = get_solr_connection()
    assert isinstance(solr, SolrClient)
    assert solr.host == TEST_SOLR_CONNECTIONS['default']['URL']
    assert collection == TEST_SOLR_CONNECTIONS['default']['COLLECTION']