Example #1
0
def collection_suggest(request):
    '''Suggest view for collections, for use with use with `JQuery UI
    Autocomplete`_ widget.  Searches for collections on all of the
    terms passed in (as multiple keywords), similar to the way the
    combined search works.

    .. _JQuery UI Autocomplete: http://jqueryui.com/demos/autocomplete/

    :param request: the http request passed to the original view
        method (used to retrieve the search term)
    '''
    term = request.GET.get('term', '')

    suggestions = []

    if term:
        # If the search term doesn't end in space, add a wildcard to
        # the last word to allow for partial word matching.
        if term[-1] != ' ':
            term += '*'
        terms = search_terms(term)

        solr = solr_interface()
        # common query parameters and options
        base_query = solr.query() \
                    .filter(content_model=CollectionObject.COLLECTION_CONTENT_MODEL) \
                    .field_limit(['pid', 'source_id', 'title', 'archive_short_name',
                                  'creator', 'archive_id']) \
                    .sort_by('-score')

        q = base_query.query(terms)

        # NOTE: there seems to be a Lucene/Solr bug/quirk where adding
        # a wildcard at the end of a word causes Solr not to match the
        # exact word (even though docs indicate this should work).
        # As a work-around, if we added a * and got 0 results,
        # try the search again without the wildcard.
        if term[-1] == '*' and q.count() == 0:
            q = base_query.query(search_terms(term[:-1]))
            #Exclude archival collection (Top-level library)
        q=q.filter(archive_id__any=True)

        suggestions = [{'label': '%s %s' % (c.get('source_id', ''),
                                            c.get('title', '(no title')),
                        'value': c['pid'],  # FIXME: do we need URI here?
                        'category':c.get('archive_short_name', ''),
                        'desc': c.get('creator', '')}
                       for c in q[:15]]

    return HttpResponse(json_serializer.encode(suggestions),
                         content_type='application/json')
 def clean_keywords(self):
     data = self.cleaned_data['keywords']
     # doesn't care about mis-matched quotes, just strips them out
     terms = search_terms(data)
     for t in terms:
         if t.startswith('*') or t.startswith('?'):
             raise forms.ValidationError(
                 mark_safe('Search terms may not begin with wildcards <b>*</b> or <b>?</b>'),
                 code='invalid')
     # NOTE: this cleans up mismatched quotes and converts them to terms
     data = ' '.join('"%s"' % t if ' ' in t else t for t in terms)
     return data
Example #3
0
    def search_terms(self):
        '''Get a list of keywords and phrases from the keyword input field,
        using :meth:`eulcommon.searchutil.search_terms`.  Assumes that the form
        has already been validated and cleaned_data is available.'''
        # get list of keywords and phrases
        keywords = self.cleaned_data['keyword']
        # NOTE: currently using searchutil.search_terms to separate out
        # single words and exact phrases.  Because it also looks for
        # fielded search terms (like title:something or title:"another thing")
        # it can't handle searching on an ARK URI.  As a workaround,
        # encode known colons that should be preserved before running
        # search terms, and then restore them after.

        keywords = re.sub(r'(http|ark):', r'\1;;', keywords)
        return [re.sub(r'(http|ark);;', r'\1:', term)
               for term in search_terms(keywords)]
    def test_phrases(self):
        # quoted phrases
        self.assertEqual(['exact phrase'], search_terms('"exact phrase"'))
        self.assertEqual(["'single", "quotes'"], search_terms("'single quotes'"))
        self.assertEqual(['exact phrase', 'with', 'keyword'],
                         search_terms('"exact phrase" with keyword'))
        # phrase with internal apostrophe
        self.assertEqual(["I don't", "know"], search_terms('"I don\'t" know'))
        # non-matching quotes ignored
        self.assertEqual(["non", "phrase'"], search_terms('"non phrase\''))

        self.assertEqual(["'hello'"], search_terms('"\'hello\'"'))
        self.assertEqual(["'Tis a beautiful day"],
                         search_terms('"\'Tis a beautiful day"'))
Example #5
0
    def test_phrases(self):
        # quoted phrases
        self.assertEqual(['exact phrase'], search_terms('"exact phrase"'))
        self.assertEqual(["'single", "quotes'"],
                         search_terms("'single quotes'"))
        self.assertEqual(['exact phrase', 'with', 'keyword'],
                         search_terms('"exact phrase" with keyword'))
        # phrase with internal apostrophe
        self.assertEqual(["I don't", "know"], search_terms('"I don\'t" know'))
        # non-matching quotes ignored
        self.assertEqual(["non", "phrase'"], search_terms('"non phrase\''))

        self.assertEqual(["'hello'"], search_terms('"\'hello\'"'))
        self.assertEqual(["'Tis a beautiful day"],
                         search_terms('"\'Tis a beautiful day"'))
 def test_wildcards(self):
     # beginning of a word
     self.assertEqual(['*nd', 'to', 'mouth'], search_terms('*nd to mouth'))
     self.assertEqual(['?nd', 'or'], search_terms(' ?nd or'))
     # middle of a word
     self.assertEqual(['w*ther', 'or', 'not'],
                      search_terms('w*ther or not'))
     self.assertEqual(['wh?ther', 'thou', 'goest'],
                      search_terms('wh?ther thou goest'))
     # end of a word
     self.assertEqual(['th*'], search_terms('th*'))
     self.assertEqual(['th?'], search_terms('th?'))
Example #7
0
 def test_wildcards(self):
     # beginning of a word
     self.assertEqual(['*nd', 'to', 'mouth'], search_terms('*nd to mouth'))
     self.assertEqual(['?nd', 'or'], search_terms(' ?nd or'))
     # middle of a word
     self.assertEqual(['w*ther', 'or', 'not'],
                      search_terms('w*ther or not'))
     self.assertEqual(['wh?ther', 'thou', 'goest'],
                      search_terms('wh?ther thou goest'))
     # end of a word
     self.assertEqual(['th*'], search_terms('th*'))
     self.assertEqual(['th?'], search_terms('th?'))
Example #8
0
    def test_words(self):
        # search strings with single words
        self.assertEqual(['word'], search_terms('word'))
        self.assertEqual(['multiple', 'words'], search_terms('multiple words'))
        self.assertEqual(["don't"], search_terms("don't"))

        self.assertEqual(['one', '2.5'], search_terms('one 2.5'))

        self.assertEqual(['extraneous', 'whitespace'],
                         search_terms('  extraneous      whitespace '))

        # search_terms should ignore colons
        self.assertEqual(['one', 'two:', 'three'],
                         search_terms(' one two: three'))
        # search_terms should ignore colons
        self.assertEqual(['one', 'two:three', 'four'],
                         search_terms(' one two:three four'))
        self.assertEqual(['one', 'two:"three\tfour"', 'five'],
                         search_terms(' one two:"three\tfour" five'))
Example #9
0
    def search_terms(self):
        '''Get a list of keywords and phrases from the keyword input field,
        using :meth:`eulcommon.searchutil.search_terms`.  Assumes that the form
        has already been validated and cleaned_data is available.'''
        # get list of keywords and phrases
        keywords = self.cleaned_data['keyword']
        # NOTE: currently using searchutil.search_terms to separate out
        # single words and exact phrases.  Because it also looks for
        # fielded search terms (like title:something or title:"another thing")
        # it can't handle searching on an ARK URI.  As a workaround,
        # encode known colons that should be preserved before running
        # search terms, and then restore them after.

        keywords = re.sub(r'(http|ark):', r'\1;;', keywords)
        return [
            re.sub(r'(http|ark);;', r'\1:', term)
            for term in search_terms(keywords)
        ]
    def test_words(self):
        # search strings with single words
        self.assertEqual(['word'], search_terms('word'))
        self.assertEqual(['multiple', 'words'],
                         search_terms('multiple words'))
        self.assertEqual(["don't"], search_terms("don't"))

        self.assertEqual(['one', '2.5'], search_terms('one 2.5'))

        self.assertEqual(['extraneous', 'whitespace'],
                         search_terms('  extraneous      whitespace '))

        # search_terms should ignore colons
        self.assertEqual(['one', 'two:', 'three'],
                         search_terms(' one two: three'))
        # search_terms should ignore colons
        self.assertEqual(['one', 'two:three', 'four'],
                         search_terms(' one two:three four'))
        self.assertEqual(['one', 'two:"three\tfour"', 'five'],
                         search_terms(' one two:"three\tfour" five'))