Example #1
0
 def test_utf8_excerpt(self):
     """Characters should stay in UTF-8."""
     wc = WikiClient()
     page = Document.objects.get(pk=4)
     q = u'fa\xe7on'
     excerpt = wc.excerpt(page.html, q)
     assert q in excerpt, u'%s not in %s' % (q, excerpt)
Example #2
0
 def test_utf8_excerpt(self):
     """Characters should stay in UTF-8."""
     wc = WikiClient()
     page = Document.objects.get(pk=4)
     q = u'fa\xe7on'
     excerpt = wc.excerpt(page.html, q)
     assert q in excerpt, u'%s not in %s' % (q, excerpt)
Example #3
0
    def test_no_syntax_error(self):
        """Test that special chars cannot cause a syntax error."""
        wc = WikiClient()
        results = wc.query('video^$')
        eq_(1, len(results))

        results = wc.query('video^^^$$$^')
        eq_(1, len(results))
Example #4
0
    def test_exclude_words(self):
        """Excluding words with -word works."""
        wc = WikiClient()
        results = wc.query('spanish')
        eq_(1, len(results))

        results = wc.query('spanish -content')
        eq_(0, len(results))
Example #5
0
    def test_no_syntax_error(self):
        """Test that special chars cannot cause a syntax error."""
        wc = WikiClient()
        results = wc.query('video^$')
        eq_(1, len(results))

        results = wc.query('video^^^$$$^')
        eq_(1, len(results))
Example #6
0
 def test_unicode_excerpt(self):
     """Unicode characters in the excerpt should not be a problem."""
     wc = WikiClient()
     page = Document.objects.get(pk=2)
     try:
         excerpt = wc.excerpt(page.html, u'\u3068')
         render('{{ c }}', {'c': excerpt})
     except UnicodeDecodeError:
         self.fail('Raised UnicodeDecodeError.')
Example #7
0
 def test_range_filter(self):
     """Test filtering on a range."""
     wc = WikiClient()
     filter_ = ({'filter': 'updated',
                 'max': 1285765791,
                 'min': 1284664176,
                 'range': True},)
     results = wc.query('', filter_)
     eq_(2, len(results))
Example #8
0
 def test_range_filter(self):
     """Test filtering on a range."""
     wc = WikiClient()
     filter_ = ({'filter': 'updated',
                 'max': 1285765791,
                 'min': 1284664176,
                 'range': True},)
     results = wc.query('', filter_)
     eq_(2, len(results))
Example #9
0
 def test_unicode_excerpt(self):
     """Unicode characters in the excerpt should not be a problem."""
     wc = WikiClient()
     page = Document.objects.get(pk=2)
     try:
         excerpt = wc.excerpt(page.html, u'\u3068')
         render('{{ c }}', {'c': excerpt})
     except UnicodeDecodeError:
         self.fail('Raised UnicodeDecodeError.')
Example #10
0
 def test_range_filter(self):
     """Test filtering on a range."""
     wc = WikiClient()
     filter_ = ({'filter': 'updated',
                 'max': 1244355125,
                 'min': 1244355115,
                 'range': True},)
     results = wc.query('', filter_)
     eq_(1, len(results))
Example #11
0
 def test_clean_excerpt(self):
     """SearchClient.excerpt() should not allow disallowed HTML through."""
     wc = WikiClient()  # Index strips HTML
     qc = QuestionsClient()  # Index does not strip HTML
     input = "test <div>the start of something</div>"
     output_strip = "<b>test</b>  the start of something"
     output_nostrip = "<b>test</b> &lt;div&gt;the start of " "something&lt;/div&gt;"
     eq_(output_strip, wc.excerpt(input, "test"))
     eq_(output_nostrip, qc.excerpt(input, "test"))
Example #12
0
    def test_translations_inherit_os_values(self):
        wc = WikiClient()
        filters = [{"filter": "locale", "value": (crc32("fr"),)}, {"filter": "os", "value": (1,)}]
        results = wc.query("", filters)
        eq_(1, len(results))
        eq_(4, results[0]["id"])

        filters[1]["value"] = (4,)
        results = wc.query("", filters)
        eq_(0, len(results))
Example #13
0
 def test_clean_excerpt(self):
     """SearchClient.excerpt() should not allow disallowed HTML through."""
     wc = WikiClient()  # Index strips HTML
     qc = QuestionsClient()  # Index does not strip HTML
     input = 'test <div>the start of something</div>'
     output_strip = '<b>test</b>  the start of something'
     output_nostrip = ('<b>test</b> &lt;div&gt;the start of '
                       'something&lt;/div&gt;')
     eq_(output_strip, wc.excerpt(input, 'test'))
     eq_(output_nostrip, qc.excerpt(input, 'test'))
Example #14
0
    def test_translations_inherit_os_values(self):
        wc = WikiClient()
        filters = [{'filter': 'locale', 'value': (crc32('fr'),)},
                   {'filter': 'os', 'value': (1,)}]
        results = wc.query('', filters)
        eq_(1, len(results))
        eq_(4, results[0]['id'])

        filters[1]['value'] = (4,)
        results = wc.query('', filters)
        eq_(0, len(results))
Example #15
0
    def test_translations_inherit_os_values(self):
        wc = WikiClient()
        filters = [{'filter': 'locale', 'value': (crc32('fr'),)},
                   {'filter': 'os', 'value': (1,)}]
        results = wc.query('', filters)
        eq_(1, len(results))
        eq_(4, results[0]['id'])

        filters[1]['value'] = (4,)
        results = wc.query('', filters)
        eq_(0, len(results))
Example #16
0
 def test_unicode_excerpt(self):
     """Unicode characters in the excerpt should not be a problem."""
     wc = WikiClient()
     q = 'contribute'
     results = wc.query(q)
     eq_(1, len(results))
     page = WikiPage.objects.get(pk=results[0]['id'])
     try:
         excerpt = wc.excerpt(page.content, q)
         render('{{ c }}', {'c': excerpt})
     except UnicodeDecodeError:
         self.fail('Raised UnicodeDecodeError.')
Example #17
0
def test_sphinx_down():
    """
    Tests that the client times out when Sphinx is down.
    """
    wc = WikiClient()
    wc.sphinx.SetServer('localhost', 65535)
    assert_raises(SearchError, wc.query, 'test')
Example #18
0
def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""

    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')

    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    wc = WikiClient()
    qc = QuestionsClient()
    site = Site.objects.get_current()
    locale = sphinx_locale(locale_or_default(request.locale))

    results = []
    filters_w = [{'filter': 'locale', 'value': (locale, )}]
    filters_q = [{'filter': 'has_helpful', 'value': (True, )}]

    for client, filter, cls in [(wc, filters_w, Document),
                                (qc, filters_q, Question)]:
        for result in client.query(term, filter, limit=5):
            try:
                result = cls.objects.get(pk=result['id'])
            except cls.DoesNotExist:
                continue
            results.append(result)

    urlize = lambda obj: u'https://%s%s' % (site, obj.get_absolute_url())
    data = [term, [r.title for r in results], [], [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)
Example #19
0
 def test_category_exclude_nothing(self):
     """Excluding no categories should return results."""
     clients = ((WikiClient(), 'category'),
                (QuestionsClient(), 'replies'),
                (DiscussionClient(), 'author_ord'))
     for client, filter in clients:
         results = client.query('', ({'filter': filter, 'exclude': True,
                                      'value': []},))
         self.assertNotEquals(0, len(results))
Example #20
0
    def test_no_filter(self):
        """Test searching with no filters."""
        wc = WikiClient()

        results = wc.query('')
        eq_(6, len(results))
Example #21
0
 def test_range_filter(self):
     """Test filtering on a range."""
     wc = WikiClient()
     filter_ = ({"filter": "updated", "max": 1285765791, "min": 1284664176, "range": True},)
     results = wc.query("", filter_)
     eq_(2, len(results))
Example #22
0
 def test_none_content_excerpt(self):
     """SearchClient.excerpt() returns empty string for None type."""
     wc = WikiClient()
     eq_('', wc.excerpt(None, 'test'))
Example #23
0
 def test_wiki_index_summary(self):
     """The summary field of a revision is indexed."""
     wc = WikiClient()
     results = wc.query('whatever')
     eq_(1, len(results))
     eq_(3, results[0]['id'])
Example #24
0
 def test_wiki_index_summary(self):
     """The summary field of a revision is indexed."""
     wc = WikiClient()
     results = wc.query('whatever')
     eq_(1, len(results))
     eq_(3, results[0]['id'])
Example #25
0
 def test_clean_excerpt(self):
     """SearchClient.excerpt() should not allow disallowed HTML through."""
     wc = WikiClient()
     eq_('<b>test</b>&lt;/style&gt;', wc.excerpt('test</style>', 'test'))
Example #26
0
 def test_none_content_excerpt(self):
     """SearchClient.excerpt() returns empty string for None type."""
     wc = WikiClient()
     eq_('', wc.excerpt(None, 'test'))
Example #27
0
    def test_no_filter(self):
        """Test searching with no filters."""
        wc = WikiClient()

        results = wc.query('')
        eq_(6, len(results))
Example #28
0
 def test_indexer(self):
     wc = WikiClient()
     results = wc.query('audio')
     eq_(2, len(results))
Example #29
0
 def test_indexer(self):
     wc = WikiClient()
     results = wc.query('audio')
     eq_(2, len(results))
Example #30
0
def _search_suggestions(query, locale):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items are dicts of:
        {
            'type':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    TODO: ZOMFG this needs to be refactored and the search app should
          provide an internal API. Seriously.

    """

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3

    # Search wiki pages:
    wiki_searcher = WikiClient()
    filters = [{'filter': 'locale',
                'value': (sphinx_locale(locale),)},
               {'filter': 'category',
                'value': [x for x in settings.SEARCH_DEFAULT_CATEGORIES
                          if x >= 0]},
               {'filter': 'category',
                'exclude': True,
                'value': [-x for x in settings.SEARCH_DEFAULT_CATEGORIES
                          if x < 0]}]
    raw_results = wiki_searcher.query(query, filters=filters,
                                      limit=WIKI_RESULTS)
    # Lazily build excerpts from results. Stop when we have enough:
    results = []
    for r in raw_results:
        try:
            doc = Document.objects.select_related('current_revision').\
                get(pk=r['id'])
            results.append({
                'type': 'document',
                'object': doc,
            })
        except Document.DoesNotExist:
            pass

    question_searcher = QuestionsClient()
    # questions app is en-US only.
    raw_results = question_searcher.query(query,
                                          limit=QUESTIONS_RESULTS)
    for r in raw_results:
        try:
            q = Question.objects.get(pk=r['attrs']['question_id'])
            results.append({
                'type': 'question',
                'object': q
            })
        except Question.DoesNotExist:
            pass

    return results
Example #31
0
 def test_ngram_chars(self):
     """Ideographs are handled correctly."""
     wc = WikiClient()
     results = wc.query(u'\u30c1')
     eq_(1, len(results))
     eq_(2, results[0]['id'])
Example #32
0
 def test_wiki_index_strip_html(self):
     """HTML should be stripped, not indexed."""
     wc = WikiClient()
     results = wc.query('strong')
     eq_(0, len(results))
Example #33
0
 def test_wiki_index_content(self):
     """Obviously the content should be indexed."""
     wc = WikiClient()
     results = wc.query('video')
     eq_(1, len(results))
     eq_(1, results[0]['id'])
Example #34
0
 def test_category(self):
     wc = WikiClient()
     results = wc.query('', ({'filter': 'category', 'value': [10]},))
     eq_(5, len(results))
     results = wc.query('', ({'filter': 'category', 'value': [30]},))
     eq_(1, len(results))
Example #35
0
 def test_clean_excerpt(self):
     """SearchClient.excerpt() should not allow disallowed HTML through."""
     wc = WikiClient()  # Index strips HTML
     input = 'test <div>the start of something</div>'
     output_strip = '<b>test</b>  the start of something'
     eq_(output_strip, wc.excerpt(input, 'test'))
Example #36
0
        assert not response.content

    def test_archived(self):
        """Ensure archived articles show only when requested."""
        qs = {'q': 'impalas', 'a': 1, 'w': 1, 'format': 'json',
              'include_archived': 'on'}
        response = self.client.get(reverse('search'), qs)
        results = json.loads(response.content)['results']
        eq_(1, len(results))
        assert results[0]['url'].endswith('archived-article')

        qs = {'q': 'impalas', 'a': 0, 'w': 1, 'format': 'json'}
        response = self.client.get(reverse('search'), qs)
        results = json.loads(response.content)['results']
        eq_([], results)


query = lambda *args, **kwargs: WikiClient().query(*args, **kwargs)


@mock.patch('search.clients.WikiClient')
def test_excerpt_timeout(sphinx_mock):
    def sphinx_error(cls):
        raise cls

    sphinx_mock.query.side_effect = lambda *a: sphinx_error(socket.timeout)
    assert_raises(SearchError, query, 'xxx')

    sphinx_mock.query.side_effect = lambda *a: sphinx_error(Exception)
    assert_raises(SearchError, query, 'xxx')
Example #37
0
def search(request, template=None):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(json.dumps(
            {'error': _('Invalid callback function.')}),
                            mimetype=mimetype,
                            status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', [x for x in category if x > 0])
    exclude_category = [abs(x) for x in category if x < 0]

    try:
        fx = map(int, r.getlist('fx')) or [v.id for v in FIREFOX_VERSIONS]
    except ValueError:
        fx = [v.id for v in FIREFOX_VERSIONS]
    r.setlist('fx', fx)

    try:
        os = map(int, r.getlist('os')) or [o.id for o in OPERATING_SYSTEMS]
    except ValueError:
        os = [o.id for o in OPERATING_SYSTEMS]
    r.setlist('os', os)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(json.dumps(
                {'error': _('Invalid search data.')}),
                                mimetype=mimetype,
                                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t, {
            'advanced': a,
            'request': request,
            'search_form': search_form
        })
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data
    search_locale = (sphinx_locale(language), )

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    documents = []
    filters_w = []
    filters_q = []
    filters_f = []

    # wiki filters
    # Version and OS filters
    if cleaned['fx']:
        filters_w.append({
            'filter': 'fx',
            'value': cleaned['fx'],
        })

    if cleaned['os']:
        filters_w.append({
            'filter': 'os',
            'value': cleaned['os'],
        })

    # Category filter
    if cleaned['category']:
        filters_w.append({
            'filter': 'category',
            'value': cleaned['category'],
        })

    if exclude_category:
        filters_w.append({
            'filter': 'category',
            'value': exclude_category,
            'exclude': True,
        })

    # Locale filter
    filters_w.append({
        'filter': 'locale',
        'value': search_locale,
    })

    # Tags filter
    tags = [crc32(t.strip()) for t in cleaned['tags'].split()]
    if tags:
        for t in tags:
            filters_w.append({
                'filter': 'tag',
                'value': (t, ),
            })
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        toggle_filters = ('is_locked', 'is_solved', 'has_answers',
                          'has_helpful')
        for filter_name in toggle_filters:
            if cleaned[filter_name] == constants.TERNARY_YES:
                filters_q.append({
                    'filter': filter_name,
                    'value': (True, ),
                })
            if cleaned[filter_name] == constants.TERNARY_NO:
                filters_q.append({
                    'filter': filter_name,
                    'value': (False, ),
                })

        if cleaned['asked_by']:
            filters_q.append({
                'filter': 'question_creator',
                'value': (crc32(cleaned['asked_by']), ),
            })

        if cleaned['answered_by']:
            filters_q.append({
                'filter': 'answer_creator',
                'value': (crc32(cleaned['answered_by']), ),
            })

        q_tags = [crc32(t.strip()) for t in cleaned['q_tags'].split()]
        if q_tags:
            for t in q_tags:
                filters_q.append({
                    'filter': 'tag',
                    'value': (t, ),
                })

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            filters_f.append({
                'filter': 'author_ord',
                'value': (crc32(cleaned['author']), ),
            })

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                filters_f.append({
                    'filter': 'is_sticky',
                    'value': (1, ),
                })

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                filters_f.append({
                    'filter': 'is_locked',
                    'value': (1, ),
                })

        if cleaned['forum']:
            filters_f.append({
                'filter': 'forum_id',
                'value': cleaned['forum'],
            })

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (('created', cleaned['created'],
                         cleaned['created_date']),
                        ('updated', cleaned['updated'],
                         cleaned['updated_date']), ('question_votes',
                                                    cleaned['num_voted'],
                                                    cleaned['num_votes']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {
                'range': True,
                'filter': filter_name,
                'min': 0,
                'max': max(filter_date, 0),
            }
            if filter_name != 'question_votes':
                filters_f.append(before)
            filters_q.append(before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {
                'range': True,
                'filter': filter_name,
                'min': min(filter_date, unix_now),
                'max': unix_now,
            }
            if filter_name != 'question_votes':
                filters_f.append(after)
            filters_q.append(after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        if cleaned['w'] & constants.WHERE_WIKI:
            wc = WikiClient()  # Wiki SearchClient instance
            # Execute the query and append to documents
            documents += wc.query(cleaned['q'], filters_w)

        if cleaned['w'] & constants.WHERE_SUPPORT:
            qc = QuestionsClient()  # Support question SearchClient instance

            # Sort results by
            try:
                qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0],
                                 constants.SORT_QUESTIONS[sortby][1])
            except IndexError:
                pass

            documents += qc.query(cleaned['q'], filters_q)

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            dc = DiscussionClient()  # Discussion forums SearchClient instance

            # Sort results by
            try:
                dc.groupsort = constants.GROUPSORT[sortby]
            except IndexError:
                pass

            documents += dc.query(cleaned['q'], filters_f)

    except SearchError:
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                mimetype=mimetype,
                                status=503)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)

    pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

    results = []
    for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE):
        try:
            if documents[i]['attrs'].get('category', False) != False:
                wiki_page = Document.objects.get(pk=documents[i]['id'])
                summary = wiki_page.current_revision.summary

                result = {
                    'search_summary': summary,
                    'url': wiki_page.get_absolute_url(),
                    'title': wiki_page.title,
                    'type': 'document',
                }
                results.append(result)
            elif documents[i]['attrs'].get('question_creator', False) != False:
                question = Question.objects.get(
                    pk=documents[i]['attrs']['question_id'])

                excerpt = qc.excerpt(question.content, cleaned['q'])
                summary = jinja2.Markup(excerpt)

                result = {
                    'search_summary': summary,
                    'url': question.get_absolute_url(),
                    'title': question.title,
                    'type': 'question',
                }
                results.append(result)
            else:
                thread = Thread.objects.get(
                    pk=documents[i]['attrs']['thread_id'])
                post = Post.objects.get(pk=documents[i]['id'])

                excerpt = dc.excerpt(post.content, cleaned['q'])
                summary = jinja2.Markup(excerpt)

                result = {
                    'search_summary': summary,
                    'url': thread.get_absolute_url(),
                    'title': thread.title,
                    'type': 'thread',
                }
                results.append(result)
        except IndexError:
            break
        except ObjectDoesNotExist:
            continue

    items = [(k, v) for k in search_form.fields for v in r.getlist(k)
             if v and k != 'a']
    items.append(('a', '2'))

    refine_query = u'?%s' % urlencode(items)

    if is_json:
        data = {}
        data['results'] = results
        data['total'] = len(results)
        data['query'] = cleaned['q']
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, mimetype=mimetype)

    results_ = jingo.render(
        request, template, {
            'num_results': len(documents),
            'results': results,
            'q': cleaned['q'],
            'pages': pages,
            'w': cleaned['w'],
            'refine_query': refine_query,
            'search_form': search_form,
            'lang_name': lang_name,
        })
    results_['Cache-Control'] = 'max-age=%s' % \
                                (settings.SEARCH_CACHE_PERIOD * 60)
    results_['Expires'] = (datetime.utcnow() +
                           timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \
                           .strftime(expires_fmt)
    return results_
Example #38
0
 def test_empty_content_excerpt(self):
     """SearchClient.excerpt() returns empty string for empty content."""
     wc = WikiClient()
     eq_('', wc.excerpt('', 'test'))
Example #39
0
 def test_wiki_index_strip_html(self):
     """HTML should be stripped, not indexed."""
     wc = WikiClient()
     results = wc.query('strong')
     eq_(0, len(results))
Example #40
0
 def test_wiki_index_keywords(self):
     """The keywords field of a revision is indexed."""
     wc = WikiClient()
     results = wc.query('foobar')
     eq_(1, len(results))
     eq_(3, results[0]['id'])
Example #41
0
 def test_indexer(self):
     wc = WikiClient()
     results = wc.query('practice')
     eq_(2, len(results))
Example #42
0
 def test_wiki_index_content(self):
     """Obviously the content should be indexed."""
     wc = WikiClient()
     results = wc.query('video')
     eq_(1, len(results))
     eq_(1, results[0]['id'])
Example #43
0
 def test_wiki_index_keywords(self):
     """The keywords field of a revision is indexed."""
     wc = WikiClient()
     results = wc.query('foobar')
     eq_(1, len(results))
     eq_(3, results[0]['id'])
Example #44
0
 def test_ngram_chars(self):
     """Ideographs are handled correctly."""
     wc = WikiClient()
     results = wc.query(u'\u30c1')
     eq_(1, len(results))
     eq_(2, results[0]['id'])
Example #45
0
def search(request, template=None):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = request.GET.get("format") == "json"
    callback = request.GET.get("callback", "").strip()
    mimetype = "application/x-javascript" if callback else "application/json"

    # Search "Expires" header format
    expires_fmt = "%A, %d %B %Y %H:%M:%S GMT"

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(json.dumps({"error": _("Invalid callback function.")}), mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get("language", request.locale))
    r = request.GET.copy()
    a = request.GET.get("a", "0")

    # Search default values
    try:
        category = map(int, r.getlist("category")) or settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist("category", [x for x in category if x > 0])
    exclude_category = [abs(x) for x in category if x < 0]

    # Basic form
    if a == "0":
        r["w"] = r.get("w", constants.WHERE_BASIC)
    # Advanced form
    if a == "2":
        r["language"] = language
        r["a"] = "1"

    # TODO: Rewrite so SearchForm is unbound initially and we can use `initial`
    # on the form fields.
    if "include_archived" not in r:
        r["include_archived"] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == "2":
        if is_json:
            return HttpResponse(json.dumps({"error": _("Invalid search data.")}), mimetype=mimetype, status=400)

        t = template if request.MOBILE else "search/form.html"
        search_ = jingo.render(request, t, {"advanced": a, "request": request, "search_form": search_form})
        search_["Cache-Control"] = "max-age=%s" % (settings.SEARCH_CACHE_PERIOD * 60)
        search_["Expires"] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)).strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data
    search_locale = (sphinx_locale(language),)

    page = max(smart_int(request.GET.get("page")), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ""

    documents = []
    filters_w = []
    filters_q = []
    filters_f = []

    # wiki filters
    # Category filter
    if cleaned["category"]:
        filters_w.append({"filter": "category", "value": cleaned["category"]})

    if exclude_category:
        filters_w.append({"filter": "category", "value": exclude_category, "exclude": True})

    # Locale filter
    filters_w.append({"filter": "locale", "value": search_locale})

    # Product filter
    products = cleaned["product"]
    if products:
        for p in products:
            filters_w.append({"filter": "tag", "value": (crc32(p),)})

    # Tags filter
    tags = [crc32(t.strip()) for t in cleaned["tags"].split()]
    if tags:
        for t in tags:
            filters_w.append({"filter": "tag", "value": (t,)})

    # Archived bit
    if a == "0" and not cleaned["include_archived"]:
        # Default to NO for basic search:
        cleaned["include_archived"] = False
    if not cleaned["include_archived"]:
        filters_w.append({"filter": "is_archived", "value": (False,)})
    # End of wiki filters

    # Support questions specific filters
    if cleaned["w"] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == "0" and not cleaned["has_helpful"]:
            cleaned["has_helpful"] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ("is_locked", "is_solved", "has_answers", "has_helpful")
        filters_q.extend(
            _ternary_filter(filter_name, cleaned[filter_name])
            for filter_name in ternary_filters
            if cleaned[filter_name]
        )

        if cleaned["asked_by"]:
            filters_q.append({"filter": "question_creator", "value": (crc32(cleaned["asked_by"]),)})

        if cleaned["answered_by"]:
            filters_q.append({"filter": "answer_creator", "value": (crc32(cleaned["answered_by"]),)})

        q_tags = [crc32(t.strip()) for t in cleaned["q_tags"].split()]
        if q_tags:
            for t in q_tags:
                filters_q.append({"filter": "tag", "value": (t,)})

    # Discussion forum specific filters
    if cleaned["w"] & constants.WHERE_DISCUSSION:
        if cleaned["author"]:
            filters_f.append({"filter": "author_ord", "value": (crc32(cleaned["author"]),)})

        if cleaned["thread_type"]:
            if constants.DISCUSSION_STICKY in cleaned["thread_type"]:
                filters_f.append({"filter": "is_sticky", "value": (1,)})

            if constants.DISCUSSION_LOCKED in cleaned["thread_type"]:
                filters_f.append({"filter": "is_locked", "value": (1,)})

        if cleaned["forum"]:
            filters_f.append({"filter": "forum_id", "value": cleaned["forum"]})

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ("created", cleaned["created"], cleaned["created_date"]),
        ("updated", cleaned["updated"], cleaned["updated_date"]),
        ("question_votes", cleaned["num_voted"], cleaned["num_votes"]),
    )
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {"range": True, "filter": filter_name, "min": 0, "max": max(filter_date, 0)}
            if filter_name != "question_votes":
                filters_f.append(before)
            filters_q.append(before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {"range": True, "filter": filter_name, "min": min(filter_date, unix_now), "max": unix_now}
            if filter_name != "question_votes":
                filters_f.append(after)
            filters_q.append(after)

    sortby = smart_int(request.GET.get("sortby"))
    try:
        if cleaned["w"] & constants.WHERE_WIKI:
            wc = WikiClient()  # Wiki SearchClient instance
            # Execute the query and append to documents
            documents += wc.query(cleaned["q"], filters_w)

        if cleaned["w"] & constants.WHERE_SUPPORT:
            qc = QuestionsClient()  # Support question SearchClient instance

            # Sort results by
            try:
                qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0], constants.SORT_QUESTIONS[sortby][1])
            except IndexError:
                pass

            documents += qc.query(cleaned["q"], filters_q)

        if cleaned["w"] & constants.WHERE_DISCUSSION:
            dc = DiscussionClient()  # Discussion forums SearchClient instance

            # Sort results by
            try:
                dc.groupsort = constants.GROUPSORT[sortby]
            except IndexError:
                pass

            documents += dc.query(cleaned["q"], filters_f)

    except SearchError:
        if is_json:
            return HttpResponse(json.dumps({"error": _("Search Unavailable")}), mimetype=mimetype, status=503)

        t = "search/mobile/down.html" if request.MOBILE else "search/down.html"
        return jingo.render(request, t, {"q": cleaned["q"]}, status=503)

    pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

    results = []
    for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE):
        try:
            if documents[i]["attrs"].get("category", False) != False:
                wiki_page = Document.objects.get(pk=documents[i]["id"])
                summary = wiki_page.current_revision.summary

                result = {
                    "search_summary": summary,
                    "url": wiki_page.get_absolute_url(),
                    "title": wiki_page.title,
                    "type": "document",
                    "rank": i,
                    "object": wiki_page,
                }
                results.append(result)
            elif documents[i]["attrs"].get("question_creator", False) != False:
                question = Question.objects.get(pk=documents[i]["attrs"]["question_id"])

                excerpt = qc.excerpt(question.content, cleaned["q"])
                summary = jinja2.Markup(excerpt)

                result = {
                    "search_summary": summary,
                    "url": question.get_absolute_url(),
                    "title": question.title,
                    "type": "question",
                    "rank": i,
                    "object": question,
                }
                results.append(result)
            else:
                thread = Thread.objects.get(pk=documents[i]["attrs"]["thread_id"])
                post = Post.objects.get(pk=documents[i]["id"])

                excerpt = dc.excerpt(post.content, cleaned["q"])
                summary = jinja2.Markup(excerpt)

                result = {
                    "search_summary": summary,
                    "url": thread.get_absolute_url(),
                    "title": thread.title,
                    "type": "thread",
                    "rank": i,
                    "object": thread,
                }
                results.append(result)
        except IndexError:
            break
        except ObjectDoesNotExist:
            continue

    items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"]
    items.append(("a", "2"))

    refine_query = u"?%s" % urlencode(items)

    if is_json:
        # Models are not json serializable.
        for r in results:
            del r["object"]
        data = {}
        data["results"] = results
        data["total"] = len(results)
        data["query"] = cleaned["q"]
        if not results:
            data["message"] = _("No pages matched the search criteria")
        json_data = json.dumps(data)
        if callback:
            json_data = callback + "(" + json_data + ");"

        return HttpResponse(json_data, mimetype=mimetype)

    results_ = jingo.render(
        request,
        template,
        {
            "num_results": len(documents),
            "results": results,
            "q": cleaned["q"],
            "pages": pages,
            "w": cleaned["w"],
            "refine_query": refine_query,
            "search_form": search_form,
            "lang_name": lang_name,
        },
    )
    results_["Cache-Control"] = "max-age=%s" % (settings.SEARCH_CACHE_PERIOD * 60)
    results_["Expires"] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)).strftime(expires_fmt)
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, cleaned["q"], max_age=3600, secure=False, httponly=False)
    return results_
Example #46
0
 def test_clean_hyphens(self):
     """Hyphens in words aren't special characters."""
     wc = WikiClient()
     results = wc.query('marque-page')
     eq_(1, len(results))
Example #47
0
 def test_category(self):
     wc = WikiClient()
     results = wc.query('', ({'filter': 'category', 'value': [10]},))
     eq_(5, len(results))
     results = wc.query('', ({'filter': 'category', 'value': [30]},))
     eq_(1, len(results))
Example #48
0
 def test_no_redirects(self):
     """Redirect articles should never appear in search results."""
     wc = WikiClient()
     results = wc.query('ghosts')
     eq_(1, len(results))
Example #49
0
 def test_empty_content_excerpt(self):
     """SearchClient.excerpt() returns empty string for empty content."""
     wc = WikiClient()
     eq_('', wc.excerpt('', 'test'))
Example #50
0
File: views.py Project: tantek/kuma
def _search_suggestions(query, locale):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items returned are dicts:
        { 'url': URL where the article can be viewed,
          'title': Title of the article,
          'excerpt_html': Excerpt of the article with search terms hilighted,
                          formatted in HTML }

    Weights wiki pages infinitely higher than questions at the moment.

    TODO: ZOMFG this needs to be refactored and the search app should
          provide an internal API. Seriously.

    """
    def prepare(result, model, attr, searcher, result_to_id):
        """Turn a search result from a Sphinx client into a dict for templates.

        Return {} if an object corresponding to the result cannot be found.

        """
        try:
            obj = model.objects.get(pk=result_to_id(result))
        except ObjectDoesNotExist:
            return {}
        return {'url': obj.get_absolute_url(),
                'title': obj.title,
                'excerpt_html': searcher.excerpt(getattr(obj, attr), query)}

    max_suggestions = settings.QUESTIONS_MAX_SUGGESTIONS
    query_limit = max_suggestions + settings.QUESTIONS_SUGGESTION_SLOP

    # Search wiki pages:
    wiki_searcher = WikiClient()
    filters = [{'filter': 'locale',
                'value': (sphinx_locale(locale),)},
               {'filter': 'category',
                'value': [x for x in settings.SEARCH_DEFAULT_CATEGORIES
                          if x >= 0]},
               {'filter': 'category',
                'exclude': True,
                'value': [-x for x in settings.SEARCH_DEFAULT_CATEGORIES
                          if x < 0]}]
    raw_results = wiki_searcher.query(query, filters=filters,
                                      limit=query_limit)
    # Lazily build excerpts from results. Stop when we have enough:
    results = islice((p for p in
                       (prepare(r, Document, 'html', wiki_searcher,
                                lambda x: x['id'])
                        for r in raw_results) if p),
                     max_suggestions)
    results = list(results)

    # If we didn't find enough wiki pages to fill the page, pad it out with
    # other questions:
    if len(results) < max_suggestions:
        question_searcher = QuestionsClient()
        # questions app is en-US only.
        raw_results = question_searcher.query(query,
                                              limit=query_limit - len(results))
        results.extend(islice((p for p in
                               (prepare(r, Question, 'content',
                                        question_searcher,
                                        lambda x: x['attrs']['question_id'])
                                for r in raw_results) if p),
                              max_suggestions - len(results)))

    return results
Example #51
0
 def test_no_redirects(self):
     """Redirect articles should never appear in search results."""
     wc = WikiClient()
     results = wc.query('ghosts')
     eq_(1, len(results))
Example #52
0
def search(request):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', [x for x in category if x > 0])
    exclude_category = [abs(x) for x in category if x < 0]

    try:
        fx = map(int, r.getlist('fx')) or [v.id for v in FIREFOX_VERSIONS]
    except ValueError:
        fx = [v.id for v in FIREFOX_VERSIONS]
    #r.setlist('fx', fx)

    try:
        os = map(int, r.getlist('os')) or [o.id for o in OPERATING_SYSTEMS]
    except ValueError:
        os = [o.id for o in OPERATING_SYSTEMS]
    #r.setlist('os', os)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        search_ = jingo.render(request, 'search/form.html',
                            {'advanced': a, 'request': request,
                             'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data
    search_locale = (sphinx_locale(language),)

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    documents = []
    filters_w = []
    filters_q = []
    filters_f = []

    # wiki filters
    # Version and OS filters
    if cleaned['fx']:
        filters_w.append({
            'filter': 'fx',
            'value': cleaned['fx'],
        })

    if cleaned['os']:
        filters_w.append({
            'filter': 'os',
            'value': cleaned['os'],
        })

    # Category filter
    if cleaned['category']:
        filters_w.append({
            'filter': 'category',
            'value': cleaned['category'],
        })

    if exclude_category:
        filters_w.append({
            'filter': 'category',
            'value': exclude_category,
            'exclude': True,
        })

    # Locale filter
    filters_w.append({
        'filter': 'locale',
        'value': search_locale,
    })

    # Tags filter
    tags = [crc32(t.strip()) for t in cleaned['tags'].split()]
    if tags:
        for t in tags:
            filters_w.append({
                'filter': 'tag',
                'value': (t,),
                })
    # End of wiki filters

    """
    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['is_solved']:
            cleaned['is_solved'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        toggle_filters = ('is_locked', 'is_solved', 'has_answers',
                          'has_helpful')
        for filter_name in toggle_filters:
            if cleaned[filter_name] == constants.TERNARY_YES:
                filters_q.append({
                    'filter': filter_name,
                    'value': (True,),
                })
            if cleaned[filter_name] == constants.TERNARY_NO:
                filters_q.append({
                    'filter': filter_name,
                    'value': (False,),
                })

        if cleaned['asked_by']:
            filters_q.append({
                'filter': 'question_creator',
                'value': (crc32(cleaned['asked_by']),),
            })

        if cleaned['answered_by']:
            filters_q.append({
                'filter': 'answer_creator',
                'value': (crc32(cleaned['answered_by']),),
            })

        q_tags = [crc32(t.strip()) for t in cleaned['q_tags'].split()]
        if q_tags:
            for t in q_tags:
                filters_q.append({
                    'filter': 'tag',
                    'value': (t,),
                    })

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            filters_f.append({
                'filter': 'author_ord',
                'value': (crc32(cleaned['author']),),
            })

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                filters_f.append({
                    'filter': 'is_sticky',
                    'value': (1,),
                })

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                filters_f.append({
                    'filter': 'is_locked',
                    'value': (1,),
                })

        if cleaned['forum']:
            filters_f.append({
                'filter': 'forum_id',
                'value': cleaned['forum'],
            })
    """
    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
#        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']),
#        ('question_votes', cleaned['num_voted'], cleaned['num_votes'])
    )
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {
                'range': True,
                'filter': filter_name,
                'min': 0,
                'max': max(filter_date, 0),
            }
            if filter_name != 'question_votes':
                filters_f.append(before)
            filters_q.append(before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {
                'range': True,
                'filter': filter_name,
                'min': min(filter_date, unix_now),
                'max': unix_now,
            }
            if filter_name != 'question_votes':
                filters_f.append(after)
            filters_q.append(after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        if cleaned['w'] & constants.WHERE_WIKI:
            wc = WikiClient()  # Wiki SearchClient instance
            # Execute the query and append to documents
            documents += wc.query(cleaned['q'], filters_w)

        if cleaned['w'] & constants.WHERE_SUPPORT:
            qc = QuestionsClient()  # Support question SearchClient instance

            # Sort results by
            try:
                qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0],
                                 constants.SORT_QUESTIONS[sortby][1])
            except IndexError:
                pass

            documents += qc.query(cleaned['q'], filters_q)

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            dc = DiscussionClient()  # Discussion forums SearchClient instance

            # Sort results by
            try:
                dc.groupsort = constants.GROUPSORT[sortby]
            except IndexError:
                pass

            documents += dc.query(cleaned['q'], filters_f)

    except SearchError:
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        return jingo.render(request, 'search/down.html', {}, status=503)

    pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

    results = []
    for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE):
        try:
            if documents[i]['attrs'].get('category', False) != False:
                wiki_page = Document.objects.get(pk=documents[i]['id'])
                summary = wiki_page.current_revision.summary

                result = {'search_summary': summary,
                          'url': wiki_page.get_absolute_url(),
                          'title': wiki_page.title,
                          'type': 'document', }
                results.append(result)
            elif documents[i]['attrs'].get('question_creator', False) != False:
                question = Question.objects.get(
                    pk=documents[i]['attrs']['question_id'])

                excerpt = qc.excerpt(question.content, cleaned['q'])
                summary = jinja2.Markup(excerpt)

                result = {'search_summary': summary,
                          'url': question.get_absolute_url(),
                          'title': question.title,
                          'type': 'question', }
                results.append(result)
            else:
                thread = Thread.objects.get(
                    pk=documents[i]['attrs']['thread_id'])
                post = Post.objects.get(pk=documents[i]['id'])

                excerpt = dc.excerpt(post.content, cleaned['q'])
                summary = jinja2.Markup(excerpt)

                result = {'search_summary': summary,
                          'url': thread.get_absolute_url(),
                          'title': thread.title,
                          'type': 'thread', }
                results.append(result)
        except IndexError:
            break
        except ObjectDoesNotExist:
            continue

    items = [(k, v) for k in search_form.fields for
             v in r.getlist(k) if v and k != 'a']
    items.append(('a', '2'))

    refine_query = u'?%s' % urlencode(items)

    if is_json:
        data = {}
        data['results'] = results
        data['total'] = len(results)
        data['query'] = cleaned['q']
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, mimetype=mimetype)

    results_ = jingo.render(request, 'search/results.html',
        {'num_results': len(documents), 'results': results, 'q': cleaned['q'],
         'pages': pages, 'w': cleaned['w'], 'refine_query': refine_query,
         'search_form': search_form, 'lang_name': lang_name, })
    results_['Cache-Control'] = 'max-age=%s' % \
                                (settings.SEARCH_CACHE_PERIOD * 60)
    results_['Expires'] = (datetime.utcnow() +
                           timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \
                           .strftime(expires_fmt)
    return results_
Example #53
0
 def test_clean_excerpt(self):
     """SearchClient.excerpt() should not allow disallowed HTML through."""
     wc = WikiClient()  # Index strips HTML
     input = 'test <div>the start of something</div>'
     output_strip = '<b>test</b>  the start of something'
     eq_(output_strip, wc.excerpt(input, 'test'))