Beispiel #1
0
def _build_es_excerpt(result):
    """Return concatenated search excerpts.

    :arg result: The result object from the queryset results

    """
    excerpt = EXCERPT_JOINER.join([m.strip() for m in chain(*result._highlighted.values()) if m])

    return jinja2.Markup(clean_excerpt(excerpt))
Beispiel #2
0
    def test_utf8_excerpt(self):
        """Characters should stay in UTF-8."""
        q = u'fa\xe7on'
        ws = (wiki_searcher().highlight('html').query(u'fa\xe7on').values_dict(
            'html'))

        results = list(ws)
        # page = Document.objects.get(pk=4)
        excerpt = clean_excerpt(ws.excerpt(results[0])[0][0])
        assert q in excerpt, u'%s not in %s' % (q, excerpt)
Beispiel #3
0
    def test_utf8_excerpt(self):
        """Characters should stay in UTF-8."""
        q = u'fa\xe7on'
        ws = (wiki_search.highlight('html')
                         .query(u'fa\xe7on')
                         .values_dict('html'))

        results = list(ws)
        # page = Document.objects.get(pk=4)
        excerpt = clean_excerpt(ws.excerpt(results[0])[0])
        assert q in excerpt, u'%s not in %s' % (q, excerpt)
Beispiel #4
0
def _build_es_excerpt(result):
    """Return concatenated search excerpts.

    :arg result: The result object from the queryset results

    """
    excerpt = EXCERPT_JOINER.join(
        [m.strip() for m in
         chain(*result._highlight.values()) if m])

    return jinja2.Markup(clean_excerpt(excerpt))
Beispiel #5
0
def _build_excerpt(searcher, model_obj):
    """Return concatenated search excerpts for Sphinx.

    :arg searcher: The ``S`` object that did the search
    :arg model_obj: The model object returned by the search

    """
    try:
        excerpt = EXCERPT_JOINER.join([m.strip() for m in chain(*searcher.excerpt(model_obj)) if m])
    except ExcerptTimeoutError:
        statsd.incr("search.excerpt.timeout")
        excerpt = u""
    except ExcerptSocketError:
        statsd.incr("search.excerpt.socketerror")
        excerpt = u""

    return jinja2.Markup(clean_excerpt(excerpt))
Beispiel #6
0
def _search_suggestions(request, text, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    :arg text: full text to search on
    :arg locale: locale to limit to
    :arg tags: list of tags to filter questions on
    :arg product_slugs: list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of::

        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    :returns: up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    results = []
    try:
        query = dict(('%s__text' % field, text)
                      for field in Document.get_query_fields())
        raw_results = (
            wiki_s.filter(document_locale=locale,
                          document_category__in=default_categories)
                  .query(or_=query)
                  .values_dict('id')[:WIKI_RESULTS])
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        query = dict(('%s__text' % field, text)
                      for field in Question.get_query_fields())
        raw_results = (question_s.query(or_=query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])
        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q,
                    'is_solved': q.is_solved,
                    'num_answers': q.num_answers,
                    'num_votes': q.num_votes,
                    'num_votes_past_week': q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException) as exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')
        log.debug(exc)

    return results
Beispiel #7
0
 def test_clean_excerpt(self):
     """clean_excerpt() should not allow disallowed HTML through."""
     in_ = '<b>test</b> <div>the start of something</div>'
     out_ = '<b>test</b> &lt;div&gt;the start of something&lt;/div&gt;'
     eq_(out_, clean_excerpt(in_))
Beispiel #8
0
 def test_clean_excerpt(self):
     """clean_excerpt() should not allow disallowed HTML through."""
     in_ = '<b>test</b> <div>the start of something</div>'
     out_ = '<b>test</b> &lt;div&gt;the start of something&lt;/div&gt;'
     eq_(out_, clean_excerpt(in_))
Beispiel #9
0
def search(request, template=None):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use `initial`
    # on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t,
                               {'advanced': a, 'request': request,
                                'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    wiki_s = wiki_search
    question_s = question_search
    discussion_s = discussion_search

    documents = []

    # wiki filters
    # Category filter
    if cleaned['category']:
        wiki_s = wiki_s.filter(category__in=cleaned['category'])

    # Locale filter
    wiki_s = wiki_s.filter(locale=language)

    # Product filter
    products = cleaned['product']
    for p in products:
        wiki_s = wiki_s.filter(tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned['tags'].split()]
    for t in tags:
        wiki_s = wiki_s.filter(tag=t)

    # Archived bit
    if a == '0' and not cleaned['include_archived']:
        # Default to NO for basic search:
        cleaned['include_archived'] = False
    if not cleaned['include_archived']:
        wiki_s = wiki_s.filter(is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict((filter_name, _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters
                 if cleaned[filter_name])
        if d:
            question_s = question_s.filter(**d)

        if cleaned['asked_by']:
            question_s = question_s.filter(
                question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_s = question_s.filter(
                answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split()]
        for t in q_tags:
            question_s = question_s.filter(tag=t)

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_s = discussion_s.filter(author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_locked=1)

        if cleaned['forum']:
            discussion_s = discussion_s.filter(forum_id__in=cleaned['forum'])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']),
        ('question_votes', cleaned['num_voted'], cleaned['num_votes']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned['q']

        if cleaned['w'] & constants.WHERE_WIKI:
            wiki_s = wiki_s.query(cleaned_q)[:max_results]
            # Execute the query and append to documents
            documents += [('wiki', (pair[0], pair[1]))
                          for pair in enumerate(wiki_s.object_ids())]

        if cleaned['w'] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            question_s = question_s.highlight(
                'content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            question_s = question_s.query(cleaned_q)[:max_results]
            documents += [('question', (pair[0], pair[1]))
                          for pair in enumerate(question_s.object_ids())]

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            # Sort results by
            try:
                # Note that the first attribute needs to be the same
                # here and in forums/models.py discussion_search.
                discussion_s = discussion_s.group_by(
                    'thread_id', constants.GROUPSORT[sortby])
            except IndexError:
                pass

            discussion_s = discussion_s.highlight(
                'content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            discussion_s = discussion_s.query(cleaned_q)[:max_results]
            documents += [('discussion', (pair[0], pair[1]))
                          for pair in enumerate(discussion_s.object_ids())]

    except SearchError:
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)

    pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

    # Build a dict of { type_ -> list of indexes } for the specific
    # docs that we're going to display on this page.  This makes it
    # easy for us to slice the appropriate search Ss so we're limiting
    # our db hits to just the items we're showing.
    documents_dict = {}
    for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]:
        documents_dict.setdefault(doc[0], []).append(doc[1][0])

    docs_for_page = []
    for type_, search_s in [('wiki', wiki_s),
                            ('question', question_s),
                            ('discussion', discussion_s)]:
        if type_ not in documents_dict:
            continue

        # documents_dict[type_] is a list of indexes--one for each
        # object id search result for that type_.  We use the values
        # at the beginning and end of the list for slice boundaries.
        begin = documents_dict[type_][0]
        end = documents_dict[type_][-1] + 1
        docs_for_page += [(type_, doc) for doc in search_s[begin:end]]

    results = []
    for i, docinfo in enumerate(docs_for_page):
        rank = i + offset
        type_, doc = docinfo
        try:
            if type_ == 'wiki':
                summary = doc.current_revision.summary

                result = {
                    'search_summary': summary,
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'rank': rank,
                    'object': doc,
                }
                results.append(result)

            elif type_ == 'question':
                try:
                    excerpt = question_s.excerpt(doc)[0]
                except ExcerptTimeoutError:
                    statsd.incr('search.excerpt.timeout')
                    excerpt = u''
                except ExcerptSocketErrorError:
                    statsd.incr('search.excerpt.socketerror')
                    excerpt = u''

                summary = jinja2.Markup(clean_excerpt(excerpt))

                result = {
                    'search_summary': summary,
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'question',
                    'rank': rank,
                    'object': doc,
                }
                results.append(result)

            else:
                # discussion_s is based on Post--not Thread, so we have
                # to get this manually.
                thread = Thread.objects.get(pk=doc.thread_id)

                try:
                    excerpt = discussion_s.excerpt(doc)[0]
                except ExcerptTimeoutError:
                    statsd.incr('search.excerpt.timeout')
                    excerpt = u''
                except ExcerptSocketErrorError:
                    statsd.incr('search.excerpt.socketerror')
                    excerpt = u''

                summary = jinja2.Markup(clean_excerpt(excerpt))

                result = {
                    'search_summary': summary,
                    'url': thread.get_absolute_url(),
                    'title': thread.title,
                    'type': 'thread',
                    'rank': rank,
                    'object': thread,
                }
                results.append(result)
        except IndexError:
            break
        except ObjectDoesNotExist:
            continue

    items = [(k, v) for k in search_form.fields for
             v in r.getlist(k) if v and k != 'a']
    items.append(('a', '2'))

    if is_json:
        # Models are not json serializable.
        for r in results:
            del r['object']
        data = {}
        data['results'] = results
        data['total'] = len(results)
        data['query'] = cleaned['q']
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, mimetype=mimetype)

    results_ = jingo.render(request, template,
        {'num_results': len(documents), 'results': results, 'q': cleaned['q'],
         'pages': pages, 'w': cleaned['w'],
         'search_form': search_form, 'lang_name': lang_name, })
    results_['Cache-Control'] = 'max-age=%s' % \
                                (settings.SEARCH_CACHE_PERIOD * 60)
    results_['Expires'] = (datetime.utcnow() +
                           timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \
                           .strftime(expires_fmt)
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                        max_age=3600, secure=False, httponly=False)
    return results_
Beispiel #10
0
def _search_suggestions(request, query, locale, category_tags):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    if waffle.flag_is_active(request, 'elasticsearch'):
        engine = 'elastic'
        question_s = Question.search()
        wiki_s = Document.search()
    else:
        engine = 'sphinx'
        question_s = question_searcher(request)
        wiki_s = wiki_searcher(request)

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3

    # Apply category filters
    if category_tags:
        question_s = question_s.filter(tag__in=category_tags)
        wiki_s = wiki_s.filter(tag__in=category_tags)

    try:
        raw_results = (
            wiki_s.filter(locale=locale,
                          category__in=settings.SEARCH_DEFAULT_CATEGORIES)
                  .query(query)
                  .values_dict('id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (question_s.query(query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q
                })
            except Question.DoesNotExist:
                pass

    except (SearchError, ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, SearchError):
            statsd.incr('questions.suggestions.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.%s.maxretryerror' % engine)
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.%s.elasticsearchexception' %
                        engine)

        return []
Beispiel #11
0
def _search_suggestions(request, text, locale, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    :arg text: full text to search on
    :arg locale: locale to limit to
    :arg product_slugs: list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of::

        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    :returns: up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(product__in=product_slugs)
        question_s = question_s.filter(product__in=product_slugs)

    results = []
    try:
        # Search for relevant KB documents.
        query = dict(('%s__text' % field, text)
                      for field in Document.get_query_fields())
        query.update(dict(('%s__text_phrase' % field, text)
                      for field in Document.get_query_fields()))
        filter = F()
        filter |= F(document_locale=locale)
        filter |= F(document_locale=settings.WIKI_DEFAULT_LANGUAGE)
        filter &= F(document_category__in=default_categories)
        filter &= F(document_is_archived=False)

        raw_results = (
            wiki_s.filter(filter)
                  .query(or_=query)
                  .values_dict('id')[:WIKI_RESULTS])
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Search for relevant questions.
        query = dict(('%s__text' % field, text)
                      for field in Question.get_query_fields())
        query.update(dict(('%s__text_phrase' % field, text)
                      for field in Question.get_query_fields()))

        max_age = int(time.time()) - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE
        # Filter questions by language. Questions should be either in English
        # or in the locale's language. This is because we have some questions
        # marked English that are really in other languages. The assumption
        # being that if a native speakers submits a query in given language,
        # the items that are written in that language will automatically match
        # better, so questions incorrectly marked as english can be found too.
        question_filter = F(question_locale=locale)
        question_filter |= F(question_locale=settings.WIKI_DEFAULT_LANGUAGE)
        question_filter &= F(updated__gte=max_age)

        raw_results = (question_s
            .query(or_=query)
            .filter(question_filter)
            .values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q,
                    'is_solved': q.is_solved,
                    'num_answers': q.num_answers,
                    'num_votes': q.num_votes,
                    'num_votes_past_week': q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException) as exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')
        log.debug(exc)

    return results
Beispiel #12
0
def _search_suggestions_es(request, query, locale, category_tags):
    """See _search_suggestions

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    engine = 'elastic'
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply category filters
    if category_tags:
        question_s = question_s.filter(question_tag__in=category_tags)
        wiki_s = wiki_s.filter(document_tag__in=category_tags)

    try:
        raw_results = (
            wiki_s.filter(document_locale=locale,
                          document_category__in=default_categories)
                  .query(query)
                  .values_dict('id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (question_s.query(query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q,
                    'is_solved': q.is_solved,
                    'num_answers': q.num_answers,
                    'num_votes': q.num_votes,
                    'num_votes_past_week': q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (SearchError, ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, SearchError):
            statsd.incr('questions.suggestions.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.%s.maxretryerror' % engine)
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.%s.elasticsearchexception' %
                        engine)

        return []
Beispiel #13
0
def search(request, template=None):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    if waffle.flag_is_active(request, 'elasticsearch'):
        engine = 'elastic'
    else:
        engine = 'sphinx'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use `initial`
    # on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t,
                               {'advanced': a, 'request': request,
                                'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    wiki_s = wiki_searcher(request)
    question_s = question_searcher(request)
    discussion_s = discussion_searcher(request)

    documents = []

    # wiki filters
    # Category filter
    if cleaned['category']:
        wiki_s = wiki_s.filter(category__in=cleaned['category'])

    # Locale filter
    wiki_s = wiki_s.filter(locale=language)

    # Product filter
    products = cleaned['product']
    for p in products:
        wiki_s = wiki_s.filter(tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned['tags'].split()]
    for t in tags:
        wiki_s = wiki_s.filter(tag=t)

    # Archived bit
    if a == '0' and not cleaned['include_archived']:
        # Default to NO for basic search:
        cleaned['include_archived'] = False
    if not cleaned['include_archived']:
        wiki_s = wiki_s.filter(is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict((filter_name, _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters
                 if cleaned[filter_name])
        if d:
            question_s = question_s.filter(**d)

        if cleaned['asked_by']:
            question_s = question_s.filter(
                question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_s = question_s.filter(
                answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split()]
        for t in q_tags:
            question_s = question_s.filter(tag=t)

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_s = discussion_s.filter(author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_locked=1)

        if cleaned['forum']:
            discussion_s = discussion_s.filter(forum_id__in=cleaned['forum'])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']),
        ('question_votes', cleaned['num_voted'], cleaned['num_votes']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned['q']

        if cleaned['w'] & constants.WHERE_WIKI:
            if cleaned_q:
                wiki_s = wiki_s.query(cleaned_q)
            wiki_s = wiki_s[:max_results]
            # Execute the query and append to documents
            documents += [('wiki', (pair[0], pair[1]))
                          for pair in enumerate(wiki_s.object_ids())]

        if cleaned['w'] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            if engine == 'elastic':
                highlight_fields = ['title', 'question_content',
                                    'answer_content']
            else:
                highlight_fields = ['content']

            question_s = question_s.highlight(
                *highlight_fields,
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                question_s = question_s.query(cleaned_q)
            question_s = question_s[:max_results]
            documents += [('question', (pair[0], pair[1]))
                          for pair in enumerate(question_s.object_ids())]

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            # Sort results by
            try:
                # Note that the first attribute needs to be the same
                # here and in forums/models.py discussion_search.
                discussion_s = discussion_s.group_by(
                    'thread_id', constants.GROUPSORT[sortby])
            except IndexError:
                pass

            discussion_s = discussion_s.highlight(
                'content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                discussion_s = discussion_s.query(cleaned_q)
            discussion_s = discussion_s[:max_results]
            documents += [('discussion', (pair[0], pair[1]))
                          for pair in enumerate(discussion_s.object_ids())]

        pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

        # Build a dict of { type_ -> list of indexes } for the specific
        # docs that we're going to display on this page.  This makes it
        # easy for us to slice the appropriate search Ss so we're limiting
        # our db hits to just the items we're showing.
        documents_dict = {}
        for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]:
            documents_dict.setdefault(doc[0], []).append(doc[1][0])

        docs_for_page = []
        for kind, search_s in [('wiki', wiki_s),
                                ('question', question_s),
                                ('discussion', discussion_s)]:
            if kind not in documents_dict:
                continue

            # documents_dict[type_] is a list of indexes--one for each
            # object id search result for that type_.  We use the values
            # at the beginning and end of the list for slice boundaries.
            begin = documents_dict[kind][0]
            end = documents_dict[kind][-1] + 1

            search_s = search_s[begin:end]

            if engine == 'elastic':
                # If we're doing elasticsearch, then we need to update
                # the _s variables to point to the sliced versions of
                # S so that, when we iterate over them in the
                # following list comp, we hang onto the version that
                # does the query, so we can call excerpt() on it
                # later.
                #
                # We only need to do this with elasticsearch.  For Sphinx,
                # search_s at this point is an ObjectResults and not an S
                # because we've already acquired object_ids on it.  Thus
                # if we update the _s variables, we'd be pointing to the
                # ObjectResults and not the S and then excerpting breaks.
                #
                # Ugh.
                if kind == 'wiki':
                    wiki_s = search_s
                elif kind == 'question':
                    question_s = search_s
                elif kind == 'discussion':
                    discussion_s = search_s

            docs_for_page += [(kind, doc) for doc in search_s]

        results = []
        for i, docinfo in enumerate(docs_for_page):
            rank = i + offset
            type_, doc = docinfo
            try:
                if type_ == 'wiki':
                    summary = doc.current_revision.summary

                    result = {
                        'search_summary': summary,
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'document',
                        'rank': rank,
                        'object': doc,
                    }
                    results.append(result)

                elif type_ == 'question':
                    try:
                        excerpt = excerpt_joiner.join(
                            [m for m in chain(*question_s.excerpt(doc)) if m])
                    except ExcerptTimeoutError:
                        statsd.incr('search.excerpt.timeout')
                        excerpt = u''
                    except ExcerptSocketError:
                        statsd.incr('search.excerpt.socketerror')
                        excerpt = u''

                    summary = jinja2.Markup(clean_excerpt(excerpt))

                    result = {
                        'search_summary': summary,
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'question',
                        'rank': rank,
                        'object': doc,
                    }
                    results.append(result)

                else:
                    if engine == 'elastic':
                        thread = doc
                    else:
                        thread = Thread.objects.get(pk=doc.thread_id)

                    try:
                        excerpt = excerpt_joiner.join(
                            [m for m in chain(*discussion_s.excerpt(doc))])
                    except ExcerptTimeoutError:
                        statsd.incr('search.excerpt.timeout')
                        excerpt = u''
                    except ExcerptSocketError:
                        statsd.incr('search.excerpt.socketerror')
                        excerpt = u''

                    summary = jinja2.Markup(clean_excerpt(excerpt))

                    result = {
                        'search_summary': summary,
                        'url': thread.get_absolute_url(),
                        'title': thread.title,
                        'type': 'thread',
                        'rank': rank,
                        'object': thread,
                    }
                    results.append(result)
            except IndexError:
                break
            except ObjectDoesNotExist:
                continue

    except (SearchError, ESTimeoutError, ESMaxRetryError), exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        if isinstance(exc, SearchError):
            statsd.incr('search.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('search.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('search.%s.maxretryerror' % engine)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)
Beispiel #14
0
def _search_suggestions(request, query, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to
    tags -- list of tags to filter questions on
    product_slugs -- list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    try:
        raw_results = (
            wiki_s.filter(document_locale=locale, document_category__in=default_categories)
            .query(query)
            .values_dict("id")[:WIKI_RESULTS]
        )

        results = []
        for r in raw_results:
            try:
                doc = Document.objects.select_related("current_revision").get(pk=r["id"])
                results.append(
                    {
                        "search_summary": clean_excerpt(doc.current_revision.summary),
                        "url": doc.get_absolute_url(),
                        "title": doc.title,
                        "type": "document",
                        "object": doc,
                    }
                )
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = question_s.query(query).values_dict("id")[:QUESTIONS_RESULTS]

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r["id"])
                results.append(
                    {
                        "search_summary": clean_excerpt(q.content[0:500]),
                        "url": q.get_absolute_url(),
                        "title": q.title,
                        "type": "question",
                        "object": q,
                        "is_solved": q.is_solved,
                        "num_answers": q.num_answers,
                        "num_votes": q.num_votes,
                        "num_votes_past_week": q.num_votes_past_week,
                    }
                )
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr("questions.suggestions.timeouterror")
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr("questions.suggestions.maxretryerror")
        elif isinstance(exc, ESException):
            statsd.incr("questions.suggestions.elasticsearchexception")

        return []
Beispiel #15
0
def _search_suggestions(request, query, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to
    tags -- list of tags to filter questions on
    product_slugs -- list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    try:
        raw_results = (wiki_s.filter(
            document_locale=locale,
            document_category__in=default_categories).query(query).values_dict(
                'id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision').get(
                    pk=r['id']))
                results.append({
                    'search_summary':
                    clean_excerpt(doc.current_revision.summary),
                    'url':
                    doc.get_absolute_url(),
                    'title':
                    doc.title,
                    'type':
                    'document',
                    'object':
                    doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (
            question_s.query(query).values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary':
                    clean_excerpt(q.content[0:500]),
                    'url':
                    q.get_absolute_url(),
                    'title':
                    q.title,
                    'type':
                    'question',
                    'object':
                    q,
                    'is_solved':
                    q.is_solved,
                    'num_answers':
                    q.num_answers,
                    'num_votes':
                    q.num_votes,
                    'num_votes_past_week':
                    q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')

        return []
Beispiel #16
0
def search(request, template=None):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    if waffle.flag_is_active(request, 'elasticsearch'):
        engine = 'elastic'
    else:
        engine = 'sphinx'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use `initial`
    # on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t,
                               {'advanced': a, 'request': request,
                                'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    wiki_s = wiki_searcher(request)
    question_s = question_searcher(request)
    discussion_s = discussion_searcher(request)

    documents = []

    # wiki filters
    # Category filter
    if cleaned['category']:
        wiki_s = wiki_s.filter(category__in=cleaned['category'])

    # Locale filter
    wiki_s = wiki_s.filter(locale=language)

    # Product filter
    products = cleaned['product']
    for p in products:
        wiki_s = wiki_s.filter(tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned['tags'].split()]
    for t in tags:
        wiki_s = wiki_s.filter(tag=t)

    # Archived bit
    if a == '0' and not cleaned['include_archived']:
        # Default to NO for basic search:
        cleaned['include_archived'] = False
    if not cleaned['include_archived']:
        wiki_s = wiki_s.filter(is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict((filter_name, _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters
                 if cleaned[filter_name])
        if d:
            question_s = question_s.filter(**d)

        if cleaned['asked_by']:
            question_s = question_s.filter(
                question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_s = question_s.filter(
                answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split()]
        for t in q_tags:
            question_s = question_s.filter(tag=t)

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_s = discussion_s.filter(author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_locked=1)

        if cleaned['forum']:
            discussion_s = discussion_s.filter(forum_id__in=cleaned['forum'])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']),
        ('question_votes', cleaned['num_voted'], cleaned['num_votes']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned['q']

        if cleaned['w'] & constants.WHERE_WIKI:
            wiki_s = wiki_s.query(cleaned_q)[:max_results]
            # Execute the query and append to documents
            documents += [('wiki', (pair[0], pair[1]))
                          for pair in enumerate(wiki_s.object_ids())]

        if cleaned['w'] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            if engine == 'elastic':
                highlight_fields = ['title', 'question_content',
                                    'answer_content']
            else:
                highlight_fields = ['content']

            question_s = question_s.highlight(
                *highlight_fields,
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            question_s = question_s.query(cleaned_q)[:max_results]
            documents += [('question', (pair[0], pair[1]))
                          for pair in enumerate(question_s.object_ids())]

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            # Sort results by
            try:
                # Note that the first attribute needs to be the same
                # here and in forums/models.py discussion_search.
                discussion_s = discussion_s.group_by(
                    'thread_id', constants.GROUPSORT[sortby])
            except IndexError:
                pass

            discussion_s = discussion_s.highlight(
                'content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            discussion_s = discussion_s.query(cleaned_q)[:max_results]
            documents += [('discussion', (pair[0], pair[1]))
                          for pair in enumerate(discussion_s.object_ids())]

        pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

        # Build a dict of { type_ -> list of indexes } for the specific
        # docs that we're going to display on this page.  This makes it
        # easy for us to slice the appropriate search Ss so we're limiting
        # our db hits to just the items we're showing.
        documents_dict = {}
        for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]:
            documents_dict.setdefault(doc[0], []).append(doc[1][0])

        docs_for_page = []
        for kind, search_s in [('wiki', wiki_s),
                                ('question', question_s),
                                ('discussion', discussion_s)]:
            if kind not in documents_dict:
                continue

            # documents_dict[type_] is a list of indexes--one for each
            # object id search result for that type_.  We use the values
            # at the beginning and end of the list for slice boundaries.
            begin = documents_dict[kind][0]
            end = documents_dict[kind][-1] + 1

            search_s = search_s[begin:end]

            if engine == 'elastic':
                # If we're doing elasticsearch, then we need to update
                # the _s variables to point to the sliced versions of
                # S so that, when we iterate over them in the
                # following list comp, we hang onto the version that
                # does the query, so we can call excerpt() on it
                # later.
                #
                # We only need to do this with elasticsearch.  For Sphinx,
                # search_s at this point is an ObjectResults and not an S
                # because we've already acquired object_ids on it.  Thus
                # if we update the _s variables, we'd be pointing to the
                # ObjectResults and not the S and then excerpting breaks.
                #
                # Ugh.
                if kind == 'wiki':
                    wiki_s = search_s
                elif kind == 'question':
                    question_s = search_s
                elif kind == 'discussion':
                    discussion_s = search_s

            docs_for_page += [(kind, doc) for doc in search_s]

        results = []
        for i, docinfo in enumerate(docs_for_page):
            rank = i + offset
            type_, doc = docinfo
            try:
                if type_ == 'wiki':
                    summary = doc.current_revision.summary

                    result = {
                        'search_summary': summary,
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'document',
                        'rank': rank,
                        'object': doc,
                    }
                    results.append(result)

                elif type_ == 'question':
                    try:
                        excerpt = excerpt_joiner.join(
                            [m for m in chain(*question_s.excerpt(doc)) if m])
                    except ExcerptTimeoutError:
                        statsd.incr('search.excerpt.timeout')
                        excerpt = u''
                    except ExcerptSocketError:
                        statsd.incr('search.excerpt.socketerror')
                        excerpt = u''

                    summary = jinja2.Markup(clean_excerpt(excerpt))

                    result = {
                        'search_summary': summary,
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'question',
                        'rank': rank,
                        'object': doc,
                    }
                    results.append(result)

                else:
                    if engine == 'elastic':
                        thread = doc
                    else:
                        thread = Thread.objects.get(pk=doc.thread_id)

                    try:
                        excerpt = excerpt_joiner.join(
                            [m for m in chain(*discussion_s.excerpt(doc))])
                    except ExcerptTimeoutError:
                        statsd.incr('search.excerpt.timeout')
                        excerpt = u''
                    except ExcerptSocketError:
                        statsd.incr('search.excerpt.socketerror')
                        excerpt = u''

                    summary = jinja2.Markup(clean_excerpt(excerpt))

                    result = {
                        'search_summary': summary,
                        'url': thread.get_absolute_url(),
                        'title': thread.title,
                        'type': 'thread',
                        'rank': rank,
                        'object': thread,
                    }
                    results.append(result)
            except IndexError:
                break
            except ObjectDoesNotExist:
                continue

    except (SearchError, ESTimeoutError, ESMaxRetryError), exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        if isinstance(exc, SearchError):
            statsd.incr('search.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('search.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('search.%s.maxretryerror' % engine)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)