예제 #1
0
    def test_with_highlights(self):
        form = SimpleSearchForm({'q': 'foo'})
        ok_(form.is_valid())

        s = generate_simple_search(form, 'en-US', with_highlights=True)
        ok_('highlight' in s.build_search())

        s = generate_simple_search(form, 'en-US', with_highlights=False)
        ok_('highlight' not in s.build_search())
예제 #2
0
    def test_language_zh_cn(self):
        form = SimpleSearchForm({'q': 'foo'})
        ok_(form.is_valid())

        s = generate_simple_search(form, 'zh-CN', with_highlights=False)

        s_string = str(s.build_search())
        # Verify locale
        ok_("{'term': {'document_locale': 'zh-CN'}}" in s_string)
        # Verify standard analyzer is used
        ok_("'analyzer': 'chinese'" in s_string)
예제 #3
0
    def test_language_fr(self):
        form = SimpleSearchForm({'q': 'foo'})
        ok_(form.is_valid())

        s = generate_simple_search(form, 'fr', with_highlights=False)

        s_string = str(s.build_search())
        # Verify locale
        ok_("{'term': {'document_locale': 'fr'}}" in s_string)
        # Verify fr has right synonym-less analyzer
        ok_("'analyzer': 'snowball-french'" in s_string)
예제 #4
0
    def test_language_en_us(self):
        form = SimpleSearchForm({'q': 'foo'})
        ok_(form.is_valid())

        s = generate_simple_search(form, 'en-US', with_highlights=False)

        # NB: Comparing bits of big trees is hard, so we serialize it
        # and look for strings.
        s_string = str(s.build_search())
        # Verify locale
        ok_("{'term': {'document_locale': 'en-US'}}" in s_string)
        # Verify en-US has the right synonym-enhanced analyzer
        ok_("'analyzer': 'snowball-english-synonyms'" in s_string)
예제 #5
0
파일: views.py 프로젝트: rivaxel/kitsune
def opensearch_suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    content_type = 'application/x-suggestions+json'

    search_form = SimpleSearchForm(request.GET, auto_id=False)
    if not search_form.is_valid():
        return HttpResponseBadRequest(content_type=content_type)

    cleaned = search_form.cleaned_data
    language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE)
    searcher = generate_simple_search(search_form, language, with_highlights=False)
    searcher = searcher.values_dict('document_title', 'question_title', 'url')
    results = searcher[:10]

    def urlize(r):
        return u'%s://%s%s' % (
            'https' if request.is_secure() else 'http',
            request.get_host(),
            r['url'][0]
        )

    def titleize(r):
        # NB: Elasticsearch returns an array of strings as the value, so we mimic that and
        # then pull out the first (and only) string.
        return r.get('document_title', r.get('question_title', [_('No title')]))[0]

    try:
        data = [
            cleaned['q'],
            [titleize(r) for r in results],
            [],
            [urlize(r) for r in results]
        ]
    except ES_EXCEPTIONS:
        # If we have Elasticsearch problems, we just send back an empty set of results.
        data = []

    return HttpResponse(json.dumps(data), content_type=content_type)
예제 #6
0
파일: views.py 프로젝트: rivaxel/kitsune
def simple_search(request, template=None):
    """Elasticsearch-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:

    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for example, for Support Forum questions only)

    """
    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    if request.GET.get('a') in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    content_type = 'application/x-javascript' if callback else 'application/json'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            content_type=content_type,
            status=400)

    search_form = SimpleSearchForm(request.GET, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                content_type=content_type,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': False,
            'request': request,
            'search_form': search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
        search_['Expires'] = (
            (datetime.utcnow() + timedelta(minutes=cache_period))
            .strftime(EXPIRES_FMT))
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE)
    lang = language.lower()
    lang_name = settings.LANGUAGES_DICT.get(lang) or ''

    searcher = generate_simple_search(search_form, language, with_highlights=True)
    searcher = searcher[:settings.SEARCH_MAX_RESULTS]
    fallback_results = None

    try:
        pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE)
        offset = pages.start_index()

        results = []
        if pages.paginator.count == 0:
            fallback_results = _fallback_results(language, cleaned['product'])

        else:
            for i, doc in enumerate(pages):
                rank = i + offset

                if doc['model'] == 'wiki_document':
                    summary = _build_es_excerpt(doc)
                    if not summary:
                        summary = doc['document_summary']
                    result = {
                        'title': doc['document_title'],
                        'type': 'document'}

                elif doc['model'] == 'questions_question':
                    summary = _build_es_excerpt(doc)
                    if not summary:
                        # We're excerpting only question_content, so if the query matched
                        # question_title or question_answer_content, then there won't be any
                        # question_content excerpts. In that case, just show the question--but
                        # only the first 500 characters.
                        summary = bleach.clean(doc['question_content'], strip=True)[:500]

                    result = {
                        'title': doc['question_title'],
                        'type': 'question',
                        'is_solved': doc['question_is_solved'],
                        'num_answers': doc['question_num_answers'],
                        'num_votes': doc['question_num_votes'],
                        'num_votes_past_week': doc['question_num_votes_past_week']}

                result['url'] = doc['url']
                result['object'] = doc
                result['search_summary'] = summary
                result['rank'] = rank
                result['score'] = doc.es_meta.score
                result['explanation'] = escape(format_explanation(
                    doc.es_meta.explanation))
                result['id'] = doc['id']
                results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                content_type=content_type, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        log.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [_(p.title, 'DB: products.Product.title') for p in product]
    else:
        product_titles = [_('All Products')]

    # FIXME: This is probably bad l10n.
    product_titles = ', '.join(product_titles)

    data = {
        'num_results': pages.paginator.count,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name}

    if is_json:
        # Models are not json serializable.
        for r in data['results']:
            del r['object']
        data['total'] = len(data['results'])

        data['products'] = [{'slug': p.slug, 'title': p.title}
                            for p in Product.objects.filter(visible=True)]

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, content_type=content_type)

    data.update({
        'product': product,
        'products': Product.objects.filter(visible=True),
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
    results_['Expires'] = (
        (datetime.utcnow() + timedelta(minutes=cache_period))
        .strftime(EXPIRES_FMT))
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                        max_age=3600, secure=False, httponly=False)

    return results_