def test_with_highlights(self): form = SimpleSearchForm({'q': 'foo'}) ok_(form.is_valid()) s = generate_simple_search(form, 'en-US', with_highlights=True) ok_('highlight' in s.build_search()) s = generate_simple_search(form, 'en-US', with_highlights=False) ok_('highlight' not in s.build_search())
def test_language_zh_cn(self): form = SimpleSearchForm({'q': 'foo'}) ok_(form.is_valid()) s = generate_simple_search(form, 'zh-CN', with_highlights=False) s_string = str(s.build_search()) # Verify locale ok_("{'term': {'document_locale': 'zh-CN'}}" in s_string) # Verify standard analyzer is used ok_("'analyzer': 'chinese'" in s_string)
def test_language_fr(self): form = SimpleSearchForm({'q': 'foo'}) ok_(form.is_valid()) s = generate_simple_search(form, 'fr', with_highlights=False) s_string = str(s.build_search()) # Verify locale ok_("{'term': {'document_locale': 'fr'}}" in s_string) # Verify fr has right synonym-less analyzer ok_("'analyzer': 'snowball-french'" in s_string)
def test_language_en_us(self): form = SimpleSearchForm({'q': 'foo'}) ok_(form.is_valid()) s = generate_simple_search(form, 'en-US', with_highlights=False) # NB: Comparing bits of big trees is hard, so we serialize it # and look for strings. s_string = str(s.build_search()) # Verify locale ok_("{'term': {'document_locale': 'en-US'}}" in s_string) # Verify en-US has the right synonym-enhanced analyzer ok_("'analyzer': 'snowball-english-synonyms'" in s_string)
def opensearch_suggestions(request): """A simple search view that returns OpenSearch suggestions.""" content_type = 'application/x-suggestions+json' search_form = SimpleSearchForm(request.GET, auto_id=False) if not search_form.is_valid(): return HttpResponseBadRequest(content_type=content_type) cleaned = search_form.cleaned_data language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE) searcher = generate_simple_search(search_form, language, with_highlights=False) searcher = searcher.values_dict('document_title', 'question_title', 'url') results = searcher[:10] def urlize(r): return u'%s://%s%s' % ( 'https' if request.is_secure() else 'http', request.get_host(), r['url'][0] ) def titleize(r): # NB: Elasticsearch returns an array of strings as the value, so we mimic that and # then pull out the first (and only) string. return r.get('document_title', r.get('question_title', [_('No title')]))[0] try: data = [ cleaned['q'], [titleize(r) for r in results], [], [urlize(r) for r in results] ] except ES_EXCEPTIONS: # If we have Elasticsearch problems, we just send back an empty set of results. data = [] return HttpResponse(json.dumps(data), content_type=content_type)
def simple_search(request, template=None): """Elasticsearch-specific simple search view. This view is for end user searching of the Knowledge Base and Support Forum. Filtering options are limited to: * product (`product=firefox`, for example, for only Firefox results) * document type (`w=2`, for example, for Support Forum questions only) """ # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL. if request.GET.get('a') in ['1', '2']: new_url = reverse('search.advanced') + '?' + request.GET.urlencode() return HttpResponseRedirect(new_url) # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() content_type = 'application/x-javascript' if callback else 'application/json' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), content_type=content_type, status=400) search_form = SimpleSearchForm(request.GET, auto_id=False) if not search_form.is_valid(): if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), content_type=content_type, status=400) t = template if request.MOBILE else 'search/form.html' search_ = render(request, t, { 'advanced': False, 'request': request, 'search_form': search_form}) cache_period = settings.SEARCH_CACHE_PERIOD search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60) search_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)) .strftime(EXPIRES_FMT)) return search_ cleaned = search_form.cleaned_data # On mobile, we default to just wiki results. if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE) lang = language.lower() lang_name = settings.LANGUAGES_DICT.get(lang) or '' searcher = generate_simple_search(search_form, language, with_highlights=True) searcher = searcher[:settings.SEARCH_MAX_RESULTS] fallback_results = None try: pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE) offset = pages.start_index() results = [] if pages.paginator.count == 0: fallback_results = _fallback_results(language, cleaned['product']) else: for i, doc in enumerate(pages): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if the query matched # question_title or question_answer_content, then there won't be any # question_content excerpts. In that case, just show the question--but # only the first 500 characters. summary = bleach.clean(doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} result['url'] = doc['url'] result['object'] = doc result['search_summary'] = summary result['rank'] = rank result['score'] = doc.es_meta.score result['explanation'] = escape(format_explanation( doc.es_meta.explanation)) result['id'] = doc['id'] results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), content_type=content_type, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip('()') statsd.incr('search.esunified.{0}'.format(exc_bucket)) log.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return render(request, t, {'q': cleaned['q']}, status=503) product = Product.objects.filter(slug__in=cleaned['product']) if product: product_titles = [_(p.title, 'DB: products.Product.title') for p in product] else: product_titles = [_('All Products')] # FIXME: This is probably bad l10n. product_titles = ', '.join(product_titles) data = { 'num_results': pages.paginator.count, 'results': results, 'fallback_results': fallback_results, 'product_titles': product_titles, 'q': cleaned['q'], 'w': cleaned['w'], 'lang_name': lang_name} if is_json: # Models are not json serializable. for r in data['results']: del r['object'] data['total'] = len(data['results']) data['products'] = [{'slug': p.slug, 'title': p.title} for p in Product.objects.filter(visible=True)] if product: data['product'] = product[0].slug pages = Paginator(pages) data['pagination'] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, content_type=content_type) data.update({ 'product': product, 'products': Product.objects.filter(visible=True), 'pages': pages, 'search_form': search_form, 'advanced': False, }) results_ = render(request, template, data) cache_period = settings.SEARCH_CACHE_PERIOD results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60) results_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)) .strftime(EXPIRES_FMT)) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_