Exemple #1
0
def opensearch_suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    content_type = 'application/x-suggestions+json'
    search_form = SimpleSearchForm(request.GET, auto_id=False)
    if not search_form.is_valid():
        return HttpResponseBadRequest(content_type=content_type)

    cleaned = search_form.cleaned_data
    language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE)
    searcher = generate_simple_search(search_form,
                                      language,
                                      with_highlights=False)
    searcher = searcher.values_dict('document_title', 'question_title', 'url')
    results = searcher[:10]

    def urlize(r):
        return '%s://%s%s' % ('https' if request.is_secure() else 'http',
                              request.get_host(), r['url'][0])

    def titleize(r):
        # NB: Elasticsearch returns an array of strings as the value, so we mimic that and
        # then pull out the first (and only) string.
        return r.get('document_title', r.get('question_title',
                                             [_('No title')]))[0]

    try:
        data = [
            cleaned['q'], [titleize(r) for r in results], [],
            [urlize(r) for r in results]
        ]
    except ES_EXCEPTIONS:
        # If we have Elasticsearch problems, we just send back an empty set of results.
        data = []

    return HttpResponse(json.dumps(data), content_type=content_type)
    def test_with_highlights(self):
        form = SimpleSearchForm({'q': 'foo'})
        ok_(form.is_valid())

        s = generate_simple_search(form, 'en-US', with_highlights=True)
        ok_('highlight' in s.build_search())

        s = generate_simple_search(form, 'en-US', with_highlights=False)
        ok_('highlight' not in s.build_search())
Exemple #3
0
    def test_with_highlights(self):
        form = SimpleSearchForm({'q': 'foo'})
        ok_(form.is_valid())

        s = generate_simple_search(form, 'en-US', with_highlights=True)
        ok_('highlight' in s.build_search())

        s = generate_simple_search(form, 'en-US', with_highlights=False)
        ok_('highlight' not in s.build_search())
Exemple #4
0
    def test_with_highlights(self):
        form = SimpleSearchForm({"q": "foo"})
        ok_(form.is_valid())

        s = generate_simple_search(form, "en-US", with_highlights=True)
        ok_("highlight" in s.build_search())

        s = generate_simple_search(form, "en-US", with_highlights=False)
        ok_("highlight" not in s.build_search())
Exemple #5
0
    def test_language_zh_cn(self):
        form = SimpleSearchForm({"q": "foo"})
        ok_(form.is_valid())

        s = generate_simple_search(form, "zh-CN", with_highlights=False)

        s_string = str(s.build_search())
        # Verify locale
        ok_("{'term': {'document_locale': 'zh-CN'}}" in s_string)
        # Verify standard analyzer is used
        ok_("'analyzer': 'chinese'" in s_string)
Exemple #6
0
    def test_language_fr(self):
        form = SimpleSearchForm({"q": "foo"})
        ok_(form.is_valid())

        s = generate_simple_search(form, "fr", with_highlights=False)

        s_string = str(s.build_search())
        # Verify locale
        ok_("{'term': {'document_locale': 'fr'}}" in s_string)
        # Verify fr has right synonym-less analyzer
        ok_("'analyzer': 'snowball-french'" in s_string)
Exemple #7
0
    def test_language_en_us(self):
        form = SimpleSearchForm({"q": "foo"})
        ok_(form.is_valid())

        s = generate_simple_search(form, "en-US", with_highlights=False)

        # NB: Comparing bits of big trees is hard, so we serialize it
        # and look for strings.
        s_string = str(s.build_search())
        # Verify locale
        ok_("{'term': {'document_locale': 'en-US'}}" in s_string)
        # Verify en-US has the right synonym-enhanced analyzer
        ok_("'analyzer': 'snowball-english-synonyms'" in s_string)
Exemple #8
0
def opensearch_suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    content_type = 'application/x-suggestions+json'

    search_form = SimpleSearchForm(request.GET, auto_id=False)
    if not search_form.is_valid():
        return HttpResponseBadRequest(content_type=content_type)

    cleaned = search_form.cleaned_data
    language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE)
    searcher = generate_simple_search(search_form, language, with_highlights=False)
    searcher = searcher.values_dict('document_title', 'question_title', 'url')
    results = searcher[:10]

    def urlize(r):
        return u'%s://%s%s' % (
            'https' if request.is_secure() else 'http',
            request.get_host(),
            r['url'][0]
        )

    def titleize(r):
        # NB: Elasticsearch returns an array of strings as the value, so we mimic that and
        # then pull out the first (and only) string.
        return r.get('document_title', r.get('question_title', [_('No title')]))[0]

    try:
        data = [
            cleaned['q'],
            [titleize(r) for r in results],
            [],
            [urlize(r) for r in results]
        ]
    except ES_EXCEPTIONS:
        # If we have Elasticsearch problems, we just send back an empty set of results.
        data = []

    return HttpResponse(json.dumps(data), content_type=content_type)
Exemple #9
0
def simple_search(request):
    """Elasticsearch-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:

    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for example, for Support Forum questions only)

    """

    to_json = JSONRenderer().render
    template = "search/results.html"

    # 1. Prep request.
    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    if request.GET.get("a") in ["1", "2"]:
        new_url = reverse("search.advanced") + "?" + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # 2. Build form.
    search_form = SimpleSearchForm(request.GET, auto_id=False)

    # 3. Validate request.
    if not search_form.is_valid():
        if request.IS_JSON:
            return HttpResponse(
                json.dumps({"error": _("Invalid search data.")}),
                content_type=request.CONTENT_TYPE,
                status=400,
            )

        t = "search/form.html"
        return cache_control(
            render(request, t, {
                "advanced": False,
                "request": request,
                "search_form": search_form
            }),
            settings.SEARCH_CACHE_PERIOD,
        )

    # 4. Generate search.
    cleaned = search_form.cleaned_data

    language = locale_or_default(cleaned["language"] or request.LANGUAGE_CODE)
    lang_name = settings.LANGUAGES_DICT.get(language.lower()) or ""

    searcher = generate_simple_search(search_form,
                                      language,
                                      with_highlights=True)
    searcher = searcher[:settings.SEARCH_MAX_RESULTS]

    # 5. Generate output.
    pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE)

    if pages.paginator.count == 0:
        fallback_results = _fallback_results(language, cleaned["product"])
        results = []

    else:
        fallback_results = None
        results = build_results_list(pages, request.IS_JSON)

    product = Product.objects.filter(slug__in=cleaned["product"])
    if product:
        product_titles = [
            pgettext("DB: products.Product.title", p.title) for p in product
        ]
    else:
        product_titles = [_("All Products")]

    # FIXME: This is probably bad l10n.
    product_titles = ", ".join(product_titles)

    data = {
        "num_results": pages.paginator.count,
        "results": results,
        "fallback_results": fallback_results,
        "product_titles": product_titles,
        "q": cleaned["q"],
        "w": cleaned["w"],
        "lang_name": lang_name,
        "products": Product.objects.filter(visible=True),
    }

    if request.IS_JSON:
        data["total"] = len(data["results"])
        data["products"] = [{
            "slug": p.slug,
            "title": p.title
        } for p in data["products"]]

        if product:
            data["product"] = product[0].slug

        pages = Paginator(pages)
        data["pagination"] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data["message"] = constants.NO_MATCH

        json_data = to_json(data)
        if request.JSON_CALLBACK:
            json_data = request.JSON_CALLBACK + "(" + json_data + ");"
        return HttpResponse(json_data, content_type=request.CONTENT_TYPE)

    data.update({
        "product": product,
        "pages": pages,
        "search_form": search_form,
        "advanced": False,
    })
    resp = cache_control(render(request, template, data),
                         settings.SEARCH_CACHE_PERIOD)
    resp.set_cookie(
        settings.LAST_SEARCH_COOKIE,
        urlquote(cleaned["q"]),
        max_age=3600,
        secure=False,
        httponly=False,
    )
    return resp
Exemple #10
0
def simple_search(request):
    """Elasticsearch-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:

    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for example, for Support Forum questions only)

    """

    to_json = JSONRenderer().render
    template = 'search/results.html'

    # 1. Prep request.
    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    if request.GET.get('a') in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # 2. Build form.
    search_form = SimpleSearchForm(request.GET, auto_id=False)

    # 3. Validate request.
    if not search_form.is_valid():
        if request.IS_JSON:
            return HttpResponse(json.dumps(
                {'error': _('Invalid search data.')}),
                                content_type=request.CONTENT_TYPE,
                                status=400)

        t = 'search/form.html'
        return cache_control(
            render(request, t, {
                'advanced': False,
                'request': request,
                'search_form': search_form
            }), settings.SEARCH_CACHE_PERIOD)

    # 4. Generate search.
    cleaned = search_form.cleaned_data

    language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE)
    lang_name = settings.LANGUAGES_DICT.get(language.lower()) or ''

    searcher = generate_simple_search(search_form,
                                      language,
                                      with_highlights=True)
    searcher = searcher[:settings.SEARCH_MAX_RESULTS]

    # 5. Generate output.
    pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE)

    if pages.paginator.count == 0:
        fallback_results = _fallback_results(language, cleaned['product'])
        results = []

    else:
        fallback_results = None
        results = build_results_list(pages, request.IS_JSON)

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [
            pgettext('DB: products.Product.title', p.title) for p in product
        ]
    else:
        product_titles = [_('All Products')]

    # FIXME: This is probably bad l10n.
    product_titles = ', '.join(product_titles)

    data = {
        'num_results': pages.paginator.count,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name,
        'products': Product.objects.filter(visible=True)
    }

    if request.IS_JSON:
        data['total'] = len(data['results'])
        data['products'] = [{
            'slug': p.slug,
            'title': p.title
        } for p in data['products']]

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')

        json_data = to_json(data)
        if request.JSON_CALLBACK:
            json_data = request.JSON_CALLBACK + '(' + json_data + ');'
        return HttpResponse(json_data, content_type=request.CONTENT_TYPE)

    data.update({
        'product': product,
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    resp = cache_control(render(request, template, data),
                         settings.SEARCH_CACHE_PERIOD)
    resp.set_cookie(settings.LAST_SEARCH_COOKIE,
                    urlquote(cleaned['q']),
                    max_age=3600,
                    secure=False,
                    httponly=False)
    return resp
Exemple #11
0
def simple_search(request):
    search_form = SimpleSearchForm(request.GET, auto_id=False)

    if not search_form.is_valid():
        return HttpResponse(
            json.dumps({"error": _("Invalid search data.")}),
            content_type="application/json",
            status=400,
        )

    cleaned = search_form.cleaned_data

    # get language
    language = locale_or_default(cleaned["language"] or request.LANGUAGE_CODE)
    lang_name = settings.LANGUAGES_DICT.get(language.lower()) or ""

    # get product and product titles
    product, product_titles = _get_product_title(cleaned["product"])

    # get page
    try:
        page = int(request.GET.get("page", 1))
    except ValueError:
        page = 1

    # create search object
    search = CompoundSearch(locale=language, product=product)

    # apply aaq/kb configs
    if cleaned["w"] & constants.WHERE_WIKI:
        search.add(WikiSearch)
    if cleaned["w"] & constants.WHERE_SUPPORT:
        search.add(QuestionSearch)

    # execute search
    search.run(cleaned["q"], page=page)
    total = search.total
    results = search.results

    # generate fallback results if necessary
    fallback_results = None
    if total == 0:
        fallback_results = _fallback_results(language, cleaned["product"])

    # create results dictionary for instant search
    data = {
        "num_results": total,
        "total": total,
        "results": results,
        "fallback_results": fallback_results,
        "product_titles": product_titles,
        "q": cleaned["q"],
        "w": cleaned["w"],
        "lang_name": lang_name,
        "products": [
            {"slug": p.slug, "title": pgettext("DB: products.Product.title", p.title)}
            for p in Product.objects.filter(visible=True)
        ],
        "pagination": _make_pagination(page, total),
    }
    if product:
        data["product"] = product.slug
    if not results:
        data["message"] = constants.NO_MATCH

    json_data = JSONRenderer().render(data)
    return HttpResponse(json_data, content_type="application/json")
Exemple #12
0
def simple_search(request, template=None):
    """ES-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:
    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for esample, for Support Forum questions only)
    """

    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    a = request.GET.get("a")
    if a in ["1", "2"]:
        new_url = reverse("search.advanced") + "?" + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = request.GET.get("format") == "json"
    callback = request.GET.get("callback", "").strip()
    content_type = "application/x-javascript" if callback else "application/json"

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({"error": _("Invalid callback function.")}), content_type=content_type, status=400
        )

    language = locale_or_default(request.GET.get("language", request.LANGUAGE_CODE))
    r = request.GET.copy()

    # TODO: Do we really need to add this to the URL if it isn't already there?
    r["w"] = r.get("w", constants.WHERE_BASIC)

    # TODO: Break out a separate simple search form.
    search_form = SimpleSearchForm(r, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(json.dumps({"error": _("Invalid search data.")}), content_type=content_type, status=400)

        t = template if request.MOBILE else "search/form.html"
        search_ = render(request, t, {"advanced": False, "request": request, "search_form": search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_["Cache-Control"] = "max-age=%s" % (cache_period * 60)
        search_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned["w"] == constants.WHERE_BASIC:
        cleaned["w"] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get("page")), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES_DICT.get(lang):
        lang_name = settings.LANGUAGES_DICT[lang]
    else:
        lang_name = ""

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default"))

    wiki_f = F(model="wiki_document")
    question_f = F(model="questions_question")

    cleaned_q = cleaned["q"]
    products = cleaned["product"]

    if not products and "all_products" not in request.GET:
        lowered_q = cleaned_q.lower()

        if "thunderbird" in lowered_q:
            products.append("thunderbird")
        elif "android" in lowered_q:
            products.append("mobile")
        elif "ios" in lowered_q or "ipad" in lowered_q or "ipod" in lowered_q or "iphone" in lowered_q:
            products.append("ios")
        elif "firefox os" in lowered_q:
            products.append("firefox-os")
        elif "firefox" in lowered_q:
            products.append("firefox")

    # Start - wiki filters

    if cleaned["w"] & constants.WHERE_WIKI:
        # Category filter
        wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES)

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        for p in products:
            wiki_f &= F(product=p)

        # Archived bit
        wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned["w"] & constants.WHERE_SUPPORT:
        # Has helpful answers is set by default if using basic search
        cleaned["has_helpful"] = constants.TERNARY_YES

        # No archived questions in default search.
        cleaned["is_archived"] = constants.TERNARY_NO

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ("has_helpful", "is_archived")
        d = dict(
            ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name]))
            for filter_name in ternary_filters
            if cleaned[filter_name]
        )
        if d:
            question_f &= F(**d)

        # Product filter
        for p in products:
            question_f &= F(product=p)

    # End - support questions filters

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned["w"] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned["w"] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if "explain" in request.GET and request.GET["explain"] == "1":
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            "question_content",  # support forum
            "document_summary",  # kb
            pre_tags=["<b>"],
            post_tags=["</b>"],
            number_of_fragments=0,
        )

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,
            # Text phrases in document titles and content get an extra
            # boost.
            document_title__match_phrase=10.0,
            document_content__match_phrase=8.0,
        )

        # Build the query
        query_fields = chain(*[cls.get_query_fields() for cls in [DocumentMappingType, QuestionMappingType]])
        query = {}
        # Create match and match_phrase queries for every field
        # we want to search.
        for field in query_fields:
            for query_type in ["match", "match_phrase"]:
                query["%s__%s" % (field, query_type)] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(("results", searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset : offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher[bounds[0] : bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc["model"] == "wiki_document":
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc["document_summary"]
                result = {"title": doc["document_title"], "type": "document"}

            elif doc["model"] == "questions_question":
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(doc["question_content"], strip=True)[:500]

                result = {
                    "title": doc["question_title"],
                    "type": "question",
                    "is_solved": doc["question_is_solved"],
                    "num_answers": doc["question_num_answers"],
                    "num_votes": doc["question_num_votes"],
                    "num_votes_past_week": doc["question_num_votes_past_week"],
                }

            result["url"] = doc["url"]
            result["object"] = doc
            result["search_summary"] = summary
            result["rank"] = rank
            result["score"] = doc.es_meta.score
            result["explanation"] = escape(format_explanation(doc.es_meta.explanation))
            result["id"] = doc["id"]
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({"error": _("Search Unavailable")}), content_type=content_type, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip("()")
        statsd.incr("search.esunified.{0}".format(exc_bucket))

        log.exception(exc)

        t = "search/mobile/down.html" if request.MOBILE else "search/down.html"
        return render(request, t, {"q": cleaned["q"]}, status=503)

    items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"]
    items.append(("a", "2"))

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned["product"])

    product = Product.objects.filter(slug__in=cleaned["product"])
    if product:
        product_titles = [_(p.title, "DB: products.Product.title") for p in product]
    else:
        product_titles = [_("All Products")]

    product_titles = ", ".join(product_titles)

    data = {
        "num_results": num_results,
        "results": results,
        "fallback_results": fallback_results,
        "product_titles": product_titles,
        "q": cleaned["q"],
        "w": cleaned["w"],
        "lang_name": lang_name,
    }

    if is_json:
        # Models are not json serializable.
        for r in data["results"]:
            del r["object"]
        data["total"] = len(data["results"])

        data["products"] = [{"slug": p.slug, "title": p.title} for p in Product.objects.filter(visible=True)]

        if product:
            data["product"] = product[0].slug

        pages = Paginator(pages)
        data["pagination"] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data["message"] = _("No pages matched the search criteria")
        json_data = json.dumps(data)
        if callback:
            json_data = callback + "(" + json_data + ");"

        return HttpResponse(json_data, content_type=content_type)

    data.update(
        {
            "product": product,
            "products": Product.objects.filter(visible=True),
            "pages": pages,
            "search_form": search_form,
            "advanced": False,
        }
    )
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_["Cache-Control"] = "max-age=%s" % (cache_period * 60)
    results_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned["q"]), max_age=3600, secure=False, httponly=False)

    return results_
Exemple #13
0
def simple_search(request):
    is_json = request.GET.get("format") == "json"
    search_form = SimpleSearchForm(request.GET, auto_id=False)

    if not search_form.is_valid():
        if not is_json:
            return render(request, "search/form.html", {"search_form": search_form})
        return HttpResponse(
            json.dumps({"error": _("Invalid search data.")}),
            content_type="application/json",
            status=400,
        )

    cleaned = search_form.cleaned_data

    # get language
    language = locale_or_default(cleaned["language"] or request.LANGUAGE_CODE)
    lang_name = settings.LANGUAGES_DICT.get(language.lower()) or ""

    # get product and product titles
    product, product_titles = _get_product_title(cleaned["product"])

    # create search object
    search = CompoundSearch()

    # apply aaq/kb configs
    if cleaned["w"] & constants.WHERE_WIKI:
        search.add(WikiSearch(query=cleaned["q"], locale=language, product=product))
    if cleaned["w"] & constants.WHERE_SUPPORT:
        search.add(QuestionSearch(query=cleaned["q"], locale=language, product=product))

    # execute search
    page = paginate(
        request,
        search,
        per_page=settings.SEARCH_RESULTS_PER_PAGE,
        paginator_cls=SumoSearchPaginator,
    )
    total = search.total
    results = search.results

    # generate fallback results if necessary
    fallback_results = None
    if total == 0:
        fallback_results = _fallback_results(language, cleaned["product"])

    data = {
        "num_results": total,
        "results": results,
        "fallback_results": fallback_results,
        "product_titles": ", ".join(product_titles),
        "q": cleaned["q"],
        "w": cleaned["w"],
        "lang_name": lang_name,
        "products": Product.objects.filter(visible=True),
    }

    if not is_json:
        data.update(
            {
                "product": product,
                "pages": page,
                "search_form": search_form,
            }
        )
        return render(request, "search/results.html", data)

    # create results dictionary for instant search
    data.update(
        {
            "total": total,
            "products": [
                {"slug": p.slug, "title": pgettext("DB: products.Product.title", p.title)}
                for p in data["products"]
            ],
            "pagination": _make_pagination(page),
        }
    )
    if product:
        data["product"] = product.slug
    if not results:
        data["message"] = constants.NO_MATCH

    json_data = JSONRenderer().render(data)
    return HttpResponse(json_data, content_type="application/json")
Exemple #14
0
def simple_search(request, template=None):
    """Elasticsearch-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:

    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for example, for Support Forum questions only)

    """

    to_json = JSONRenderer().render

    # 1. Prep request.
    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    if request.GET.get('a') in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # 2. Build form.
    search_form = SimpleSearchForm(request.GET, auto_id=False)

    # 3. Validate request.
    if not search_form.is_valid():
        if request.IS_JSON:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                content_type=request.CONTENT_TYPE,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        return cache_control(
            render(request, t, {
                'advanced': False,
                'request': request,
                'search_form': search_form}),
            settings.SEARCH_CACHE_PERIOD)

    # 4. Generate search.
    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE)
    lang_name = settings.LANGUAGES_DICT.get(language.lower()) or ''

    searcher = generate_simple_search(search_form, language, with_highlights=True)
    searcher = searcher[:settings.SEARCH_MAX_RESULTS]

    # 5. Generate output.
    pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE)

    if pages.paginator.count == 0:
        fallback_results = _fallback_results(language, cleaned['product'])
        results = []

    else:
        fallback_results = None
        results = build_results_list(pages, request.IS_JSON)

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [pgettext('DB: products.Product.title', p.title) for p in product]
    else:
        product_titles = [_('All Products')]

    # FIXME: This is probably bad l10n.
    product_titles = ', '.join(product_titles)

    data = {
        'num_results': pages.paginator.count,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name,
        'products': Product.objects.filter(visible=True)}

    if request.IS_JSON:
        data['total'] = len(data['results'])
        data['products'] = [{'slug': p.slug, 'title': p.title}
                            for p in data['products']]

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')

        json_data = to_json(data)
        if request.JSON_CALLBACK:
            json_data = request.JSON_CALLBACK + '(' + json_data + ');'
        return HttpResponse(json_data, content_type=request.CONTENT_TYPE)

    data.update({
        'product': product,
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    resp = cache_control(render(request, template, data), settings.SEARCH_CACHE_PERIOD)
    resp.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                    max_age=3600, secure=False, httponly=False)
    return resp
Exemple #15
0
def simple_search(request, template=None):
    """ES-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:
    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for esample, for Support Forum questions only)
    """

    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    a = request.GET.get('a')
    if a in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    content_type = (
        'application/x-javascript' if callback else 'application/json')

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            content_type=content_type, status=400)

    language = locale_or_default(
        request.GET.get('language', request.LANGUAGE_CODE))
    r = request.GET.copy()

    # TODO: Do we really need to add this to the URL if it isn't already there?
    r['w'] = r.get('w', constants.WHERE_BASIC)

    # TODO: Break out a separate simple search form.
    search_form = SimpleSearchForm(r, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                content_type=content_type,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': False, 'request': request,
            'search_form': search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_['Cache-Control'] = 'max-age={0!s}'.format((cache_period * 60))
        search_['Expires'] = (
            (datetime.utcnow() + timedelta(minutes=cache_period))
            .strftime(EXPIRES_FMT))
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES_DICT.get(lang):
        lang_name = settings.LANGUAGES_DICT[lang]
    else:
        lang_name = ''

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(urls=settings.ES_URLS)
                .indexes(es_utils.read_index('default')))

    wiki_f = F(model='wiki_document')
    question_f = F(model='questions_question')

    cleaned_q = cleaned['q']
    products = cleaned['product']

    if not products and 'all_products' not in request.GET:
        lowered_q = cleaned_q.lower()

        if 'thunderbird' in lowered_q:
            products.append('thunderbird')
        elif 'android' in lowered_q:
            products.append('mobile')
        elif ('ios' in lowered_q or 'ipad' in lowered_q or 'ipod' in lowered_q or
              'iphone' in lowered_q):
            products.append('ios')
        elif 'firefox os' in lowered_q:
            products.append('firefox-os')
        elif 'firefox' in lowered_q:
            products.append('firefox')

    # Start - wiki filters

    if cleaned['w'] & constants.WHERE_WIKI:
        # Category filter
        wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES)

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        for p in products:
            wiki_f &= F(product=p)

        # Archived bit
        wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned['w'] & constants.WHERE_SUPPORT:
        # Has helpful answers is set by default if using basic search
        cleaned['has_helpful'] = constants.TERNARY_YES

        # No archived questions in default search.
        cleaned['is_archived'] = constants.TERNARY_NO

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('has_helpful', 'is_archived')
        d = dict(('question_{0!s}'.format(filter_name),
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        # Product filter
        for p in products:
            question_f &= F(product=p)

    # End - support questions filters

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned['w'] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned['w'] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            'question_content',  # support forum
            'document_summary',  # kb
            pre_tags=['<b>'],
            post_tags=['</b>'],
            number_of_fragments=0)

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,

            # Text phrases in document titles and content get an extra
            # boost.
            document_title__match_phrase=10.0,
            document_content__match_phrase=8.0)

        # Build the query
        query_fields = chain(*[
            cls.get_query_fields() for cls in [
                DocumentMappingType,
                QuestionMappingType
            ]
        ])
        query = {}
        # Create match and match_phrase queries for every field
        # we want to search.
        for field in query_fields:
            for query_type in ['match', 'match_phrase']:
                query['{0!s}__{1!s}'.format(field, query_type)] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(('results', searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset:offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher[bounds[0]:bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc['model'] == 'wiki_document':
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc['document_summary']
                result = {
                    'title': doc['document_title'],
                    'type': 'document'}

            elif doc['model'] == 'questions_question':
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(
                        doc['question_content'], strip=True)[:500]

                result = {
                    'title': doc['question_title'],
                    'type': 'question',
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']}

            result['url'] = doc['url']
            result['object'] = doc
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc.es_meta.score
            result['explanation'] = escape(format_explanation(
                doc.es_meta.explanation))
            result['id'] = doc['id']
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                content_type=content_type, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        log.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    items = [(k, v) for k in search_form.fields for
             v in r.getlist(k) if v and k != 'a']
    items.append(('a', '2'))

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned['product'])

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [_(p.title, 'DB: products.Product.title')
                          for p in product]
    else:
        product_titles = [_('All Products')]

    product_titles = ', '.join(product_titles)

    data = {
        'num_results': num_results,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name, }

    if is_json:
        # Models are not json serializable.
        for r in data['results']:
            del r['object']
        data['total'] = len(data['results'])

        data['products'] = ([{'slug': p.slug, 'title': p.title}
                             for p in Product.objects.filter(visible=True)])

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, content_type=content_type)

    data.update({
        'product': product,
        'products': Product.objects.filter(visible=True),
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_['Cache-Control'] = 'max-age={0!s}'.format((cache_period * 60))
    results_['Expires'] = (
        (datetime.utcnow() + timedelta(minutes=cache_period))
        .strftime(EXPIRES_FMT))
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                        max_age=3600, secure=False, httponly=False)

    return results_
Exemple #16
0
def simple_search(request, template=None):
    """ES-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:
    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for esample, for Support Forum questions only)
    """

    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    a = request.GET.get('a')
    if a in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    content_type = ('application/x-javascript'
                    if callback else 'application/json')

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(json.dumps(
            {'error': _('Invalid callback function.')}),
                            content_type=content_type,
                            status=400)

    language = locale_or_default(
        request.GET.get('language', request.LANGUAGE_CODE))
    r = request.GET.copy()

    # TODO: Do we really need to add this to the URL if it isn't already there?
    r['w'] = r.get('w', constants.WHERE_BASIC)

    # TODO: Break out a separate simple search form.
    search_form = SimpleSearchForm(r, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(json.dumps(
                {'error': _('Invalid search data.')}),
                                content_type=content_type,
                                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': False,
            'request': request,
            'search_form': search_form
        })
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
        search_['Expires'] = (
            (datetime.utcnow() +
             timedelta(minutes=cache_period)).strftime(EXPIRES_FMT))
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES_DICT.get(lang):
        lang_name = settings.LANGUAGES_DICT[lang]
    else:
        lang_name = ''

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(urls=settings.ES_URLS).indexes(
        es_utils.read_index('default')))

    wiki_f = F(model='wiki_document')
    question_f = F(model='questions_question')

    # Start - wiki filters

    if cleaned['w'] & constants.WHERE_WIKI:
        # Category filter
        wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES)

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        products = cleaned['product']
        for p in products:
            wiki_f &= F(product=p)

        # Archived bit
        wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned['w'] & constants.WHERE_SUPPORT:
        # Has helpful answers is set by default if using basic search
        cleaned['has_helpful'] = constants.TERNARY_YES

        # No archived questions in default search.
        cleaned['is_archived'] = constants.TERNARY_NO

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('has_helpful', 'is_archived')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        # Product filter
        products = cleaned['product']
        for p in products:
            question_f &= F(product=p)

    # End - support questions filters

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned['w'] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned['w'] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        cleaned_q = cleaned['q']

        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            'question_content',  # support forum
            'document_summary',  # kb
            pre_tags=['<b>'],
            post_tags=['</b>'],
            number_of_fragments=0)

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,

            # Text phrases in document titles and content get an extra
            # boost.
            document_title__match_phrase=10.0,
            document_content__match_phrase=8.0)

        # Build the query
        query_fields = chain(*[
            cls.get_query_fields()
            for cls in [DocumentMappingType, QuestionMappingType]
        ])
        query = {}
        # Create match and match_phrase queries for every field
        # we want to search.
        for field in query_fields:
            for query_type in ['match', 'match_phrase']:
                query['%s__%s' % (field, query_type)] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(('results', searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset:offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher[bounds[0]:bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc['model'] == 'wiki_document':
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc['document_summary']
                result = {'title': doc['document_title'], 'type': 'document'}

            elif doc['model'] == 'questions_question':
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(doc['question_content'],
                                           strip=True)[:500]

                result = {
                    'title': doc['question_title'],
                    'type': 'question',
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']
                }

            result['url'] = doc['url']
            result['object'] = doc
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc.es_meta.score
            result['explanation'] = escape(
                format_explanation(doc.es_meta.explanation))
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                content_type=content_type,
                                status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        log.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    items = [(k, v) for k in search_form.fields for v in r.getlist(k)
             if v and k != 'a']
    items.append(('a', '2'))

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned['product'])

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [
            _(p.title, 'DB: products.Product.title') for p in product
        ]
    else:
        product_titles = [_('All Products')]

    product_titles = ', '.join(product_titles)

    data = {
        'num_results': num_results,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name,
    }

    if is_json:
        # Models are not json serializable.
        for r in data['results']:
            del r['object']
        data['total'] = len(data['results'])

        data['products'] = ([{
            'slug': p.slug,
            'title': p.title
        } for p in Product.objects.filter(visible=True)])

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, content_type=content_type)

    data.update({
        'product': product,
        'products': Product.objects.filter(visible=True),
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
    results_['Expires'] = (
        (datetime.utcnow() +
         timedelta(minutes=cache_period)).strftime(EXPIRES_FMT))
    results_.set_cookie(settings.LAST_SEARCH_COOKIE,
                        urlquote(cleaned['q']),
                        max_age=3600,
                        secure=False,
                        httponly=False)

    return results_
Exemple #17
0
def simple_search(request, template=None):
    """Elasticsearch-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:

    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for example, for Support Forum questions only)

    """
    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    if request.GET.get('a') in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    content_type = 'application/x-javascript' if callback else 'application/json'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            content_type=content_type,
            status=400)

    search_form = SimpleSearchForm(request.GET, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                content_type=content_type,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': False,
            'request': request,
            'search_form': search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
        search_['Expires'] = (
            (datetime.utcnow() + timedelta(minutes=cache_period))
            .strftime(EXPIRES_FMT))
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    language = locale_or_default(cleaned['language'] or request.LANGUAGE_CODE)
    lang = language.lower()
    lang_name = settings.LANGUAGES_DICT.get(lang) or ''

    searcher = generate_simple_search(search_form, language, with_highlights=True)
    searcher = searcher[:settings.SEARCH_MAX_RESULTS]
    fallback_results = None

    try:
        pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE)
        offset = pages.start_index()

        results = []
        if pages.paginator.count == 0:
            fallback_results = _fallback_results(language, cleaned['product'])

        else:
            for i, doc in enumerate(pages):
                rank = i + offset

                if doc['model'] == 'wiki_document':
                    summary = _build_es_excerpt(doc)
                    if not summary:
                        summary = doc['document_summary']
                    result = {
                        'title': doc['document_title'],
                        'type': 'document'}

                elif doc['model'] == 'questions_question':
                    summary = _build_es_excerpt(doc)
                    if not summary:
                        # We're excerpting only question_content, so if the query matched
                        # question_title or question_answer_content, then there won't be any
                        # question_content excerpts. In that case, just show the question--but
                        # only the first 500 characters.
                        summary = bleach.clean(doc['question_content'], strip=True)[:500]

                    result = {
                        'title': doc['question_title'],
                        'type': 'question',
                        'is_solved': doc['question_is_solved'],
                        'num_answers': doc['question_num_answers'],
                        'num_votes': doc['question_num_votes'],
                        'num_votes_past_week': doc['question_num_votes_past_week']}

                result['url'] = doc['url']
                result['object'] = doc
                result['search_summary'] = summary
                result['rank'] = rank
                result['score'] = doc.es_meta.score
                result['explanation'] = escape(format_explanation(
                    doc.es_meta.explanation))
                result['id'] = doc['id']
                results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                content_type=content_type, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        log.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [_(p.title, 'DB: products.Product.title') for p in product]
    else:
        product_titles = [_('All Products')]

    # FIXME: This is probably bad l10n.
    product_titles = ', '.join(product_titles)

    data = {
        'num_results': pages.paginator.count,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name}

    if is_json:
        # Models are not json serializable.
        for r in data['results']:
            del r['object']
        data['total'] = len(data['results'])

        data['products'] = [{'slug': p.slug, 'title': p.title}
                            for p in Product.objects.filter(visible=True)]

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, content_type=content_type)

    data.update({
        'product': product,
        'products': Product.objects.filter(visible=True),
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
    results_['Expires'] = (
        (datetime.utcnow() + timedelta(minutes=cache_period))
        .strftime(EXPIRES_FMT))
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                        max_age=3600, secure=False, httponly=False)

    return results_