def simple_search(request, template=None): """ES-specific simple search view. This view is for end user searching of the Knowledge Base and Support Forum. Filtering options are limited to: * product (`product=firefox`, for example, for only Firefox results) * document type (`w=2`, for esample, for Support Forum questions only) """ # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL. a = request.GET.get("a") if a in ["1", "2"]: new_url = reverse("search.advanced") + "?" + request.GET.urlencode() return HttpResponseRedirect(new_url) # JSON-specific variables is_json = request.GET.get("format") == "json" callback = request.GET.get("callback", "").strip() content_type = "application/x-javascript" if callback else "application/json" # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({"error": _("Invalid callback function.")}), content_type=content_type, status=400 ) language = locale_or_default(request.GET.get("language", request.LANGUAGE_CODE)) r = request.GET.copy() # TODO: Do we really need to add this to the URL if it isn't already there? r["w"] = r.get("w", constants.WHERE_BASIC) # TODO: Break out a separate simple search form. search_form = SimpleSearchForm(r, auto_id=False) if not search_form.is_valid(): if is_json: return HttpResponse(json.dumps({"error": _("Invalid search data.")}), content_type=content_type, status=400) t = template if request.MOBILE else "search/form.html" search_ = render(request, t, {"advanced": False, "request": request, "search_form": search_form}) cache_period = settings.SEARCH_CACHE_PERIOD search_["Cache-Control"] = "max-age=%s" % (cache_period * 60) search_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) return search_ cleaned = search_form.cleaned_data # On mobile, we default to just wiki results. if request.MOBILE and cleaned["w"] == constants.WHERE_BASIC: cleaned["w"] = constants.WHERE_WIKI page = max(smart_int(request.GET.get("page")), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = "" # We use a regular S here because we want to search across # multiple doctypes. searcher = AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default")) wiki_f = F(model="wiki_document") question_f = F(model="questions_question") cleaned_q = cleaned["q"] products = cleaned["product"] if not products and "all_products" not in request.GET: lowered_q = cleaned_q.lower() if "thunderbird" in lowered_q: products.append("thunderbird") elif "android" in lowered_q: products.append("mobile") elif "ios" in lowered_q or "ipad" in lowered_q or "ipod" in lowered_q or "iphone" in lowered_q: products.append("ios") elif "firefox os" in lowered_q: products.append("firefox-os") elif "firefox" in lowered_q: products.append("firefox") # Start - wiki filters if cleaned["w"] & constants.WHERE_WIKI: # Category filter wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES) # Locale filter wiki_f &= F(document_locale=language) # Product filter for p in products: wiki_f &= F(product=p) # Archived bit wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned["w"] & constants.WHERE_SUPPORT: # Has helpful answers is set by default if using basic search cleaned["has_helpful"] = constants.TERNARY_YES # No archived questions in default search. cleaned["is_archived"] = constants.TERNARY_NO # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ("has_helpful", "is_archived") d = dict( ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name] ) if d: question_f &= F(**d) # Product filter for p in products: question_f &= F(product=p) # End - support questions filters # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned["w"] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned["w"] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if "explain" in request.GET and request.GET["explain"] == "1": searcher = searcher.explain() documents = ComposedList() try: # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( "question_content", # support forum "document_summary", # kb pre_tags=["<b>"], post_tags=["</b>"], number_of_fragments=0, ) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__match_phrase=10.0, document_content__match_phrase=8.0, ) # Build the query query_fields = chain(*[cls.get_query_fields() for cls in [DocumentMappingType, QuestionMappingType]]) query = {} # Create match and match_phrase queries for every field # we want to search. for field in query_fields: for query_type in ["match", "match_phrase"]: query["%s__%s" % (field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(("results", searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset : offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0] : bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc["model"] == "wiki_document": summary = _build_es_excerpt(doc) if not summary: summary = doc["document_summary"] result = {"title": doc["document_title"], "type": "document"} elif doc["model"] == "questions_question": summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean(doc["question_content"], strip=True)[:500] result = { "title": doc["question_title"], "type": "question", "is_solved": doc["question_is_solved"], "num_answers": doc["question_num_answers"], "num_votes": doc["question_num_votes"], "num_votes_past_week": doc["question_num_votes_past_week"], } result["url"] = doc["url"] result["object"] = doc result["search_summary"] = summary result["rank"] = rank result["score"] = doc.es_meta.score result["explanation"] = escape(format_explanation(doc.es_meta.explanation)) result["id"] = doc["id"] results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({"error": _("Search Unavailable")}), content_type=content_type, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip("()") statsd.incr("search.esunified.{0}".format(exc_bucket)) log.exception(exc) t = "search/mobile/down.html" if request.MOBILE else "search/down.html" return render(request, t, {"q": cleaned["q"]}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"] items.append(("a", "2")) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned["product"]) product = Product.objects.filter(slug__in=cleaned["product"]) if product: product_titles = [_(p.title, "DB: products.Product.title") for p in product] else: product_titles = [_("All Products")] product_titles = ", ".join(product_titles) data = { "num_results": num_results, "results": results, "fallback_results": fallback_results, "product_titles": product_titles, "q": cleaned["q"], "w": cleaned["w"], "lang_name": lang_name, } if is_json: # Models are not json serializable. for r in data["results"]: del r["object"] data["total"] = len(data["results"]) data["products"] = [{"slug": p.slug, "title": p.title} for p in Product.objects.filter(visible=True)] if product: data["product"] = product[0].slug pages = Paginator(pages) data["pagination"] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data["message"] = _("No pages matched the search criteria") json_data = json.dumps(data) if callback: json_data = callback + "(" + json_data + ");" return HttpResponse(json_data, content_type=content_type) data.update( { "product": product, "products": Product.objects.filter(visible=True), "pages": pages, "search_form": search_form, "advanced": False, } ) results_ = render(request, template, data) cache_period = settings.SEARCH_CACHE_PERIOD results_["Cache-Control"] = "max-age=%s" % (cache_period * 60) results_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned["q"]), max_age=3600, secure=False, httponly=False) return results_
def advanced_search(request, template=None): """ES-specific Advanced search view""" # JSON-specific variables is_json = request.GET.get("format") == "json" callback = request.GET.get("callback", "").strip() content_type = "application/x-javascript" if callback else "application/json" # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({"error": _("Invalid callback function.")}), content_type=content_type, status=400 ) language = locale_or_default(request.GET.get("language", request.LANGUAGE_CODE)) r = request.GET.copy() # TODO: Figure out how to get rid of 'a' and do it. # It basically is used to switch between showing the form or results. a = request.GET.get("a", "2") # TODO: This is so the 'a=1' stays in the URL for pagination. r["a"] = 1 # Search default values try: category = map(int, r.getlist("category")) or settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist("category", category) r["language"] = language search_form = AdvancedSearchForm(r, auto_id=False) search_form.set_allowed_forums(request.user) # This is all we use a for now I think. if not search_form.is_valid() or a == "2": if is_json: return HttpResponse(json.dumps({"error": _("Invalid search data.")}), content_type=content_type, status=400) t = template if request.MOBILE else "search/form.html" search_ = render(request, t, {"advanced": True, "request": request, "search_form": search_form}) cache_period = settings.SEARCH_CACHE_PERIOD search_["Cache-Control"] = "max-age=%s" % (cache_period * 60) search_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) return search_ cleaned = search_form.cleaned_data if request.MOBILE and cleaned["w"] == constants.WHERE_BASIC: cleaned["w"] = constants.WHERE_WIKI page = max(smart_int(request.GET.get("page")), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = "" # We use a regular S here because we want to search across # multiple doctypes. searcher = AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default")) wiki_f = F(model="wiki_document") question_f = F(model="questions_question") discussion_f = F(model="forums_thread") # Start - wiki filters if cleaned["w"] & constants.WHERE_WIKI: # Category filter if cleaned["category"]: wiki_f &= F(document_category__in=cleaned["category"]) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned["product"] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned["topics"] for t in topics: wiki_f &= F(topic=t) # Archived bit if not cleaned["include_archived"]: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned["w"] & constants.WHERE_SUPPORT: # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ("is_locked", "is_solved", "has_answers", "has_helpful", "is_archived") d = dict( ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name] ) if d: question_f &= F(**d) if cleaned["asked_by"]: question_f &= F(question_creator=cleaned["asked_by"]) if cleaned["answered_by"]: question_f &= F(question_answer_creator=cleaned["answered_by"]) q_tags = [t.strip() for t in cleaned["q_tags"].split(",")] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned["product"] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned["topics"] for t in topics: question_f &= F(topic=t) # End - support questions filters # Start - discussion forum filters if cleaned["w"] & constants.WHERE_DISCUSSION: if cleaned["author"]: discussion_f &= F(post_author_ord=cleaned["author"]) if cleaned["thread_type"]: if constants.DISCUSSION_STICKY in cleaned["thread_type"]: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned["thread_type"]: discussion_f &= F(post_is_locked=1) valid_forum_ids = [f.id for f in Forum.authorized_forums_for_user(request.user)] forum_ids = None if cleaned["forum"]: forum_ids = [f for f in cleaned["forum"] if f in valid_forum_ids] # If we removed all the forums they wanted to look at or if # they didn't specify, then we filter on the list of all # forums they're authorized to look at. if not forum_ids: forum_ids = valid_forum_ids discussion_f &= F(post_forum_id__in=forum_ids) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ("created", cleaned["created"], cleaned["created_date"]), ("updated", cleaned["updated"], cleaned["updated_date"]), ) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + "__gte": 0, filter_name + "__lte": max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + "__gte": min(filter_date, unix_now), filter_name + "__lte": unix_now} discussion_f &= F(**after) question_f &= F(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned["num_voted"] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned["num_votes"], 0)) elif cleaned["num_voted"] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned["num_votes"]) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned["w"] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned["w"] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f if cleaned["w"] & constants.WHERE_DISCUSSION: doctypes.append(ThreadMappingType.get_mapping_type_name()) final_filter |= discussion_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if "explain" in request.GET and request.GET["explain"] == "1": searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned["q"] # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( "question_content", # support forum "document_summary", # kb "post_content", # contributor forum pre_tags=["<b>"], post_tags=["</b>"], number_of_fragments=0, ) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__match_phrase=10.0, document_content__match_phrase=8.0, ) # Apply sortby for advanced search of questions if cleaned["w"] == constants.WHERE_SUPPORT: sortby = cleaned["sortby"] try: searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Apply sortby for advanced search of kb documents if cleaned["w"] == constants.WHERE_WIKI: sortby = cleaned["sortby_documents"] try: searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain( *[cls.get_query_fields() for cls in [DocumentMappingType, ThreadMappingType, QuestionMappingType]] ) query = {} # Create a simple_query_search query for every field # we want to search. for field in query_fields: query["%s__sqs" % field] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(("results", searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset : offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0] : bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc["model"] == "wiki_document": summary = _build_es_excerpt(doc) if not summary: summary = doc["document_summary"] result = {"title": doc["document_title"], "type": "document"} elif doc["model"] == "questions_question": summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean(doc["question_content"], strip=True)[:500] result = { "title": doc["question_title"], "type": "question", "is_solved": doc["question_is_solved"], "num_answers": doc["question_num_answers"], "num_votes": doc["question_num_votes"], "num_votes_past_week": doc["question_num_votes_past_week"], } else: summary = _build_es_excerpt(doc, first_only=True) result = {"title": doc["post_title"], "type": "thread"} result["url"] = doc["url"] result["object"] = doc result["search_summary"] = summary result["rank"] = rank result["score"] = doc.es_meta.score result["explanation"] = escape(format_explanation(doc.es_meta.explanation)) results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({"error": _("Search Unavailable")}), content_type=content_type, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip("()") statsd.incr("search.esunified.{0}".format(exc_bucket)) log.exception(exc) t = "search/mobile/down.html" if request.MOBILE else "search/down.html" return render(request, t, {"q": cleaned["q"]}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"] items.append(("a", "2")) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned["product"]) product = Product.objects.filter(slug__in=cleaned["product"]) if product: product_titles = [_(p.title, "DB: products.Product.title") for p in product] else: product_titles = [_("All Products")] product_titles = ", ".join(product_titles) data = { "num_results": num_results, "results": results, "fallback_results": fallback_results, "product_titles": product_titles, "q": cleaned["q"], "w": cleaned["w"], "lang_name": lang_name, "advanced": True, } if is_json: # Models are not json serializable. for r in data["results"]: del r["object"] data["total"] = len(data["results"]) data["products"] = [{"slug": p.slug, "title": p.title} for p in Product.objects.filter(visible=True)] if product: data["product"] = product[0].slug pages = Paginator(pages) data["pagination"] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data["message"] = _("No pages matched the search criteria") json_data = json.dumps(data) if callback: json_data = callback + "(" + json_data + ");" return HttpResponse(json_data, content_type=content_type) data.update( { "product": product, "products": Product.objects.filter(visible=True), "pages": pages, "search_form": search_form, } ) results_ = render(request, template, data) cache_period = settings.SEARCH_CACHE_PERIOD results_["Cache-Control"] = "max-age=%s" % (cache_period * 60) results_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned["q"]), max_age=3600, secure=False, httponly=False) return results_