def compare_revisions(request, document_slug): """Compare two wiki document revisions. The ids are passed as query string parameters (to and from). """ locale = request.GET.get('locale', request.locale) doc = get_object_or_404(Document, locale=locale, slug=document_slug) if 'from' not in request.GET or 'to' not in request.GET: raise Http404 from_id = smart_int(request.GET.get('from')) to_id = smart_int(request.GET.get('to')) revision_from = get_object_or_404(Revision, document=doc, id=from_id) revision_to = get_object_or_404(Revision, document=doc, id=to_id) if request.is_ajax(): template = 'wiki/includes/revision_diff.html' else: template = 'wiki/compare_revisions.html' return jingo.render( request, template, { 'document': doc, 'revision_from': revision_from, 'revision_to': revision_to })
def wiki_rows(request, readout_slug): """Return the table contents HTML for the given readout and mode.""" readout = _kb_readout(request, readout_slug, READOUTS, locale=request.GET.get('locale'), mode=smart_int(request.GET.get('mode'), None)) max_rows = smart_int(request.GET.get('max'), fallback=None) return HttpResponse(readout.render(max_rows=max_rows))
def compare_revisions(request, document_slug): """Compare two wiki document revisions. The ids are passed as query string parameters (to and from). """ locale = request.GET.get('locale', request.locale) doc = get_object_or_404( Document, locale=locale, slug=document_slug) if 'from' not in request.GET or 'to' not in request.GET: raise Http404 from_id = smart_int(request.GET.get('from')) to_id = smart_int(request.GET.get('to')) revision_from = get_object_or_404(Revision, document=doc, id=from_id) revision_to = get_object_or_404(Revision, document=doc, id=to_id) if request.is_ajax(): template = 'wiki/includes/revision_diff.html' else: template = 'wiki/compare_revisions.html' return jingo.render(request, template, {'document': doc, 'revision_from': revision_from, 'revision_to': revision_to})
def wiki_rows(request, readout_slug): """Return the table contents HTML for the given readout and mode.""" readout = _kb_readout( request, readout_slug, READOUTS, locale=request.GET.get("locale"), mode=smart_int(request.GET.get("mode"), None) ) max_rows = smart_int(request.GET.get("max"), fallback=None) return HttpResponse(readout.render(max_rows=max_rows))
def wiki_rows(request, readout_slug): """Return the table contents HTML for the given readout and mode.""" product = _get_product(request) readout = _kb_readout(request, readout_slug, READOUTS, locale=request.GET.get('locale'), mode=smart_int(request.GET.get('mode'), None), product=product) max_rows = smart_int(request.GET.get('max'), fallback=None) return HttpResponse(readout.render(max_rows=max_rows))
def helpful_vote(request, document_slug): """Vote for Helpful/Not Helpful document""" revision = get_object_or_404( Revision, id=smart_int(request.POST['revision_id'])) if not revision.has_voted(request): ua = request.META.get('HTTP_USER_AGENT', '')[:1000] # 1000 max_length vote = HelpfulVote(revision=revision, user_agent=ua) if 'helpful' in request.POST: vote.helpful = True message = _('Glad to hear it — thanks for the feedback!') else: message = _('Sorry to hear that. Try searching for solutions ' 'below.') if request.user.is_authenticated(): vote.creator = request.user else: vote.anonymous_id = request.anonymous.anonymous_id vote.save() statsd.incr('wiki.vote') else: message = _('You already voted on this Article.') if request.is_ajax(): return HttpResponse(json.dumps({'message': message})) return HttpResponseRedirect(revision.document.get_absolute_url())
def helpful_vote(request, document_slug): """Vote for Helpful/Not Helpful document""" if 'revision_id' not in request.POST: return HttpResponseBadRequest() revision = get_object_or_404( Revision, id=smart_int(request.POST['revision_id'])) survey = None if revision.document.category == TEMPLATES_CATEGORY: return HttpResponseBadRequest() if not revision.has_voted(request): ua = request.META.get('HTTP_USER_AGENT', '')[:1000] # 1000 max_length vote = HelpfulVote(revision=revision, user_agent=ua) if 'helpful' in request.POST: vote.helpful = True message = _('Glad to hear it — thanks for the feedback!') else: message = _('Sorry to hear that.') # If user is over the limit, don't save but pretend everything is ok. if not request.limited: if request.user.is_authenticated(): vote.creator = request.user else: vote.anonymous_id = request.anonymous.anonymous_id vote.save() statsd.incr('wiki.vote') # Send a survey if flag is enabled and vote wasn't helpful. if 'helpful' not in request.POST: survey = jingo.render_to_string( request, 'wiki/includes/unhelpful_survey.html', {'vote_id': vote.id}) # Save vote metadata: referrer and search query (if available) for name in ['referrer', 'query', 'source']: val = request.POST.get(name) if val: vote.add_metadata(name, val) else: message = _('You already voted on this Article.') if request.is_ajax(): r = {'message': message} if survey: r.update(survey=survey) return HttpResponse(json.dumps(r)) return HttpResponseRedirect(revision.document.get_absolute_url())
def helpful_vote(request, document_slug): """Vote for Helpful/Not Helpful document""" if 'revision_id' not in request.POST: return HttpResponseBadRequest() revision = get_object_or_404( Revision, id=smart_int(request.POST['revision_id'])) survey = None if revision.document.category == TEMPLATES_CATEGORY: return HttpResponseBadRequest() if not revision.has_voted(request): ua = request.META.get('HTTP_USER_AGENT', '')[:1000] # 1000 max_length vote = HelpfulVote(revision=revision, user_agent=ua) if 'helpful' in request.POST: vote.helpful = True message = _('Glad to hear it — thanks for the feedback!') else: message = _('Sorry to hear that.') # If user is over the limit, don't save but pretend everything is ok. if not request.limited: if request.user.is_authenticated(): vote.creator = request.user else: vote.anonymous_id = request.anonymous.anonymous_id vote.save() statsd.incr('wiki.vote') # Send a survey if flag is enabled and vote wasn't helpful. if 'helpful' not in request.POST: survey = jingo.render_to_string( request, 'wiki/includes/unhelpful_survey.html', {'vote_id': vote.id}) # Save vote metadata: referrer and search query (if available) for name in ['referrer', 'query', 'source']: val = request.POST.get(name) if val: vote.add_metadata(name, val) else: message = _('You already voted on this Article.') if request.is_ajax(): r = {'message': message} if survey: r.update(survey=survey) return HttpResponse(json.dumps(r)) return HttpResponseRedirect(revision.document.get_absolute_url())
def unhelpful_survey(request): """Ajax only view: Unhelpful vote survey processing.""" vote = get_object_or_404(HelpfulVote, id=smart_int(request.POST.get('vote_id'))) # The survey is the posted data, minus the vote_id and button value. survey = request.POST.copy() survey.pop('vote_id') survey.pop('button') # Save the survey in JSON format, taking care not to exceed 1000 chars. vote.add_metadata('survey', truncated_json_dumps(survey, 1000, 'comment')) return HttpResponse( json.dumps({'message': _('Thanks for making us better!')}))
def compare_revisions(request, document_slug): """Compare two wiki document revisions. The ids are passed as query string parameters (to and from). """ locale = request.GET.get("locale", request.locale) doc = get_object_or_404(Document, locale=locale, slug=document_slug) if "from" not in request.GET or "to" not in request.GET: raise Http404 from_id = smart_int(request.GET.get("from")) to_id = smart_int(request.GET.get("to")) revision_from = get_object_or_404(Revision, document=doc, id=from_id) revision_to = get_object_or_404(Revision, document=doc, id=to_id) if request.is_ajax(): template = "wiki/includes/revision_diff.html" else: template = "wiki/compare_revisions.html" return jingo.render( request, template, {"document": doc, "revision_from": revision_from, "revision_to": revision_to} )
def unhelpful_survey(request): """Ajax only view: Unhelpful vote survey processing.""" vote = get_object_or_404(HelpfulVote, id=smart_int(request.POST.get('vote_id'))) # The survey is the posted data, minus the vote_id and button value. survey = request.POST.copy() survey.pop('vote_id') survey.pop('button') # Save the survey in JSON format, taking care not to exceed 1000 chars. vote.add_metadata('survey', truncated_json_dumps(survey, 1000, 'comment')) return HttpResponse( json.dumps({'message': _('Thanks for making us better!')}))
def helpful_vote(request, document_slug): """Vote for Helpful/Not Helpful document""" if "revision_id" not in request.POST: return HttpResponseBadRequest() revision = get_object_or_404(Revision, id=smart_int(request.POST["revision_id"])) if not revision.has_voted(request): ua = request.META.get("HTTP_USER_AGENT", "")[:1000] # 1000 max_length vote = HelpfulVote(revision=revision, user_agent=ua) if "helpful" in request.POST: vote.helpful = True message = _("Glad to hear it — thanks for the feedback!") else: message = _("Sorry to hear that. Try searching for solutions " "below.") if request.user.is_authenticated(): vote.creator = request.user else: vote.anonymous_id = request.anonymous.anonymous_id vote.save() statsd.incr("wiki.vote") # Save vote metadata: referrer and search query (if available) for name in ["referrer", "query"]: val = request.POST.get(name) if val: vote.add_metadata(name, val) else: message = _("You already voted on this Article.") if request.is_ajax(): return HttpResponse(json.dumps({"message": message})) return HttpResponseRedirect(revision.document.get_absolute_url())
def search(request, template=None): """Performs search or displays the search form.""" # JSON-specific variables is_json = request.GET.get("format") == "json" callback = request.GET.get("callback", "").strip() mimetype = "application/x-javascript" if callback else "application/json" # Search "Expires" header format expires_fmt = "%A, %d %B %Y %H:%M:%S GMT" # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse(json.dumps({"error": _("Invalid callback function.")}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get("language", request.locale)) r = request.GET.copy() a = request.GET.get("a", "0") # Search default values try: category = map(int, r.getlist("category")) or settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist("category", [x for x in category if x > 0]) exclude_category = [abs(x) for x in category if x < 0] # Basic form if a == "0": r["w"] = r.get("w", constants.WHERE_BASIC) # Advanced form if a == "2": r["language"] = language r["a"] = "1" # TODO: Rewrite so SearchForm is unbound initially and we can use `initial` # on the form fields. if "include_archived" not in r: r["include_archived"] = False search_form = SearchForm(r) if not search_form.is_valid() or a == "2": if is_json: return HttpResponse(json.dumps({"error": _("Invalid search data.")}), mimetype=mimetype, status=400) t = template if request.MOBILE else "search/form.html" search_ = jingo.render(request, t, {"advanced": a, "request": request, "search_form": search_form}) search_["Cache-Control"] = "max-age=%s" % (settings.SEARCH_CACHE_PERIOD * 60) search_["Expires"] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)).strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data search_locale = (sphinx_locale(language),) page = max(smart_int(request.GET.get("page")), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE # get language name for display in template lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = "" documents = [] filters_w = [] filters_q = [] filters_f = [] # wiki filters # Category filter if cleaned["category"]: filters_w.append({"filter": "category", "value": cleaned["category"]}) if exclude_category: filters_w.append({"filter": "category", "value": exclude_category, "exclude": True}) # Locale filter filters_w.append({"filter": "locale", "value": search_locale}) # Product filter products = cleaned["product"] if products: for p in products: filters_w.append({"filter": "tag", "value": (crc32(p),)}) # Tags filter tags = [crc32(t.strip()) for t in cleaned["tags"].split()] if tags: for t in tags: filters_w.append({"filter": "tag", "value": (t,)}) # Archived bit if a == "0" and not cleaned["include_archived"]: # Default to NO for basic search: cleaned["include_archived"] = False if not cleaned["include_archived"]: filters_w.append({"filter": "is_archived", "value": (False,)}) # End of wiki filters # Support questions specific filters if cleaned["w"] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == "0" and not cleaned["has_helpful"]: cleaned["has_helpful"] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ("is_locked", "is_solved", "has_answers", "has_helpful") filters_q.extend( _ternary_filter(filter_name, cleaned[filter_name]) for filter_name in ternary_filters if cleaned[filter_name] ) if cleaned["asked_by"]: filters_q.append({"filter": "question_creator", "value": (crc32(cleaned["asked_by"]),)}) if cleaned["answered_by"]: filters_q.append({"filter": "answer_creator", "value": (crc32(cleaned["answered_by"]),)}) q_tags = [crc32(t.strip()) for t in cleaned["q_tags"].split()] if q_tags: for t in q_tags: filters_q.append({"filter": "tag", "value": (t,)}) # Discussion forum specific filters if cleaned["w"] & constants.WHERE_DISCUSSION: if cleaned["author"]: filters_f.append({"filter": "author_ord", "value": (crc32(cleaned["author"]),)}) if cleaned["thread_type"]: if constants.DISCUSSION_STICKY in cleaned["thread_type"]: filters_f.append({"filter": "is_sticky", "value": (1,)}) if constants.DISCUSSION_LOCKED in cleaned["thread_type"]: filters_f.append({"filter": "is_locked", "value": (1,)}) if cleaned["forum"]: filters_f.append({"filter": "forum_id", "value": cleaned["forum"]}) # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = ( ("created", cleaned["created"], cleaned["created_date"]), ("updated", cleaned["updated"], cleaned["updated_date"]), ("question_votes", cleaned["num_voted"], cleaned["num_votes"]), ) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {"range": True, "filter": filter_name, "min": 0, "max": max(filter_date, 0)} if filter_name != "question_votes": filters_f.append(before) filters_q.append(before) elif filter_option == constants.INTERVAL_AFTER: after = {"range": True, "filter": filter_name, "min": min(filter_date, unix_now), "max": unix_now} if filter_name != "question_votes": filters_f.append(after) filters_q.append(after) sortby = smart_int(request.GET.get("sortby")) try: if cleaned["w"] & constants.WHERE_WIKI: wc = WikiClient() # Wiki SearchClient instance # Execute the query and append to documents documents += wc.query(cleaned["q"], filters_w) if cleaned["w"] & constants.WHERE_SUPPORT: qc = QuestionsClient() # Support question SearchClient instance # Sort results by try: qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0], constants.SORT_QUESTIONS[sortby][1]) except IndexError: pass documents += qc.query(cleaned["q"], filters_q) if cleaned["w"] & constants.WHERE_DISCUSSION: dc = DiscussionClient() # Discussion forums SearchClient instance # Sort results by try: dc.groupsort = constants.GROUPSORT[sortby] except IndexError: pass documents += dc.query(cleaned["q"], filters_f) except SearchError: if is_json: return HttpResponse(json.dumps({"error": _("Search Unavailable")}), mimetype=mimetype, status=503) t = "search/mobile/down.html" if request.MOBILE else "search/down.html" return jingo.render(request, t, {"q": cleaned["q"]}, status=503) pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE) results = [] for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE): try: if documents[i]["attrs"].get("category", False) != False: wiki_page = Document.objects.get(pk=documents[i]["id"]) summary = wiki_page.current_revision.summary result = { "search_summary": summary, "url": wiki_page.get_absolute_url(), "title": wiki_page.title, "type": "document", "rank": i, "object": wiki_page, } results.append(result) elif documents[i]["attrs"].get("question_creator", False) != False: question = Question.objects.get(pk=documents[i]["attrs"]["question_id"]) excerpt = qc.excerpt(question.content, cleaned["q"]) summary = jinja2.Markup(excerpt) result = { "search_summary": summary, "url": question.get_absolute_url(), "title": question.title, "type": "question", "rank": i, "object": question, } results.append(result) else: thread = Thread.objects.get(pk=documents[i]["attrs"]["thread_id"]) post = Post.objects.get(pk=documents[i]["id"]) excerpt = dc.excerpt(post.content, cleaned["q"]) summary = jinja2.Markup(excerpt) result = { "search_summary": summary, "url": thread.get_absolute_url(), "title": thread.title, "type": "thread", "rank": i, "object": thread, } results.append(result) except IndexError: break except ObjectDoesNotExist: continue items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"] items.append(("a", "2")) refine_query = u"?%s" % urlencode(items) if is_json: # Models are not json serializable. for r in results: del r["object"] data = {} data["results"] = results data["total"] = len(results) data["query"] = cleaned["q"] if not results: data["message"] = _("No pages matched the search criteria") json_data = json.dumps(data) if callback: json_data = callback + "(" + json_data + ");" return HttpResponse(json_data, mimetype=mimetype) results_ = jingo.render( request, template, { "num_results": len(documents), "results": results, "q": cleaned["q"], "pages": pages, "w": cleaned["w"], "refine_query": refine_query, "search_form": search_form, "lang_name": lang_name, }, ) results_["Cache-Control"] = "max-age=%s" % (settings.SEARCH_CACHE_PERIOD * 60) results_["Expires"] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)).strftime(expires_fmt) results_.set_cookie(settings.LAST_SEARCH_COOKIE, cleaned["q"], max_age=3600, secure=False, httponly=False) return results_
def search(request, template=None): """Performs search or displays the search form.""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse(json.dumps( {'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = map(int, r.getlist('category')) or \ settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', [x for x in category if x > 0]) exclude_category = [abs(x) for x in category if x < 0] try: fx = map(int, r.getlist('fx')) or [v.id for v in FIREFOX_VERSIONS] except ValueError: fx = [v.id for v in FIREFOX_VERSIONS] r.setlist('fx', fx) try: os = map(int, r.getlist('os')) or [o.id for o in OPERATING_SYSTEMS] except ValueError: os = [o.id for o in OPERATING_SYSTEMS] r.setlist('os', os) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse(json.dumps( {'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, { 'advanced': a, 'request': request, 'search_form': search_form }) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data search_locale = (sphinx_locale(language), ) page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE # get language name for display in template lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' documents = [] filters_w = [] filters_q = [] filters_f = [] # wiki filters # Version and OS filters if cleaned['fx']: filters_w.append({ 'filter': 'fx', 'value': cleaned['fx'], }) if cleaned['os']: filters_w.append({ 'filter': 'os', 'value': cleaned['os'], }) # Category filter if cleaned['category']: filters_w.append({ 'filter': 'category', 'value': cleaned['category'], }) if exclude_category: filters_w.append({ 'filter': 'category', 'value': exclude_category, 'exclude': True, }) # Locale filter filters_w.append({ 'filter': 'locale', 'value': search_locale, }) # Tags filter tags = [crc32(t.strip()) for t in cleaned['tags'].split()] if tags: for t in tags: filters_w.append({ 'filter': 'tag', 'value': (t, ), }) # End of wiki filters # Support questions specific filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF toggle_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') for filter_name in toggle_filters: if cleaned[filter_name] == constants.TERNARY_YES: filters_q.append({ 'filter': filter_name, 'value': (True, ), }) if cleaned[filter_name] == constants.TERNARY_NO: filters_q.append({ 'filter': filter_name, 'value': (False, ), }) if cleaned['asked_by']: filters_q.append({ 'filter': 'question_creator', 'value': (crc32(cleaned['asked_by']), ), }) if cleaned['answered_by']: filters_q.append({ 'filter': 'answer_creator', 'value': (crc32(cleaned['answered_by']), ), }) q_tags = [crc32(t.strip()) for t in cleaned['q_tags'].split()] if q_tags: for t in q_tags: filters_q.append({ 'filter': 'tag', 'value': (t, ), }) # Discussion forum specific filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: filters_f.append({ 'filter': 'author_ord', 'value': (crc32(cleaned['author']), ), }) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: filters_f.append({ 'filter': 'is_sticky', 'value': (1, ), }) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: filters_f.append({ 'filter': 'is_locked', 'value': (1, ), }) if cleaned['forum']: filters_f.append({ 'filter': 'forum_id', 'value': cleaned['forum'], }) # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = (('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date']), ('question_votes', cleaned['num_voted'], cleaned['num_votes'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = { 'range': True, 'filter': filter_name, 'min': 0, 'max': max(filter_date, 0), } if filter_name != 'question_votes': filters_f.append(before) filters_q.append(before) elif filter_option == constants.INTERVAL_AFTER: after = { 'range': True, 'filter': filter_name, 'min': min(filter_date, unix_now), 'max': unix_now, } if filter_name != 'question_votes': filters_f.append(after) filters_q.append(after) sortby = smart_int(request.GET.get('sortby')) try: if cleaned['w'] & constants.WHERE_WIKI: wc = WikiClient() # Wiki SearchClient instance # Execute the query and append to documents documents += wc.query(cleaned['q'], filters_w) if cleaned['w'] & constants.WHERE_SUPPORT: qc = QuestionsClient() # Support question SearchClient instance # Sort results by try: qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0], constants.SORT_QUESTIONS[sortby][1]) except IndexError: pass documents += qc.query(cleaned['q'], filters_q) if cleaned['w'] & constants.WHERE_DISCUSSION: dc = DiscussionClient() # Discussion forums SearchClient instance # Sort results by try: dc.groupsort = constants.GROUPSORT[sortby] except IndexError: pass documents += dc.query(cleaned['q'], filters_f) except SearchError: if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503) pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE) results = [] for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE): try: if documents[i]['attrs'].get('category', False) != False: wiki_page = Document.objects.get(pk=documents[i]['id']) summary = wiki_page.current_revision.summary result = { 'search_summary': summary, 'url': wiki_page.get_absolute_url(), 'title': wiki_page.title, 'type': 'document', } results.append(result) elif documents[i]['attrs'].get('question_creator', False) != False: question = Question.objects.get( pk=documents[i]['attrs']['question_id']) excerpt = qc.excerpt(question.content, cleaned['q']) summary = jinja2.Markup(excerpt) result = { 'search_summary': summary, 'url': question.get_absolute_url(), 'title': question.title, 'type': 'question', } results.append(result) else: thread = Thread.objects.get( pk=documents[i]['attrs']['thread_id']) post = Post.objects.get(pk=documents[i]['id']) excerpt = dc.excerpt(post.content, cleaned['q']) summary = jinja2.Markup(excerpt) result = { 'search_summary': summary, 'url': thread.get_absolute_url(), 'title': thread.title, 'type': 'thread', } results.append(result) except IndexError: break except ObjectDoesNotExist: continue items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) refine_query = u'?%s' % urlencode(items) if is_json: data = {} data['results'] = results data['total'] = len(results) data['query'] = cleaned['q'] if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, mimetype=mimetype) results_ = jingo.render( request, template, { 'num_results': len(documents), 'results': results, 'q': cleaned['q'], 'pages': pages, 'w': cleaned['w'], 'refine_query': refine_query, 'search_form': search_form, 'lang_name': lang_name, }) results_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) results_['Expires'] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return results_
def search(request, template=None): """Performs search or displays the search form.""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = map(int, r.getlist('category')) or \ settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use `initial` # on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE # get language name for display in template lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' wiki_s = wiki_search question_s = question_search discussion_s = discussion_search documents = [] # wiki filters # Category filter if cleaned['category']: wiki_s = wiki_s.filter(category__in=cleaned['category']) # Locale filter wiki_s = wiki_s.filter(locale=language) # Product filter products = cleaned['product'] for p in products: wiki_s = wiki_s.filter(tag=p) # Tags filter tags = [t.strip() for t in cleaned['tags'].split()] for t in tags: wiki_s = wiki_s.filter(tag=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_s = wiki_s.filter(is_archived=False) # End of wiki filters # Support questions specific filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict((filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_s = question_s.filter(**d) if cleaned['asked_by']: question_s = question_s.filter( question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_s = question_s.filter( answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split()] for t in q_tags: question_s = question_s.filter(tag=t) # Discussion forum specific filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_s = discussion_s.filter(author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_s = discussion_s.filter(is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_s = discussion_s.filter(is_locked=1) if cleaned['forum']: discussion_s = discussion_s.filter(forum_id__in=cleaned['forum']) # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date']), ('question_votes', cleaned['num_voted'], cleaned['num_votes'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} if filter_name != 'question_votes': discussion_s = discussion_s.filter(**before) question_s = question_s.filter(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} if filter_name != 'question_votes': discussion_s = discussion_s.filter(**after) question_s = question_s.filter(**after) sortby = smart_int(request.GET.get('sortby')) try: max_results = settings.SEARCH_MAX_RESULTS cleaned_q = cleaned['q'] if cleaned['w'] & constants.WHERE_WIKI: wiki_s = wiki_s.query(cleaned_q)[:max_results] # Execute the query and append to documents documents += [('wiki', (pair[0], pair[1])) for pair in enumerate(wiki_s.object_ids())] if cleaned['w'] & constants.WHERE_SUPPORT: # Sort results by try: question_s = question_s.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: pass question_s = question_s.highlight( 'content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) question_s = question_s.query(cleaned_q)[:max_results] documents += [('question', (pair[0], pair[1])) for pair in enumerate(question_s.object_ids())] if cleaned['w'] & constants.WHERE_DISCUSSION: # Sort results by try: # Note that the first attribute needs to be the same # here and in forums/models.py discussion_search. discussion_s = discussion_s.group_by( 'thread_id', constants.GROUPSORT[sortby]) except IndexError: pass discussion_s = discussion_s.highlight( 'content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) discussion_s = discussion_s.query(cleaned_q)[:max_results] documents += [('discussion', (pair[0], pair[1])) for pair in enumerate(discussion_s.object_ids())] except SearchError: if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503) pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE) # Build a dict of { type_ -> list of indexes } for the specific # docs that we're going to display on this page. This makes it # easy for us to slice the appropriate search Ss so we're limiting # our db hits to just the items we're showing. documents_dict = {} for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]: documents_dict.setdefault(doc[0], []).append(doc[1][0]) docs_for_page = [] for type_, search_s in [('wiki', wiki_s), ('question', question_s), ('discussion', discussion_s)]: if type_ not in documents_dict: continue # documents_dict[type_] is a list of indexes--one for each # object id search result for that type_. We use the values # at the beginning and end of the list for slice boundaries. begin = documents_dict[type_][0] end = documents_dict[type_][-1] + 1 docs_for_page += [(type_, doc) for doc in search_s[begin:end]] results = [] for i, docinfo in enumerate(docs_for_page): rank = i + offset type_, doc = docinfo try: if type_ == 'wiki': summary = doc.current_revision.summary result = { 'search_summary': summary, 'url': doc.get_absolute_url(), 'title': doc.title, 'type': 'document', 'rank': rank, 'object': doc, } results.append(result) elif type_ == 'question': try: excerpt = question_s.excerpt(doc)[0] except ExcerptTimeoutError: statsd.incr('search.excerpt.timeout') excerpt = u'' except ExcerptSocketErrorError: statsd.incr('search.excerpt.socketerror') excerpt = u'' summary = jinja2.Markup(clean_excerpt(excerpt)) result = { 'search_summary': summary, 'url': doc.get_absolute_url(), 'title': doc.title, 'type': 'question', 'rank': rank, 'object': doc, } results.append(result) else: # discussion_s is based on Post--not Thread, so we have # to get this manually. thread = Thread.objects.get(pk=doc.thread_id) try: excerpt = discussion_s.excerpt(doc)[0] except ExcerptTimeoutError: statsd.incr('search.excerpt.timeout') excerpt = u'' except ExcerptSocketErrorError: statsd.incr('search.excerpt.socketerror') excerpt = u'' summary = jinja2.Markup(clean_excerpt(excerpt)) result = { 'search_summary': summary, 'url': thread.get_absolute_url(), 'title': thread.title, 'type': 'thread', 'rank': rank, 'object': thread, } results.append(result) except IndexError: break except ObjectDoesNotExist: continue items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) if is_json: # Models are not json serializable. for r in results: del r['object'] data = {} data['results'] = results data['total'] = len(results) data['query'] = cleaned['q'] if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, mimetype=mimetype) results_ = jingo.render(request, template, {'num_results': len(documents), 'results': results, 'q': cleaned['q'], 'pages': pages, 'w': cleaned['w'], 'search_form': search_form, 'lang_name': lang_name, }) results_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) results_['Expires'] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_
def search(request, template=None): """ES-specific search view""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # Woah! object?! Yeah, so what happens is that Sphilastic is # really an elasticutils.S and that requires a Django ORM model # argument. That argument only gets used if you want object # results--for every hit it gets back from ES, it creates an # object of the type of the Django ORM model you passed in. We use # object here to satisfy the need for a type in the constructor # and make sure we don't ever ask for object results. searcher = Sphilastic(object) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(document_product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(document_topic=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) if cleaned['forum']: discussion_f &= F(post_forum_id__in=cleaned['forum']) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: final_filter |= discussion_f searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights searcher = searcher.highlight( 'question_title', 'question_content', 'question_answer_content', 'discussion_content', pre_tags=['<b>'], post_tags=['</b>'], fragment_size=settings.SEARCH_SUMMARY_LENGTH) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=4.0, document_summary=2.0) # Apply sortby, but only for advanced search for questions if a == '1' and cleaned['w'] & constants.WHERE_SUPPORT: sortby = smart_int(request.GET.get('sortby')) try: searcher = searcher.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_search_models()]) query = dict((field, cleaned_q) for field in query_fields) searcher = searcher.query(or_=query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score result['explanation'] = escape(format_explanation( doc._explanation)) results.append(result) except (ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, ESTimeoutError): statsd.incr('search.esunified.timeouterror') elif isinstance(exc, ESMaxRetryError): statsd.incr('search.esunified.maxretryerror') elif isinstance(exc, ESException): statsd.incr('search.esunified.elasticsearchexception') import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)
def search_with_es_unified(request, template=None): """ES-specific search view""" # TODO: Remove this once elastic search bucketed code is gone. start = time.time() # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # Woah! object?! Yeah, so what happens is that Sphilastic is # really an elasticutils.S and that requires a Django ORM model # argument. That argument only gets used if you want object # results--for every hit it gets back from ES, it creates an # object of the type of the Django ORM model you passed in. We use # object here to satisfy the need for a type in the constructor # and make sure we don't ever ask for object results. searcher = Sphilastic(object) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(document_tag=p) # Tags filter tags = [t.strip() for t in cleaned['tags'].split()] for t in tags: wiki_f &= F(document_tag=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) if cleaned['forum']: discussion_f &= F(post_forum_id__in=cleaned['forum']) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: final_filter |= discussion_f searcher = searcher.filter(final_filter) documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights searcher = searcher.highlight( 'question_title', 'question_content', 'question_answer_content', 'discussion_content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) # Set up weights searcher = searcher.weight( question_title__text=4, question_content__text=3, question_answer_content__text=3, post_title__text=2, post_content__text=1, document_title__text=6, document_content__text=1, document_keywords__text=4, document_summary__text=2) # Apply sortby, but only for advanced search for questions if a == '1' and cleaned['w'] & constants.WHERE_SUPPORT: sortby = smart_int(request.GET.get('sortby')) try: searcher = searcher.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_search_models()]) query = dict((field, cleaned_q) for field in query_fields) searcher = searcher.query(or_=query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score results.append(result) except (ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, ESTimeoutError): statsd.incr('search.esunified.timeouterror') elif isinstance(exc, ESMaxRetryError): statsd.incr('search.esunified.maxretryerror') elif isinstance(exc, ESException): statsd.incr('search.esunified.elasticsearchexception') import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)
def test_sanity(self): eq_(10, smart_int('10')) eq_(10, smart_int('10.5'))
def test_empty_string(self): eq_(0, smart_int(''))
def test_invalid_string(self): eq_(0, smart_int('invalid'))
def search_with_es(request, template=None): """ES-specific search view""" engine = "elastic" # Time ES and Sphinx separate. See bug 723930. # TODO: Remove this once Sphinx is gone. start = time.time() # JSON-specific variables is_json = request.GET.get("format") == "json" callback = request.GET.get("callback", "").strip() mimetype = "application/x-javascript" if callback else "application/json" # Search "Expires" header format expires_fmt = "%A, %d %B %Y %H:%M:%S GMT" # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse(json.dumps({"error": _("Invalid callback function.")}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get("language", request.locale)) r = request.GET.copy() a = request.GET.get("a", "0") # Search default values try: category = map(int, r.getlist("category")) or settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist("category", category) # Basic form if a == "0": r["w"] = r.get("w", constants.WHERE_BASIC) # Advanced form if a == "2": r["language"] = language r["a"] = "1" # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if "include_archived" not in r: r["include_archived"] = False search_form = SearchForm(r) if not search_form.is_valid() or a == "2": if is_json: return HttpResponse(json.dumps({"error": _("Invalid search data.")}), mimetype=mimetype, status=400) t = template if request.MOBILE else "search/form.html" search_ = jingo.render(request, t, {"advanced": a, "request": request, "search_form": search_form}) search_["Cache-Control"] = "max-age=%s" % (settings.SEARCH_CACHE_PERIOD * 60) search_["Expires"] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)).strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get("page")), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE # TODO: This is fishy--why does it have to be coded this way? # get language name for display in template lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = "" wiki_s = Document.search() question_s = Question.search() discussion_s = Thread.search() # wiki filters # Category filter if cleaned["category"]: wiki_s = wiki_s.filter(document_category__in=cleaned["category"]) # Locale filter wiki_s = wiki_s.filter(document_locale=language) # Product filter products = cleaned["product"] for p in products: wiki_s = wiki_s.filter(document_tag=p) # Tags filter tags = [t.strip() for t in cleaned["tags"].split()] for t in tags: wiki_s = wiki_s.filter(document_tag=t) # Archived bit if a == "0" and not cleaned["include_archived"]: # Default to NO for basic search: cleaned["include_archived"] = False if not cleaned["include_archived"]: wiki_s = wiki_s.filter(document_is_archived=False) # End of wiki filters # Support questions specific filters if cleaned["w"] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == "0" and not cleaned["has_helpful"]: cleaned["has_helpful"] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ("is_locked", "is_solved", "has_answers", "has_helpful") d = dict( ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name] ) if d: question_s = question_s.filter(**d) if cleaned["asked_by"]: question_s = question_s.filter(question_creator=cleaned["asked_by"]) if cleaned["answered_by"]: question_s = question_s.filter(question_answer_creator=cleaned["answered_by"]) q_tags = [t.strip() for t in cleaned["q_tags"].split()] for t in q_tags: question_s = question_s.filter(question_tag=t) # Discussion forum specific filters if cleaned["w"] & constants.WHERE_DISCUSSION: if cleaned["author"]: discussion_s = discussion_s.filter(post_author_ord=cleaned["author"]) if cleaned["thread_type"]: if constants.DISCUSSION_STICKY in cleaned["thread_type"]: discussion_s = discussion_s.filter(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned["thread_type"]: discussion_s = discussion_s.filter(post_is_locked=1) if cleaned["forum"]: discussion_s = discussion_s.filter(post_forum_id__in=cleaned["forum"]) # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = ( ("created", cleaned["created"], cleaned["created_date"]), ("updated", cleaned["updated"], cleaned["updated_date"]), ) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + "__gte": 0, filter_name + "__lte": max(filter_date, 0)} discussion_s = discussion_s.filter(**before) question_s = question_s.filter(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + "__gte": min(filter_date, unix_now), filter_name + "__lte": unix_now} discussion_s = discussion_s.filter(**after) question_s = question_s.filter(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned["num_voted"] == constants.INTERVAL_BEFORE: question_s.filter(question_num_votes__lte=max(cleaned["num_votes"], 0)) elif cleaned["num_voted"] == constants.INTERVAL_AFTER: question_s.filter(question_num_votes__gte=cleaned["num_votes"]) # Done with all the filtery stuff--time to generate results documents = ComposedList() sortby = smart_int(request.GET.get("sortby")) try: max_results = settings.SEARCH_MAX_RESULTS cleaned_q = cleaned["q"] if cleaned["w"] & constants.WHERE_WIKI: if cleaned_q: wiki_s = wiki_s.query(cleaned_q) # For a front-page non-advanced search, we want to cap the kb # at 10 results. if a == "0": wiki_max_results = 10 else: wiki_max_results = max_results documents.set_count(("wiki", wiki_s), min(wiki_s.count(), wiki_max_results)) if cleaned["w"] & constants.WHERE_SUPPORT: # Sort results by try: question_s = question_s.order_by(*constants.SORT_QUESTIONS[sortby]) except IndexError: pass question_s = question_s.highlight( "question_title", "question_content", "question_answer_content", before_match="<b>", after_match="</b>", limit=settings.SEARCH_SUMMARY_LENGTH, ) if cleaned_q: question_s = question_s.query(cleaned_q) documents.set_count(("question", question_s), min(question_s.count(), max_results)) if cleaned["w"] & constants.WHERE_DISCUSSION: discussion_s = discussion_s.highlight( "discussion_content", before_match="<b>", after_match="</b>", limit=settings.SEARCH_SUMMARY_LENGTH ) if cleaned_q: discussion_s = discussion_s.query(cleaned_q) documents.set_count(("forum", discussion_s), min(discussion_s.count(), max_results)) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) num_results = len(documents) # Get the documents we want to show and add them to # docs_for_page. documents = documents[offset : offset + results_per_page] docs_for_page = [] for (kind, search_s), bounds in documents: search_s = search_s.values_dict()[bounds[0] : bounds[1]] docs_for_page += [(kind, doc) for doc in search_s] results = [] for i, docinfo in enumerate(docs_for_page): rank = i + offset # Type here is something like 'wiki', ... while doc here # is an ES result document. type_, doc = docinfo if type_ == "wiki": summary = doc["document_summary"] result = { "url": doc["url"], "title": doc["document_title"], "type": "document", "object": ObjectDict(doc), } elif type_ == "question": summary = _build_es_excerpt(doc) result = { "url": doc["url"], "title": doc["question_title"], "type": "question", "object": ObjectDict(doc), "is_solved": doc["question_is_solved"], "num_answers": doc["question_num_answers"], "num_votes": doc["question_num_votes"], "num_votes_past_week": doc["question_num_votes_past_week"], } else: summary = _build_es_excerpt(doc) result = {"url": doc["url"], "title": doc["post_title"], "type": "thread", "object": ObjectDict(doc)} result["search_summary"] = summary result["rank"] = rank result["score"] = doc._score results.append(result) except (ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({"error": _("Search Unavailable")}), mimetype=mimetype, status=503) if isinstance(exc, ESTimeoutError): statsd.incr("search.%s.timeouterror" % engine) elif isinstance(exc, ESMaxRetryError): statsd.incr("search.%s.maxretryerror" % engine) elif isinstance(exc, ESException): statsd.incr("search.%s.elasticsearchexception" % engine) t = "search/mobile/down.html" if request.MOBILE else "search/down.html" return jingo.render(request, t, {"q": cleaned["q"]}, status=503)
def test_invalid_string(self): eq_(0, smart_int('invalid'))
def test_sanity(self): eq_(10, smart_int('10')) eq_(10, smart_int('10.5'))
def test_wrong_type(self): eq_(0, smart_int(None)) eq_(10, smart_int([], 10))
def search_with_sphinx(request, template=None): """Sphinx-specific search view""" # Time ES and Sphinx separate. See bug 723930. # TODO: Remove this once Sphinx is gone. start = time.time() # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' if waffle.flag_is_active(request, 'elasticsearch'): engine = 'elastic' else: engine = 'sphinx' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = map(int, r.getlist('category')) or \ settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use `initial` # on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE # get language name for display in template lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' wiki_s = wiki_searcher(request) question_s = question_searcher(request) discussion_s = discussion_searcher(request) documents = [] # wiki filters # Category filter if cleaned['category']: wiki_s = wiki_s.filter(category__in=cleaned['category']) # Locale filter wiki_s = wiki_s.filter(locale=language) # Product filter products = cleaned['product'] for p in products: wiki_s = wiki_s.filter(tag=p) # Tags filter tags = [t.strip() for t in cleaned['tags'].split()] for t in tags: wiki_s = wiki_s.filter(tag=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_s = wiki_s.filter(is_archived=False) # End of wiki filters # Support questions specific filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict((filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_s = question_s.filter(**d) if cleaned['asked_by']: question_s = question_s.filter( question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_s = question_s.filter( answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split()] for t in q_tags: question_s = question_s.filter(tag=t) # Discussion forum specific filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_s = discussion_s.filter(author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_s = discussion_s.filter(is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_s = discussion_s.filter(is_locked=1) if cleaned['forum']: discussion_s = discussion_s.filter(forum_id__in=cleaned['forum']) # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date']), ('question_votes', cleaned['num_voted'], cleaned['num_votes'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} if filter_name != 'question_votes': discussion_s = discussion_s.filter(**before) question_s = question_s.filter(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} if filter_name != 'question_votes': discussion_s = discussion_s.filter(**after) question_s = question_s.filter(**after) sortby = smart_int(request.GET.get('sortby')) try: max_results = settings.SEARCH_MAX_RESULTS cleaned_q = cleaned['q'] if cleaned['w'] & constants.WHERE_WIKI: if cleaned_q: wiki_s = wiki_s.query(cleaned_q) wiki_s = wiki_s[:max_results] # Execute the query and append to documents documents += [('wiki', (pair[0], pair[1])) for pair in enumerate(wiki_s.object_ids())] if cleaned['w'] & constants.WHERE_SUPPORT: # Sort results by try: question_s = question_s.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: pass if engine == 'elastic': highlight_fields = ['title', 'question_content', 'answer_content'] else: highlight_fields = ['content'] question_s = question_s.highlight( *highlight_fields, before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) if cleaned_q: question_s = question_s.query(cleaned_q) question_s = question_s[:max_results] documents += [('question', (pair[0], pair[1])) for pair in enumerate(question_s.object_ids())] if cleaned['w'] & constants.WHERE_DISCUSSION: # Sort results by try: # Note that the first attribute needs to be the same # here and in forums/models.py discussion_search. discussion_s = discussion_s.group_by( 'thread_id', constants.GROUPSORT[sortby]) except IndexError: pass discussion_s = discussion_s.highlight( 'content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) if cleaned_q: discussion_s = discussion_s.query(cleaned_q) discussion_s = discussion_s[:max_results] documents += [('discussion', (pair[0], pair[1])) for pair in enumerate(discussion_s.object_ids())] pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE) # Build a dict of { type_ -> list of indexes } for the specific # docs that we're going to display on this page. This makes it # easy for us to slice the appropriate search Ss so we're limiting # our db hits to just the items we're showing. documents_dict = {} for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]: documents_dict.setdefault(doc[0], []).append(doc[1][0]) docs_for_page = [] for kind, search_s in [('wiki', wiki_s), ('question', question_s), ('discussion', discussion_s)]: if kind not in documents_dict: continue # documents_dict[type_] is a list of indexes--one for each # object id search result for that type_. We use the values # at the beginning and end of the list for slice boundaries. begin = documents_dict[kind][0] end = documents_dict[kind][-1] + 1 search_s = search_s[begin:end] if engine == 'elastic': # If we're doing elasticsearch, then we need to update # the _s variables to point to the sliced versions of # S so that, when we iterate over them in the # following list comp, we hang onto the version that # does the query, so we can call excerpt() on it # later. # # We only need to do this with elasticsearch. For Sphinx, # search_s at this point is an ObjectResults and not an S # because we've already acquired object_ids on it. Thus # if we update the _s variables, we'd be pointing to the # ObjectResults and not the S and then excerpting breaks. # # Ugh. if kind == 'wiki': wiki_s = search_s elif kind == 'question': question_s = search_s elif kind == 'discussion': discussion_s = search_s docs_for_page += [(kind, doc) for doc in search_s] results = [] for i, docinfo in enumerate(docs_for_page): rank = i + offset type_, doc = docinfo try: if type_ == 'wiki': summary = doc.current_revision.summary result = { 'url': doc.get_absolute_url(), 'title': doc.title, 'type': 'document', 'object': doc} elif type_ == 'question': summary = _build_excerpt(question_s, doc) result = { 'url': doc.get_absolute_url(), 'title': doc.title, 'type': 'question', 'object': doc, 'is_solved': doc.is_solved, 'num_answers': doc.num_answers, 'num_votes': doc.num_votes, 'num_votes_past_week': doc.num_votes_past_week} else: if engine == 'elastic': thread = doc else: thread = Thread.objects.get(pk=doc.thread_id) summary = _build_excerpt(discussion_s, doc) result = { 'url': thread.get_absolute_url(), 'title': thread.title, 'type': 'thread', 'object': thread} result['search_summary'] = summary result['rank'] = rank results.append(result) except IndexError: break except ObjectDoesNotExist: continue except (SearchError, ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, SearchError): statsd.incr('search.%s.searcherror' % engine) elif isinstance(exc, ESTimeoutError): statsd.incr('search.%s.timeouterror' % engine) elif isinstance(exc, ESMaxRetryError): statsd.incr('search.%s.maxretryerror' % engine) elif isinstance(exc, ESException): statsd.incr('search.%s.elasticsearchexception' % engine) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)
def search(request): """Performs search or displays the search form.""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = map(int, r.getlist('category')) or \ settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', [x for x in category if x > 0]) exclude_category = [abs(x) for x in category if x < 0] try: fx = map(int, r.getlist('fx')) or [v.id for v in FIREFOX_VERSIONS] except ValueError: fx = [v.id for v in FIREFOX_VERSIONS] #r.setlist('fx', fx) try: os = map(int, r.getlist('os')) or [o.id for o in OPERATING_SYSTEMS] except ValueError: os = [o.id for o in OPERATING_SYSTEMS] #r.setlist('os', os) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) search_ = jingo.render(request, 'search/form.html', {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data search_locale = (sphinx_locale(language),) page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE # get language name for display in template lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' documents = [] filters_w = [] filters_q = [] filters_f = [] # wiki filters # Version and OS filters if cleaned['fx']: filters_w.append({ 'filter': 'fx', 'value': cleaned['fx'], }) if cleaned['os']: filters_w.append({ 'filter': 'os', 'value': cleaned['os'], }) # Category filter if cleaned['category']: filters_w.append({ 'filter': 'category', 'value': cleaned['category'], }) if exclude_category: filters_w.append({ 'filter': 'category', 'value': exclude_category, 'exclude': True, }) # Locale filter filters_w.append({ 'filter': 'locale', 'value': search_locale, }) # Tags filter tags = [crc32(t.strip()) for t in cleaned['tags'].split()] if tags: for t in tags: filters_w.append({ 'filter': 'tag', 'value': (t,), }) # End of wiki filters """ # Support questions specific filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['is_solved']: cleaned['is_solved'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF toggle_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') for filter_name in toggle_filters: if cleaned[filter_name] == constants.TERNARY_YES: filters_q.append({ 'filter': filter_name, 'value': (True,), }) if cleaned[filter_name] == constants.TERNARY_NO: filters_q.append({ 'filter': filter_name, 'value': (False,), }) if cleaned['asked_by']: filters_q.append({ 'filter': 'question_creator', 'value': (crc32(cleaned['asked_by']),), }) if cleaned['answered_by']: filters_q.append({ 'filter': 'answer_creator', 'value': (crc32(cleaned['answered_by']),), }) q_tags = [crc32(t.strip()) for t in cleaned['q_tags'].split()] if q_tags: for t in q_tags: filters_q.append({ 'filter': 'tag', 'value': (t,), }) # Discussion forum specific filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: filters_f.append({ 'filter': 'author_ord', 'value': (crc32(cleaned['author']),), }) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: filters_f.append({ 'filter': 'is_sticky', 'value': (1,), }) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: filters_f.append({ 'filter': 'is_locked', 'value': (1,), }) if cleaned['forum']: filters_f.append({ 'filter': 'forum_id', 'value': cleaned['forum'], }) """ # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = ( # ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date']), # ('question_votes', cleaned['num_voted'], cleaned['num_votes']) ) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = { 'range': True, 'filter': filter_name, 'min': 0, 'max': max(filter_date, 0), } if filter_name != 'question_votes': filters_f.append(before) filters_q.append(before) elif filter_option == constants.INTERVAL_AFTER: after = { 'range': True, 'filter': filter_name, 'min': min(filter_date, unix_now), 'max': unix_now, } if filter_name != 'question_votes': filters_f.append(after) filters_q.append(after) sortby = smart_int(request.GET.get('sortby')) try: if cleaned['w'] & constants.WHERE_WIKI: wc = WikiClient() # Wiki SearchClient instance # Execute the query and append to documents documents += wc.query(cleaned['q'], filters_w) if cleaned['w'] & constants.WHERE_SUPPORT: qc = QuestionsClient() # Support question SearchClient instance # Sort results by try: qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0], constants.SORT_QUESTIONS[sortby][1]) except IndexError: pass documents += qc.query(cleaned['q'], filters_q) if cleaned['w'] & constants.WHERE_DISCUSSION: dc = DiscussionClient() # Discussion forums SearchClient instance # Sort results by try: dc.groupsort = constants.GROUPSORT[sortby] except IndexError: pass documents += dc.query(cleaned['q'], filters_f) except SearchError: if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) return jingo.render(request, 'search/down.html', {}, status=503) pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE) results = [] for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE): try: if documents[i]['attrs'].get('category', False) != False: wiki_page = Document.objects.get(pk=documents[i]['id']) summary = wiki_page.current_revision.summary result = {'search_summary': summary, 'url': wiki_page.get_absolute_url(), 'title': wiki_page.title, 'type': 'document', } results.append(result) elif documents[i]['attrs'].get('question_creator', False) != False: question = Question.objects.get( pk=documents[i]['attrs']['question_id']) excerpt = qc.excerpt(question.content, cleaned['q']) summary = jinja2.Markup(excerpt) result = {'search_summary': summary, 'url': question.get_absolute_url(), 'title': question.title, 'type': 'question', } results.append(result) else: thread = Thread.objects.get( pk=documents[i]['attrs']['thread_id']) post = Post.objects.get(pk=documents[i]['id']) excerpt = dc.excerpt(post.content, cleaned['q']) summary = jinja2.Markup(excerpt) result = {'search_summary': summary, 'url': thread.get_absolute_url(), 'title': thread.title, 'type': 'thread', } results.append(result) except IndexError: break except ObjectDoesNotExist: continue items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) refine_query = u'?%s' % urlencode(items) if is_json: data = {} data['results'] = results data['total'] = len(results) data['query'] = cleaned['q'] if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, mimetype=mimetype) results_ = jingo.render(request, 'search/results.html', {'num_results': len(documents), 'results': results, 'q': cleaned['q'], 'pages': pages, 'w': cleaned['w'], 'refine_query': refine_query, 'search_form': search_form, 'lang_name': lang_name, }) results_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) results_['Expires'] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return results_
def test_wrong_type(self): eq_(0, smart_int(None)) eq_(10, smart_int([], 10))
def test_int(self): eq_(10, smart_int(10))
def test_large_values(self): """Makes sure ints that would cause an overflow result in fallback.""" eq_(0, smart_int('1' * 1000))
def test_empty_string(self): eq_(0, smart_int(''))
def search(request, template=None): """ES-specific search view""" if (waffle.flag_is_active(request, 'esunified') or request.GET.get('esunified')): return search_with_es_unified(request, template) start = time.time() # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' wiki_s = Document.search() question_s = Question.search() discussion_s = Thread.search() # wiki filters # Category filter if cleaned['category']: wiki_s = wiki_s.filter(document_category__in=cleaned['category']) # Locale filter wiki_s = wiki_s.filter(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_s = wiki_s.filter(document_tag=p) # Tags filter tags = [t.strip() for t in cleaned['tags'].split()] for t in tags: wiki_s = wiki_s.filter(document_tag=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_s = wiki_s.filter(document_is_archived=False) # End of wiki filters # Support questions specific filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_s = question_s.filter(**d) if cleaned['asked_by']: question_s = question_s.filter( question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_s = question_s.filter( question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_s = question_s.filter(question_tag=t) # Discussion forum specific filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_s = discussion_s.filter( post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_s = discussion_s.filter(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_s = discussion_s.filter(post_is_locked=1) if cleaned['forum']: discussion_s = discussion_s.filter( post_forum_id__in=cleaned['forum']) # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_s = discussion_s.filter(**before) question_s = question_s.filter(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_s = discussion_s.filter(**after) question_s = question_s.filter(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_s = question_s.filter( question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_s = question_s.filter( question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results documents = ComposedList() sortby = smart_int(request.GET.get('sortby')) try: max_results = settings.SEARCH_MAX_RESULTS cleaned_q = cleaned['q'] if cleaned['w'] & constants.WHERE_WIKI: if cleaned_q: wiki_s = wiki_s.query(cleaned_q) # For a front-page non-advanced search, we want to cap the kb # at 10 results. if a == '0': wiki_max_results = 10 else: wiki_max_results = max_results documents.set_count(('wiki', wiki_s), min(wiki_s.count(), wiki_max_results)) if cleaned['w'] & constants.WHERE_SUPPORT: # Sort results by try: question_s = question_s.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: pass question_s = question_s.highlight( 'question_title', 'question_content', 'question_answer_content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) if cleaned_q: question_s = question_s.query(cleaned_q) documents.set_count(('question', question_s), min(question_s.count(), max_results)) if cleaned['w'] & constants.WHERE_DISCUSSION: discussion_s = discussion_s.highlight( 'discussion_content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) if cleaned_q: discussion_s = discussion_s.query(cleaned_q) documents.set_count(('forum', discussion_s), min(discussion_s.count(), max_results)) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) num_results = len(documents) # Get the documents we want to show and add them to # docs_for_page. documents = documents[offset:offset + results_per_page] docs_for_page = [] for (kind, search_s), bounds in documents: search_s = search_s.values_dict()[bounds[0]:bounds[1]] docs_for_page += [(kind, doc) for doc in search_s] results = [] for i, docinfo in enumerate(docs_for_page): rank = i + offset # Type here is something like 'wiki', ... while doc here # is an ES result document. type_, doc = docinfo if type_ == 'wiki': summary = doc['document_summary'] result = { 'url': doc['url'], 'title': doc['document_title'], 'type': 'document', 'object': ObjectDict(doc)} elif type_ == 'question': summary = _build_es_excerpt(doc) result = { 'url': doc['url'], 'title': doc['question_title'], 'type': 'question', 'object': ObjectDict(doc), 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'url': doc['url'], 'title': doc['post_title'], 'type': 'thread', 'object': ObjectDict(doc)} result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score results.append(result) except (ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, ESTimeoutError): statsd.incr('search.es.timeouterror') elif isinstance(exc, ESMaxRetryError): statsd.incr('search.es.maxretryerror') elif isinstance(exc, ESException): statsd.incr('search.es.elasticsearchexception') t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)
def test_large_values(self): """Makes sure ints that would cause an overflow result in fallback.""" eq_(0, smart_int('1' * 1000))
def search(request, template=None): """ES-specific search view""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = render(request, t, { 'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # We use a regular S here because we want to search across # multiple doctypes. searcher = (UntypedS().es(urls=settings.ES_URLS) .indexes(es_utils.READ_INDEX)) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(topic=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned['product'] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: question_f &= F(topic=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) if cleaned['forum']: discussion_f &= F(post_forum_id__in=cleaned['forum']) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # In basic search, we limit questions from the last # SEARCH_DEFAULT_MAX_QUESTION_AGE seconds. if a == '0': start_date = unix_now - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE question_f &= F(created__gte=start_date) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: doctypes.append(ThreadMappingType.get_mapping_type_name()) final_filter |= discussion_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights # First 500 characters of content in one big fragment searcher = searcher.highlight( 'question_content', 'discussion_content', 'document_summary', pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0, fragment_size=500) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__text_phrase=10.0, document_content__text_phrase=8.0) # Apply sortby for advanced search of questions if cleaned['w'] == constants.WHERE_SUPPORT: sortby = cleaned['sortby'] try: searcher = searcher.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Apply sortby for advanced search of kb documents if cleaned['w'] == constants.WHERE_WIKI: sortby = cleaned['sortby_documents'] try: searcher = searcher.order_by( *constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_mapping_types()]) query = {} # Create text and text_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['text', 'text_phrase']: query['%s__%s' % (field, query_type)] = cleaned_q searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # Facets product_facets = {} # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] # If we are doing basic search, we show product facets. if a == '0': pfc = searcher.facet( 'product', filtered=True).facet_counts() product_facets = dict( [(p['term'], p['count']) for p in pfc['product']]) results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean( doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score result['explanation'] = escape(format_explanation( doc._explanation)) results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip('()') statsd.incr('search.esunified.{0}'.format(exc_bucket)) import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return render(request, t, {'q': cleaned['q']}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) if is_json: # Models are not json serializable. for r in results: del r['object'] data = {} data['results'] = results data['total'] = len(results) data['query'] = cleaned['q'] if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, mimetype=mimetype) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned['product']) results_ = render(request, template, { 'num_results': num_results, 'results': results, 'fallback_results': fallback_results, 'q': cleaned['q'], 'w': cleaned['w'], 'product': cleaned['product'], 'products': Product.objects.filter(visible=True), 'product_facets': product_facets, 'pages': pages, 'search_form': search_form, 'lang_name': lang_name, }) results_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) results_['Expires'] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_
def test_int(self): eq_(10, smart_int(10))
def search(request, template=None): """Performs search or displays the search form.""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' if waffle.flag_is_active(request, 'elasticsearch'): engine = 'elastic' else: engine = 'sphinx' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = map(int, r.getlist('category')) or \ settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use `initial` # on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE # get language name for display in template lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' wiki_s = wiki_searcher(request) question_s = question_searcher(request) discussion_s = discussion_searcher(request) documents = [] # wiki filters # Category filter if cleaned['category']: wiki_s = wiki_s.filter(category__in=cleaned['category']) # Locale filter wiki_s = wiki_s.filter(locale=language) # Product filter products = cleaned['product'] for p in products: wiki_s = wiki_s.filter(tag=p) # Tags filter tags = [t.strip() for t in cleaned['tags'].split()] for t in tags: wiki_s = wiki_s.filter(tag=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_s = wiki_s.filter(is_archived=False) # End of wiki filters # Support questions specific filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict((filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_s = question_s.filter(**d) if cleaned['asked_by']: question_s = question_s.filter( question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_s = question_s.filter( answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split()] for t in q_tags: question_s = question_s.filter(tag=t) # Discussion forum specific filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_s = discussion_s.filter(author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_s = discussion_s.filter(is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_s = discussion_s.filter(is_locked=1) if cleaned['forum']: discussion_s = discussion_s.filter(forum_id__in=cleaned['forum']) # Filters common to support and discussion forums # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date']), ('question_votes', cleaned['num_voted'], cleaned['num_votes'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} if filter_name != 'question_votes': discussion_s = discussion_s.filter(**before) question_s = question_s.filter(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} if filter_name != 'question_votes': discussion_s = discussion_s.filter(**after) question_s = question_s.filter(**after) sortby = smart_int(request.GET.get('sortby')) try: max_results = settings.SEARCH_MAX_RESULTS cleaned_q = cleaned['q'] if cleaned['w'] & constants.WHERE_WIKI: wiki_s = wiki_s.query(cleaned_q)[:max_results] # Execute the query and append to documents documents += [('wiki', (pair[0], pair[1])) for pair in enumerate(wiki_s.object_ids())] if cleaned['w'] & constants.WHERE_SUPPORT: # Sort results by try: question_s = question_s.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: pass if engine == 'elastic': highlight_fields = ['title', 'question_content', 'answer_content'] else: highlight_fields = ['content'] question_s = question_s.highlight( *highlight_fields, before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) question_s = question_s.query(cleaned_q)[:max_results] documents += [('question', (pair[0], pair[1])) for pair in enumerate(question_s.object_ids())] if cleaned['w'] & constants.WHERE_DISCUSSION: # Sort results by try: # Note that the first attribute needs to be the same # here and in forums/models.py discussion_search. discussion_s = discussion_s.group_by( 'thread_id', constants.GROUPSORT[sortby]) except IndexError: pass discussion_s = discussion_s.highlight( 'content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) discussion_s = discussion_s.query(cleaned_q)[:max_results] documents += [('discussion', (pair[0], pair[1])) for pair in enumerate(discussion_s.object_ids())] pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE) # Build a dict of { type_ -> list of indexes } for the specific # docs that we're going to display on this page. This makes it # easy for us to slice the appropriate search Ss so we're limiting # our db hits to just the items we're showing. documents_dict = {} for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]: documents_dict.setdefault(doc[0], []).append(doc[1][0]) docs_for_page = [] for kind, search_s in [('wiki', wiki_s), ('question', question_s), ('discussion', discussion_s)]: if kind not in documents_dict: continue # documents_dict[type_] is a list of indexes--one for each # object id search result for that type_. We use the values # at the beginning and end of the list for slice boundaries. begin = documents_dict[kind][0] end = documents_dict[kind][-1] + 1 search_s = search_s[begin:end] if engine == 'elastic': # If we're doing elasticsearch, then we need to update # the _s variables to point to the sliced versions of # S so that, when we iterate over them in the # following list comp, we hang onto the version that # does the query, so we can call excerpt() on it # later. # # We only need to do this with elasticsearch. For Sphinx, # search_s at this point is an ObjectResults and not an S # because we've already acquired object_ids on it. Thus # if we update the _s variables, we'd be pointing to the # ObjectResults and not the S and then excerpting breaks. # # Ugh. if kind == 'wiki': wiki_s = search_s elif kind == 'question': question_s = search_s elif kind == 'discussion': discussion_s = search_s docs_for_page += [(kind, doc) for doc in search_s] results = [] for i, docinfo in enumerate(docs_for_page): rank = i + offset type_, doc = docinfo try: if type_ == 'wiki': summary = doc.current_revision.summary result = { 'search_summary': summary, 'url': doc.get_absolute_url(), 'title': doc.title, 'type': 'document', 'rank': rank, 'object': doc, } results.append(result) elif type_ == 'question': try: excerpt = excerpt_joiner.join( [m for m in chain(*question_s.excerpt(doc)) if m]) except ExcerptTimeoutError: statsd.incr('search.excerpt.timeout') excerpt = u'' except ExcerptSocketError: statsd.incr('search.excerpt.socketerror') excerpt = u'' summary = jinja2.Markup(clean_excerpt(excerpt)) result = { 'search_summary': summary, 'url': doc.get_absolute_url(), 'title': doc.title, 'type': 'question', 'rank': rank, 'object': doc, } results.append(result) else: if engine == 'elastic': thread = doc else: thread = Thread.objects.get(pk=doc.thread_id) try: excerpt = excerpt_joiner.join( [m for m in chain(*discussion_s.excerpt(doc))]) except ExcerptTimeoutError: statsd.incr('search.excerpt.timeout') excerpt = u'' except ExcerptSocketError: statsd.incr('search.excerpt.socketerror') excerpt = u'' summary = jinja2.Markup(clean_excerpt(excerpt)) result = { 'search_summary': summary, 'url': thread.get_absolute_url(), 'title': thread.title, 'type': 'thread', 'rank': rank, 'object': thread, } results.append(result) except IndexError: break except ObjectDoesNotExist: continue except (SearchError, ESTimeoutError, ESMaxRetryError), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, SearchError): statsd.incr('search.%s.searcherror' % engine) elif isinstance(exc, ESTimeoutError): statsd.incr('search.%s.timeouterror' % engine) elif isinstance(exc, ESMaxRetryError): statsd.incr('search.%s.maxretryerror' % engine) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)