def search(request, template=None): """ES-specific search view""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # Woah! object?! Yeah, so what happens is that Sphilastic is # really an elasticutils.S and that requires a Django ORM model # argument. That argument only gets used if you want object # results--for every hit it gets back from ES, it creates an # object of the type of the Django ORM model you passed in. We use # object here to satisfy the need for a type in the constructor # and make sure we don't ever ask for object results. searcher = Sphilastic(object) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(document_product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(document_topic=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) if cleaned['forum']: discussion_f &= F(post_forum_id__in=cleaned['forum']) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # In basic search, we limit questions from the last # SEARCH_DEFAULT_MAX_QUESTION_AGE seconds. if a == '0': start_date = unix_now - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE question_f &= F(created__gte=start_date) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: final_filter |= discussion_f searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights # First 500 characters of content in one big fragment searcher = searcher.highlight( 'question_content', 'discussion_content', 'document_summary', pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0, fragment_size=500) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=4.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__text_phrase=10.0, document_content__text_phrase=8.0) # Apply sortby for advanced search of questions if cleaned['w'] == constants.WHERE_SUPPORT: sortby = cleaned['sortby'] try: searcher = searcher.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Apply sortby for advanced search of kb documents if cleaned['w'] == constants.WHERE_WIKI: sortby = cleaned['sortby_documents'] try: searcher = searcher.order_by( *constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_search_models()]) query = {} # Create text and text_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['text', 'text_phrase']: query['%s__%s' % (field, query_type)] = cleaned_q searcher = searcher.query(or_=query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean( doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score result['explanation'] = escape(format_explanation( doc._explanation)) results.append(result) except (ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, ESTimeoutError): statsd.incr('search.esunified.timeouterror') elif isinstance(exc, ESMaxRetryError): statsd.incr('search.esunified.maxretryerror') elif isinstance(exc, ESException): statsd.incr('search.esunified.elasticsearchexception') import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)
def search(request, template=None): """ES-specific search view""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # Woah! object?! Yeah, so what happens is that Sphilastic is # really an elasticutils.S and that requires a Django ORM model # argument. That argument only gets used if you want object # results--for every hit it gets back from ES, it creates an # object of the type of the Django ORM model you passed in. We use # object here to satisfy the need for a type in the constructor # and make sure we don't ever ask for object results. searcher = Sphilastic(object) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(document_product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(document_topic=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) if cleaned['forum']: discussion_f &= F(post_forum_id__in=cleaned['forum']) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: final_filter |= discussion_f searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights searcher = searcher.highlight( 'question_title', 'question_content', 'question_answer_content', 'discussion_content', pre_tags=['<b>'], post_tags=['</b>'], fragment_size=settings.SEARCH_SUMMARY_LENGTH) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=4.0, document_summary=2.0) # Apply sortby, but only for advanced search for questions if a == '1' and cleaned['w'] & constants.WHERE_SUPPORT: sortby = smart_int(request.GET.get('sortby')) try: searcher = searcher.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_search_models()]) query = dict((field, cleaned_q) for field in query_fields) searcher = searcher.query(or_=query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score result['explanation'] = escape(format_explanation( doc._explanation)) results.append(result) except (ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, ESTimeoutError): statsd.incr('search.esunified.timeouterror') elif isinstance(exc, ESMaxRetryError): statsd.incr('search.esunified.maxretryerror') elif isinstance(exc, ESException): statsd.incr('search.esunified.elasticsearchexception') import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)
def search_with_es_unified(request, template=None): """ES-specific search view""" # Time ES and Sphinx separate. See bug 723930. # TODO: Remove this once Sphinx is gone. start = time.time() # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default(request.GET.get('language', request.locale)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = jingo.render(request, t, {'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # Woah! object?! Yeah, so what happens is that Sphilastic is # really an elasticutils.S and that requires a Django ORM model # argument. That argument only gets used if you want object # results--for every hit it gets back from ES, it creates an # object of the type of the Django ORM model you passed in. We use # object here to satisfy the need for a type in the constructor # and make sure we don't ever ask for object results. searcher = Sphilastic(object) wiki_f = F() question_f = F() discussion_f = F() # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(document_tag=p) # Tags filter tags = [t.strip() for t in cleaned['tags'].split()] for t in tags: wiki_f &= F(document_tag=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split()] for t in q_tags: question_f &= F(question_tag=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) if cleaned['forum']: discussion_f &= F(post_form_id__in=cleaned['forum']) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results documents = ComposedList() try: cleaned_q = cleaned['q'] # Add all the filters searcher = searcher.filter(question_f | wiki_f | discussion_f) # Set up the highlights searcher = searcher.highlight( 'question_title', 'question_content', 'question_answer_content', 'discussion_content', before_match='<b>', after_match='</b>', limit=settings.SEARCH_SUMMARY_LENGTH) # Set up weights searcher = searcher.weight( question_title__text=4, question_content__text=3, question_answer_content__text=3, post_title__text=2, post_content__text=1, document_title__text=6, document_content__text=1, document_keywords__text=4, document_summary__text=2) # Apply sortby, but only for advanced search for questions if a == '1' and cleaned['w'] & constants.WHERE_SUPPORT: sortby = smart_int(request.GET.get('sortby')) try: searcher = searcher.order_by( *constants.SORT_QUESTIONS_ES[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_search_models()]) query = dict((field, cleaned_q) for field in query_fields) searcher = searcher.query(or_=query) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), min(searcher.count(), settings.SEARCH_MAX_RESULTS)) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) num_results = len(documents) # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score results.append(result) except (ESTimeoutError, ESMaxRetryError, ESException), exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) if isinstance(exc, ESTimeoutError): statsd.incr('search.%s.timeouterror.elastic.unified') elif isinstance(exc, ESMaxRetryError): statsd.incr('search.%s.maxretryerror.elastic.unified') elif isinstance(exc, ESException): statsd.incr('search.%s.elasticsearchexception.elastic.unified') import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return jingo.render(request, t, {'q': cleaned['q']}, status=503)
def search(request, template=None): """ES-specific search view""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = render(request, t, { 'advanced': a, 'request': request, 'search_form': search_form}) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # Woah! object?! Yeah, so what happens is that Sphilastic is # really an elasticutils.S and that requires a Django ORM model # argument. That argument only gets used if you want object # results--for every hit it gets back from ES, it creates an # object of the type of the Django ORM model you passed in. We use # object here to satisfy the need for a type in the constructor # and make sure we don't ever ask for object results. searcher = Sphilastic(object) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(topic=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned['product'] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: question_f &= F(topic=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) if cleaned['forum']: discussion_f &= F(post_forum_id__in=cleaned['forum']) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # In basic search, we limit questions from the last # SEARCH_DEFAULT_MAX_QUESTION_AGE seconds. if a == '0': start_date = unix_now - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE question_f &= F(created__gte=start_date) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: final_filter |= discussion_f searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights # First 500 characters of content in one big fragment searcher = searcher.highlight( 'question_content', 'discussion_content', 'document_summary', pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0, fragment_size=500) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__text_phrase=10.0, document_content__text_phrase=8.0) # Apply sortby for advanced search of questions if cleaned['w'] == constants.WHERE_SUPPORT: sortby = cleaned['sortby'] try: searcher = searcher.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Apply sortby for advanced search of kb documents if cleaned['w'] == constants.WHERE_WIKI: sortby = cleaned['sortby_documents'] try: searcher = searcher.order_by( *constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_search_models()]) query = {} # Create text and text_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['text', 'text_phrase']: query['%s__%s' % (field, query_type)] = cleaned_q searcher = searcher.query(or_=query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # Facets product_facets = {} # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] # If we are doing basic search, we show product facets. if a == '0': pfc = searcher.facet( 'product', filtered=True).facet_counts() product_facets = dict( [(p['term'], p['count']) for p in pfc['product']]) results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean( doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score result['explanation'] = escape(format_explanation( doc._explanation)) results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip('()') statsd.incr('search.esunified.{0}'.format(exc_bucket)) import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return render(request, t, {'q': cleaned['q']}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) if is_json: # Models are not json serializable. for r in results: del r['object'] data = {} data['results'] = results data['total'] = len(results) data['query'] = cleaned['q'] if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, mimetype=mimetype) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned['product']) results_ = render(request, template, { 'num_results': num_results, 'results': results, 'fallback_results': fallback_results, 'q': cleaned['q'], 'w': cleaned['w'], 'product': cleaned['product'], 'products': Product.objects.filter(visible=True), 'product_facets': product_facets, 'pages': pages, 'search_form': search_form, 'lang_name': lang_name, }) results_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) results_['Expires'] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_