def suggest(request): text = request.body or request.GET.get('q') locale = request.GET.get('locale', settings.WIKI_DEFAULT_LANGUAGE) product = request.GET.get('product') max_questions = request.GET.get('max_questions', 10) max_documents = request.GET.get('max_documents', 10) errors = {} try: max_questions = int(max_questions) except ValueError: errors['max_questions'] = 'This field must be an integer.' try: max_documents = int(max_documents) except ValueError: errors['max_documents'] = 'This field must be an integer.' if text is None: errors['q'] = 'This field is required.' if product is not None and not Product.objects.filter(slug=product).exists(): errors['product'] = 'Could not find product with slug "{0}".'.format(product) if errors: raise GenericAPIException(400, errors) searcher = ( es_utils.AnalyzerS() .es(urls=settings.ES_URLS) .indexes(es_utils.read_index('default'))) return Response({ 'questions': _question_suggestions(searcher, text, locale, product, max_questions), 'documents': _document_suggestions(searcher, text, locale, product, max_documents), })
def suggest(request): if request.data and request.GET: raise GenericAPIException( 400, 'Put all parameters either in the querystring or the HTTP request body.') serializer = SuggestSerializer(data=(request.data or request.GET)) if not serializer.is_valid(): raise GenericAPIException(400, serializer.errors) searcher = ( es_utils.AnalyzerS() .es(urls=settings.ES_URLS, timeout=settings.ES_TIMEOUT, use_ssl=settings.ES_USE_SSL, http_auth=settings.ES_HTTP_AUTH) .indexes(es_utils.read_index('default'))) data = serializer.validated_data return Response({ 'questions': _question_suggestions( searcher, data['q'], data['locale'], data['product'], data['max_questions']), 'documents': _document_suggestions( searcher, data['q'], data['locale'], data['product'], data['max_documents']), })
def suggest(request): text = request.body or request.GET.get('q') locale = request.GET.get('locale', settings.WIKI_DEFAULT_LANGUAGE) product = request.GET.get('product') max_questions = request.GET.get('max_questions', 10) max_documents = request.GET.get('max_documents', 10) errors = {} try: max_questions = int(max_questions) except ValueError: errors['max_questions'] = 'This field must be an integer.' try: max_documents = int(max_documents) except ValueError: errors['max_documents'] = 'This field must be an integer.' if text is None: errors['q'] = 'This field is required.' if product is not None and not Product.objects.filter( slug=product).exists(): errors['product'] = 'Could not find product with slug "{0}".'.format( product) if errors: raise GenericAPIException(400, errors) searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes( es_utils.read_index('default'))) return Response({ 'questions': _question_suggestions(searcher, text, locale, product, max_questions), 'documents': _document_suggestions(searcher, text, locale, product, max_documents), })
def suggest(request): if request.data and request.GET: raise GenericAPIException( 400, 'Put all parameters either in the querystring or the HTTP request body.' ) serializer = SuggestSerializer(data=(request.data or request.GET)) if not serializer.is_valid(): raise GenericAPIException(400, serializer.errors) searcher = (es_utils.AnalyzerS().es( urls=settings.ES_URLS, timeout=settings.ES_TIMEOUT, use_ssl=settings.ES_USE_SSL, http_auth=settings.ES_HTTP_AUTH, connection_class=RequestsHttpConnection).indexes( es_utils.read_index('default'))) data = serializer.validated_data return Response({ 'questions': _question_suggestions(searcher, data['q'], data['locale'], data['product'], data['max_questions']), 'documents': _document_suggestions(searcher, data['q'], data['locale'], data['product'], data['max_documents']), })
def suggest(request): text = request.body or request.GET.get("q") locale = request.GET.get("locale", settings.WIKI_DEFAULT_LANGUAGE) product = request.GET.get("product") max_questions = request.GET.get("max_questions", 10) max_documents = request.GET.get("max_documents", 10) errors = {} try: max_questions = int(max_questions) except ValueError: errors["max_questions"] = "This field must be an integer." try: max_documents = int(max_documents) except ValueError: errors["max_documents"] = "This field must be an integer." if text is None: errors["q"] = "This field is required." if product is not None and not Product.objects.filter(slug=product).exists(): errors["product"] = 'Could not find product with slug "{0}".'.format(product) if errors: raise GenericAPIException(400, errors) searcher = es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default")) return Response( { "questions": _question_suggestions(searcher, text, locale, product, max_questions), "documents": _document_suggestions(searcher, text, locale, product, max_documents), } )
def handle_delete(request): """Deletes an index""" index_to_delete = request.POST['delete_index'] # Rule 1: Has to start with the ES_INDEX_PREFIX. if not index_to_delete.startswith(settings.ES_INDEX_PREFIX): raise DeleteError('"%s" is not a valid index name.' % index_to_delete) # Rule 2: Must be an existing index. indexes = [name for name, count in get_indexes()] if index_to_delete not in indexes: raise DeleteError('"%s" does not exist.' % index_to_delete) # Rule 3: Don't delete the read index. if index_to_delete == read_index(): raise DeleteError('"%s" is the read index.' % index_to_delete) delete_index(index_to_delete) return HttpResponseRedirect(request.path)
def suggest(request): if request.DATA and request.GET: raise GenericAPIException( 400, 'Put all parameters either in the querystring or the HTTP request body.') serializer = SuggestSerializer(data=(request.DATA or request.GET)) if not serializer.is_valid(): raise GenericAPIException(400, serializer.errors) searcher = ( es_utils.AnalyzerS() .es(urls=settings.ES_URLS) .indexes(es_utils.read_index('default'))) data = serializer.object return Response({ 'questions': _question_suggestions( searcher, data['q'], data['locale'], data['product'], data['max_questions']), 'documents': _document_suggestions( searcher, data['q'], data['locale'], data['product'], data['max_documents']), })
def handle_delete(request): """Deletes an index""" index_to_delete = request.POST.get('delete_index') es_indexes = [name for (name, count) in get_indexes()] # Rule 1: Has to start with the ES_INDEX_PREFIX. if not index_to_delete.startswith(settings.ES_INDEX_PREFIX): raise DeleteError('"%s" is not a valid index name.' % index_to_delete) # Rule 2: Must be an existing index. if index_to_delete not in es_indexes: raise DeleteError('"%s" does not exist.' % index_to_delete) # Rule 3: Don't delete the default read index. # TODO: When the critical index exists, this should be "Don't # delete the critical read index." if index_to_delete == read_index('default'): raise DeleteError('"%s" is the default read index.' % index_to_delete) # The index is ok to delete delete_index(index_to_delete) return HttpResponseRedirect(request.path)
def suggest(request): if request.DATA and request.GET: raise GenericAPIException( 400, 'Put all parameters either in the querystring or the HTTP request body.' ) serializer = SuggestSerializer(data=(request.DATA or request.GET)) if not serializer.is_valid(): raise GenericAPIException(400, serializer.errors) searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes( es_utils.read_index('default'))) data = serializer.object return Response({ 'questions': _question_suggestions(searcher, data['q'], data['locale'], data['product'], data['max_questions']), 'documents': _document_suggestions(searcher, data['q'], data['locale'], data['product'], data['max_documents']), })
def search(request): """Render the admin view containing search tools""" if not request.user.has_perm('search.reindex'): raise PermissionDenied error_messages = [] stats = {} if 'reset' in request.POST: try: return handle_reset(request) except ReindexError as e: error_messages.append(u'Error: %s' % e.message) if 'reindex' in request.POST: try: return handle_reindex(request) except ReindexError as e: error_messages.append(u'Error: %s' % e.message) if 'recreate_index' in request.POST: try: return handle_recreate_index(request) except ReindexError as e: error_messages.append(u'Error: %s' % e.message) if 'delete_index' in request.POST: try: return handle_delete(request) except DeleteError as e: error_messages.append(u'Error: %s' % e.message) except ES_EXCEPTIONS as e: error_messages.append('Error: {0}'.format(repr(e))) stats = None write_stats = None es_deets = None indexes = [] outstanding_chunks = None try: # TODO: SUMO has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except requests.exceptions.RequestException: pass stats = {} for index in all_read_indexes(): try: stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: stats[index] = None write_stats = {} for index in all_write_indexes(): try: write_stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: write_stats[index] = None try: indexes = get_indexes() indexes.sort(key=lambda m: m[0]) except ES_EXCEPTIONS as e: error_messages.append('Error: {0}'.format(repr(e))) try: client = redis_client('default') outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS)) except (RedisError, TypeError): pass recent_records = Record.uncached.order_by('-starttime')[:100] outstanding_records = (Record.uncached.filter(endtime__isnull=True) .order_by('-starttime')) index_groups = set(settings.ES_INDEXES.keys()) index_groups |= set(settings.ES_WRITE_INDEXES.keys()) index_group_data = [[group, read_index(group), write_index(group)] for group in index_groups] return render( request, 'admin/search_maintenance.html', {'title': 'Search', 'es_deets': es_deets, 'doctype_stats': stats, 'doctype_write_stats': write_stats, 'indexes': indexes, 'index_groups': index_groups, 'index_group_data': index_group_data, 'read_indexes': all_read_indexes, 'write_indexes': all_write_indexes, 'error_messages': error_messages, 'recent_records': recent_records, 'outstanding_records': outstanding_records, 'outstanding_chunks': outstanding_chunks, 'now': datetime.now(), 'read_index': read_index, 'write_index': write_index, })
def search(request): """Render the admin view containing search tools""" if not request.user.has_perm('search.reindex'): raise PermissionDenied error_messages = [] stats = {} if 'reset' in request.POST: try: return handle_reset(request) except ReindexError as e: error_messages.append('Error: %s' % e.message) if 'reindex' in request.POST: try: return handle_reindex(request) except ReindexError as e: error_messages.append('Error: %s' % e.message) if 'recreate_index' in request.POST: try: return handle_recreate_index(request) except ReindexError as e: error_messages.append('Error: %s' % e.message) if 'delete_index' in request.POST: try: return handle_delete(request) except DeleteError as e: error_messages.append('Error: %s' % e.message) except ES_EXCEPTIONS as e: error_messages.append('Error: {0}'.format(repr(e))) stats = None write_stats = None es_deets = None indexes = [] try: # TODO: SUMO has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except requests.exceptions.RequestException: pass stats = {} for index in all_read_indexes(): try: stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: stats[index] = None write_stats = {} for index in all_write_indexes(): try: write_stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: write_stats[index] = None try: indexes = get_indexes() indexes.sort(key=lambda m: m[0]) except ES_EXCEPTIONS as e: error_messages.append('Error: {0}'.format(repr(e))) recent_records = Record.objects.all()[:100] outstanding_records = Record.objects.outstanding() index_groups = set(settings.ES_INDEXES.keys()) index_groups |= set(settings.ES_WRITE_INDEXES.keys()) index_group_data = [[group, read_index(group), write_index(group)] for group in index_groups] return render( request, 'admin/search_maintenance.html', { 'title': 'Search', 'es_deets': es_deets, 'doctype_stats': stats, 'doctype_write_stats': write_stats, 'indexes': indexes, 'index_groups': index_groups, 'index_group_data': index_group_data, 'read_indexes': all_read_indexes, 'write_indexes': all_write_indexes, 'error_messages': error_messages, 'recent_records': recent_records, 'outstanding_records': outstanding_records, 'now': datetime.now(), 'read_index': read_index, 'write_index': write_index, })
def simple_search(request, template=None): """ES-specific simple search view. This view is for end user searching of the Knowledge Base and Support Forum. Filtering options are limited to: * product (`product=firefox`, for example, for only Firefox results) * document type (`w=2`, for esample, for Support Forum questions only) """ # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL. a = request.GET.get('a') if a in ['1', '2']: new_url = reverse('search.advanced') + '?' + request.GET.urlencode() return HttpResponseRedirect(new_url) # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() content_type = ('application/x-javascript' if callback else 'application/json') # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse(json.dumps( {'error': _('Invalid callback function.')}), content_type=content_type, status=400) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) r = request.GET.copy() # TODO: Do we really need to add this to the URL if it isn't already there? r['w'] = r.get('w', constants.WHERE_BASIC) # TODO: Break out a separate simple search form. search_form = SimpleSearchForm(r, auto_id=False) if not search_form.is_valid(): if is_json: return HttpResponse(json.dumps( {'error': _('Invalid search data.')}), content_type=content_type, status=400) t = template if request.MOBILE else 'search/form.html' search_ = render(request, t, { 'advanced': False, 'request': request, 'search_form': search_form }) cache_period = settings.SEARCH_CACHE_PERIOD search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60) search_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)) return search_ cleaned = search_form.cleaned_data # On mobile, we default to just wiki results. if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = '' # We use a regular S here because we want to search across # multiple doctypes. searcher = (AnalyzerS().es(urls=settings.ES_URLS).indexes( es_utils.read_index('default'))) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(product=p) # Archived bit wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Has helpful answers is set by default if using basic search cleaned['has_helpful'] = constants.TERNARY_YES # No archived questions in default search. cleaned['is_archived'] = constants.TERNARY_NO # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('has_helpful', 'is_archived') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) # Product filter products = cleaned['product'] for p in products: question_f &= F(product=p) # End - support questions filters # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( 'question_content', # support forum 'document_summary', # kb pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__match_phrase=10.0, document_content__match_phrase=8.0) # Build the query query_fields = chain(*[ cls.get_query_fields() for cls in [DocumentMappingType, QuestionMappingType] ]) query = {} # Create match and match_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['match', 'match_phrase']: query['%s__%s' % (field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = {'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean(doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week'] } result['url'] = doc['url'] result['object'] = doc result['search_summary'] = summary result['rank'] = rank result['score'] = doc.es_meta.score result['explanation'] = escape( format_explanation(doc.es_meta.explanation)) results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), content_type=content_type, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip('()') statsd.incr('search.esunified.{0}'.format(exc_bucket)) log.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return render(request, t, {'q': cleaned['q']}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned['product']) product = Product.objects.filter(slug__in=cleaned['product']) if product: product_titles = [ _(p.title, 'DB: products.Product.title') for p in product ] else: product_titles = [_('All Products')] product_titles = ', '.join(product_titles) data = { 'num_results': num_results, 'results': results, 'fallback_results': fallback_results, 'product_titles': product_titles, 'q': cleaned['q'], 'w': cleaned['w'], 'lang_name': lang_name, } if is_json: # Models are not json serializable. for r in data['results']: del r['object'] data['total'] = len(data['results']) data['products'] = ([{ 'slug': p.slug, 'title': p.title } for p in Product.objects.filter(visible=True)]) if product: data['product'] = product[0].slug pages = Paginator(pages) data['pagination'] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, content_type=content_type) data.update({ 'product': product, 'products': Product.objects.filter(visible=True), 'pages': pages, 'search_form': search_form, 'advanced': False, }) results_ = render(request, template, data) cache_period = settings.SEARCH_CACHE_PERIOD results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60) results_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_
def generate_simple_search(search_form, language, with_highlights=False): """Generates an S given a form :arg search_form: a validated SimpleSearch form :arg language: the language code :arg with_highlights: whether or not to ask for highlights :returns: a fully formed S """ # We use a regular S here because we want to search across # multiple doctypes. searcher = (es_utils.AnalyzerS().es( urls=settings.ES_URLS, timeout=settings.ES_TIMEOUT, use_ssl=settings.ES_USE_SSL, http_auth=settings.ES_HTTP_AUTH, connection_class=RequestsHttpConnection, ).indexes(es_utils.read_index("default"))) cleaned = search_form.cleaned_data doctypes = [] final_filter = es_utils.F() cleaned_q = cleaned["q"] products = cleaned["product"] # Handle wiki filters if cleaned["w"] & constants.WHERE_WIKI: wiki_f = es_utils.F( model="wiki_document", document_category__in=settings.SEARCH_DEFAULT_CATEGORIES, document_locale=language, document_is_archived=False, ) for p in products: wiki_f &= es_utils.F(product=p) doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f # Handle question filters if cleaned["w"] & constants.WHERE_SUPPORT: question_f = es_utils.F(model="questions_question", question_is_archived=False, question_has_helpful=True) for p in products: question_f &= es_utils.F(product=p) doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f # Build a filter for those filters and add the other bits to # finish the search searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if cleaned["explain"]: searcher = searcher.explain() if with_highlights: # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( "question_content", # support forum "document_summary", # kb pre_tags=["<b>"], post_tags=["</b>"], number_of_fragments=0, ) searcher = apply_boosts(searcher) # Build the query query_fields = chain(*[ cls.get_query_fields() for cls in [DocumentMappingType, QuestionMappingType] ]) query = {} # Create match and match_phrase queries for every field # we want to search. for field in query_fields: for query_type in ["match", "match_phrase"]: query["%s__%s" % (field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) return searcher
def test_delete(self, _out): # Note: read_index() == write_index() in the tests, so we only # have to do one. for index in [es_utils.read_index(), 'cupcakerainbow_index']: call_command('esdelete', index, noinput=True)
stats = None write_stats = None es_deets = None indexes = [] outstanding_chunks = None try: # TODO: SUMO has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except requests.exceptions.RequestException: pass try: stats = get_doctype_stats(read_index()) except ES_EXCEPTIONS: pass try: write_stats = get_doctype_stats(write_index()) except ES_EXCEPTIONS: pass try: indexes = get_indexes() indexes.sort(key=lambda m: m[0]) except ES_EXCEPTIONS as e: error_messages.append('Error: {0}'.format(repr(e))) try:
def advanced_search(request, template=None): """ES-specific Advanced search view""" # JSON-specific variables is_json = request.GET.get("format") == "json" callback = request.GET.get("callback", "").strip() content_type = "application/x-javascript" if callback else "application/json" # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({"error": _("Invalid callback function.")}), content_type=content_type, status=400 ) language = locale_or_default(request.GET.get("language", request.LANGUAGE_CODE)) r = request.GET.copy() # TODO: Figure out how to get rid of 'a' and do it. # It basically is used to switch between showing the form or results. a = request.GET.get("a", "2") # TODO: This is so the 'a=1' stays in the URL for pagination. r["a"] = 1 # Search default values try: category = map(int, r.getlist("category")) or settings.SEARCH_DEFAULT_CATEGORIES except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist("category", category) r["language"] = language search_form = AdvancedSearchForm(r, auto_id=False) search_form.set_allowed_forums(request.user) # This is all we use a for now I think. if not search_form.is_valid() or a == "2": if is_json: return HttpResponse(json.dumps({"error": _("Invalid search data.")}), content_type=content_type, status=400) t = template if request.MOBILE else "search/form.html" search_ = render(request, t, {"advanced": True, "request": request, "search_form": search_form}) cache_period = settings.SEARCH_CACHE_PERIOD search_["Cache-Control"] = "max-age=%s" % (cache_period * 60) search_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) return search_ cleaned = search_form.cleaned_data if request.MOBILE and cleaned["w"] == constants.WHERE_BASIC: cleaned["w"] = constants.WHERE_WIKI page = max(smart_int(request.GET.get("page")), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = "" # We use a regular S here because we want to search across # multiple doctypes. searcher = AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default")) wiki_f = F(model="wiki_document") question_f = F(model="questions_question") discussion_f = F(model="forums_thread") # Start - wiki filters if cleaned["w"] & constants.WHERE_WIKI: # Category filter if cleaned["category"]: wiki_f &= F(document_category__in=cleaned["category"]) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned["product"] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned["topics"] for t in topics: wiki_f &= F(topic=t) # Archived bit if not cleaned["include_archived"]: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned["w"] & constants.WHERE_SUPPORT: # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ("is_locked", "is_solved", "has_answers", "has_helpful", "is_archived") d = dict( ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name] ) if d: question_f &= F(**d) if cleaned["asked_by"]: question_f &= F(question_creator=cleaned["asked_by"]) if cleaned["answered_by"]: question_f &= F(question_answer_creator=cleaned["answered_by"]) q_tags = [t.strip() for t in cleaned["q_tags"].split(",")] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned["product"] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned["topics"] for t in topics: question_f &= F(topic=t) # End - support questions filters # Start - discussion forum filters if cleaned["w"] & constants.WHERE_DISCUSSION: if cleaned["author"]: discussion_f &= F(post_author_ord=cleaned["author"]) if cleaned["thread_type"]: if constants.DISCUSSION_STICKY in cleaned["thread_type"]: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned["thread_type"]: discussion_f &= F(post_is_locked=1) valid_forum_ids = [f.id for f in Forum.authorized_forums_for_user(request.user)] forum_ids = None if cleaned["forum"]: forum_ids = [f for f in cleaned["forum"] if f in valid_forum_ids] # If we removed all the forums they wanted to look at or if # they didn't specify, then we filter on the list of all # forums they're authorized to look at. if not forum_ids: forum_ids = valid_forum_ids discussion_f &= F(post_forum_id__in=forum_ids) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ("created", cleaned["created"], cleaned["created_date"]), ("updated", cleaned["updated"], cleaned["updated_date"]), ) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + "__gte": 0, filter_name + "__lte": max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + "__gte": min(filter_date, unix_now), filter_name + "__lte": unix_now} discussion_f &= F(**after) question_f &= F(**after) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned["num_voted"] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned["num_votes"], 0)) elif cleaned["num_voted"] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned["num_votes"]) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned["w"] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned["w"] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f if cleaned["w"] & constants.WHERE_DISCUSSION: doctypes.append(ThreadMappingType.get_mapping_type_name()) final_filter |= discussion_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if "explain" in request.GET and request.GET["explain"] == "1": searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned["q"] # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( "question_content", # support forum "document_summary", # kb "post_content", # contributor forum pre_tags=["<b>"], post_tags=["</b>"], number_of_fragments=0, ) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__match_phrase=10.0, document_content__match_phrase=8.0, ) # Apply sortby for advanced search of questions if cleaned["w"] == constants.WHERE_SUPPORT: sortby = cleaned["sortby"] try: searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Apply sortby for advanced search of kb documents if cleaned["w"] == constants.WHERE_WIKI: sortby = cleaned["sortby_documents"] try: searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain( *[cls.get_query_fields() for cls in [DocumentMappingType, ThreadMappingType, QuestionMappingType]] ) query = {} # Create a simple_query_search query for every field # we want to search. for field in query_fields: query["%s__sqs" % field] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(("results", searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset : offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0] : bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc["model"] == "wiki_document": summary = _build_es_excerpt(doc) if not summary: summary = doc["document_summary"] result = {"title": doc["document_title"], "type": "document"} elif doc["model"] == "questions_question": summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean(doc["question_content"], strip=True)[:500] result = { "title": doc["question_title"], "type": "question", "is_solved": doc["question_is_solved"], "num_answers": doc["question_num_answers"], "num_votes": doc["question_num_votes"], "num_votes_past_week": doc["question_num_votes_past_week"], } else: summary = _build_es_excerpt(doc, first_only=True) result = {"title": doc["post_title"], "type": "thread"} result["url"] = doc["url"] result["object"] = doc result["search_summary"] = summary result["rank"] = rank result["score"] = doc.es_meta.score result["explanation"] = escape(format_explanation(doc.es_meta.explanation)) results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({"error": _("Search Unavailable")}), content_type=content_type, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip("()") statsd.incr("search.esunified.{0}".format(exc_bucket)) log.exception(exc) t = "search/mobile/down.html" if request.MOBILE else "search/down.html" return render(request, t, {"q": cleaned["q"]}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"] items.append(("a", "2")) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned["product"]) product = Product.objects.filter(slug__in=cleaned["product"]) if product: product_titles = [_(p.title, "DB: products.Product.title") for p in product] else: product_titles = [_("All Products")] product_titles = ", ".join(product_titles) data = { "num_results": num_results, "results": results, "fallback_results": fallback_results, "product_titles": product_titles, "q": cleaned["q"], "w": cleaned["w"], "lang_name": lang_name, "advanced": True, } if is_json: # Models are not json serializable. for r in data["results"]: del r["object"] data["total"] = len(data["results"]) data["products"] = [{"slug": p.slug, "title": p.title} for p in Product.objects.filter(visible=True)] if product: data["product"] = product[0].slug pages = Paginator(pages) data["pagination"] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data["message"] = _("No pages matched the search criteria") json_data = json.dumps(data) if callback: json_data = callback + "(" + json_data + ");" return HttpResponse(json_data, content_type=content_type) data.update( { "product": product, "products": Product.objects.filter(visible=True), "pages": pages, "search_form": search_form, } ) results_ = render(request, template, data) cache_period = settings.SEARCH_CACHE_PERIOD results_["Cache-Control"] = "max-age=%s" % (cache_period * 60) results_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned["q"]), max_age=3600, secure=False, httponly=False) return results_
def monitor(request): """View for services monitor.""" status = {} # Note: To add a new component to the services monitor, do your # testing and then add a name -> list of output tuples map to # status. # Check memcached. memcache_results = [] try: for cache_name, cache_props in settings.CACHES.items(): result = True backend = cache_props['BACKEND'] location = cache_props['LOCATION'] # LOCATION can be a string or a list of strings if isinstance(location, basestring): location = location.split(';') if 'memcache' in backend: for loc in location: # TODO: this doesn't handle unix: variant ip, port = loc.split(':') result = test_memcached(ip, int(port)) memcache_results.append( (INFO, '%s:%s %s' % (ip, port, result))) if not memcache_results: memcache_results.append((ERROR, 'memcache is not configured.')) elif len(memcache_results) < 2: memcache_results.append( (ERROR, ('You should have at least 2 memcache servers. ' 'You have %s.' % len(memcache_results)))) else: memcache_results.append((INFO, 'memcached servers look good.')) except Exception as exc: memcache_results.append( (ERROR, 'Exception while looking at memcached: %s' % str(exc))) status['memcached'] = memcache_results # Check Libraries and versions libraries_results = [] try: Image.new('RGB', (16, 16)).save(StringIO.StringIO(), 'JPEG') libraries_results.append((INFO, 'PIL+JPEG: Got it!')) except Exception as exc: libraries_results.append( (ERROR, 'PIL+JPEG: Probably missing: ' 'Failed to create a jpeg image: %s' % exc)) status['libraries'] = libraries_results # Check file paths. msg = 'We want read + write.' filepaths = ( (settings.USER_AVATAR_PATH, os.R_OK | os.W_OK, msg), (settings.IMAGE_UPLOAD_PATH, os.R_OK | os.W_OK, msg), (settings.THUMBNAIL_UPLOAD_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_IMAGE_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_IMAGE_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_VIDEO_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_VIDEO_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg), (settings.GROUP_AVATAR_PATH, os.R_OK | os.W_OK, msg), ) filepath_results = [] for path, perms, notes in filepaths: path = os.path.join(settings.MEDIA_ROOT, path) path_exists = os.path.isdir(path) path_perms = os.access(path, perms) if path_exists and path_perms: filepath_results.append( (INFO, '%s: %s %s %s' % (path, path_exists, path_perms, notes))) status['filepaths'] = filepath_results # Check RabbitMQ. rabbitmq_results = [] try: rabbit_conn = establish_connection(connect_timeout=2) rabbit_conn.connect() rabbitmq_results.append( (INFO, 'Successfully connected to RabbitMQ.')) except (socket.error, IOError) as exc: rabbitmq_results.append( (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc))) except Exception as exc: rabbitmq_results.append( (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc))) status['RabbitMQ'] = rabbitmq_results # Check ES. es_results = [] try: es_utils.get_doctype_stats(es_utils.read_index()) es_results.append( (INFO, ('Successfully connected to ElasticSearch and index ' 'exists.'))) except es_utils.ES_EXCEPTIONS as exc: es_results.append( (ERROR, 'ElasticSearch problem: %s' % str(exc))) except Exception as exc: es_results.append( (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc))) status['ElasticSearch'] = es_results # Check Celery. # start = time.time() # pong = celery.task.ping() # rabbit_results = r = {'duration': time.time() - start} # status_summary['rabbit'] = pong == 'pong' and r['duration'] < 1 # Check Redis. redis_results = [] if hasattr(settings, 'REDIS_BACKENDS'): for backend in settings.REDIS_BACKENDS: try: redis_client(backend) redis_results.append((INFO, '%s: Pass!' % backend)) except RedisError: redis_results.append((ERROR, '%s: Fail!' % backend)) status['Redis'] = redis_results status_code = 200 status_summary = {} for component, output in status.items(): if ERROR in [item[0] for item in output]: status_code = 500 status_summary[component] = False else: status_summary[component] = True return render(request, 'services/monitor.html', { 'component_status': status, 'status_summary': status_summary}, status=status_code)
def get_data(self, request): search_form = self.form_class(request.GET) if not search_form.is_valid(): raise GenericAPIException( status.HTTP_400_BAD_REQUEST, _('Invalid search data.') ) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE) ) lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = '' self.language = language page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE searcher = ( es_utils.AnalyzerS() .es(urls=settings.ES_URLS) .indexes(es_utils.read_index('default')) ) doctypes = self.get_doctypes() searcher = searcher.doctypes(*doctypes) filters = self.get_filters(search_form.cleaned_data) searcher = searcher.filter(filters) # Add the simple string query. cleaned_q = search_form.cleaned_data.get('query') if cleaned_q: query_fields = self.get_query_fields() query = {} # Create a simple_query_search query for every field # we want to search. for field in query_fields: query['%s__sqs' % field] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) try: num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) results_per_page = settings.SEARCH_RESULTS_PER_PAGE # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset result = self.format_result(doc) result['url'] = doc['url'] result['rank'] = rank result['score'] = doc.es_meta.score result['explanation'] = escape( format_explanation(doc.es_meta.explanation) ) result['id'] = doc['id'] results.append(result) except es_utils.ES_EXCEPTIONS: raise GenericAPIException( status.HTTP_503_SERVICE_UNAVAILABLE, _('Search Unavailable') ) data = { 'num_results': num_results, 'results': results, 'lang_name': lang_name, } if not results: data['message'] = _('No pages matched the search criteria') return data
def search(request, template=None): """ES-specific search view""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse(json.dumps( {'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r) search_form.set_allowed_forums(request.user) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse(json.dumps( {'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = render(request, t, { 'advanced': a, 'request': request, 'search_form': search_form }) search_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) search_['Expires'] = (datetime.utcnow() + timedelta( minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) return search_ cleaned = search_form.cleaned_data if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # We use a regular S here because we want to search across # multiple doctypes. searcher = (AnalyzerS().es(urls=settings.ES_URLS).indexes( es_utils.read_index())) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(topic=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned['product'] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: question_f &= F(topic=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) valid_forum_ids = [ f.id for f in Forum.authorized_forums_for_user(request.user) ] forum_ids = None if cleaned['forum']: forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids] # If we removed all the forums they wanted to look at or if # they didn't specify, then we filter on the list of all # forums they're authorized to look at. if not forum_ids: forum_ids = valid_forum_ids discussion_f &= F(post_forum_id__in=forum_ids) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = (('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = { filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0) } discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = { filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now } discussion_f &= F(**after) question_f &= F(**after) # In basic search, we limit questions from the last # SEARCH_DEFAULT_MAX_QUESTION_AGE seconds. if a == '0': start_date = unix_now - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE question_f &= F(created__gte=start_date) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: doctypes.append(ThreadMappingType.get_mapping_type_name()) final_filter |= discussion_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( 'question_content', # support forum 'document_summary', # kb 'post_content', # contributor forum pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__text_phrase=10.0, document_content__text_phrase=8.0) # Apply sortby for advanced search of questions if cleaned['w'] == constants.WHERE_SUPPORT: sortby = cleaned['sortby'] try: searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Apply sortby for advanced search of kb documents if cleaned['w'] == constants.WHERE_WIKI: sortby = cleaned['sortby_documents'] try: searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain( *[cls.get_query_fields() for cls in get_mapping_types()]) query = {} # Create text and text_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['text', 'text_phrase']: query['%s__%s' % (field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = {'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean(doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week'] } else: summary = _build_es_excerpt(doc, first_only=True) result = {'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score result['explanation'] = escape(format_explanation( doc._explanation)) results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip('()') statsd.incr('search.esunified.{0}'.format(exc_bucket)) import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return render(request, t, {'q': cleaned['q']}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) if is_json: # Models are not json serializable. for r in results: del r['object'] data = {} data['results'] = results data['total'] = len(results) data['query'] = cleaned['q'] if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, mimetype=mimetype) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned['product']) results_ = render( request, template, { 'num_results': num_results, 'results': results, 'fallback_results': fallback_results, 'q': cleaned['q'], 'w': cleaned['w'], 'product': Product.objects.filter(slug__in=cleaned['product']), 'products': Product.objects.filter(visible=True), 'pages': pages, 'search_form': search_form, 'lang_name': lang_name, }) results_['Cache-Control'] = 'max-age=%s' % \ (settings.SEARCH_CACHE_PERIOD * 60) results_['Expires'] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \ .strftime(expires_fmt) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_
def simple_search(request, template=None): """ES-specific simple search view. This view is for end user searching of the Knowledge Base and Support Forum. Filtering options are limited to: * product (`product=firefox`, for example, for only Firefox results) * document type (`w=2`, for esample, for Support Forum questions only) """ # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL. a = request.GET.get('a') if a in ['1', '2']: new_url = reverse('search.advanced') + '?' + request.GET.urlencode() return HttpResponseRedirect(new_url) # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() content_type = ( 'application/x-javascript' if callback else 'application/json') # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), content_type=content_type, status=400) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) r = request.GET.copy() # TODO: Do we really need to add this to the URL if it isn't already there? r['w'] = r.get('w', constants.WHERE_BASIC) # TODO: Break out a separate simple search form. search_form = SimpleSearchForm(r, auto_id=False) if not search_form.is_valid(): if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), content_type=content_type, status=400) t = template if request.MOBILE else 'search/form.html' search_ = render(request, t, { 'advanced': False, 'request': request, 'search_form': search_form}) cache_period = settings.SEARCH_CACHE_PERIOD search_['Cache-Control'] = 'max-age={0!s}'.format((cache_period * 60)) search_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)) .strftime(EXPIRES_FMT)) return search_ cleaned = search_form.cleaned_data # On mobile, we default to just wiki results. if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = '' # We use a regular S here because we want to search across # multiple doctypes. searcher = (AnalyzerS().es(urls=settings.ES_URLS) .indexes(es_utils.read_index('default'))) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') cleaned_q = cleaned['q'] products = cleaned['product'] if not products and 'all_products' not in request.GET: lowered_q = cleaned_q.lower() if 'thunderbird' in lowered_q: products.append('thunderbird') elif 'android' in lowered_q: products.append('mobile') elif ('ios' in lowered_q or 'ipad' in lowered_q or 'ipod' in lowered_q or 'iphone' in lowered_q): products.append('ios') elif 'firefox os' in lowered_q: products.append('firefox-os') elif 'firefox' in lowered_q: products.append('firefox') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES) # Locale filter wiki_f &= F(document_locale=language) # Product filter for p in products: wiki_f &= F(product=p) # Archived bit wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Has helpful answers is set by default if using basic search cleaned['has_helpful'] = constants.TERNARY_YES # No archived questions in default search. cleaned['is_archived'] = constants.TERNARY_NO # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('has_helpful', 'is_archived') d = dict(('question_{0!s}'.format(filter_name), _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) # Product filter for p in products: question_f &= F(product=p) # End - support questions filters # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( 'question_content', # support forum 'document_summary', # kb pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__match_phrase=10.0, document_content__match_phrase=8.0) # Build the query query_fields = chain(*[ cls.get_query_fields() for cls in [ DocumentMappingType, QuestionMappingType ] ]) query = {} # Create match and match_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['match', 'match_phrase']: query['{0!s}__{1!s}'.format(field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean( doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} result['url'] = doc['url'] result['object'] = doc result['search_summary'] = summary result['rank'] = rank result['score'] = doc.es_meta.score result['explanation'] = escape(format_explanation( doc.es_meta.explanation)) result['id'] = doc['id'] results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), content_type=content_type, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip('()') statsd.incr('search.esunified.{0}'.format(exc_bucket)) log.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return render(request, t, {'q': cleaned['q']}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned['product']) product = Product.objects.filter(slug__in=cleaned['product']) if product: product_titles = [_(p.title, 'DB: products.Product.title') for p in product] else: product_titles = [_('All Products')] product_titles = ', '.join(product_titles) data = { 'num_results': num_results, 'results': results, 'fallback_results': fallback_results, 'product_titles': product_titles, 'q': cleaned['q'], 'w': cleaned['w'], 'lang_name': lang_name, } if is_json: # Models are not json serializable. for r in data['results']: del r['object'] data['total'] = len(data['results']) data['products'] = ([{'slug': p.slug, 'title': p.title} for p in Product.objects.filter(visible=True)]) if product: data['product'] = product[0].slug pages = Paginator(pages) data['pagination'] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, content_type=content_type) data.update({ 'product': product, 'products': Product.objects.filter(visible=True), 'pages': pages, 'search_form': search_form, 'advanced': False, }) results_ = render(request, template, data) cache_period = settings.SEARCH_CACHE_PERIOD results_['Cache-Control'] = 'max-age={0!s}'.format((cache_period * 60)) results_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)) .strftime(EXPIRES_FMT)) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_
def get_data(self, request): search_form = self.form_class(request.GET) if not search_form.is_valid(): raise GenericAPIException(status.HTTP_400_BAD_REQUEST, _('Invalid search data.')) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = '' page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes( es_utils.read_index('default'))) doctypes = self.get_doctypes() searcher = searcher.doctypes(*doctypes) filters = self.get_filters() searcher = searcher.filter(filters) # Add the simple string query. cleaned_q = search_form.cleaned_data.get('query') if cleaned_q: query_fields = self.get_query_fields() query = {} # Create a simple_query_search query for every field # we want to search. for field in query_fields: query['%s__sqs' % field] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) try: num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) results_per_page = settings.SEARCH_RESULTS_PER_PAGE # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset result = self.format_result(doc) result['url'] = doc['url'] result['rank'] = rank result['score'] = doc.es_meta.score result['explanation'] = escape( format_explanation(doc.es_meta.explanation)) result['id'] = doc['id'] results.append(result) except es_utils.ES_EXCEPTIONS: raise GenericAPIException(status.HTTP_503_SERVICE_UNAVAILABLE, _('Search Unavailable')) data = { 'num_results': num_results, 'results': results, 'lang_name': lang_name, } if not results: data['message'] = _('No pages matched the search criteria') return data
def search(request, template=None): """ES-specific search view""" # JSON-specific variables is_json = (request.GET.get('format') == 'json') callback = request.GET.get('callback', '').strip() mimetype = 'application/x-javascript' if callback else 'application/json' # Search "Expires" header format expires_fmt = '%A, %d %B %Y %H:%M:%S GMT' # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({'error': _('Invalid callback function.')}), mimetype=mimetype, status=400) language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) r = request.GET.copy() a = request.GET.get('a', '0') # Search default values try: category = (map(int, r.getlist('category')) or settings.SEARCH_DEFAULT_CATEGORIES) except ValueError: category = settings.SEARCH_DEFAULT_CATEGORIES r.setlist('category', category) # Basic form if a == '0': r['w'] = r.get('w', constants.WHERE_BASIC) # Advanced form if a == '2': r['language'] = language r['a'] = '1' # TODO: Rewrite so SearchForm is unbound initially and we can use # `initial` on the form fields. if 'include_archived' not in r: r['include_archived'] = False search_form = SearchForm(r, auto_id=False) search_form.set_allowed_forums(request.user) if not search_form.is_valid() or a == '2': if is_json: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), mimetype=mimetype, status=400) t = template if request.MOBILE else 'search/form.html' search_ = render(request, t, { 'advanced': a, 'request': request, 'search_form': search_form}) cache_period = settings.SEARCH_CACHE_PERIOD search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60) search_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)) .strftime(expires_fmt)) return search_ cleaned = search_form.cleaned_data if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI page = max(smart_int(request.GET.get('page')), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES.get(lang): lang_name = settings.LANGUAGES[lang] else: lang_name = '' # We use a regular S here because we want to search across # multiple doctypes. searcher = (AnalyzerS().es(urls=settings.ES_URLS) .indexes(es_utils.read_index('default'))) wiki_f = F(model='wiki_document') question_f = F(model='questions_question') discussion_f = F(model='forums_thread') # Start - wiki filters if cleaned['w'] & constants.WHERE_WIKI: # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(topic=t) # Archived bit if a == '0' and not cleaned['include_archived']: # Default to NO for basic search: cleaned['include_archived'] = False if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned['w'] & constants.WHERE_SUPPORT: # Solved is set by default if using basic search if a == '0' and not cleaned['has_helpful']: cleaned['has_helpful'] = constants.TERNARY_YES # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful', 'is_archived') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned['product'] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: question_f &= F(topic=t) # End - support questions filters # Start - discussion forum filters if cleaned['w'] & constants.WHERE_DISCUSSION: if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) valid_forum_ids = [ f.id for f in Forum.authorized_forums_for_user(request.user)] forum_ids = None if cleaned['forum']: forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids] # If we removed all the forums they wanted to look at or if # they didn't specify, then we filter on the list of all # forums they're authorized to look at. if not forum_ids: forum_ids = valid_forum_ids discussion_f &= F(post_forum_id__in=forum_ids) # End - discussion forum filters # Created filter unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) question_f &= F(**after) # In basic search, we limit questions from the last # SEARCH_DEFAULT_MAX_QUESTION_AGE seconds. if a == '0': start_date = unix_now - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE question_f &= F(created__gte=start_date) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned['w'] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned['w'] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f if cleaned['w'] & constants.WHERE_DISCUSSION: doctypes.append(ThreadMappingType.get_mapping_type_name()) final_filter |= discussion_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() documents = ComposedList() try: cleaned_q = cleaned['q'] # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( 'question_content', # support forum 'document_summary', # kb 'post_content', # contributor forum pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, post_title=2.0, post_content=1.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__text_phrase=10.0, document_content__text_phrase=8.0) # Apply sortby for advanced search of questions if cleaned['w'] == constants.WHERE_SUPPORT: sortby = cleaned['sortby'] try: searcher = searcher.order_by( *constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Apply sortby for advanced search of kb documents if cleaned['w'] == constants.WHERE_WIKI: sortby = cleaned['sortby_documents'] try: searcher = searcher.order_by( *constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is # sending us sortby values that aren't valid. pass # Build the query if cleaned_q: query_fields = chain(*[cls.get_query_fields() for cls in get_mapping_types()]) query = {} # Create text and text_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['text', 'text_phrase']: query['%s__%s' % (field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(('results', searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset:offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher.values_dict()[bounds[0]:bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc['model'] == 'wiki_document': summary = _build_es_excerpt(doc) if not summary: summary = doc['document_summary'] result = { 'title': doc['document_title'], 'type': 'document'} elif doc['model'] == 'questions_question': summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean( doc['question_content'], strip=True)[:500] result = { 'title': doc['question_title'], 'type': 'question', 'is_solved': doc['question_is_solved'], 'num_answers': doc['question_num_answers'], 'num_votes': doc['question_num_votes'], 'num_votes_past_week': doc['question_num_votes_past_week']} else: summary = _build_es_excerpt(doc, first_only=True) result = { 'title': doc['post_title'], 'type': 'thread'} result['url'] = doc['url'] result['object'] = ObjectDict(doc) result['search_summary'] = summary result['rank'] = rank result['score'] = doc._score result['explanation'] = escape(format_explanation( doc._explanation)) results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({'error': _('Search Unavailable')}), mimetype=mimetype, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip('()') statsd.incr('search.esunified.{0}'.format(exc_bucket)) import logging logging.exception(exc) t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html' return render(request, t, {'q': cleaned['q']}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) if is_json: # Models are not json serializable. for r in results: del r['object'] data = {} data['results'] = results data['total'] = len(results) data['query'] = cleaned['q'] if not results: data['message'] = _('No pages matched the search criteria') json_data = json.dumps(data) if callback: json_data = callback + '(' + json_data + ');' return HttpResponse(json_data, mimetype=mimetype) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned['product']) product = Product.objects.filter(slug__in=cleaned['product']) if product: product_titles = [_(p.title, 'DB: products.Product.title') for p in product] else: product_titles = [_('All Products')] product_titles = ', '.join(product_titles) results_ = render(request, template, { 'num_results': num_results, 'results': results, 'fallback_results': fallback_results, 'q': cleaned['q'], 'w': cleaned['w'], 'product': product, 'products': Product.objects.filter(visible=True), 'product_titles': product_titles, 'pages': pages, 'search_form': search_form, 'lang_name': lang_name, }) cache_period = settings.SEARCH_CACHE_PERIOD results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60) results_['Expires'] = ( (datetime.utcnow() + timedelta(minutes=cache_period)) .strftime(expires_fmt)) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return results_
def advanced_search(request, template=None): """Elasticsearch-specific Advanced search view""" to_json = JSONRenderer().render # 1. Prep request. r = request.GET.copy() # TODO: Figure out how to get rid of 'a' and do it. # It basically is used to switch between showing the form or results. a = request.GET.get('a', '2') # TODO: This is so the 'a=1' stays in the URL for pagination. r['a'] = 1 language = locale_or_default(request.GET.get('language', request.LANGUAGE_CODE)) r['language'] = language lang = language.lower() lang_name = settings.LANGUAGES_DICT.get(lang) or '' # 2. Build form. search_form = AdvancedSearchForm(r, auto_id=False) search_form.set_allowed_forums(request.user) # 3. Validate request. # Note: a == 2 means "show the form"--that's all we use it for now. if a == '2' or not search_form.is_valid(): if request.IS_JSON: return HttpResponse( json.dumps({'error': _('Invalid search data.')}), content_type=request.CONTENT_TYPE, status=400) t = template if request.MOBILE else 'search/form.html' data = {'advanced': True, 'request': request, 'search_form': search_form} # get value for search input from last search term. last_search = request.COOKIES.get(settings.LAST_SEARCH_COOKIE) # If there is any cached input from last search, pass it to template if last_search and 'q' not in r: cached_field = urlquote(last_search) data.update({'cached_field': cached_field}) return cache_control( render(request, t, data), settings.SEARCH_CACHE_PERIOD) # 4. Generate search. cleaned = search_form.cleaned_data # On mobile, we default to just wiki results. if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC: cleaned['w'] = constants.WHERE_WIKI # We use a regular S here because we want to search across # multiple doctypes. searcher = (AnalyzerS().es(urls=settings.ES_URLS) .indexes(es_utils.read_index('default'))) doctypes = [] final_filter = F() unix_now = int(time.time()) interval_filters = ( ('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date']) ) # Start - wiki search configuration if cleaned['w'] & constants.WHERE_WIKI: wiki_f = F(model='wiki_document') # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(topic=t) # Archived bit if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # Apply sortby sortby = cleaned['sortby_documents'] try: searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is sending us sortby values # that aren't valid. pass doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f # End - wiki search configuration # Start - support questions configuration if cleaned['w'] & constants.WHERE_SUPPORT: question_f = F(model='questions_question') # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful', 'is_archived') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned['product'] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: question_f &= F(topic=t) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Apply sortby sortby = cleaned['sortby'] try: searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is sending us sortby values # that aren't valid. pass # Apply created and updated filters for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} question_f &= F(**after) doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f # End - support questions configuration # Start - discussion forum configuration if cleaned['w'] & constants.WHERE_DISCUSSION: discussion_f = F(model='forums_thread') if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) valid_forum_ids = [f.id for f in Forum.authorized_forums_for_user(request.user)] forum_ids = None if cleaned['forum']: forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids] # If we removed all the forums they wanted to look at or if # they didn't specify, then we filter on the list of all # forums they're authorized to look at. if not forum_ids: forum_ids = valid_forum_ids discussion_f &= F(post_forum_id__in=forum_ids) # Apply created and updated filters for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = {filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0)} discussion_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = {filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now} discussion_f &= F(**after) doctypes.append(ThreadMappingType.get_mapping_type_name()) final_filter |= discussion_f # End - discussion forum configuration # Done with all the filtery stuff--time to generate results searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() cleaned_q = cleaned['q'] # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( 'question_content', # support forum 'document_summary', # kb 'post_content', # contributor forum pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0) searcher = apply_boosts(searcher) # Build the query if cleaned_q: query_fields = chain(*[ cls.get_query_fields() for cls in [ DocumentMappingType, ThreadMappingType, QuestionMappingType ] ]) query = {} # Create a simple_query_search query for every field we want to search. for field in query_fields: query['%s__sqs' % field] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) searcher = searcher[:settings.SEARCH_MAX_RESULTS] # 5. Generate output pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE) if pages.paginator.count == 0: # If we know there aren't any results, show fallback_results. fallback_results = _fallback_results(language, cleaned['product']) results = [] else: fallback_results = None results = build_results_list(pages, request.IS_JSON) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) product = Product.objects.filter(slug__in=cleaned['product']) if product: product_titles = [pgettext('DB: products.Product.title', p.title) for p in product] else: product_titles = [_('All Products')] # FIXME: This is probably bad l10n. product_titles = ', '.join(product_titles) data = { 'num_results': pages.paginator.count, 'results': results, 'fallback_results': fallback_results, 'product_titles': product_titles, 'q': cleaned['q'], 'w': cleaned['w'], 'lang_name': lang_name, 'advanced': True, 'products': Product.objects.filter(visible=True) } if request.IS_JSON: data['total'] = len(data['results']) data['products'] = [{'slug': p.slug, 'title': p.title} for p in data['products']] if product: data['product'] = product[0].slug pages = Paginator(pages) data['pagination'] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data['message'] = _('No pages matched the search criteria') json_data = to_json(data) if request.JSON_CALLBACK: json_data = request.JSON_CALLBACK + '(' + json_data + ');' return HttpResponse(json_data, content_type=request.CONTENT_TYPE) data.update({ 'product': product, 'pages': pages, 'search_form': search_form }) resp = cache_control(render(request, template, data), settings.SEARCH_CACHE_PERIOD) resp.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return resp
def suggest(request): text = request.body or request.GET.get('q') locale = request.GET.get('locale', settings.WIKI_DEFAULT_LANGUAGE) product = request.GET.get('product') max_questions = request.GET.get('max_questions', 10) max_documents = request.GET.get('max_documents', 10) errors = {} try: max_questions = int(max_questions) except ValueError: errors['max_questions'] = 'This field must be an integer.' try: max_documents = int(max_documents) except ValueError: errors['max_documents'] = 'This field must be an integer.' if text is None: errors['q'] = 'This field is required.' if product is not None and not Product.objects.filter( slug=product).exists(): errors['product'] = 'Could not find product with slug "{0}".'.format( product) if errors: raise GenericAPIException(400, errors) wiki_f = es_utils.F( model='wiki_document', document_category__in=settings.SEARCH_DEFAULT_CATEGORIES, document_locale=locale, document_is_archived=False) questions_f = es_utils.F(model='questions_question', question_is_archived=False, question_is_locked=False, question_has_helpful=True) if product is not None: wiki_f &= es_utils.F(product=product) questions_f &= es_utils.F(product=product) mapping_types = [QuestionMappingType, DocumentMappingType] query_fields = itertools.chain( *[cls.get_query_fields() for cls in mapping_types]) query = {} for field in query_fields: for query_type in ['match', 'match_phrase']: key = '{0}__{1}'.format(field, query_type) query[key] = text # Transform query to be locale aware. query = es_utils.es_query_with_analyzer(query, locale) searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes( es_utils.read_index('default')).doctypes( *[cls.get_mapping_type_name() for cls in mapping_types]).filter(wiki_f | questions_f).query( should=True, **query)) documents = [] questions = [] for result in searcher[:(max_documents + max_questions) * 2]: if result['model'] == 'wiki_document': documents.append({ 'title': result['document_title'], 'slug': result['document_slug'], 'summary': result['document_summary'], }) elif result['model'] == 'questions_question': questions.append({ 'id': result['id'], 'title': result['question_title'], }) if len(documents) >= max_documents and len(questions) >= max_questions: break return Response({ 'questions': questions[:max_questions], 'documents': documents[:max_documents], })
def search(request): """Render the admin view containing search tools""" if not request.user.has_perm("search.reindex"): raise PermissionDenied error_messages = [] stats = {} if "reset" in request.POST: try: return handle_reset(request) except ReindexError as e: error_messages.append(u"Error: %s" % e.message) if "reindex" in request.POST: try: return handle_reindex(request) except ReindexError as e: error_messages.append(u"Error: %s" % e.message) if "recreate_index" in request.POST: try: return handle_recreate_index(request) except ReindexError as e: error_messages.append(u"Error: %s" % e.message) if "delete_index" in request.POST: try: return handle_delete(request) except DeleteError as e: error_messages.append(u"Error: %s" % e.message) except ES_EXCEPTIONS as e: error_messages.append("Error: {0}".format(repr(e))) stats = None write_stats = None es_deets = None indexes = [] outstanding_chunks = None try: # TODO: SUMO has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except requests.exceptions.RequestException: pass stats = {} for index in all_read_indexes(): try: stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: stats[index] = None write_stats = {} for index in all_write_indexes(): try: write_stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: write_stats[index] = None try: indexes = get_indexes() indexes.sort(key=lambda m: m[0]) except ES_EXCEPTIONS as e: error_messages.append("Error: {0}".format(repr(e))) try: client = redis_client("default") outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS)) except (RedisError, TypeError): pass recent_records = Record.objects.order_by("-starttime")[:100] outstanding_records = Record.objects.filter(endtime__isnull=True).order_by("-starttime") index_groups = set(settings.ES_INDEXES.keys()) index_groups |= set(settings.ES_WRITE_INDEXES.keys()) index_group_data = [[group, read_index(group), write_index(group)] for group in index_groups] return render( request, "admin/search_maintenance.html", { "title": "Search", "es_deets": es_deets, "doctype_stats": stats, "doctype_write_stats": write_stats, "indexes": indexes, "index_groups": index_groups, "index_group_data": index_group_data, "read_indexes": all_read_indexes, "write_indexes": all_write_indexes, "error_messages": error_messages, "recent_records": recent_records, "outstanding_records": outstanding_records, "outstanding_chunks": outstanding_chunks, "now": datetime.now(), "read_index": read_index, "write_index": write_index, }, )
def generate_simple_search(search_form, language, with_highlights=False): """Generates an S given a form :arg search_form: a validated SimpleSearch form :arg language: the language code :arg with_highlights: whether or not to ask for highlights :returns: a fully formed S """ # We use a regular S here because we want to search across # multiple doctypes. searcher = ( es_utils.AnalyzerS().es( urls=settings.ES_URLS, timeout=settings.ES_TIMEOUT, use_ssl=settings.ES_USE_SSL, http_auth=settings.ES_HTTP_AUTH, ) .indexes(es_utils.read_index('default')) ) cleaned = search_form.cleaned_data doctypes = [] final_filter = es_utils.F() cleaned_q = cleaned['q'] products = cleaned['product'] # Handle wiki filters if cleaned['w'] & constants.WHERE_WIKI: wiki_f = es_utils.F(model='wiki_document', document_category__in=settings.SEARCH_DEFAULT_CATEGORIES, document_locale=language, document_is_archived=False) for p in products: wiki_f &= es_utils.F(product=p) doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f # Handle question filters if cleaned['w'] & constants.WHERE_SUPPORT: question_f = es_utils.F(model='questions_question', question_is_archived=False, question_has_helpful=True) for p in products: question_f &= es_utils.F(product=p) doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f # Build a filter for those filters and add the other bits to # finish the search searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if cleaned['explain']: searcher = searcher.explain() if with_highlights: # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( 'question_content', # support forum 'document_summary', # kb pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0 ) searcher = apply_boosts(searcher) # Build the query query_fields = chain(*[ cls.get_query_fields() for cls in [ DocumentMappingType, QuestionMappingType ] ]) query = {} # Create match and match_phrase queries for every field # we want to search. for field in query_fields: for query_type in ['match', 'match_phrase']: query['%s__%s' % (field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) return searcher
def simple_search(request, template=None): """ES-specific simple search view. This view is for end user searching of the Knowledge Base and Support Forum. Filtering options are limited to: * product (`product=firefox`, for example, for only Firefox results) * document type (`w=2`, for esample, for Support Forum questions only) """ # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL. a = request.GET.get("a") if a in ["1", "2"]: new_url = reverse("search.advanced") + "?" + request.GET.urlencode() return HttpResponseRedirect(new_url) # JSON-specific variables is_json = request.GET.get("format") == "json" callback = request.GET.get("callback", "").strip() content_type = "application/x-javascript" if callback else "application/json" # Check callback is valid if is_json and callback and not jsonp_is_valid(callback): return HttpResponse( json.dumps({"error": _("Invalid callback function.")}), content_type=content_type, status=400 ) language = locale_or_default(request.GET.get("language", request.LANGUAGE_CODE)) r = request.GET.copy() # TODO: Do we really need to add this to the URL if it isn't already there? r["w"] = r.get("w", constants.WHERE_BASIC) # TODO: Break out a separate simple search form. search_form = SimpleSearchForm(r, auto_id=False) if not search_form.is_valid(): if is_json: return HttpResponse(json.dumps({"error": _("Invalid search data.")}), content_type=content_type, status=400) t = template if request.MOBILE else "search/form.html" search_ = render(request, t, {"advanced": False, "request": request, "search_form": search_form}) cache_period = settings.SEARCH_CACHE_PERIOD search_["Cache-Control"] = "max-age=%s" % (cache_period * 60) search_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) return search_ cleaned = search_form.cleaned_data # On mobile, we default to just wiki results. if request.MOBILE and cleaned["w"] == constants.WHERE_BASIC: cleaned["w"] = constants.WHERE_WIKI page = max(smart_int(request.GET.get("page")), 1) offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE lang = language.lower() if settings.LANGUAGES_DICT.get(lang): lang_name = settings.LANGUAGES_DICT[lang] else: lang_name = "" # We use a regular S here because we want to search across # multiple doctypes. searcher = AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default")) wiki_f = F(model="wiki_document") question_f = F(model="questions_question") cleaned_q = cleaned["q"] products = cleaned["product"] if not products and "all_products" not in request.GET: lowered_q = cleaned_q.lower() if "thunderbird" in lowered_q: products.append("thunderbird") elif "android" in lowered_q: products.append("mobile") elif "ios" in lowered_q or "ipad" in lowered_q or "ipod" in lowered_q or "iphone" in lowered_q: products.append("ios") elif "firefox os" in lowered_q: products.append("firefox-os") elif "firefox" in lowered_q: products.append("firefox") # Start - wiki filters if cleaned["w"] & constants.WHERE_WIKI: # Category filter wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES) # Locale filter wiki_f &= F(document_locale=language) # Product filter for p in products: wiki_f &= F(product=p) # Archived bit wiki_f &= F(document_is_archived=False) # End - wiki filters # Start - support questions filters if cleaned["w"] & constants.WHERE_SUPPORT: # Has helpful answers is set by default if using basic search cleaned["has_helpful"] = constants.TERNARY_YES # No archived questions in default search. cleaned["is_archived"] = constants.TERNARY_NO # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ("has_helpful", "is_archived") d = dict( ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name] ) if d: question_f &= F(**d) # Product filter for p in products: question_f &= F(product=p) # End - support questions filters # Done with all the filtery stuff--time to generate results # Combine all the filters and add to the searcher doctypes = [] final_filter = F() if cleaned["w"] & constants.WHERE_WIKI: doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f if cleaned["w"] & constants.WHERE_SUPPORT: doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if "explain" in request.GET and request.GET["explain"] == "1": searcher = searcher.explain() documents = ComposedList() try: # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( "question_content", # support forum "document_summary", # kb pre_tags=["<b>"], post_tags=["</b>"], number_of_fragments=0, ) # Set up boosts searcher = searcher.boost( question_title=4.0, question_content=3.0, question_answer_content=3.0, document_title=6.0, document_content=1.0, document_keywords=8.0, document_summary=2.0, # Text phrases in document titles and content get an extra # boost. document_title__match_phrase=10.0, document_content__match_phrase=8.0, ) # Build the query query_fields = chain(*[cls.get_query_fields() for cls in [DocumentMappingType, QuestionMappingType]]) query = {} # Create match and match_phrase queries for every field # we want to search. for field in query_fields: for query_type in ["match", "match_phrase"]: query["%s__%s" % (field, query_type)] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS) # TODO - Can ditch the ComposedList here, but we need # something that paginate can use to figure out the paging. documents = ComposedList() documents.set_count(("results", searcher), num_results) results_per_page = settings.SEARCH_RESULTS_PER_PAGE pages = paginate(request, documents, results_per_page) # If we know there aren't any results, let's cheat and in # doing that, not hit ES again. if num_results == 0: searcher = [] else: # Get the documents we want to show and add them to # docs_for_page documents = documents[offset : offset + results_per_page] if len(documents) == 0: # If the user requested a page that's beyond the # pagination, then documents is an empty list and # there are no results to show. searcher = [] else: bounds = documents[0][1] searcher = searcher[bounds[0] : bounds[1]] results = [] for i, doc in enumerate(searcher): rank = i + offset if doc["model"] == "wiki_document": summary = _build_es_excerpt(doc) if not summary: summary = doc["document_summary"] result = {"title": doc["document_title"], "type": "document"} elif doc["model"] == "questions_question": summary = _build_es_excerpt(doc) if not summary: # We're excerpting only question_content, so if # the query matched question_title or # question_answer_content, then there won't be any # question_content excerpts. In that case, just # show the question--but only the first 500 # characters. summary = bleach.clean(doc["question_content"], strip=True)[:500] result = { "title": doc["question_title"], "type": "question", "is_solved": doc["question_is_solved"], "num_answers": doc["question_num_answers"], "num_votes": doc["question_num_votes"], "num_votes_past_week": doc["question_num_votes_past_week"], } result["url"] = doc["url"] result["object"] = doc result["search_summary"] = summary result["rank"] = rank result["score"] = doc.es_meta.score result["explanation"] = escape(format_explanation(doc.es_meta.explanation)) result["id"] = doc["id"] results.append(result) except ES_EXCEPTIONS as exc: # Handle timeout and all those other transient errors with a # "Search Unavailable" rather than a Django error page. if is_json: return HttpResponse(json.dumps({"error": _("Search Unavailable")}), content_type=content_type, status=503) # Cheating here: Convert from 'Timeout()' to 'timeout' so # we have less code, but still have good stats. exc_bucket = repr(exc).lower().strip("()") statsd.incr("search.esunified.{0}".format(exc_bucket)) log.exception(exc) t = "search/mobile/down.html" if request.MOBILE else "search/down.html" return render(request, t, {"q": cleaned["q"]}, status=503) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"] items.append(("a", "2")) fallback_results = None if num_results == 0: fallback_results = _fallback_results(language, cleaned["product"]) product = Product.objects.filter(slug__in=cleaned["product"]) if product: product_titles = [_(p.title, "DB: products.Product.title") for p in product] else: product_titles = [_("All Products")] product_titles = ", ".join(product_titles) data = { "num_results": num_results, "results": results, "fallback_results": fallback_results, "product_titles": product_titles, "q": cleaned["q"], "w": cleaned["w"], "lang_name": lang_name, } if is_json: # Models are not json serializable. for r in data["results"]: del r["object"] data["total"] = len(data["results"]) data["products"] = [{"slug": p.slug, "title": p.title} for p in Product.objects.filter(visible=True)] if product: data["product"] = product[0].slug pages = Paginator(pages) data["pagination"] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data["message"] = _("No pages matched the search criteria") json_data = json.dumps(data) if callback: json_data = callback + "(" + json_data + ");" return HttpResponse(json_data, content_type=content_type) data.update( { "product": product, "products": Product.objects.filter(visible=True), "pages": pages, "search_form": search_form, "advanced": False, } ) results_ = render(request, template, data) cache_period = settings.SEARCH_CACHE_PERIOD results_["Cache-Control"] = "max-age=%s" % (cache_period * 60) results_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT) results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned["q"]), max_age=3600, secure=False, httponly=False) return results_
def advanced_search(request): """Elasticsearch-specific Advanced search view""" to_json = JSONRenderer().render template = 'search/results.html' # 1. Prep request. r = request.GET.copy() # TODO: Figure out how to get rid of 'a' and do it. # It basically is used to switch between showing the form or results. a = request.GET.get('a', '2') # TODO: This is so the 'a=1' stays in the URL for pagination. r['a'] = 1 language = locale_or_default( request.GET.get('language', request.LANGUAGE_CODE)) r['language'] = language lang = language.lower() lang_name = settings.LANGUAGES_DICT.get(lang) or '' # 2. Build form. search_form = AdvancedSearchForm(r, auto_id=False) search_form.set_allowed_forums(request.user) # 3. Validate request. # Note: a == 2 means "show the form"--that's all we use it for now. if a == '2' or not search_form.is_valid(): if request.IS_JSON: return HttpResponse(json.dumps( {'error': _('Invalid search data.')}), content_type=request.CONTENT_TYPE, status=400) t = 'search/form.html' data = { 'advanced': True, 'request': request, 'search_form': search_form } # get value for search input from last search term. last_search = request.COOKIES.get(settings.LAST_SEARCH_COOKIE) # If there is any cached input from last search, pass it to template if last_search and 'q' not in r: cached_field = urlquote(last_search) data.update({'cached_field': cached_field}) return cache_control(render(request, t, data), settings.SEARCH_CACHE_PERIOD) # 4. Generate search. cleaned = search_form.cleaned_data # We use a regular S here because we want to search across # multiple doctypes. searcher = (AnalyzerS().es( urls=settings.ES_URLS, timeout=settings.ES_TIMEOUT, use_ssl=settings.ES_USE_SSL, http_auth=settings.ES_HTTP_AUTH, connection_class=RequestsHttpConnection).indexes( es_utils.read_index('default'))) doctypes = [] final_filter = F() unix_now = int(time.time()) interval_filters = (('created', cleaned['created'], cleaned['created_date']), ('updated', cleaned['updated'], cleaned['updated_date'])) # Start - wiki search configuration if cleaned['w'] & constants.WHERE_WIKI: wiki_f = F(model='wiki_document') # Category filter if cleaned['category']: wiki_f &= F(document_category__in=cleaned['category']) # Locale filter wiki_f &= F(document_locale=language) # Product filter products = cleaned['product'] for p in products: wiki_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: wiki_f &= F(topic=t) # Archived bit if not cleaned['include_archived']: wiki_f &= F(document_is_archived=False) # Apply sortby sortby = cleaned['sortby_documents'] try: searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby]) except IndexError: # Skip index errors because they imply the user is sending us sortby values # that aren't valid. pass doctypes.append(DocumentMappingType.get_mapping_type_name()) final_filter |= wiki_f # End - wiki search configuration # Start - support questions configuration if cleaned['w'] & constants.WHERE_SUPPORT: question_f = F(model='questions_question') # These filters are ternary, they can be either YES, NO, or OFF ternary_filters = ('is_locked', 'is_solved', 'has_answers', 'has_helpful', 'is_archived') d = dict(('question_%s' % filter_name, _ternary_filter(cleaned[filter_name])) for filter_name in ternary_filters if cleaned[filter_name]) if d: question_f &= F(**d) if cleaned['asked_by']: question_f &= F(question_creator=cleaned['asked_by']) if cleaned['answered_by']: question_f &= F(question_answer_creator=cleaned['answered_by']) q_tags = [t.strip() for t in cleaned['q_tags'].split(',')] for t in q_tags: if t: question_f &= F(question_tag=t) # Product filter products = cleaned['product'] for p in products: question_f &= F(product=p) # Topics filter topics = cleaned['topics'] for t in topics: question_f &= F(topic=t) # Note: num_voted (with a d) is a different field than num_votes # (with an s). The former is a dropdown and the latter is an # integer value. if cleaned['num_voted'] == constants.INTERVAL_BEFORE: question_f &= F( question_num_votes__lte=max(cleaned['num_votes'], 0)) elif cleaned['num_voted'] == constants.INTERVAL_AFTER: question_f &= F(question_num_votes__gte=cleaned['num_votes']) # Apply sortby sortby = cleaned['sortby'] try: searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby]) except IndexError: # Skip index errors because they imply the user is sending us sortby values # that aren't valid. pass # Apply created and updated filters for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = { filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0) } question_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = { filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now } question_f &= F(**after) doctypes.append(QuestionMappingType.get_mapping_type_name()) final_filter |= question_f # End - support questions configuration # Start - discussion forum configuration if cleaned['w'] & constants.WHERE_DISCUSSION: discussion_f = F(model='forums_thread') if cleaned['author']: discussion_f &= F(post_author_ord=cleaned['author']) if cleaned['thread_type']: if constants.DISCUSSION_STICKY in cleaned['thread_type']: discussion_f &= F(post_is_sticky=1) if constants.DISCUSSION_LOCKED in cleaned['thread_type']: discussion_f &= F(post_is_locked=1) valid_forum_ids = [ f.id for f in Forum.authorized_forums_for_user(request.user) ] forum_ids = None if cleaned['forum']: forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids] # If we removed all the forums they wanted to look at or if # they didn't specify, then we filter on the list of all # forums they're authorized to look at. if not forum_ids: forum_ids = valid_forum_ids discussion_f &= F(post_forum_id__in=forum_ids) # Apply created and updated filters for filter_name, filter_option, filter_date in interval_filters: if filter_option == constants.INTERVAL_BEFORE: before = { filter_name + '__gte': 0, filter_name + '__lte': max(filter_date, 0) } discussion_f &= F(**before) elif filter_option == constants.INTERVAL_AFTER: after = { filter_name + '__gte': min(filter_date, unix_now), filter_name + '__lte': unix_now } discussion_f &= F(**after) doctypes.append(ThreadMappingType.get_mapping_type_name()) final_filter |= discussion_f # End - discussion forum configuration # Done with all the filtery stuff--time to generate results searcher = searcher.doctypes(*doctypes) searcher = searcher.filter(final_filter) if 'explain' in request.GET and request.GET['explain'] == '1': searcher = searcher.explain() cleaned_q = cleaned['q'] # Set up the highlights. Show the entire field highlighted. searcher = searcher.highlight( 'question_content', # support forum 'document_summary', # kb 'post_content', # contributor forum pre_tags=['<b>'], post_tags=['</b>'], number_of_fragments=0) searcher = apply_boosts(searcher) # Build the query if cleaned_q: query_fields = chain(*[ cls.get_query_fields() for cls in [DocumentMappingType, ThreadMappingType, QuestionMappingType] ]) query = {} # Create a simple_query_search query for every field we want to search. for field in query_fields: query['%s__sqs' % field] = cleaned_q # Transform the query to use locale aware analyzers. query = es_utils.es_query_with_analyzer(query, language) searcher = searcher.query(should=True, **query) searcher = searcher[:settings.SEARCH_MAX_RESULTS] # 5. Generate output pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE) if pages.paginator.count == 0: # If we know there aren't any results, show fallback_results. fallback_results = _fallback_results(language, cleaned['product']) results = [] else: fallback_results = None results = build_results_list(pages, request.IS_JSON) items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != 'a'] items.append(('a', '2')) product = Product.objects.filter(slug__in=cleaned['product']) if product: product_titles = [ pgettext('DB: products.Product.title', p.title) for p in product ] else: product_titles = [_('All Products')] # FIXME: This is probably bad l10n. product_titles = ', '.join(product_titles) data = { 'num_results': pages.paginator.count, 'results': results, 'fallback_results': fallback_results, 'product_titles': product_titles, 'q': cleaned['q'], 'w': cleaned['w'], 'lang_name': lang_name, 'advanced': True, 'products': Product.objects.filter(visible=True) } if request.IS_JSON: data['total'] = len(data['results']) data['products'] = [{ 'slug': p.slug, 'title': p.title } for p in data['products']] if product: data['product'] = product[0].slug pages = Paginator(pages) data['pagination'] = dict( number=pages.pager.number, num_pages=pages.pager.paginator.num_pages, has_next=pages.pager.has_next(), has_previous=pages.pager.has_previous(), max=pages.max, span=pages.span, dotted_upper=pages.pager.dotted_upper, dotted_lower=pages.pager.dotted_lower, page_range=pages.pager.page_range, url=pages.pager.url, ) if not results: data['message'] = _('No pages matched the search criteria') json_data = to_json(data) if request.JSON_CALLBACK: json_data = request.JSON_CALLBACK + '(' + json_data + ');' return HttpResponse(json_data, content_type=request.CONTENT_TYPE) data.update({ 'product': product, 'pages': pages, 'search_form': search_form }) resp = cache_control(render(request, template, data), settings.SEARCH_CACHE_PERIOD) resp.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']), max_age=3600, secure=False, httponly=False) return resp
def monitor(request): """View for services monitor.""" status = {} # Note: To add a new component to the services monitor, do your # testing and then add a name -> list of output tuples map to # status. # Check memcached. memcache_results = [] try: for cache_name, cache_props in settings.CACHES.items(): result = True backend = cache_props['BACKEND'] location = cache_props['LOCATION'] # LOCATION can be a string or a list of strings if isinstance(location, basestring): location = location.split(';') if 'memcache' in backend: for loc in location: # TODO: this doesn't handle unix: variant ip, port = loc.split(':') result = test_memcached(ip, int(port)) memcache_results.append( (INFO, '%s:%s %s' % (ip, port, result))) if not memcache_results: memcache_results.append((ERROR, 'memcache is not configured.')) elif len(memcache_results) < 2: memcache_results.append( (ERROR, ('You should have at least 2 memcache servers. ' 'You have %s.' % len(memcache_results)))) else: memcache_results.append((INFO, 'memcached servers look good.')) except Exception as exc: memcache_results.append( (ERROR, 'Exception while looking at memcached: %s' % str(exc))) status['memcached'] = memcache_results # Check Libraries and versions libraries_results = [] try: Image.new('RGB', (16, 16)).save(StringIO.StringIO(), 'JPEG') libraries_results.append((INFO, 'PIL+JPEG: Got it!')) except Exception as exc: libraries_results.append((ERROR, 'PIL+JPEG: Probably missing: ' 'Failed to create a jpeg image: %s' % exc)) status['libraries'] = libraries_results # Check file paths. msg = 'We want read + write.' filepaths = ( (settings.USER_AVATAR_PATH, os.R_OK | os.W_OK, msg), (settings.IMAGE_UPLOAD_PATH, os.R_OK | os.W_OK, msg), (settings.THUMBNAIL_UPLOAD_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_IMAGE_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_IMAGE_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_VIDEO_PATH, os.R_OK | os.W_OK, msg), (settings.GALLERY_VIDEO_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg), (settings.GROUP_AVATAR_PATH, os.R_OK | os.W_OK, msg), ) filepath_results = [] for path, perms, notes in filepaths: path = os.path.join(settings.MEDIA_ROOT, path) path_exists = os.path.isdir(path) path_perms = os.access(path, perms) if path_exists and path_perms: filepath_results.append( (INFO, '%s: %s %s %s' % (path, path_exists, path_perms, notes))) status['filepaths'] = filepath_results # Check RabbitMQ. rabbitmq_results = [] try: rabbit_conn = establish_connection(connect_timeout=2) rabbit_conn.connect() rabbitmq_results.append((INFO, 'Successfully connected to RabbitMQ.')) except (socket.error, IOError) as exc: rabbitmq_results.append( (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc))) except Exception as exc: rabbitmq_results.append( (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc))) status['RabbitMQ'] = rabbitmq_results # Check ES. es_results = [] try: es_utils.get_doctype_stats(es_utils.read_index()) es_results.append( (INFO, ('Successfully connected to ElasticSearch and index ' 'exists.'))) except es_utils.ES_EXCEPTIONS as exc: es_results.append((ERROR, 'ElasticSearch problem: %s' % str(exc))) except Exception as exc: es_results.append( (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc))) status['ElasticSearch'] = es_results # Check Celery. # start = time.time() # pong = celery.task.ping() # rabbit_results = r = {'duration': time.time() - start} # status_summary['rabbit'] = pong == 'pong' and r['duration'] < 1 # Check Redis. redis_results = [] if hasattr(settings, 'REDIS_BACKENDS'): for backend in settings.REDIS_BACKENDS: try: redis_client(backend) redis_results.append((INFO, '%s: Pass!' % backend)) except RedisError: redis_results.append((ERROR, '%s: Fail!' % backend)) status['Redis'] = redis_results status_code = 200 status_summary = {} for component, output in status.items(): if ERROR in [item[0] for item in output]: status_code = 500 status_summary[component] = False else: status_summary[component] = True return render(request, 'services/monitor.html', { 'component_status': status, 'status_summary': status_summary }, status=status_code)