Example #1
0
def suggest(request):
    text = request.body or request.GET.get('q')
    locale = request.GET.get('locale', settings.WIKI_DEFAULT_LANGUAGE)
    product = request.GET.get('product')
    max_questions = request.GET.get('max_questions', 10)
    max_documents = request.GET.get('max_documents', 10)

    errors = {}
    try:
        max_questions = int(max_questions)
    except ValueError:
        errors['max_questions'] = 'This field must be an integer.'
    try:
        max_documents = int(max_documents)
    except ValueError:
        errors['max_documents'] = 'This field must be an integer.'
    if text is None:
        errors['q'] = 'This field is required.'
    if product is not None and not Product.objects.filter(slug=product).exists():
        errors['product'] = 'Could not find product with slug "{0}".'.format(product)
    if errors:
        raise GenericAPIException(400, errors)

    searcher = (
        es_utils.AnalyzerS()
        .es(urls=settings.ES_URLS)
        .indexes(es_utils.read_index('default')))

    return Response({
        'questions': _question_suggestions(searcher, text, locale, product, max_questions),
        'documents': _document_suggestions(searcher, text, locale, product, max_documents),
    })
Example #2
0
def suggest(request):
    if request.data and request.GET:
        raise GenericAPIException(
            400, 'Put all parameters either in the querystring or the HTTP request body.')

    serializer = SuggestSerializer(data=(request.data or request.GET))
    if not serializer.is_valid():
        raise GenericAPIException(400, serializer.errors)

    searcher = (
        es_utils.AnalyzerS()
        .es(urls=settings.ES_URLS,
            timeout=settings.ES_TIMEOUT,
            use_ssl=settings.ES_USE_SSL,
            http_auth=settings.ES_HTTP_AUTH)
        .indexes(es_utils.read_index('default')))

    data = serializer.validated_data

    return Response({
        'questions': _question_suggestions(
            searcher, data['q'], data['locale'], data['product'], data['max_questions']),
        'documents': _document_suggestions(
            searcher, data['q'], data['locale'], data['product'], data['max_documents']),
    })
Example #3
0
def suggest(request):
    text = request.body or request.GET.get('q')
    locale = request.GET.get('locale', settings.WIKI_DEFAULT_LANGUAGE)
    product = request.GET.get('product')
    max_questions = request.GET.get('max_questions', 10)
    max_documents = request.GET.get('max_documents', 10)

    errors = {}
    try:
        max_questions = int(max_questions)
    except ValueError:
        errors['max_questions'] = 'This field must be an integer.'
    try:
        max_documents = int(max_documents)
    except ValueError:
        errors['max_documents'] = 'This field must be an integer.'
    if text is None:
        errors['q'] = 'This field is required.'
    if product is not None and not Product.objects.filter(
            slug=product).exists():
        errors['product'] = 'Could not find product with slug "{0}".'.format(
            product)
    if errors:
        raise GenericAPIException(400, errors)

    searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes(
        es_utils.read_index('default')))

    return Response({
        'questions':
        _question_suggestions(searcher, text, locale, product, max_questions),
        'documents':
        _document_suggestions(searcher, text, locale, product, max_documents),
    })
Example #4
0
def suggest(request):
    if request.data and request.GET:
        raise GenericAPIException(
            400,
            'Put all parameters either in the querystring or the HTTP request body.'
        )

    serializer = SuggestSerializer(data=(request.data or request.GET))
    if not serializer.is_valid():
        raise GenericAPIException(400, serializer.errors)

    searcher = (es_utils.AnalyzerS().es(
        urls=settings.ES_URLS,
        timeout=settings.ES_TIMEOUT,
        use_ssl=settings.ES_USE_SSL,
        http_auth=settings.ES_HTTP_AUTH,
        connection_class=RequestsHttpConnection).indexes(
            es_utils.read_index('default')))

    data = serializer.validated_data

    return Response({
        'questions':
        _question_suggestions(searcher, data['q'], data['locale'],
                              data['product'], data['max_questions']),
        'documents':
        _document_suggestions(searcher, data['q'], data['locale'],
                              data['product'], data['max_documents']),
    })
Example #5
0
def suggest(request):
    text = request.body or request.GET.get("q")
    locale = request.GET.get("locale", settings.WIKI_DEFAULT_LANGUAGE)
    product = request.GET.get("product")
    max_questions = request.GET.get("max_questions", 10)
    max_documents = request.GET.get("max_documents", 10)

    errors = {}
    try:
        max_questions = int(max_questions)
    except ValueError:
        errors["max_questions"] = "This field must be an integer."
    try:
        max_documents = int(max_documents)
    except ValueError:
        errors["max_documents"] = "This field must be an integer."
    if text is None:
        errors["q"] = "This field is required."
    if product is not None and not Product.objects.filter(slug=product).exists():
        errors["product"] = 'Could not find product with slug "{0}".'.format(product)
    if errors:
        raise GenericAPIException(400, errors)

    searcher = es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default"))

    return Response(
        {
            "questions": _question_suggestions(searcher, text, locale, product, max_questions),
            "documents": _document_suggestions(searcher, text, locale, product, max_documents),
        }
    )
Example #6
0
def handle_delete(request):
    """Deletes an index"""
    index_to_delete = request.POST['delete_index']

    # Rule 1: Has to start with the ES_INDEX_PREFIX.
    if not index_to_delete.startswith(settings.ES_INDEX_PREFIX):
        raise DeleteError('"%s" is not a valid index name.' % index_to_delete)

    # Rule 2: Must be an existing index.
    indexes = [name for name, count in get_indexes()]
    if index_to_delete not in indexes:
        raise DeleteError('"%s" does not exist.' % index_to_delete)

    # Rule 3: Don't delete the read index.
    if index_to_delete == read_index():
        raise DeleteError('"%s" is the read index.' % index_to_delete)

    delete_index(index_to_delete)

    return HttpResponseRedirect(request.path)
Example #7
0
def handle_delete(request):
    """Deletes an index"""
    index_to_delete = request.POST['delete_index']

    # Rule 1: Has to start with the ES_INDEX_PREFIX.
    if not index_to_delete.startswith(settings.ES_INDEX_PREFIX):
        raise DeleteError('"%s" is not a valid index name.' % index_to_delete)

    # Rule 2: Must be an existing index.
    indexes = [name for name, count in get_indexes()]
    if index_to_delete not in indexes:
        raise DeleteError('"%s" does not exist.' % index_to_delete)

    # Rule 3: Don't delete the read index.
    if index_to_delete == read_index():
        raise DeleteError('"%s" is the read index.' % index_to_delete)

    delete_index(index_to_delete)

    return HttpResponseRedirect(request.path)
Example #8
0
def suggest(request):
    if request.DATA and request.GET:
        raise GenericAPIException(
            400, 'Put all parameters either in the querystring or the HTTP request body.')

    serializer = SuggestSerializer(data=(request.DATA or request.GET))
    if not serializer.is_valid():
        raise GenericAPIException(400, serializer.errors)

    searcher = (
        es_utils.AnalyzerS()
        .es(urls=settings.ES_URLS)
        .indexes(es_utils.read_index('default')))

    data = serializer.object

    return Response({
        'questions': _question_suggestions(
            searcher, data['q'], data['locale'], data['product'], data['max_questions']),
        'documents': _document_suggestions(
            searcher, data['q'], data['locale'], data['product'], data['max_documents']),
    })
Example #9
0
def handle_delete(request):
    """Deletes an index"""
    index_to_delete = request.POST.get('delete_index')
    es_indexes = [name for (name, count) in get_indexes()]

    # Rule 1: Has to start with the ES_INDEX_PREFIX.
    if not index_to_delete.startswith(settings.ES_INDEX_PREFIX):
        raise DeleteError('"%s" is not a valid index name.' % index_to_delete)

    # Rule 2: Must be an existing index.
    if index_to_delete not in es_indexes:
        raise DeleteError('"%s" does not exist.' % index_to_delete)

    # Rule 3: Don't delete the default read index.
    # TODO: When the critical index exists, this should be "Don't
    # delete the critical read index."
    if index_to_delete == read_index('default'):
        raise DeleteError('"%s" is the default read index.' % index_to_delete)

    # The index is ok to delete
    delete_index(index_to_delete)

    return HttpResponseRedirect(request.path)
Example #10
0
def handle_delete(request):
    """Deletes an index"""
    index_to_delete = request.POST.get('delete_index')
    es_indexes = [name for (name, count) in get_indexes()]

    # Rule 1: Has to start with the ES_INDEX_PREFIX.
    if not index_to_delete.startswith(settings.ES_INDEX_PREFIX):
        raise DeleteError('"%s" is not a valid index name.' % index_to_delete)

    # Rule 2: Must be an existing index.
    if index_to_delete not in es_indexes:
        raise DeleteError('"%s" does not exist.' % index_to_delete)

    # Rule 3: Don't delete the default read index.
    # TODO: When the critical index exists, this should be "Don't
    # delete the critical read index."
    if index_to_delete == read_index('default'):
        raise DeleteError('"%s" is the default read index.' % index_to_delete)

    # The index is ok to delete
    delete_index(index_to_delete)

    return HttpResponseRedirect(request.path)
Example #11
0
def suggest(request):
    if request.DATA and request.GET:
        raise GenericAPIException(
            400,
            'Put all parameters either in the querystring or the HTTP request body.'
        )

    serializer = SuggestSerializer(data=(request.DATA or request.GET))
    if not serializer.is_valid():
        raise GenericAPIException(400, serializer.errors)

    searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes(
        es_utils.read_index('default')))

    data = serializer.object

    return Response({
        'questions':
        _question_suggestions(searcher, data['q'], data['locale'],
                              data['product'], data['max_questions']),
        'documents':
        _document_suggestions(searcher, data['q'], data['locale'],
                              data['product'], data['max_documents']),
    })
Example #12
0
def search(request):
    """Render the admin view containing search tools"""
    if not request.user.has_perm('search.reindex'):
        raise PermissionDenied

    error_messages = []
    stats = {}

    if 'reset' in request.POST:
        try:
            return handle_reset(request)
        except ReindexError as e:
            error_messages.append(u'Error: %s' % e.message)

    if 'reindex' in request.POST:
        try:
            return handle_reindex(request)
        except ReindexError as e:
            error_messages.append(u'Error: %s' % e.message)

    if 'recreate_index' in request.POST:
        try:
            return handle_recreate_index(request)
        except ReindexError as e:
            error_messages.append(u'Error: %s' % e.message)

    if 'delete_index' in request.POST:
        try:
            return handle_delete(request)
        except DeleteError as e:
            error_messages.append(u'Error: %s' % e.message)
        except ES_EXCEPTIONS as e:
            error_messages.append('Error: {0}'.format(repr(e)))

    stats = None
    write_stats = None
    es_deets = None
    indexes = []
    outstanding_chunks = None

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    stats = {}
    for index in all_read_indexes():
        try:
            stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            stats[index] = None

    write_stats = {}
    for index in all_write_indexes():
        try:
            write_stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            write_stats[index] = None

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    try:
        client = redis_client('default')
        outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS))
    except (RedisError, TypeError):
        pass

    recent_records = Record.uncached.order_by('-starttime')[:100]

    outstanding_records = (Record.uncached.filter(endtime__isnull=True)
                                          .order_by('-starttime'))

    index_groups = set(settings.ES_INDEXES.keys())
    index_groups |= set(settings.ES_WRITE_INDEXES.keys())

    index_group_data = [[group, read_index(group), write_index(group)]
                        for group in index_groups]

    return render(
        request,
        'admin/search_maintenance.html',
        {'title': 'Search',
         'es_deets': es_deets,
         'doctype_stats': stats,
         'doctype_write_stats': write_stats,
         'indexes': indexes,
         'index_groups': index_groups,
         'index_group_data': index_group_data,
         'read_indexes': all_read_indexes,
         'write_indexes': all_write_indexes,
         'error_messages': error_messages,
         'recent_records': recent_records,
         'outstanding_records': outstanding_records,
         'outstanding_chunks': outstanding_chunks,
         'now': datetime.now(),
         'read_index': read_index,
         'write_index': write_index,
         })
Example #13
0
def search(request):
    """Render the admin view containing search tools"""
    if not request.user.has_perm('search.reindex'):
        raise PermissionDenied

    error_messages = []
    stats = {}

    if 'reset' in request.POST:
        try:
            return handle_reset(request)
        except ReindexError as e:
            error_messages.append('Error: %s' % e.message)

    if 'reindex' in request.POST:
        try:
            return handle_reindex(request)
        except ReindexError as e:
            error_messages.append('Error: %s' % e.message)

    if 'recreate_index' in request.POST:
        try:
            return handle_recreate_index(request)
        except ReindexError as e:
            error_messages.append('Error: %s' % e.message)

    if 'delete_index' in request.POST:
        try:
            return handle_delete(request)
        except DeleteError as e:
            error_messages.append('Error: %s' % e.message)
        except ES_EXCEPTIONS as e:
            error_messages.append('Error: {0}'.format(repr(e)))

    stats = None
    write_stats = None
    es_deets = None
    indexes = []

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    stats = {}
    for index in all_read_indexes():
        try:
            stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            stats[index] = None

    write_stats = {}
    for index in all_write_indexes():
        try:
            write_stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            write_stats[index] = None

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    recent_records = Record.objects.all()[:100]
    outstanding_records = Record.objects.outstanding()

    index_groups = set(settings.ES_INDEXES.keys())
    index_groups |= set(settings.ES_WRITE_INDEXES.keys())

    index_group_data = [[group, read_index(group),
                         write_index(group)] for group in index_groups]

    return render(
        request, 'admin/search_maintenance.html', {
            'title': 'Search',
            'es_deets': es_deets,
            'doctype_stats': stats,
            'doctype_write_stats': write_stats,
            'indexes': indexes,
            'index_groups': index_groups,
            'index_group_data': index_group_data,
            'read_indexes': all_read_indexes,
            'write_indexes': all_write_indexes,
            'error_messages': error_messages,
            'recent_records': recent_records,
            'outstanding_records': outstanding_records,
            'now': datetime.now(),
            'read_index': read_index,
            'write_index': write_index,
        })
Example #14
0
def simple_search(request, template=None):
    """ES-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:
    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for esample, for Support Forum questions only)
    """

    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    a = request.GET.get('a')
    if a in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    content_type = ('application/x-javascript'
                    if callback else 'application/json')

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(json.dumps(
            {'error': _('Invalid callback function.')}),
                            content_type=content_type,
                            status=400)

    language = locale_or_default(
        request.GET.get('language', request.LANGUAGE_CODE))
    r = request.GET.copy()

    # TODO: Do we really need to add this to the URL if it isn't already there?
    r['w'] = r.get('w', constants.WHERE_BASIC)

    # TODO: Break out a separate simple search form.
    search_form = SimpleSearchForm(r, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(json.dumps(
                {'error': _('Invalid search data.')}),
                                content_type=content_type,
                                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': False,
            'request': request,
            'search_form': search_form
        })
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
        search_['Expires'] = (
            (datetime.utcnow() +
             timedelta(minutes=cache_period)).strftime(EXPIRES_FMT))
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES_DICT.get(lang):
        lang_name = settings.LANGUAGES_DICT[lang]
    else:
        lang_name = ''

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(urls=settings.ES_URLS).indexes(
        es_utils.read_index('default')))

    wiki_f = F(model='wiki_document')
    question_f = F(model='questions_question')

    # Start - wiki filters

    if cleaned['w'] & constants.WHERE_WIKI:
        # Category filter
        wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES)

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        products = cleaned['product']
        for p in products:
            wiki_f &= F(product=p)

        # Archived bit
        wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned['w'] & constants.WHERE_SUPPORT:
        # Has helpful answers is set by default if using basic search
        cleaned['has_helpful'] = constants.TERNARY_YES

        # No archived questions in default search.
        cleaned['is_archived'] = constants.TERNARY_NO

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('has_helpful', 'is_archived')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        # Product filter
        products = cleaned['product']
        for p in products:
            question_f &= F(product=p)

    # End - support questions filters

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned['w'] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned['w'] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        cleaned_q = cleaned['q']

        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            'question_content',  # support forum
            'document_summary',  # kb
            pre_tags=['<b>'],
            post_tags=['</b>'],
            number_of_fragments=0)

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,

            # Text phrases in document titles and content get an extra
            # boost.
            document_title__match_phrase=10.0,
            document_content__match_phrase=8.0)

        # Build the query
        query_fields = chain(*[
            cls.get_query_fields()
            for cls in [DocumentMappingType, QuestionMappingType]
        ])
        query = {}
        # Create match and match_phrase queries for every field
        # we want to search.
        for field in query_fields:
            for query_type in ['match', 'match_phrase']:
                query['%s__%s' % (field, query_type)] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(('results', searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset:offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher[bounds[0]:bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc['model'] == 'wiki_document':
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc['document_summary']
                result = {'title': doc['document_title'], 'type': 'document'}

            elif doc['model'] == 'questions_question':
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(doc['question_content'],
                                           strip=True)[:500]

                result = {
                    'title': doc['question_title'],
                    'type': 'question',
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']
                }

            result['url'] = doc['url']
            result['object'] = doc
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc.es_meta.score
            result['explanation'] = escape(
                format_explanation(doc.es_meta.explanation))
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                content_type=content_type,
                                status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        log.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    items = [(k, v) for k in search_form.fields for v in r.getlist(k)
             if v and k != 'a']
    items.append(('a', '2'))

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned['product'])

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [
            _(p.title, 'DB: products.Product.title') for p in product
        ]
    else:
        product_titles = [_('All Products')]

    product_titles = ', '.join(product_titles)

    data = {
        'num_results': num_results,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name,
    }

    if is_json:
        # Models are not json serializable.
        for r in data['results']:
            del r['object']
        data['total'] = len(data['results'])

        data['products'] = ([{
            'slug': p.slug,
            'title': p.title
        } for p in Product.objects.filter(visible=True)])

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, content_type=content_type)

    data.update({
        'product': product,
        'products': Product.objects.filter(visible=True),
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
    results_['Expires'] = (
        (datetime.utcnow() +
         timedelta(minutes=cache_period)).strftime(EXPIRES_FMT))
    results_.set_cookie(settings.LAST_SEARCH_COOKIE,
                        urlquote(cleaned['q']),
                        max_age=3600,
                        secure=False,
                        httponly=False)

    return results_
Example #15
0
def generate_simple_search(search_form, language, with_highlights=False):
    """Generates an S given a form

    :arg search_form: a validated SimpleSearch form
    :arg language: the language code
    :arg with_highlights: whether or not to ask for highlights

    :returns: a fully formed S

    """
    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (es_utils.AnalyzerS().es(
        urls=settings.ES_URLS,
        timeout=settings.ES_TIMEOUT,
        use_ssl=settings.ES_USE_SSL,
        http_auth=settings.ES_HTTP_AUTH,
        connection_class=RequestsHttpConnection,
    ).indexes(es_utils.read_index("default")))

    cleaned = search_form.cleaned_data

    doctypes = []
    final_filter = es_utils.F()
    cleaned_q = cleaned["q"]
    products = cleaned["product"]

    # Handle wiki filters
    if cleaned["w"] & constants.WHERE_WIKI:
        wiki_f = es_utils.F(
            model="wiki_document",
            document_category__in=settings.SEARCH_DEFAULT_CATEGORIES,
            document_locale=language,
            document_is_archived=False,
        )

        for p in products:
            wiki_f &= es_utils.F(product=p)

        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    # Handle question filters
    if cleaned["w"] & constants.WHERE_SUPPORT:
        question_f = es_utils.F(model="questions_question",
                                question_is_archived=False,
                                question_has_helpful=True)

        for p in products:
            question_f &= es_utils.F(product=p)

        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    # Build a filter for those filters and add the other bits to
    # finish the search
    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if cleaned["explain"]:
        searcher = searcher.explain()

    if with_highlights:
        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            "question_content",  # support forum
            "document_summary",  # kb
            pre_tags=["<b>"],
            post_tags=["</b>"],
            number_of_fragments=0,
        )

    searcher = apply_boosts(searcher)

    # Build the query
    query_fields = chain(*[
        cls.get_query_fields()
        for cls in [DocumentMappingType, QuestionMappingType]
    ])
    query = {}
    # Create match and match_phrase queries for every field
    # we want to search.
    for field in query_fields:
        for query_type in ["match", "match_phrase"]:
            query["%s__%s" % (field, query_type)] = cleaned_q

    # Transform the query to use locale aware analyzers.
    query = es_utils.es_query_with_analyzer(query, language)

    searcher = searcher.query(should=True, **query)
    return searcher
Example #16
0
 def test_delete(self, _out):
     # Note: read_index() == write_index() in the tests, so we only
     # have to do one.
     for index in [es_utils.read_index(),
                   'cupcakerainbow_index']:
         call_command('esdelete', index, noinput=True)
Example #17
0
    stats = None
    write_stats = None
    es_deets = None
    indexes = []
    outstanding_chunks = None

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    try:
        stats = get_doctype_stats(read_index())
    except ES_EXCEPTIONS:
        pass

    try:
        write_stats = get_doctype_stats(write_index())
    except ES_EXCEPTIONS:
        pass

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    try:
Example #18
0
def advanced_search(request, template=None):
    """ES-specific Advanced search view"""

    # JSON-specific variables
    is_json = request.GET.get("format") == "json"
    callback = request.GET.get("callback", "").strip()
    content_type = "application/x-javascript" if callback else "application/json"

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({"error": _("Invalid callback function.")}), content_type=content_type, status=400
        )

    language = locale_or_default(request.GET.get("language", request.LANGUAGE_CODE))
    r = request.GET.copy()
    # TODO: Figure out how to get rid of 'a' and do it.
    # It basically is used to switch between showing the form or results.
    a = request.GET.get("a", "2")
    # TODO: This is so the 'a=1' stays in the URL for pagination.
    r["a"] = 1

    # Search default values
    try:
        category = map(int, r.getlist("category")) or settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist("category", category)

    r["language"] = language

    search_form = AdvancedSearchForm(r, auto_id=False)
    search_form.set_allowed_forums(request.user)

    # This is all we use a for now I think.
    if not search_form.is_valid() or a == "2":
        if is_json:
            return HttpResponse(json.dumps({"error": _("Invalid search data.")}), content_type=content_type, status=400)

        t = template if request.MOBILE else "search/form.html"
        search_ = render(request, t, {"advanced": True, "request": request, "search_form": search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_["Cache-Control"] = "max-age=%s" % (cache_period * 60)
        search_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)
        return search_

    cleaned = search_form.cleaned_data

    if request.MOBILE and cleaned["w"] == constants.WHERE_BASIC:
        cleaned["w"] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get("page")), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES_DICT.get(lang):
        lang_name = settings.LANGUAGES_DICT[lang]
    else:
        lang_name = ""

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default"))

    wiki_f = F(model="wiki_document")
    question_f = F(model="questions_question")
    discussion_f = F(model="forums_thread")

    # Start - wiki filters

    if cleaned["w"] & constants.WHERE_WIKI:
        # Category filter
        if cleaned["category"]:
            wiki_f &= F(document_category__in=cleaned["category"])

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        products = cleaned["product"]
        for p in products:
            wiki_f &= F(product=p)

        # Topics filter
        topics = cleaned["topics"]
        for t in topics:
            wiki_f &= F(topic=t)

        # Archived bit
        if not cleaned["include_archived"]:
            wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned["w"] & constants.WHERE_SUPPORT:
        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ("is_locked", "is_solved", "has_answers", "has_helpful", "is_archived")
        d = dict(
            ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name]))
            for filter_name in ternary_filters
            if cleaned[filter_name]
        )
        if d:
            question_f &= F(**d)

        if cleaned["asked_by"]:
            question_f &= F(question_creator=cleaned["asked_by"])

        if cleaned["answered_by"]:
            question_f &= F(question_answer_creator=cleaned["answered_by"])

        q_tags = [t.strip() for t in cleaned["q_tags"].split(",")]
        for t in q_tags:
            if t:
                question_f &= F(question_tag=t)

        # Product filter
        products = cleaned["product"]
        for p in products:
            question_f &= F(product=p)

        # Topics filter
        topics = cleaned["topics"]
        for t in topics:
            question_f &= F(topic=t)

    # End - support questions filters

    # Start - discussion forum filters

    if cleaned["w"] & constants.WHERE_DISCUSSION:
        if cleaned["author"]:
            discussion_f &= F(post_author_ord=cleaned["author"])

        if cleaned["thread_type"]:
            if constants.DISCUSSION_STICKY in cleaned["thread_type"]:
                discussion_f &= F(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned["thread_type"]:
                discussion_f &= F(post_is_locked=1)

        valid_forum_ids = [f.id for f in Forum.authorized_forums_for_user(request.user)]

        forum_ids = None
        if cleaned["forum"]:
            forum_ids = [f for f in cleaned["forum"] if f in valid_forum_ids]

        # If we removed all the forums they wanted to look at or if
        # they didn't specify, then we filter on the list of all
        # forums they're authorized to look at.
        if not forum_ids:
            forum_ids = valid_forum_ids

        discussion_f &= F(post_forum_id__in=forum_ids)

    # End - discussion forum filters

    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ("created", cleaned["created"], cleaned["created_date"]),
        ("updated", cleaned["updated"], cleaned["updated_date"]),
    )
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + "__gte": 0, filter_name + "__lte": max(filter_date, 0)}

            discussion_f &= F(**before)
            question_f &= F(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + "__gte": min(filter_date, unix_now), filter_name + "__lte": unix_now}

            discussion_f &= F(**after)
            question_f &= F(**after)

    # Note: num_voted (with a d) is a different field than num_votes
    # (with an s). The former is a dropdown and the latter is an
    # integer value.
    if cleaned["num_voted"] == constants.INTERVAL_BEFORE:
        question_f &= F(question_num_votes__lte=max(cleaned["num_votes"], 0))
    elif cleaned["num_voted"] == constants.INTERVAL_AFTER:
        question_f &= F(question_num_votes__gte=cleaned["num_votes"])

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned["w"] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned["w"] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    if cleaned["w"] & constants.WHERE_DISCUSSION:
        doctypes.append(ThreadMappingType.get_mapping_type_name())
        final_filter |= discussion_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if "explain" in request.GET and request.GET["explain"] == "1":
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        cleaned_q = cleaned["q"]

        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            "question_content",  # support forum
            "document_summary",  # kb
            "post_content",  # contributor forum
            pre_tags=["<b>"],
            post_tags=["</b>"],
            number_of_fragments=0,
        )

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            post_title=2.0,
            post_content=1.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,
            # Text phrases in document titles and content get an extra
            # boost.
            document_title__match_phrase=10.0,
            document_content__match_phrase=8.0,
        )

        # Apply sortby for advanced search of questions
        if cleaned["w"] == constants.WHERE_SUPPORT:
            sortby = cleaned["sortby"]
            try:
                searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby])
            except IndexError:
                # Skip index errors because they imply the user is
                # sending us sortby values that aren't valid.
                pass

        # Apply sortby for advanced search of kb documents
        if cleaned["w"] == constants.WHERE_WIKI:
            sortby = cleaned["sortby_documents"]
            try:
                searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby])
            except IndexError:
                # Skip index errors because they imply the user is
                # sending us sortby values that aren't valid.
                pass

        # Build the query
        if cleaned_q:
            query_fields = chain(
                *[cls.get_query_fields() for cls in [DocumentMappingType, ThreadMappingType, QuestionMappingType]]
            )
            query = {}
            # Create a simple_query_search query for every field
            # we want to search.
            for field in query_fields:
                query["%s__sqs" % field] = cleaned_q

            # Transform the query to use locale aware analyzers.
            query = es_utils.es_query_with_analyzer(query, language)

            searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(("results", searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset : offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher[bounds[0] : bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc["model"] == "wiki_document":
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc["document_summary"]
                result = {"title": doc["document_title"], "type": "document"}

            elif doc["model"] == "questions_question":
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(doc["question_content"], strip=True)[:500]

                result = {
                    "title": doc["question_title"],
                    "type": "question",
                    "is_solved": doc["question_is_solved"],
                    "num_answers": doc["question_num_answers"],
                    "num_votes": doc["question_num_votes"],
                    "num_votes_past_week": doc["question_num_votes_past_week"],
                }

            else:
                summary = _build_es_excerpt(doc, first_only=True)
                result = {"title": doc["post_title"], "type": "thread"}

            result["url"] = doc["url"]
            result["object"] = doc
            result["search_summary"] = summary
            result["rank"] = rank
            result["score"] = doc.es_meta.score
            result["explanation"] = escape(format_explanation(doc.es_meta.explanation))
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({"error": _("Search Unavailable")}), content_type=content_type, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip("()")
        statsd.incr("search.esunified.{0}".format(exc_bucket))

        log.exception(exc)

        t = "search/mobile/down.html" if request.MOBILE else "search/down.html"
        return render(request, t, {"q": cleaned["q"]}, status=503)

    items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"]
    items.append(("a", "2"))

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned["product"])

    product = Product.objects.filter(slug__in=cleaned["product"])
    if product:
        product_titles = [_(p.title, "DB: products.Product.title") for p in product]
    else:
        product_titles = [_("All Products")]

    product_titles = ", ".join(product_titles)

    data = {
        "num_results": num_results,
        "results": results,
        "fallback_results": fallback_results,
        "product_titles": product_titles,
        "q": cleaned["q"],
        "w": cleaned["w"],
        "lang_name": lang_name,
        "advanced": True,
    }

    if is_json:
        # Models are not json serializable.
        for r in data["results"]:
            del r["object"]
        data["total"] = len(data["results"])

        data["products"] = [{"slug": p.slug, "title": p.title} for p in Product.objects.filter(visible=True)]

        if product:
            data["product"] = product[0].slug

        pages = Paginator(pages)
        data["pagination"] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data["message"] = _("No pages matched the search criteria")
        json_data = json.dumps(data)
        if callback:
            json_data = callback + "(" + json_data + ");"

        return HttpResponse(json_data, content_type=content_type)

    data.update(
        {
            "product": product,
            "products": Product.objects.filter(visible=True),
            "pages": pages,
            "search_form": search_form,
        }
    )
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_["Cache-Control"] = "max-age=%s" % (cache_period * 60)
    results_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned["q"]), max_age=3600, secure=False, httponly=False)

    return results_
Example #19
0
def monitor(request):
    """View for services monitor."""
    status = {}

    # Note: To add a new component to the services monitor, do your
    # testing and then add a name -> list of output tuples map to
    # status.

    # Check memcached.
    memcache_results = []
    try:
        for cache_name, cache_props in settings.CACHES.items():
            result = True
            backend = cache_props['BACKEND']
            location = cache_props['LOCATION']

            # LOCATION can be a string or a list of strings
            if isinstance(location, basestring):
                location = location.split(';')

            if 'memcache' in backend:
                for loc in location:
                    # TODO: this doesn't handle unix: variant
                    ip, port = loc.split(':')
                    result = test_memcached(ip, int(port))
                    memcache_results.append(
                        (INFO, '%s:%s %s' % (ip, port, result)))

        if not memcache_results:
            memcache_results.append((ERROR, 'memcache is not configured.'))

        elif len(memcache_results) < 2:
            memcache_results.append(
                (ERROR, ('You should have at least 2 memcache servers. '
                         'You have %s.' % len(memcache_results))))

        else:
            memcache_results.append((INFO, 'memcached servers look good.'))

    except Exception as exc:
        memcache_results.append(
            (ERROR, 'Exception while looking at memcached: %s' % str(exc)))

    status['memcached'] = memcache_results

    # Check Libraries and versions
    libraries_results = []
    try:
        Image.new('RGB', (16, 16)).save(StringIO.StringIO(), 'JPEG')
        libraries_results.append((INFO, 'PIL+JPEG: Got it!'))
    except Exception as exc:
        libraries_results.append(
            (ERROR,
             'PIL+JPEG: Probably missing: '
             'Failed to create a jpeg image: %s' % exc))

    status['libraries'] = libraries_results

    # Check file paths.
    msg = 'We want read + write.'
    filepaths = (
        (settings.USER_AVATAR_PATH, os.R_OK | os.W_OK, msg),
        (settings.IMAGE_UPLOAD_PATH, os.R_OK | os.W_OK, msg),
        (settings.THUMBNAIL_UPLOAD_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_IMAGE_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_IMAGE_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_VIDEO_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_VIDEO_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg),
        (settings.GROUP_AVATAR_PATH, os.R_OK | os.W_OK, msg),
    )

    filepath_results = []
    for path, perms, notes in filepaths:
        path = os.path.join(settings.MEDIA_ROOT, path)
        path_exists = os.path.isdir(path)
        path_perms = os.access(path, perms)

        if path_exists and path_perms:
            filepath_results.append(
                (INFO, '%s: %s %s %s' % (path, path_exists, path_perms,
                                         notes)))

    status['filepaths'] = filepath_results

    # Check RabbitMQ.
    rabbitmq_results = []
    try:
        rabbit_conn = establish_connection(connect_timeout=2)
        rabbit_conn.connect()
        rabbitmq_results.append(
            (INFO, 'Successfully connected to RabbitMQ.'))

    except (socket.error, IOError) as exc:
        rabbitmq_results.append(
            (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc)))

    except Exception as exc:
        rabbitmq_results.append(
            (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc)))

    status['RabbitMQ'] = rabbitmq_results

    # Check ES.
    es_results = []
    try:
        es_utils.get_doctype_stats(es_utils.read_index())
        es_results.append(
            (INFO, ('Successfully connected to ElasticSearch and index '
                    'exists.')))

    except es_utils.ES_EXCEPTIONS as exc:
        es_results.append(
            (ERROR, 'ElasticSearch problem: %s' % str(exc)))

    except Exception as exc:
        es_results.append(
            (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc)))

    status['ElasticSearch'] = es_results

    # Check Celery.
    # start = time.time()
    # pong = celery.task.ping()
    # rabbit_results = r = {'duration': time.time() - start}
    # status_summary['rabbit'] = pong == 'pong' and r['duration'] < 1

    # Check Redis.
    redis_results = []
    if hasattr(settings, 'REDIS_BACKENDS'):
        for backend in settings.REDIS_BACKENDS:
            try:
                redis_client(backend)
                redis_results.append((INFO, '%s: Pass!' % backend))
            except RedisError:
                redis_results.append((ERROR, '%s: Fail!' % backend))
    status['Redis'] = redis_results

    status_code = 200

    status_summary = {}
    for component, output in status.items():
        if ERROR in [item[0] for item in output]:
            status_code = 500
            status_summary[component] = False
        else:
            status_summary[component] = True

    return render(request, 'services/monitor.html', {
        'component_status': status,
        'status_summary': status_summary},
        status=status_code)
Example #20
0
    def get_data(self, request):
        search_form = self.form_class(request.GET)
        if not search_form.is_valid():
            raise GenericAPIException(
                status.HTTP_400_BAD_REQUEST,
                _('Invalid search data.')
            )

        language = locale_or_default(
            request.GET.get('language', request.LANGUAGE_CODE)
        )
        lang = language.lower()
        if settings.LANGUAGES_DICT.get(lang):
            lang_name = settings.LANGUAGES_DICT[lang]
        else:
            lang_name = ''
        self.language = language

        page = max(smart_int(request.GET.get('page')), 1)
        offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

        searcher = (
            es_utils.AnalyzerS()
            .es(urls=settings.ES_URLS)
            .indexes(es_utils.read_index('default'))
        )

        doctypes = self.get_doctypes()
        searcher = searcher.doctypes(*doctypes)

        filters = self.get_filters(search_form.cleaned_data)
        searcher = searcher.filter(filters)

        # Add the simple string query.
        cleaned_q = search_form.cleaned_data.get('query')

        if cleaned_q:
            query_fields = self.get_query_fields()
            query = {}
            # Create a simple_query_search query for every field
            # we want to search.
            for field in query_fields:
                query['%s__sqs' % field] = cleaned_q

            # Transform the query to use locale aware analyzers.
            query = es_utils.es_query_with_analyzer(query, language)

            searcher = searcher.query(should=True, **query)

        try:
            num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

            results_per_page = settings.SEARCH_RESULTS_PER_PAGE

            # If we know there aren't any results, let's cheat and in
            # doing that, not hit ES again.
            if num_results == 0:
                searcher = []
            else:
                # TODO - Can ditch the ComposedList here, but we need
                # something that paginate can use to figure out the paging.
                documents = ComposedList()
                documents.set_count(('results', searcher), num_results)

                # Get the documents we want to show and add them to
                # docs_for_page
                documents = documents[offset:offset + results_per_page]

                if len(documents) == 0:
                    # If the user requested a page that's beyond the
                    # pagination, then documents is an empty list and
                    # there are no results to show.
                    searcher = []
                else:
                    bounds = documents[0][1]
                    searcher = searcher[bounds[0]:bounds[1]]

            results = []
            for i, doc in enumerate(searcher):
                rank = i + offset

                result = self.format_result(doc)

                result['url'] = doc['url']
                result['rank'] = rank
                result['score'] = doc.es_meta.score
                result['explanation'] = escape(
                    format_explanation(doc.es_meta.explanation)
                )
                result['id'] = doc['id']
                results.append(result)

        except es_utils.ES_EXCEPTIONS:
            raise GenericAPIException(
                status.HTTP_503_SERVICE_UNAVAILABLE,
                _('Search Unavailable')
            )

        data = {
            'num_results': num_results,
            'results': results,
            'lang_name': lang_name,
        }

        if not results:
            data['message'] = _('No pages matched the search criteria')

        return data
Example #21
0
def search(request, template=None):
    """ES-specific search view"""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(json.dumps(
            {'error': _('Invalid callback function.')}),
                            mimetype=mimetype,
                            status=400)

    language = locale_or_default(
        request.GET.get('language', request.LANGUAGE_CODE))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = (map(int, r.getlist('category'))
                    or settings.SEARCH_DEFAULT_CATEGORIES)
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use
    # `initial` on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)
    search_form.set_allowed_forums(request.user)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(json.dumps(
                {'error': _('Invalid search data.')}),
                                mimetype=mimetype,
                                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': a,
            'request': request,
            'search_form': search_form
        })
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(urls=settings.ES_URLS).indexes(
        es_utils.read_index()))

    wiki_f = F(model='wiki_document')
    question_f = F(model='questions_question')
    discussion_f = F(model='forums_thread')

    # Start - wiki filters

    if cleaned['w'] & constants.WHERE_WIKI:
        # Category filter
        if cleaned['category']:
            wiki_f &= F(document_category__in=cleaned['category'])

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        products = cleaned['product']
        for p in products:
            wiki_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            wiki_f &= F(topic=t)

        # Archived bit
        if a == '0' and not cleaned['include_archived']:
            # Default to NO for basic search:
            cleaned['include_archived'] = False
        if not cleaned['include_archived']:
            wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned['w'] & constants.WHERE_SUPPORT:
        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        if cleaned['asked_by']:
            question_f &= F(question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_f &= F(question_answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split(',')]
        for t in q_tags:
            if t:
                question_f &= F(question_tag=t)

        # Product filter
        products = cleaned['product']
        for p in products:
            question_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            question_f &= F(topic=t)

    # End - support questions filters

    # Start - discussion forum filters

    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_f &= F(post_author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_f &= F(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_f &= F(post_is_locked=1)

        valid_forum_ids = [
            f.id for f in Forum.authorized_forums_for_user(request.user)
        ]

        forum_ids = None
        if cleaned['forum']:
            forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids]

        # If we removed all the forums they wanted to look at or if
        # they didn't specify, then we filter on the list of all
        # forums they're authorized to look at.
        if not forum_ids:
            forum_ids = valid_forum_ids

        discussion_f &= F(post_forum_id__in=forum_ids)

    # End - discussion forum filters

    # Created filter
    unix_now = int(time.time())
    interval_filters = (('created', cleaned['created'],
                         cleaned['created_date']),
                        ('updated', cleaned['updated'],
                         cleaned['updated_date']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {
                filter_name + '__gte': 0,
                filter_name + '__lte': max(filter_date, 0)
            }

            discussion_f &= F(**before)
            question_f &= F(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {
                filter_name + '__gte': min(filter_date, unix_now),
                filter_name + '__lte': unix_now
            }

            discussion_f &= F(**after)
            question_f &= F(**after)

    # In basic search, we limit questions from the last
    # SEARCH_DEFAULT_MAX_QUESTION_AGE seconds.
    if a == '0':
        start_date = unix_now - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE
        question_f &= F(created__gte=start_date)

    # Note: num_voted (with a d) is a different field than num_votes
    # (with an s). The former is a dropdown and the latter is an
    # integer value.
    if cleaned['num_voted'] == constants.INTERVAL_BEFORE:
        question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0))
    elif cleaned['num_voted'] == constants.INTERVAL_AFTER:
        question_f &= F(question_num_votes__gte=cleaned['num_votes'])

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned['w'] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned['w'] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    if cleaned['w'] & constants.WHERE_DISCUSSION:
        doctypes.append(ThreadMappingType.get_mapping_type_name())
        final_filter |= discussion_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        cleaned_q = cleaned['q']

        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            'question_content',  # support forum
            'document_summary',  # kb
            'post_content',  # contributor forum
            pre_tags=['<b>'],
            post_tags=['</b>'],
            number_of_fragments=0)

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            post_title=2.0,
            post_content=1.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,

            # Text phrases in document titles and content get an extra
            # boost.
            document_title__text_phrase=10.0,
            document_content__text_phrase=8.0)

        # Apply sortby for advanced search of questions
        if cleaned['w'] == constants.WHERE_SUPPORT:
            sortby = cleaned['sortby']
            try:
                searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby])
            except IndexError:
                # Skip index errors because they imply the user is
                # sending us sortby values that aren't valid.
                pass

        # Apply sortby for advanced search of kb documents
        if cleaned['w'] == constants.WHERE_WIKI:
            sortby = cleaned['sortby_documents']
            try:
                searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby])
            except IndexError:
                # Skip index errors because they imply the user is
                # sending us sortby values that aren't valid.
                pass

        # Build the query
        if cleaned_q:
            query_fields = chain(
                *[cls.get_query_fields() for cls in get_mapping_types()])
            query = {}
            # Create text and text_phrase queries for every field
            # we want to search.
            for field in query_fields:
                for query_type in ['text', 'text_phrase']:
                    query['%s__%s' % (field, query_type)] = cleaned_q

            # Transform the query to use locale aware analyzers.
            query = es_utils.es_query_with_analyzer(query, language)

            searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(('results', searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset:offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher.values_dict()[bounds[0]:bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc['model'] == 'wiki_document':
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc['document_summary']
                result = {'title': doc['document_title'], 'type': 'document'}

            elif doc['model'] == 'questions_question':
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(doc['question_content'],
                                           strip=True)[:500]

                result = {
                    'title': doc['question_title'],
                    'type': 'question',
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']
                }

            else:
                summary = _build_es_excerpt(doc, first_only=True)
                result = {'title': doc['post_title'], 'type': 'thread'}

            result['url'] = doc['url']
            result['object'] = ObjectDict(doc)
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc._score
            result['explanation'] = escape(format_explanation(
                doc._explanation))
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                mimetype=mimetype,
                                status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        import logging
        logging.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    items = [(k, v) for k in search_form.fields for v in r.getlist(k)
             if v and k != 'a']
    items.append(('a', '2'))

    if is_json:
        # Models are not json serializable.
        for r in results:
            del r['object']
        data = {}
        data['results'] = results
        data['total'] = len(results)
        data['query'] = cleaned['q']
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, mimetype=mimetype)

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned['product'])

    results_ = render(
        request, template, {
            'num_results': num_results,
            'results': results,
            'fallback_results': fallback_results,
            'q': cleaned['q'],
            'w': cleaned['w'],
            'product': Product.objects.filter(slug__in=cleaned['product']),
            'products': Product.objects.filter(visible=True),
            'pages': pages,
            'search_form': search_form,
            'lang_name': lang_name,
        })
    results_['Cache-Control'] = 'max-age=%s' % \
                                (settings.SEARCH_CACHE_PERIOD * 60)
    results_['Expires'] = (datetime.utcnow() +
                           timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \
                           .strftime(expires_fmt)
    results_.set_cookie(settings.LAST_SEARCH_COOKIE,
                        urlquote(cleaned['q']),
                        max_age=3600,
                        secure=False,
                        httponly=False)

    return results_
Example #22
0
    stats = None
    write_stats = None
    es_deets = None
    indexes = []
    outstanding_chunks = None

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    try:
        stats = get_doctype_stats(read_index())
    except ES_EXCEPTIONS:
        pass

    try:
        write_stats = get_doctype_stats(write_index())
    except ES_EXCEPTIONS:
        pass

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append('Error: {0}'.format(repr(e)))

    try:
Example #23
0
 def test_delete(self, _out):
     # Note: read_index() == write_index() in the tests, so we only
     # have to do one.
     for index in [es_utils.read_index(), 'cupcakerainbow_index']:
         call_command('esdelete', index, noinput=True)
Example #24
0
def simple_search(request, template=None):
    """ES-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:
    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for esample, for Support Forum questions only)
    """

    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    a = request.GET.get('a')
    if a in ['1', '2']:
        new_url = reverse('search.advanced') + '?' + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    content_type = (
        'application/x-javascript' if callback else 'application/json')

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            content_type=content_type, status=400)

    language = locale_or_default(
        request.GET.get('language', request.LANGUAGE_CODE))
    r = request.GET.copy()

    # TODO: Do we really need to add this to the URL if it isn't already there?
    r['w'] = r.get('w', constants.WHERE_BASIC)

    # TODO: Break out a separate simple search form.
    search_form = SimpleSearchForm(r, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                content_type=content_type,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': False, 'request': request,
            'search_form': search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_['Cache-Control'] = 'max-age={0!s}'.format((cache_period * 60))
        search_['Expires'] = (
            (datetime.utcnow() + timedelta(minutes=cache_period))
            .strftime(EXPIRES_FMT))
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES_DICT.get(lang):
        lang_name = settings.LANGUAGES_DICT[lang]
    else:
        lang_name = ''

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(urls=settings.ES_URLS)
                .indexes(es_utils.read_index('default')))

    wiki_f = F(model='wiki_document')
    question_f = F(model='questions_question')

    cleaned_q = cleaned['q']
    products = cleaned['product']

    if not products and 'all_products' not in request.GET:
        lowered_q = cleaned_q.lower()

        if 'thunderbird' in lowered_q:
            products.append('thunderbird')
        elif 'android' in lowered_q:
            products.append('mobile')
        elif ('ios' in lowered_q or 'ipad' in lowered_q or 'ipod' in lowered_q or
              'iphone' in lowered_q):
            products.append('ios')
        elif 'firefox os' in lowered_q:
            products.append('firefox-os')
        elif 'firefox' in lowered_q:
            products.append('firefox')

    # Start - wiki filters

    if cleaned['w'] & constants.WHERE_WIKI:
        # Category filter
        wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES)

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        for p in products:
            wiki_f &= F(product=p)

        # Archived bit
        wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned['w'] & constants.WHERE_SUPPORT:
        # Has helpful answers is set by default if using basic search
        cleaned['has_helpful'] = constants.TERNARY_YES

        # No archived questions in default search.
        cleaned['is_archived'] = constants.TERNARY_NO

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('has_helpful', 'is_archived')
        d = dict(('question_{0!s}'.format(filter_name),
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        # Product filter
        for p in products:
            question_f &= F(product=p)

    # End - support questions filters

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned['w'] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned['w'] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            'question_content',  # support forum
            'document_summary',  # kb
            pre_tags=['<b>'],
            post_tags=['</b>'],
            number_of_fragments=0)

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,

            # Text phrases in document titles and content get an extra
            # boost.
            document_title__match_phrase=10.0,
            document_content__match_phrase=8.0)

        # Build the query
        query_fields = chain(*[
            cls.get_query_fields() for cls in [
                DocumentMappingType,
                QuestionMappingType
            ]
        ])
        query = {}
        # Create match and match_phrase queries for every field
        # we want to search.
        for field in query_fields:
            for query_type in ['match', 'match_phrase']:
                query['{0!s}__{1!s}'.format(field, query_type)] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(('results', searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset:offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher[bounds[0]:bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc['model'] == 'wiki_document':
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc['document_summary']
                result = {
                    'title': doc['document_title'],
                    'type': 'document'}

            elif doc['model'] == 'questions_question':
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(
                        doc['question_content'], strip=True)[:500]

                result = {
                    'title': doc['question_title'],
                    'type': 'question',
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']}

            result['url'] = doc['url']
            result['object'] = doc
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc.es_meta.score
            result['explanation'] = escape(format_explanation(
                doc.es_meta.explanation))
            result['id'] = doc['id']
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                content_type=content_type, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        log.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    items = [(k, v) for k in search_form.fields for
             v in r.getlist(k) if v and k != 'a']
    items.append(('a', '2'))

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned['product'])

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [_(p.title, 'DB: products.Product.title')
                          for p in product]
    else:
        product_titles = [_('All Products')]

    product_titles = ', '.join(product_titles)

    data = {
        'num_results': num_results,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name, }

    if is_json:
        # Models are not json serializable.
        for r in data['results']:
            del r['object']
        data['total'] = len(data['results'])

        data['products'] = ([{'slug': p.slug, 'title': p.title}
                             for p in Product.objects.filter(visible=True)])

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, content_type=content_type)

    data.update({
        'product': product,
        'products': Product.objects.filter(visible=True),
        'pages': pages,
        'search_form': search_form,
        'advanced': False,
    })
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_['Cache-Control'] = 'max-age={0!s}'.format((cache_period * 60))
    results_['Expires'] = (
        (datetime.utcnow() + timedelta(minutes=cache_period))
        .strftime(EXPIRES_FMT))
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                        max_age=3600, secure=False, httponly=False)

    return results_
Example #25
0
    def get_data(self, request):
        search_form = self.form_class(request.GET)
        if not search_form.is_valid():
            raise GenericAPIException(status.HTTP_400_BAD_REQUEST,
                                      _('Invalid search data.'))

        language = locale_or_default(
            request.GET.get('language', request.LANGUAGE_CODE))
        lang = language.lower()
        if settings.LANGUAGES_DICT.get(lang):
            lang_name = settings.LANGUAGES_DICT[lang]
        else:
            lang_name = ''

        page = max(smart_int(request.GET.get('page')), 1)
        offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

        searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes(
            es_utils.read_index('default')))

        doctypes = self.get_doctypes()
        searcher = searcher.doctypes(*doctypes)

        filters = self.get_filters()
        searcher = searcher.filter(filters)

        # Add the simple string query.
        cleaned_q = search_form.cleaned_data.get('query')

        if cleaned_q:
            query_fields = self.get_query_fields()
            query = {}
            # Create a simple_query_search query for every field
            # we want to search.
            for field in query_fields:
                query['%s__sqs' % field] = cleaned_q

            # Transform the query to use locale aware analyzers.
            query = es_utils.es_query_with_analyzer(query, language)

            searcher = searcher.query(should=True, **query)

        try:
            num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

            results_per_page = settings.SEARCH_RESULTS_PER_PAGE

            # If we know there aren't any results, let's cheat and in
            # doing that, not hit ES again.
            if num_results == 0:
                searcher = []
            else:
                # TODO - Can ditch the ComposedList here, but we need
                # something that paginate can use to figure out the paging.
                documents = ComposedList()
                documents.set_count(('results', searcher), num_results)

                # Get the documents we want to show and add them to
                # docs_for_page
                documents = documents[offset:offset + results_per_page]

                if len(documents) == 0:
                    # If the user requested a page that's beyond the
                    # pagination, then documents is an empty list and
                    # there are no results to show.
                    searcher = []
                else:
                    bounds = documents[0][1]
                    searcher = searcher[bounds[0]:bounds[1]]

            results = []
            for i, doc in enumerate(searcher):
                rank = i + offset

                result = self.format_result(doc)

                result['url'] = doc['url']
                result['rank'] = rank
                result['score'] = doc.es_meta.score
                result['explanation'] = escape(
                    format_explanation(doc.es_meta.explanation))
                result['id'] = doc['id']
                results.append(result)

        except es_utils.ES_EXCEPTIONS:
            raise GenericAPIException(status.HTTP_503_SERVICE_UNAVAILABLE,
                                      _('Search Unavailable'))

        data = {
            'num_results': num_results,
            'results': results,
            'lang_name': lang_name,
        }

        if not results:
            data['message'] = _('No pages matched the search criteria')

        return data
Example #26
0
def search(request, template=None):
    """ES-specific search view"""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(
        request.GET.get('language', request.LANGUAGE_CODE))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = (map(int, r.getlist('category')) or
                    settings.SEARCH_DEFAULT_CATEGORIES)
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use
    # `initial` on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r, auto_id=False)
    search_form.set_allowed_forums(request.user)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = render(request, t, {
            'advanced': a, 'request': request,
            'search_form': search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
        search_['Expires'] = (
            (datetime.utcnow() + timedelta(minutes=cache_period))
            .strftime(expires_fmt))
        return search_

    cleaned = search_form.cleaned_data

    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(urls=settings.ES_URLS)
                .indexes(es_utils.read_index('default')))

    wiki_f = F(model='wiki_document')
    question_f = F(model='questions_question')
    discussion_f = F(model='forums_thread')

    # Start - wiki filters

    if cleaned['w'] & constants.WHERE_WIKI:
        # Category filter
        if cleaned['category']:
            wiki_f &= F(document_category__in=cleaned['category'])

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        products = cleaned['product']
        for p in products:
            wiki_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            wiki_f &= F(topic=t)

        # Archived bit
        if a == '0' and not cleaned['include_archived']:
            # Default to NO for basic search:
            cleaned['include_archived'] = False
        if not cleaned['include_archived']:
            wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned['w'] & constants.WHERE_SUPPORT:
        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful', 'is_archived')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        if cleaned['asked_by']:
            question_f &= F(question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_f &= F(question_answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split(',')]
        for t in q_tags:
            if t:
                question_f &= F(question_tag=t)

        # Product filter
        products = cleaned['product']
        for p in products:
            question_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            question_f &= F(topic=t)

    # End - support questions filters

    # Start - discussion forum filters

    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_f &= F(post_author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_f &= F(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_f &= F(post_is_locked=1)

        valid_forum_ids = [
            f.id for f in Forum.authorized_forums_for_user(request.user)]

        forum_ids = None
        if cleaned['forum']:
            forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids]

        # If we removed all the forums they wanted to look at or if
        # they didn't specify, then we filter on the list of all
        # forums they're authorized to look at.
        if not forum_ids:
            forum_ids = valid_forum_ids

        discussion_f &= F(post_forum_id__in=forum_ids)

    # End - discussion forum filters

    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            discussion_f &= F(**before)
            question_f &= F(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            discussion_f &= F(**after)
            question_f &= F(**after)

    # In basic search, we limit questions from the last
    # SEARCH_DEFAULT_MAX_QUESTION_AGE seconds.
    if a == '0':
        start_date = unix_now - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE
        question_f &= F(created__gte=start_date)

    # Note: num_voted (with a d) is a different field than num_votes
    # (with an s). The former is a dropdown and the latter is an
    # integer value.
    if cleaned['num_voted'] == constants.INTERVAL_BEFORE:
        question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0))
    elif cleaned['num_voted'] == constants.INTERVAL_AFTER:
        question_f &= F(question_num_votes__gte=cleaned['num_votes'])

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned['w'] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned['w'] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    if cleaned['w'] & constants.WHERE_DISCUSSION:
        doctypes.append(ThreadMappingType.get_mapping_type_name())
        final_filter |= discussion_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        cleaned_q = cleaned['q']

        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            'question_content',  # support forum
            'document_summary',  # kb
            'post_content',  # contributor forum
            pre_tags=['<b>'],
            post_tags=['</b>'],
            number_of_fragments=0)

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            post_title=2.0,
            post_content=1.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,

            # Text phrases in document titles and content get an extra
            # boost.
            document_title__text_phrase=10.0,
            document_content__text_phrase=8.0)

        # Apply sortby for advanced search of questions
        if cleaned['w'] == constants.WHERE_SUPPORT:
            sortby = cleaned['sortby']
            try:
                searcher = searcher.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                # Skip index errors because they imply the user is
                # sending us sortby values that aren't valid.
                pass

        # Apply sortby for advanced search of kb documents
        if cleaned['w'] == constants.WHERE_WIKI:
            sortby = cleaned['sortby_documents']
            try:
                searcher = searcher.order_by(
                    *constants.SORT_DOCUMENTS[sortby])
            except IndexError:
                # Skip index errors because they imply the user is
                # sending us sortby values that aren't valid.
                pass

        # Build the query
        if cleaned_q:
            query_fields = chain(*[cls.get_query_fields()
                                   for cls in get_mapping_types()])
            query = {}
            # Create text and text_phrase queries for every field
            # we want to search.
            for field in query_fields:
                for query_type in ['text', 'text_phrase']:
                    query['%s__%s' % (field, query_type)] = cleaned_q

            # Transform the query to use locale aware analyzers.
            query = es_utils.es_query_with_analyzer(query, language)

            searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(('results', searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset:offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher.values_dict()[bounds[0]:bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc['model'] == 'wiki_document':
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc['document_summary']
                result = {
                    'title': doc['document_title'],
                    'type': 'document'}

            elif doc['model'] == 'questions_question':
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(
                        doc['question_content'], strip=True)[:500]

                result = {
                    'title': doc['question_title'],
                    'type': 'question',
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']}

            else:
                summary = _build_es_excerpt(doc, first_only=True)
                result = {
                    'title': doc['post_title'],
                    'type': 'thread'}

            result['url'] = doc['url']
            result['object'] = ObjectDict(doc)
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc._score
            result['explanation'] = escape(format_explanation(
                doc._explanation))
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip('()')
        statsd.incr('search.esunified.{0}'.format(exc_bucket))

        import logging
        logging.exception(exc)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return render(request, t, {'q': cleaned['q']}, status=503)

    items = [(k, v) for k in search_form.fields for
             v in r.getlist(k) if v and k != 'a']
    items.append(('a', '2'))

    if is_json:
        # Models are not json serializable.
        for r in results:
            del r['object']
        data = {}
        data['results'] = results
        data['total'] = len(results)
        data['query'] = cleaned['q']
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, mimetype=mimetype)

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned['product'])

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [_(p.title, 'DB: products.Product.title')
                          for p in product]
    else:
        product_titles = [_('All Products')]

    product_titles = ', '.join(product_titles)

    results_ = render(request, template, {
        'num_results': num_results,
        'results': results,
        'fallback_results': fallback_results,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'product': product,
        'products': Product.objects.filter(visible=True),
        'product_titles': product_titles,
        'pages': pages,
        'search_form': search_form,
        'lang_name': lang_name, })
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_['Cache-Control'] = 'max-age=%s' % (cache_period * 60)
    results_['Expires'] = (
        (datetime.utcnow() + timedelta(minutes=cache_period))
        .strftime(expires_fmt))
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                        max_age=3600, secure=False, httponly=False)

    return results_
Example #27
0
def advanced_search(request, template=None):
    """Elasticsearch-specific Advanced search view"""

    to_json = JSONRenderer().render

    # 1. Prep request.
    r = request.GET.copy()
    # TODO: Figure out how to get rid of 'a' and do it.
    # It basically is used to switch between showing the form or results.
    a = request.GET.get('a', '2')
    # TODO: This is so the 'a=1' stays in the URL for pagination.
    r['a'] = 1

    language = locale_or_default(request.GET.get('language', request.LANGUAGE_CODE))
    r['language'] = language
    lang = language.lower()
    lang_name = settings.LANGUAGES_DICT.get(lang) or ''

    # 2. Build form.
    search_form = AdvancedSearchForm(r, auto_id=False)
    search_form.set_allowed_forums(request.user)

    # 3. Validate request.
    # Note: a == 2 means "show the form"--that's all we use it for now.
    if a == '2' or not search_form.is_valid():
        if request.IS_JSON:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                content_type=request.CONTENT_TYPE,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        data = {'advanced': True,
                'request': request,
                'search_form': search_form}
        # get value for search input from last search term.
        last_search = request.COOKIES.get(settings.LAST_SEARCH_COOKIE)
        # If there is any cached input from last search, pass it to template
        if last_search and 'q' not in r:
            cached_field = urlquote(last_search)
            data.update({'cached_field': cached_field})

        return cache_control(
            render(request, t, data),
            settings.SEARCH_CACHE_PERIOD)

    # 4. Generate search.
    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned['w'] == constants.WHERE_BASIC:
        cleaned['w'] = constants.WHERE_WIKI

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(urls=settings.ES_URLS)
                .indexes(es_utils.read_index('default')))

    doctypes = []
    final_filter = F()
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date'])
    )

    # Start - wiki search configuration

    if cleaned['w'] & constants.WHERE_WIKI:
        wiki_f = F(model='wiki_document')

        # Category filter
        if cleaned['category']:
            wiki_f &= F(document_category__in=cleaned['category'])

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        products = cleaned['product']
        for p in products:
            wiki_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            wiki_f &= F(topic=t)

        # Archived bit
        if not cleaned['include_archived']:
            wiki_f &= F(document_is_archived=False)

        # Apply sortby
        sortby = cleaned['sortby_documents']
        try:
            searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby])
        except IndexError:
            # Skip index errors because they imply the user is sending us sortby values
            # that aren't valid.
            pass

        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    # End - wiki search configuration

    # Start - support questions configuration

    if cleaned['w'] & constants.WHERE_SUPPORT:
        question_f = F(model='questions_question')

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful', 'is_archived')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        if cleaned['asked_by']:
            question_f &= F(question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_f &= F(question_answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split(',')]
        for t in q_tags:
            if t:
                question_f &= F(question_tag=t)

        # Product filter
        products = cleaned['product']
        for p in products:
            question_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            question_f &= F(topic=t)

        # Note: num_voted (with a d) is a different field than num_votes
        # (with an s). The former is a dropdown and the latter is an
        # integer value.
        if cleaned['num_voted'] == constants.INTERVAL_BEFORE:
            question_f &= F(question_num_votes__lte=max(cleaned['num_votes'], 0))
        elif cleaned['num_voted'] == constants.INTERVAL_AFTER:
            question_f &= F(question_num_votes__gte=cleaned['num_votes'])

        # Apply sortby
        sortby = cleaned['sortby']
        try:
            searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby])
        except IndexError:
            # Skip index errors because they imply the user is sending us sortby values
            # that aren't valid.
            pass

        # Apply created and updated filters
        for filter_name, filter_option, filter_date in interval_filters:
            if filter_option == constants.INTERVAL_BEFORE:
                before = {filter_name + '__gte': 0,
                          filter_name + '__lte': max(filter_date, 0)}

                question_f &= F(**before)

            elif filter_option == constants.INTERVAL_AFTER:
                after = {filter_name + '__gte': min(filter_date, unix_now),
                         filter_name + '__lte': unix_now}

                question_f &= F(**after)

        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    # End - support questions configuration

    # Start - discussion forum configuration

    if cleaned['w'] & constants.WHERE_DISCUSSION:
        discussion_f = F(model='forums_thread')

        if cleaned['author']:
            discussion_f &= F(post_author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_f &= F(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_f &= F(post_is_locked=1)

        valid_forum_ids = [f.id for f in Forum.authorized_forums_for_user(request.user)]

        forum_ids = None
        if cleaned['forum']:
            forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids]

        # If we removed all the forums they wanted to look at or if
        # they didn't specify, then we filter on the list of all
        # forums they're authorized to look at.
        if not forum_ids:
            forum_ids = valid_forum_ids

        discussion_f &= F(post_forum_id__in=forum_ids)

        # Apply created and updated filters
        for filter_name, filter_option, filter_date in interval_filters:
            if filter_option == constants.INTERVAL_BEFORE:
                before = {filter_name + '__gte': 0,
                          filter_name + '__lte': max(filter_date, 0)}

                discussion_f &= F(**before)

            elif filter_option == constants.INTERVAL_AFTER:
                after = {filter_name + '__gte': min(filter_date, unix_now),
                         filter_name + '__lte': unix_now}

                discussion_f &= F(**after)

        doctypes.append(ThreadMappingType.get_mapping_type_name())
        final_filter |= discussion_f

    # End - discussion forum configuration

    # Done with all the filtery stuff--time  to generate results

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    cleaned_q = cleaned['q']

    # Set up the highlights. Show the entire field highlighted.
    searcher = searcher.highlight(
        'question_content',  # support forum
        'document_summary',  # kb
        'post_content',  # contributor forum
        pre_tags=['<b>'],
        post_tags=['</b>'],
        number_of_fragments=0)

    searcher = apply_boosts(searcher)

    # Build the query
    if cleaned_q:
        query_fields = chain(*[
            cls.get_query_fields() for cls in [
                DocumentMappingType,
                ThreadMappingType,
                QuestionMappingType
            ]
        ])
        query = {}
        # Create a simple_query_search query for every field we want to search.
        for field in query_fields:
            query['%s__sqs' % field] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

    searcher = searcher[:settings.SEARCH_MAX_RESULTS]

    # 5. Generate output
    pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE)

    if pages.paginator.count == 0:
        # If we know there aren't any results, show fallback_results.
        fallback_results = _fallback_results(language, cleaned['product'])
        results = []
    else:
        fallback_results = None
        results = build_results_list(pages, request.IS_JSON)

    items = [(k, v) for k in search_form.fields for
             v in r.getlist(k) if v and k != 'a']
    items.append(('a', '2'))

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [pgettext('DB: products.Product.title', p.title) for p in product]
    else:
        product_titles = [_('All Products')]

    # FIXME: This is probably bad l10n.
    product_titles = ', '.join(product_titles)

    data = {
        'num_results': pages.paginator.count,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name,
        'advanced': True,
        'products': Product.objects.filter(visible=True)
    }

    if request.IS_JSON:
        data['total'] = len(data['results'])
        data['products'] = [{'slug': p.slug, 'title': p.title}
                            for p in data['products']]

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = to_json(data)
        if request.JSON_CALLBACK:
            json_data = request.JSON_CALLBACK + '(' + json_data + ');'
        return HttpResponse(json_data, content_type=request.CONTENT_TYPE)

    data.update({
        'product': product,
        'pages': pages,
        'search_form': search_form
    })
    resp = cache_control(render(request, template, data), settings.SEARCH_CACHE_PERIOD)
    resp.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned['q']),
                    max_age=3600, secure=False, httponly=False)
    return resp
Example #28
0
def suggest(request):
    text = request.body or request.GET.get('q')
    locale = request.GET.get('locale', settings.WIKI_DEFAULT_LANGUAGE)
    product = request.GET.get('product')
    max_questions = request.GET.get('max_questions', 10)
    max_documents = request.GET.get('max_documents', 10)

    errors = {}
    try:
        max_questions = int(max_questions)
    except ValueError:
        errors['max_questions'] = 'This field must be an integer.'
    try:
        max_documents = int(max_documents)
    except ValueError:
        errors['max_documents'] = 'This field must be an integer.'
    if text is None:
        errors['q'] = 'This field is required.'
    if product is not None and not Product.objects.filter(
            slug=product).exists():
        errors['product'] = 'Could not find product with slug "{0}".'.format(
            product)
    if errors:
        raise GenericAPIException(400, errors)

    wiki_f = es_utils.F(
        model='wiki_document',
        document_category__in=settings.SEARCH_DEFAULT_CATEGORIES,
        document_locale=locale,
        document_is_archived=False)

    questions_f = es_utils.F(model='questions_question',
                             question_is_archived=False,
                             question_is_locked=False,
                             question_has_helpful=True)

    if product is not None:
        wiki_f &= es_utils.F(product=product)
        questions_f &= es_utils.F(product=product)

    mapping_types = [QuestionMappingType, DocumentMappingType]
    query_fields = itertools.chain(
        *[cls.get_query_fields() for cls in mapping_types])
    query = {}
    for field in query_fields:
        for query_type in ['match', 'match_phrase']:
            key = '{0}__{1}'.format(field, query_type)
            query[key] = text

    # Transform query to be locale aware.
    query = es_utils.es_query_with_analyzer(query, locale)

    searcher = (es_utils.AnalyzerS().es(urls=settings.ES_URLS).indexes(
        es_utils.read_index('default')).doctypes(
            *[cls.get_mapping_type_name()
              for cls in mapping_types]).filter(wiki_f | questions_f).query(
                  should=True, **query))

    documents = []
    questions = []

    for result in searcher[:(max_documents + max_questions) * 2]:
        if result['model'] == 'wiki_document':
            documents.append({
                'title': result['document_title'],
                'slug': result['document_slug'],
                'summary': result['document_summary'],
            })
        elif result['model'] == 'questions_question':
            questions.append({
                'id': result['id'],
                'title': result['question_title'],
            })
        if len(documents) >= max_documents and len(questions) >= max_questions:
            break

    return Response({
        'questions': questions[:max_questions],
        'documents': documents[:max_documents],
    })
Example #29
0
def search(request):
    """Render the admin view containing search tools"""
    if not request.user.has_perm("search.reindex"):
        raise PermissionDenied

    error_messages = []
    stats = {}

    if "reset" in request.POST:
        try:
            return handle_reset(request)
        except ReindexError as e:
            error_messages.append(u"Error: %s" % e.message)

    if "reindex" in request.POST:
        try:
            return handle_reindex(request)
        except ReindexError as e:
            error_messages.append(u"Error: %s" % e.message)

    if "recreate_index" in request.POST:
        try:
            return handle_recreate_index(request)
        except ReindexError as e:
            error_messages.append(u"Error: %s" % e.message)

    if "delete_index" in request.POST:
        try:
            return handle_delete(request)
        except DeleteError as e:
            error_messages.append(u"Error: %s" % e.message)
        except ES_EXCEPTIONS as e:
            error_messages.append("Error: {0}".format(repr(e)))

    stats = None
    write_stats = None
    es_deets = None
    indexes = []
    outstanding_chunks = None

    try:
        # TODO: SUMO has a single ES_URL and that's the ZLB and does
        # the balancing. If that ever changes and we have multiple
        # ES_URLs, then this should get fixed.
        es_deets = requests.get(settings.ES_URLS[0]).json()
    except requests.exceptions.RequestException:
        pass

    stats = {}
    for index in all_read_indexes():
        try:
            stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            stats[index] = None

    write_stats = {}
    for index in all_write_indexes():
        try:
            write_stats[index] = get_doctype_stats(index)
        except ES_EXCEPTIONS:
            write_stats[index] = None

    try:
        indexes = get_indexes()
        indexes.sort(key=lambda m: m[0])
    except ES_EXCEPTIONS as e:
        error_messages.append("Error: {0}".format(repr(e)))

    try:
        client = redis_client("default")
        outstanding_chunks = int(client.get(OUTSTANDING_INDEX_CHUNKS))
    except (RedisError, TypeError):
        pass

    recent_records = Record.objects.order_by("-starttime")[:100]

    outstanding_records = Record.objects.filter(endtime__isnull=True).order_by("-starttime")

    index_groups = set(settings.ES_INDEXES.keys())
    index_groups |= set(settings.ES_WRITE_INDEXES.keys())

    index_group_data = [[group, read_index(group), write_index(group)] for group in index_groups]

    return render(
        request,
        "admin/search_maintenance.html",
        {
            "title": "Search",
            "es_deets": es_deets,
            "doctype_stats": stats,
            "doctype_write_stats": write_stats,
            "indexes": indexes,
            "index_groups": index_groups,
            "index_group_data": index_group_data,
            "read_indexes": all_read_indexes,
            "write_indexes": all_write_indexes,
            "error_messages": error_messages,
            "recent_records": recent_records,
            "outstanding_records": outstanding_records,
            "outstanding_chunks": outstanding_chunks,
            "now": datetime.now(),
            "read_index": read_index,
            "write_index": write_index,
        },
    )
Example #30
0
def generate_simple_search(search_form, language, with_highlights=False):
    """Generates an S given a form

    :arg search_form: a validated SimpleSearch form
    :arg language: the language code
    :arg with_highlights: whether or not to ask for highlights

    :returns: a fully formed S

    """
    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (
        es_utils.AnalyzerS().es(
            urls=settings.ES_URLS,
            timeout=settings.ES_TIMEOUT,
            use_ssl=settings.ES_USE_SSL,
            http_auth=settings.ES_HTTP_AUTH,
        )
        .indexes(es_utils.read_index('default'))
    )

    cleaned = search_form.cleaned_data

    doctypes = []
    final_filter = es_utils.F()
    cleaned_q = cleaned['q']
    products = cleaned['product']

    # Handle wiki filters
    if cleaned['w'] & constants.WHERE_WIKI:
        wiki_f = es_utils.F(model='wiki_document',
                            document_category__in=settings.SEARCH_DEFAULT_CATEGORIES,
                            document_locale=language,
                            document_is_archived=False)

        for p in products:
            wiki_f &= es_utils.F(product=p)

        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    # Handle question filters
    if cleaned['w'] & constants.WHERE_SUPPORT:
        question_f = es_utils.F(model='questions_question',
                                question_is_archived=False,
                                question_has_helpful=True)

        for p in products:
            question_f &= es_utils.F(product=p)

        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    # Build a filter for those filters and add the other bits to
    # finish the search
    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if cleaned['explain']:
        searcher = searcher.explain()

    if with_highlights:
        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            'question_content',  # support forum
            'document_summary',  # kb
            pre_tags=['<b>'],
            post_tags=['</b>'],
            number_of_fragments=0
        )

    searcher = apply_boosts(searcher)

    # Build the query
    query_fields = chain(*[
        cls.get_query_fields() for cls in [
            DocumentMappingType,
            QuestionMappingType
        ]
    ])
    query = {}
    # Create match and match_phrase queries for every field
    # we want to search.
    for field in query_fields:
        for query_type in ['match', 'match_phrase']:
            query['%s__%s' % (field, query_type)] = cleaned_q

    # Transform the query to use locale aware analyzers.
    query = es_utils.es_query_with_analyzer(query, language)

    searcher = searcher.query(should=True, **query)
    return searcher
Example #31
0
def simple_search(request, template=None):
    """ES-specific simple search view.

    This view is for end user searching of the Knowledge Base and
    Support Forum. Filtering options are limited to:
    * product (`product=firefox`, for example, for only Firefox results)
    * document type (`w=2`, for esample, for Support Forum questions only)
    """

    # Redirect to old Advanced Search URLs (?a={1,2}) to the new URL.
    a = request.GET.get("a")
    if a in ["1", "2"]:
        new_url = reverse("search.advanced") + "?" + request.GET.urlencode()
        return HttpResponseRedirect(new_url)

    # JSON-specific variables
    is_json = request.GET.get("format") == "json"
    callback = request.GET.get("callback", "").strip()
    content_type = "application/x-javascript" if callback else "application/json"

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({"error": _("Invalid callback function.")}), content_type=content_type, status=400
        )

    language = locale_or_default(request.GET.get("language", request.LANGUAGE_CODE))
    r = request.GET.copy()

    # TODO: Do we really need to add this to the URL if it isn't already there?
    r["w"] = r.get("w", constants.WHERE_BASIC)

    # TODO: Break out a separate simple search form.
    search_form = SimpleSearchForm(r, auto_id=False)

    if not search_form.is_valid():
        if is_json:
            return HttpResponse(json.dumps({"error": _("Invalid search data.")}), content_type=content_type, status=400)

        t = template if request.MOBILE else "search/form.html"
        search_ = render(request, t, {"advanced": False, "request": request, "search_form": search_form})
        cache_period = settings.SEARCH_CACHE_PERIOD
        search_["Cache-Control"] = "max-age=%s" % (cache_period * 60)
        search_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)
        return search_

    cleaned = search_form.cleaned_data

    # On mobile, we default to just wiki results.
    if request.MOBILE and cleaned["w"] == constants.WHERE_BASIC:
        cleaned["w"] = constants.WHERE_WIKI

    page = max(smart_int(request.GET.get("page")), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES_DICT.get(lang):
        lang_name = settings.LANGUAGES_DICT[lang]
    else:
        lang_name = ""

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = AnalyzerS().es(urls=settings.ES_URLS).indexes(es_utils.read_index("default"))

    wiki_f = F(model="wiki_document")
    question_f = F(model="questions_question")

    cleaned_q = cleaned["q"]
    products = cleaned["product"]

    if not products and "all_products" not in request.GET:
        lowered_q = cleaned_q.lower()

        if "thunderbird" in lowered_q:
            products.append("thunderbird")
        elif "android" in lowered_q:
            products.append("mobile")
        elif "ios" in lowered_q or "ipad" in lowered_q or "ipod" in lowered_q or "iphone" in lowered_q:
            products.append("ios")
        elif "firefox os" in lowered_q:
            products.append("firefox-os")
        elif "firefox" in lowered_q:
            products.append("firefox")

    # Start - wiki filters

    if cleaned["w"] & constants.WHERE_WIKI:
        # Category filter
        wiki_f &= F(document_category__in=settings.SEARCH_DEFAULT_CATEGORIES)

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        for p in products:
            wiki_f &= F(product=p)

        # Archived bit
        wiki_f &= F(document_is_archived=False)

    # End - wiki filters

    # Start - support questions filters

    if cleaned["w"] & constants.WHERE_SUPPORT:
        # Has helpful answers is set by default if using basic search
        cleaned["has_helpful"] = constants.TERNARY_YES

        # No archived questions in default search.
        cleaned["is_archived"] = constants.TERNARY_NO

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ("has_helpful", "is_archived")
        d = dict(
            ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name]))
            for filter_name in ternary_filters
            if cleaned[filter_name]
        )
        if d:
            question_f &= F(**d)

        # Product filter
        for p in products:
            question_f &= F(product=p)

    # End - support questions filters

    # Done with all the filtery stuff--time  to generate results

    # Combine all the filters and add to the searcher
    doctypes = []
    final_filter = F()
    if cleaned["w"] & constants.WHERE_WIKI:
        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    if cleaned["w"] & constants.WHERE_SUPPORT:
        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if "explain" in request.GET and request.GET["explain"] == "1":
        searcher = searcher.explain()

    documents = ComposedList()

    try:
        # Set up the highlights. Show the entire field highlighted.
        searcher = searcher.highlight(
            "question_content",  # support forum
            "document_summary",  # kb
            pre_tags=["<b>"],
            post_tags=["</b>"],
            number_of_fragments=0,
        )

        # Set up boosts
        searcher = searcher.boost(
            question_title=4.0,
            question_content=3.0,
            question_answer_content=3.0,
            document_title=6.0,
            document_content=1.0,
            document_keywords=8.0,
            document_summary=2.0,
            # Text phrases in document titles and content get an extra
            # boost.
            document_title__match_phrase=10.0,
            document_content__match_phrase=8.0,
        )

        # Build the query
        query_fields = chain(*[cls.get_query_fields() for cls in [DocumentMappingType, QuestionMappingType]])
        query = {}
        # Create match and match_phrase queries for every field
        # we want to search.
        for field in query_fields:
            for query_type in ["match", "match_phrase"]:
                query["%s__%s" % (field, query_type)] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

        num_results = min(searcher.count(), settings.SEARCH_MAX_RESULTS)

        # TODO - Can ditch the ComposedList here, but we need
        # something that paginate can use to figure out the paging.
        documents = ComposedList()
        documents.set_count(("results", searcher), num_results)

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)

        # If we know there aren't any results, let's cheat and in
        # doing that, not hit ES again.
        if num_results == 0:
            searcher = []
        else:
            # Get the documents we want to show and add them to
            # docs_for_page
            documents = documents[offset : offset + results_per_page]

            if len(documents) == 0:
                # If the user requested a page that's beyond the
                # pagination, then documents is an empty list and
                # there are no results to show.
                searcher = []
            else:
                bounds = documents[0][1]
                searcher = searcher[bounds[0] : bounds[1]]

        results = []
        for i, doc in enumerate(searcher):
            rank = i + offset

            if doc["model"] == "wiki_document":
                summary = _build_es_excerpt(doc)
                if not summary:
                    summary = doc["document_summary"]
                result = {"title": doc["document_title"], "type": "document"}

            elif doc["model"] == "questions_question":
                summary = _build_es_excerpt(doc)
                if not summary:
                    # We're excerpting only question_content, so if
                    # the query matched question_title or
                    # question_answer_content, then there won't be any
                    # question_content excerpts. In that case, just
                    # show the question--but only the first 500
                    # characters.
                    summary = bleach.clean(doc["question_content"], strip=True)[:500]

                result = {
                    "title": doc["question_title"],
                    "type": "question",
                    "is_solved": doc["question_is_solved"],
                    "num_answers": doc["question_num_answers"],
                    "num_votes": doc["question_num_votes"],
                    "num_votes_past_week": doc["question_num_votes_past_week"],
                }

            result["url"] = doc["url"]
            result["object"] = doc
            result["search_summary"] = summary
            result["rank"] = rank
            result["score"] = doc.es_meta.score
            result["explanation"] = escape(format_explanation(doc.es_meta.explanation))
            result["id"] = doc["id"]
            results.append(result)

    except ES_EXCEPTIONS as exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({"error": _("Search Unavailable")}), content_type=content_type, status=503)

        # Cheating here: Convert from 'Timeout()' to 'timeout' so
        # we have less code, but still have good stats.
        exc_bucket = repr(exc).lower().strip("()")
        statsd.incr("search.esunified.{0}".format(exc_bucket))

        log.exception(exc)

        t = "search/mobile/down.html" if request.MOBILE else "search/down.html"
        return render(request, t, {"q": cleaned["q"]}, status=503)

    items = [(k, v) for k in search_form.fields for v in r.getlist(k) if v and k != "a"]
    items.append(("a", "2"))

    fallback_results = None
    if num_results == 0:
        fallback_results = _fallback_results(language, cleaned["product"])

    product = Product.objects.filter(slug__in=cleaned["product"])
    if product:
        product_titles = [_(p.title, "DB: products.Product.title") for p in product]
    else:
        product_titles = [_("All Products")]

    product_titles = ", ".join(product_titles)

    data = {
        "num_results": num_results,
        "results": results,
        "fallback_results": fallback_results,
        "product_titles": product_titles,
        "q": cleaned["q"],
        "w": cleaned["w"],
        "lang_name": lang_name,
    }

    if is_json:
        # Models are not json serializable.
        for r in data["results"]:
            del r["object"]
        data["total"] = len(data["results"])

        data["products"] = [{"slug": p.slug, "title": p.title} for p in Product.objects.filter(visible=True)]

        if product:
            data["product"] = product[0].slug

        pages = Paginator(pages)
        data["pagination"] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data["message"] = _("No pages matched the search criteria")
        json_data = json.dumps(data)
        if callback:
            json_data = callback + "(" + json_data + ");"

        return HttpResponse(json_data, content_type=content_type)

    data.update(
        {
            "product": product,
            "products": Product.objects.filter(visible=True),
            "pages": pages,
            "search_form": search_form,
            "advanced": False,
        }
    )
    results_ = render(request, template, data)
    cache_period = settings.SEARCH_CACHE_PERIOD
    results_["Cache-Control"] = "max-age=%s" % (cache_period * 60)
    results_["Expires"] = (datetime.utcnow() + timedelta(minutes=cache_period)).strftime(EXPIRES_FMT)
    results_.set_cookie(settings.LAST_SEARCH_COOKIE, urlquote(cleaned["q"]), max_age=3600, secure=False, httponly=False)

    return results_
Example #32
0
def advanced_search(request):
    """Elasticsearch-specific Advanced search view"""

    to_json = JSONRenderer().render
    template = 'search/results.html'

    # 1. Prep request.
    r = request.GET.copy()
    # TODO: Figure out how to get rid of 'a' and do it.
    # It basically is used to switch between showing the form or results.
    a = request.GET.get('a', '2')
    # TODO: This is so the 'a=1' stays in the URL for pagination.
    r['a'] = 1

    language = locale_or_default(
        request.GET.get('language', request.LANGUAGE_CODE))
    r['language'] = language
    lang = language.lower()
    lang_name = settings.LANGUAGES_DICT.get(lang) or ''

    # 2. Build form.
    search_form = AdvancedSearchForm(r, auto_id=False)
    search_form.set_allowed_forums(request.user)

    # 3. Validate request.
    # Note: a == 2 means "show the form"--that's all we use it for now.
    if a == '2' or not search_form.is_valid():
        if request.IS_JSON:
            return HttpResponse(json.dumps(
                {'error': _('Invalid search data.')}),
                                content_type=request.CONTENT_TYPE,
                                status=400)

        t = 'search/form.html'
        data = {
            'advanced': True,
            'request': request,
            'search_form': search_form
        }
        # get value for search input from last search term.
        last_search = request.COOKIES.get(settings.LAST_SEARCH_COOKIE)
        # If there is any cached input from last search, pass it to template
        if last_search and 'q' not in r:
            cached_field = urlquote(last_search)
            data.update({'cached_field': cached_field})

        return cache_control(render(request, t, data),
                             settings.SEARCH_CACHE_PERIOD)

    # 4. Generate search.
    cleaned = search_form.cleaned_data

    # We use a regular S here because we want to search across
    # multiple doctypes.
    searcher = (AnalyzerS().es(
        urls=settings.ES_URLS,
        timeout=settings.ES_TIMEOUT,
        use_ssl=settings.ES_USE_SSL,
        http_auth=settings.ES_HTTP_AUTH,
        connection_class=RequestsHttpConnection).indexes(
            es_utils.read_index('default')))

    doctypes = []
    final_filter = F()
    unix_now = int(time.time())
    interval_filters = (('created', cleaned['created'],
                         cleaned['created_date']),
                        ('updated', cleaned['updated'],
                         cleaned['updated_date']))

    # Start - wiki search configuration

    if cleaned['w'] & constants.WHERE_WIKI:
        wiki_f = F(model='wiki_document')

        # Category filter
        if cleaned['category']:
            wiki_f &= F(document_category__in=cleaned['category'])

        # Locale filter
        wiki_f &= F(document_locale=language)

        # Product filter
        products = cleaned['product']
        for p in products:
            wiki_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            wiki_f &= F(topic=t)

        # Archived bit
        if not cleaned['include_archived']:
            wiki_f &= F(document_is_archived=False)

        # Apply sortby
        sortby = cleaned['sortby_documents']
        try:
            searcher = searcher.order_by(*constants.SORT_DOCUMENTS[sortby])
        except IndexError:
            # Skip index errors because they imply the user is sending us sortby values
            # that aren't valid.
            pass

        doctypes.append(DocumentMappingType.get_mapping_type_name())
        final_filter |= wiki_f

    # End - wiki search configuration

    # Start - support questions configuration

    if cleaned['w'] & constants.WHERE_SUPPORT:
        question_f = F(model='questions_question')

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful', 'is_archived')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_f &= F(**d)

        if cleaned['asked_by']:
            question_f &= F(question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_f &= F(question_answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split(',')]
        for t in q_tags:
            if t:
                question_f &= F(question_tag=t)

        # Product filter
        products = cleaned['product']
        for p in products:
            question_f &= F(product=p)

        # Topics filter
        topics = cleaned['topics']
        for t in topics:
            question_f &= F(topic=t)

        # Note: num_voted (with a d) is a different field than num_votes
        # (with an s). The former is a dropdown and the latter is an
        # integer value.
        if cleaned['num_voted'] == constants.INTERVAL_BEFORE:
            question_f &= F(
                question_num_votes__lte=max(cleaned['num_votes'], 0))
        elif cleaned['num_voted'] == constants.INTERVAL_AFTER:
            question_f &= F(question_num_votes__gte=cleaned['num_votes'])

        # Apply sortby
        sortby = cleaned['sortby']
        try:
            searcher = searcher.order_by(*constants.SORT_QUESTIONS[sortby])
        except IndexError:
            # Skip index errors because they imply the user is sending us sortby values
            # that aren't valid.
            pass

        # Apply created and updated filters
        for filter_name, filter_option, filter_date in interval_filters:
            if filter_option == constants.INTERVAL_BEFORE:
                before = {
                    filter_name + '__gte': 0,
                    filter_name + '__lte': max(filter_date, 0)
                }

                question_f &= F(**before)

            elif filter_option == constants.INTERVAL_AFTER:
                after = {
                    filter_name + '__gte': min(filter_date, unix_now),
                    filter_name + '__lte': unix_now
                }

                question_f &= F(**after)

        doctypes.append(QuestionMappingType.get_mapping_type_name())
        final_filter |= question_f

    # End - support questions configuration

    # Start - discussion forum configuration

    if cleaned['w'] & constants.WHERE_DISCUSSION:
        discussion_f = F(model='forums_thread')

        if cleaned['author']:
            discussion_f &= F(post_author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_f &= F(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_f &= F(post_is_locked=1)

        valid_forum_ids = [
            f.id for f in Forum.authorized_forums_for_user(request.user)
        ]

        forum_ids = None
        if cleaned['forum']:
            forum_ids = [f for f in cleaned['forum'] if f in valid_forum_ids]

        # If we removed all the forums they wanted to look at or if
        # they didn't specify, then we filter on the list of all
        # forums they're authorized to look at.
        if not forum_ids:
            forum_ids = valid_forum_ids

        discussion_f &= F(post_forum_id__in=forum_ids)

        # Apply created and updated filters
        for filter_name, filter_option, filter_date in interval_filters:
            if filter_option == constants.INTERVAL_BEFORE:
                before = {
                    filter_name + '__gte': 0,
                    filter_name + '__lte': max(filter_date, 0)
                }

                discussion_f &= F(**before)

            elif filter_option == constants.INTERVAL_AFTER:
                after = {
                    filter_name + '__gte': min(filter_date, unix_now),
                    filter_name + '__lte': unix_now
                }

                discussion_f &= F(**after)

        doctypes.append(ThreadMappingType.get_mapping_type_name())
        final_filter |= discussion_f

    # End - discussion forum configuration

    # Done with all the filtery stuff--time  to generate results

    searcher = searcher.doctypes(*doctypes)
    searcher = searcher.filter(final_filter)

    if 'explain' in request.GET and request.GET['explain'] == '1':
        searcher = searcher.explain()

    cleaned_q = cleaned['q']

    # Set up the highlights. Show the entire field highlighted.
    searcher = searcher.highlight(
        'question_content',  # support forum
        'document_summary',  # kb
        'post_content',  # contributor forum
        pre_tags=['<b>'],
        post_tags=['</b>'],
        number_of_fragments=0)

    searcher = apply_boosts(searcher)

    # Build the query
    if cleaned_q:
        query_fields = chain(*[
            cls.get_query_fields() for cls in
            [DocumentMappingType, ThreadMappingType, QuestionMappingType]
        ])
        query = {}
        # Create a simple_query_search query for every field we want to search.
        for field in query_fields:
            query['%s__sqs' % field] = cleaned_q

        # Transform the query to use locale aware analyzers.
        query = es_utils.es_query_with_analyzer(query, language)

        searcher = searcher.query(should=True, **query)

    searcher = searcher[:settings.SEARCH_MAX_RESULTS]

    # 5. Generate output
    pages = paginate(request, searcher, settings.SEARCH_RESULTS_PER_PAGE)

    if pages.paginator.count == 0:
        # If we know there aren't any results, show fallback_results.
        fallback_results = _fallback_results(language, cleaned['product'])
        results = []
    else:
        fallback_results = None
        results = build_results_list(pages, request.IS_JSON)

    items = [(k, v) for k in search_form.fields for v in r.getlist(k)
             if v and k != 'a']
    items.append(('a', '2'))

    product = Product.objects.filter(slug__in=cleaned['product'])
    if product:
        product_titles = [
            pgettext('DB: products.Product.title', p.title) for p in product
        ]
    else:
        product_titles = [_('All Products')]

    # FIXME: This is probably bad l10n.
    product_titles = ', '.join(product_titles)

    data = {
        'num_results': pages.paginator.count,
        'results': results,
        'fallback_results': fallback_results,
        'product_titles': product_titles,
        'q': cleaned['q'],
        'w': cleaned['w'],
        'lang_name': lang_name,
        'advanced': True,
        'products': Product.objects.filter(visible=True)
    }

    if request.IS_JSON:
        data['total'] = len(data['results'])
        data['products'] = [{
            'slug': p.slug,
            'title': p.title
        } for p in data['products']]

        if product:
            data['product'] = product[0].slug

        pages = Paginator(pages)
        data['pagination'] = dict(
            number=pages.pager.number,
            num_pages=pages.pager.paginator.num_pages,
            has_next=pages.pager.has_next(),
            has_previous=pages.pager.has_previous(),
            max=pages.max,
            span=pages.span,
            dotted_upper=pages.pager.dotted_upper,
            dotted_lower=pages.pager.dotted_lower,
            page_range=pages.pager.page_range,
            url=pages.pager.url,
        )
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = to_json(data)
        if request.JSON_CALLBACK:
            json_data = request.JSON_CALLBACK + '(' + json_data + ');'
        return HttpResponse(json_data, content_type=request.CONTENT_TYPE)

    data.update({
        'product': product,
        'pages': pages,
        'search_form': search_form
    })
    resp = cache_control(render(request, template, data),
                         settings.SEARCH_CACHE_PERIOD)
    resp.set_cookie(settings.LAST_SEARCH_COOKIE,
                    urlquote(cleaned['q']),
                    max_age=3600,
                    secure=False,
                    httponly=False)
    return resp
Example #33
0
def monitor(request):
    """View for services monitor."""
    status = {}

    # Note: To add a new component to the services monitor, do your
    # testing and then add a name -> list of output tuples map to
    # status.

    # Check memcached.
    memcache_results = []
    try:
        for cache_name, cache_props in settings.CACHES.items():
            result = True
            backend = cache_props['BACKEND']
            location = cache_props['LOCATION']

            # LOCATION can be a string or a list of strings
            if isinstance(location, basestring):
                location = location.split(';')

            if 'memcache' in backend:
                for loc in location:
                    # TODO: this doesn't handle unix: variant
                    ip, port = loc.split(':')
                    result = test_memcached(ip, int(port))
                    memcache_results.append(
                        (INFO, '%s:%s %s' % (ip, port, result)))

        if not memcache_results:
            memcache_results.append((ERROR, 'memcache is not configured.'))

        elif len(memcache_results) < 2:
            memcache_results.append(
                (ERROR, ('You should have at least 2 memcache servers. '
                         'You have %s.' % len(memcache_results))))

        else:
            memcache_results.append((INFO, 'memcached servers look good.'))

    except Exception as exc:
        memcache_results.append(
            (ERROR, 'Exception while looking at memcached: %s' % str(exc)))

    status['memcached'] = memcache_results

    # Check Libraries and versions
    libraries_results = []
    try:
        Image.new('RGB', (16, 16)).save(StringIO.StringIO(), 'JPEG')
        libraries_results.append((INFO, 'PIL+JPEG: Got it!'))
    except Exception as exc:
        libraries_results.append((ERROR, 'PIL+JPEG: Probably missing: '
                                  'Failed to create a jpeg image: %s' % exc))

    status['libraries'] = libraries_results

    # Check file paths.
    msg = 'We want read + write.'
    filepaths = (
        (settings.USER_AVATAR_PATH, os.R_OK | os.W_OK, msg),
        (settings.IMAGE_UPLOAD_PATH, os.R_OK | os.W_OK, msg),
        (settings.THUMBNAIL_UPLOAD_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_IMAGE_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_IMAGE_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_VIDEO_PATH, os.R_OK | os.W_OK, msg),
        (settings.GALLERY_VIDEO_THUMBNAIL_PATH, os.R_OK | os.W_OK, msg),
        (settings.GROUP_AVATAR_PATH, os.R_OK | os.W_OK, msg),
    )

    filepath_results = []
    for path, perms, notes in filepaths:
        path = os.path.join(settings.MEDIA_ROOT, path)
        path_exists = os.path.isdir(path)
        path_perms = os.access(path, perms)

        if path_exists and path_perms:
            filepath_results.append(
                (INFO,
                 '%s: %s %s %s' % (path, path_exists, path_perms, notes)))

    status['filepaths'] = filepath_results

    # Check RabbitMQ.
    rabbitmq_results = []
    try:
        rabbit_conn = establish_connection(connect_timeout=2)
        rabbit_conn.connect()
        rabbitmq_results.append((INFO, 'Successfully connected to RabbitMQ.'))

    except (socket.error, IOError) as exc:
        rabbitmq_results.append(
            (ERROR, 'Error connecting to RabbitMQ: %s' % str(exc)))

    except Exception as exc:
        rabbitmq_results.append(
            (ERROR, 'Exception while looking at RabbitMQ: %s' % str(exc)))

    status['RabbitMQ'] = rabbitmq_results

    # Check ES.
    es_results = []
    try:
        es_utils.get_doctype_stats(es_utils.read_index())
        es_results.append(
            (INFO, ('Successfully connected to ElasticSearch and index '
                    'exists.')))

    except es_utils.ES_EXCEPTIONS as exc:
        es_results.append((ERROR, 'ElasticSearch problem: %s' % str(exc)))

    except Exception as exc:
        es_results.append(
            (ERROR, 'Exception while looking at ElasticSearch: %s' % str(exc)))

    status['ElasticSearch'] = es_results

    # Check Celery.
    # start = time.time()
    # pong = celery.task.ping()
    # rabbit_results = r = {'duration': time.time() - start}
    # status_summary['rabbit'] = pong == 'pong' and r['duration'] < 1

    # Check Redis.
    redis_results = []
    if hasattr(settings, 'REDIS_BACKENDS'):
        for backend in settings.REDIS_BACKENDS:
            try:
                redis_client(backend)
                redis_results.append((INFO, '%s: Pass!' % backend))
            except RedisError:
                redis_results.append((ERROR, '%s: Fail!' % backend))
    status['Redis'] = redis_results

    status_code = 200

    status_summary = {}
    for component, output in status.items():
        if ERROR in [item[0] for item in output]:
            status_code = 500
            status_summary[component] = False
        else:
            status_summary[component] = True

    return render(request,
                  'services/monitor.html', {
                      'component_status': status,
                      'status_summary': status_summary
                  },
                  status=status_code)