Python wiki_searcherの例、wiki.models.wiki_searcher Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

    def test_exclude_words(self):
        """Excluding words with -word works."""
        results = list(wiki_searcher().query('spanish'))
        eq_(1, len(results))

        results = list(wiki_searcher().query('spanish -content'))
        eq_(0, len(results))

コード例 #2

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

    def test_exclude_words(self):
        """Excluding words with -word works."""
        results = list(wiki_searcher().query('spanish'))
        eq_(1, len(results))

        results = list(wiki_searcher().query('spanish -content'))
        eq_(0, len(results))

コード例 #3

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

    def test_no_syntax_error(self):
        """Test that special chars cannot cause a syntax error."""
        results = list(wiki_searcher().query('video^$'))
        eq_(1, len(results))

        results = list(wiki_searcher().query('video^^^$$$^'))
        eq_(1, len(results))

        results = list(wiki_searcher().query('google.com/ig'))
        eq_(0, len(results))

コード例 #4

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

    def test_no_syntax_error(self):
        """Test that special chars cannot cause a syntax error."""
        results = list(wiki_searcher().query('video^$'))
        eq_(1, len(results))

        results = list(wiki_searcher().query('video^^^$$$^'))
        eq_(1, len(results))

        results = list(wiki_searcher().query('google.com/ig'))
        eq_(0, len(results))

コード例 #5

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_no_filter(self):
     """Test searching with no filters."""
     # Note: We keep the query('') here to force a new S and thus
     # not inadvertently test with an S that's not in an original
     # state.
     results = list(wiki_searcher().query(''))
     eq_(6, len(results))

コード例 #6

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_no_filter(self):
     """Test searching with no filters."""
     # Note: We keep the query('') here to force a new S and thus
     # not inadvertently test with an S that's not in an original
     # state.
     results = list(wiki_searcher().query(''))
     eq_(6, len(results))

コード例 #7

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_unicode_excerpt(self):
     """Unicode characters in the excerpt should not be a problem."""
     ws = (wiki_searcher().highlight('html').query(u'\u30c1').values_dict(
         'html'))
     results = list(ws)
     try:
         excerpt = ws.excerpt(results[0])
         render('{{ c }}', {'c': excerpt})
     except UnicodeDecodeError:
         self.fail('Raised UnicodeDecodeError.')

コード例 #8

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

    def test_utf8_excerpt(self):
        """Characters should stay in UTF-8."""
        q = u'fa\xe7on'
        ws = (wiki_searcher().highlight('html').query(u'fa\xe7on').values_dict(
            'html'))

        results = list(ws)
        # page = Document.objects.get(pk=4)
        excerpt = clean_excerpt(ws.excerpt(results[0])[0][0])
        assert q in excerpt, u'%s not in %s' % (q, excerpt)

コード例 #9

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

    def test_utf8_excerpt(self):
        """Characters should stay in UTF-8."""
        q = u'fa\xe7on'
        ws = (wiki_searcher().highlight('html')
                             .query(u'fa\xe7on')
                             .values_dict('html'))

        results = list(ws)
        # page = Document.objects.get(pk=4)
        excerpt = clean_excerpt(ws.excerpt(results[0])[0][0])
        assert q in excerpt, u'%s not in %s' % (q, excerpt)

コード例 #10

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_unicode_excerpt(self):
     """Unicode characters in the excerpt should not be a problem."""
     ws = (wiki_searcher().highlight('html')
                          .query(u'\u30c1')
                          .values_dict('html'))
     results = list(ws)
     try:
         excerpt = ws.excerpt(results[0])
         render('{{ c }}', {'c': excerpt})
     except UnicodeDecodeError:
         self.fail('Raised UnicodeDecodeError.')

コード例 #11

0

ファイルを表示

ファイル: views.py プロジェクト: jasonthomas/kitsune

def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')
    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    site = Site.objects.get_current()
    locale = locale_or_default(request.locale)
    results = list(chain(
            wiki_searcher(request).filter(is_archived=False)
                                  .filter(locale=locale)
                                  .query(term)[:5],
            question_searcher(request).filter(has_helpful=True)
                                      .query(term)[:5]))
    # Assumption: wiki_search sets filter(is_archived=False).

    urlize = lambda obj: u'https://%s%s' % (site, obj.get_absolute_url())
    data = [term, [r.title for r in results], [], [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)

コード例 #12

0

ファイルを表示

ファイル: views.py プロジェクト: ccarruitero/kitsune

def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')
    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    site = Site.objects.get_current()
    locale = locale_or_default(request.locale)
    results = list(chain(
            wiki_searcher(request).filter(is_archived=False)
                                  .filter(locale=locale)
                                  .query(term)[:5],
            question_searcher(request).filter(has_helpful=True)
                                      .query(term)[:5]))
    # Assumption: wiki_search sets filter(is_archived=False).

    urlize = lambda obj: u'https://%s%s' % (site, obj.get_absolute_url())
    data = [term, [r.title for r in results], [], [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)

コード例 #13

0

ファイルを表示

ファイル: views.py プロジェクト: readevalprint/kitsune

def _search_suggestions(request, query, locale, category_tags):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    my_question_search = question_searcher(request)
    my_wiki_search = wiki_searcher(request)

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3

    # Apply category filters
    if category_tags:
        my_question_search = my_question_search.filter(tag__in=category_tags)
        my_wiki_search = my_wiki_search.filter(tag__in=category_tags)

    raw_results = (
        my_wiki_search.filter(locale=locale,
                              category__in=settings.SEARCH_DEFAULT_CATEGORIES)
                      .query(query)
                      .values_dict('id')[:WIKI_RESULTS])

    # Lazily build excerpts from results. Stop when we have enough:
    results = []
    for r in raw_results:
        try:
            doc = (Document.objects.select_related('current_revision').
                   get(pk=r['id']))
            results.append({
                'search_summary': doc.current_revision.summary,
                'url': doc.get_absolute_url(),
                'title': doc.title,
                'type': 'document',
                'object': doc,
            })
        except Document.DoesNotExist:
            pass

    # Questions app is en-US only.
    raw_results = (my_question_search.query(query)
                                     .values_dict('id')[:QUESTIONS_RESULTS])

    for r in raw_results:
        try:
            q = Question.objects.get(pk=r['id'])
            results.append({
                'search_summary': q.content[0:500],
                'url': q.get_absolute_url(),
                'title': q.title,
                'type': 'question',
                'object': q
            })
        except Question.DoesNotExist:
            pass

    return results

コード例 #14

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_category(self):
     results = wiki_searcher().filter(category__in=[10])
     eq_(5, len(results))
     results = wiki_searcher().filter(category__in=[30])
     eq_(1, len(results))

コード例 #15

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_range_filter(self):
     """Test filtering on a range."""
     results = wiki_searcher().filter(updated__gte=1284664176,
                                      updated__lte=1285765791)
     eq_(2, len(results))

コード例 #16

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_range_filter(self):
     """Test filtering on a range."""
     results = wiki_searcher().filter(updated__gte=1284664176,
                                      updated__lte=1285765791)
     eq_(2, len(results))

コード例 #17

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_wiki_index_content(self):
     """Obviously the content should be indexed."""
     results = list(wiki_searcher().query('video'))
     eq_(1, len(results))
     eq_(1, results[0].id)

コード例 #18

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_ngram_chars(self):
     """Ideographs are handled correctly."""
     results = list(wiki_searcher().query(u'\u30c1'))
     eq_(1, len(results))
     eq_(2, results[0].id)

コード例 #19

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_wiki_index_content(self):
     """Obviously the content should be indexed."""
     results = list(wiki_searcher().query('video'))
     eq_(1, len(results))
     eq_(1, results[0].id)

コード例 #20

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_indexer(self):
     results = wiki_searcher().query('audio')
     eq_(2, len(results))

コード例 #21

0

ファイルを表示

ファイル: views.py プロジェクト: james4388/kitsune

def _search_suggestions(request, query, locale, category_tags):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    if waffle.flag_is_active(request, 'elasticsearch'):
        engine = 'elastic'
        question_s = Question.search()
        wiki_s = Document.search()
    else:
        engine = 'sphinx'
        question_s = question_searcher(request)
        wiki_s = wiki_searcher(request)

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3

    # Apply category filters
    if category_tags:
        question_s = question_s.filter(tag__in=category_tags)
        wiki_s = wiki_s.filter(tag__in=category_tags)

    try:
        raw_results = (
            wiki_s.filter(locale=locale,
                          category__in=settings.SEARCH_DEFAULT_CATEGORIES)
                  .query(query)
                  .values_dict('id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (question_s.query(query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q
                })
            except Question.DoesNotExist:
                pass

    except (SearchError, ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, SearchError):
            statsd.incr('questions.suggestions.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.%s.maxretryerror' % engine)
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.%s.elasticsearchexception' %
                        engine)

        return []

コード例 #22

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_clean_hyphens(self):
     """Hyphens in words aren't special characters."""
     results = list(wiki_searcher().query('marque-page'))
     eq_(1, len(results))

コード例 #23

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_clean_hyphens(self):
     """Hyphens in words aren't special characters."""
     results = list(wiki_searcher().query('marque-page'))
     eq_(1, len(results))

コード例 #24

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_wiki_index_keywords(self):
     """The keywords field of a revision is indexed."""
     results = list(wiki_searcher().query('foobar'))
     eq_(1, len(results))
     eq_(3, results[0].id)

コード例 #25

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_ngram_chars(self):
     """Ideographs are handled correctly."""
     results = list(wiki_searcher().query(u'\u30c1'))
     eq_(1, len(results))
     eq_(2, results[0].id)

コード例 #26

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_wiki_index_strip_html(self):
     """HTML should be stripped, not indexed."""
     results = list(wiki_searcher().query('strong'))
     eq_(0, len(results))

コード例 #27

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_wiki_index_keywords(self):
     """The keywords field of a revision is indexed."""
     results = list(wiki_searcher().query('foobar'))
     eq_(1, len(results))
     eq_(3, results[0].id)

コード例 #28

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_wiki_index_summary(self):
     """The summary field of a revision is indexed."""
     results = list(wiki_searcher().query('whatever'))
     eq_(1, len(results))
     eq_(3, results[0].id)

コード例 #29

0

ファイルを表示

ファイル: views.py プロジェクト: ccarruitero/kitsune

def search_with_sphinx(request, template=None):
    """Sphinx-specific search view"""

    # Time ES and Sphinx separate. See bug 723930.
    # TODO: Remove this once Sphinx is gone.
    start = time.time()

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    if waffle.flag_is_active(request, 'elasticsearch'):
        engine = 'elastic'
    else:
        engine = 'sphinx'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use `initial`
    # on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t,
                               {'advanced': a, 'request': request,
                                'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    wiki_s = wiki_searcher(request)
    question_s = question_searcher(request)
    discussion_s = discussion_searcher(request)

    documents = []

    # wiki filters
    # Category filter
    if cleaned['category']:
        wiki_s = wiki_s.filter(category__in=cleaned['category'])

    # Locale filter
    wiki_s = wiki_s.filter(locale=language)

    # Product filter
    products = cleaned['product']
    for p in products:
        wiki_s = wiki_s.filter(tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned['tags'].split()]
    for t in tags:
        wiki_s = wiki_s.filter(tag=t)

    # Archived bit
    if a == '0' and not cleaned['include_archived']:
        # Default to NO for basic search:
        cleaned['include_archived'] = False
    if not cleaned['include_archived']:
        wiki_s = wiki_s.filter(is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict((filter_name, _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters
                 if cleaned[filter_name])
        if d:
            question_s = question_s.filter(**d)

        if cleaned['asked_by']:
            question_s = question_s.filter(
                question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_s = question_s.filter(
                answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split()]
        for t in q_tags:
            question_s = question_s.filter(tag=t)

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_s = discussion_s.filter(author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_locked=1)

        if cleaned['forum']:
            discussion_s = discussion_s.filter(forum_id__in=cleaned['forum'])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']),
        ('question_votes', cleaned['num_voted'], cleaned['num_votes']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned['q']

        if cleaned['w'] & constants.WHERE_WIKI:
            if cleaned_q:
                wiki_s = wiki_s.query(cleaned_q)
            wiki_s = wiki_s[:max_results]
            # Execute the query and append to documents
            documents += [('wiki', (pair[0], pair[1]))
                          for pair in enumerate(wiki_s.object_ids())]

        if cleaned['w'] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            if engine == 'elastic':
                highlight_fields = ['title', 'question_content',
                                    'answer_content']
            else:
                highlight_fields = ['content']

            question_s = question_s.highlight(
                *highlight_fields,
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                question_s = question_s.query(cleaned_q)
            question_s = question_s[:max_results]
            documents += [('question', (pair[0], pair[1]))
                          for pair in enumerate(question_s.object_ids())]

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            # Sort results by
            try:
                # Note that the first attribute needs to be the same
                # here and in forums/models.py discussion_search.
                discussion_s = discussion_s.group_by(
                    'thread_id', constants.GROUPSORT[sortby])
            except IndexError:
                pass

            discussion_s = discussion_s.highlight(
                'content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                discussion_s = discussion_s.query(cleaned_q)
            discussion_s = discussion_s[:max_results]
            documents += [('discussion', (pair[0], pair[1]))
                          for pair in enumerate(discussion_s.object_ids())]

        pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

        # Build a dict of { type_ -> list of indexes } for the specific
        # docs that we're going to display on this page.  This makes it
        # easy for us to slice the appropriate search Ss so we're limiting
        # our db hits to just the items we're showing.
        documents_dict = {}
        for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]:
            documents_dict.setdefault(doc[0], []).append(doc[1][0])

        docs_for_page = []
        for kind, search_s in [('wiki', wiki_s),
                                ('question', question_s),
                                ('discussion', discussion_s)]:
            if kind not in documents_dict:
                continue

            # documents_dict[type_] is a list of indexes--one for each
            # object id search result for that type_.  We use the values
            # at the beginning and end of the list for slice boundaries.
            begin = documents_dict[kind][0]
            end = documents_dict[kind][-1] + 1

            search_s = search_s[begin:end]

            if engine == 'elastic':
                # If we're doing elasticsearch, then we need to update
                # the _s variables to point to the sliced versions of
                # S so that, when we iterate over them in the
                # following list comp, we hang onto the version that
                # does the query, so we can call excerpt() on it
                # later.
                #
                # We only need to do this with elasticsearch.  For Sphinx,
                # search_s at this point is an ObjectResults and not an S
                # because we've already acquired object_ids on it.  Thus
                # if we update the _s variables, we'd be pointing to the
                # ObjectResults and not the S and then excerpting breaks.
                #
                # Ugh.
                if kind == 'wiki':
                    wiki_s = search_s
                elif kind == 'question':
                    question_s = search_s
                elif kind == 'discussion':
                    discussion_s = search_s

            docs_for_page += [(kind, doc) for doc in search_s]

        results = []
        for i, docinfo in enumerate(docs_for_page):
            rank = i + offset
            type_, doc = docinfo
            try:
                if type_ == 'wiki':
                    summary = doc.current_revision.summary
                    result = {
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'document',
                        'object': doc}
                elif type_ == 'question':
                    summary = _build_excerpt(question_s, doc)
                    result = {
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'question',
                        'object': doc,
                        'is_solved': doc.is_solved,
                        'num_answers': doc.num_answers,
                        'num_votes': doc.num_votes,
                        'num_votes_past_week': doc.num_votes_past_week}
                else:
                    if engine == 'elastic':
                        thread = doc
                    else:
                        thread = Thread.objects.get(pk=doc.thread_id)

                    summary = _build_excerpt(discussion_s, doc)
                    result = {
                        'url': thread.get_absolute_url(),
                        'title': thread.title,
                        'type': 'thread',
                        'object': thread}
                result['search_summary'] = summary
                result['rank'] = rank
                results.append(result)
            except IndexError:
                break
            except ObjectDoesNotExist:
                continue

    except (SearchError, ESTimeoutError, ESMaxRetryError, ESException), exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        if isinstance(exc, SearchError):
            statsd.incr('search.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('search.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('search.%s.maxretryerror' % engine)
        elif isinstance(exc, ESException):
            statsd.incr('search.%s.elasticsearchexception' % engine)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)

コード例 #30

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_wiki_index_strip_html(self):
     """HTML should be stripped, not indexed."""
     results = list(wiki_searcher().query('strong'))
     eq_(0, len(results))

コード例 #31

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_category(self):
     results = wiki_searcher().filter(category__in=[10])
     eq_(5, len(results))
     results = wiki_searcher().filter(category__in=[30])
     eq_(1, len(results))

コード例 #32

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_no_redirects(self):
     """Redirect articles should never appear in search results."""
     results = list(wiki_searcher().query('ghosts'))
     eq_(1, len(results))

コード例 #33

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_wiki_index_summary(self):
     """The summary field of a revision is indexed."""
     results = list(wiki_searcher().query('whatever'))
     eq_(1, len(results))
     eq_(3, results[0].id)

コード例 #34

0

ファイルを表示

ファイル: test_search.py プロジェクト: erikrose/kitsune

 def test_no_redirects(self):
     """Redirect articles should never appear in search results."""
     results = list(wiki_searcher().query('ghosts'))
     eq_(1, len(results))

コード例 #35

0

ファイルを表示

def _search_suggestions(request, query, locale, category_tags):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    my_question_search = question_searcher(request)
    my_wiki_search = wiki_searcher(request)

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3

    # Apply category filters
    if category_tags:
        my_question_search = my_question_search.filter(tag__in=category_tags)
        my_wiki_search = my_wiki_search.filter(tag__in=category_tags)

    raw_results = (
        my_wiki_search.filter(locale=locale,
                              category__in=settings.SEARCH_DEFAULT_CATEGORIES)
                      .query(query)
                      .values_dict('id')[:WIKI_RESULTS])

    # Lazily build excerpts from results. Stop when we have enough:
    results = []
    for r in raw_results:
        try:
            doc = (Document.objects.select_related('current_revision').
                   get(pk=r['id']))
            results.append({
                'search_summary': doc.current_revision.summary,
                'url': doc.get_absolute_url(),
                'title': doc.title,
                'type': 'document',
                'object': doc,
            })
        except Document.DoesNotExist:
            pass

    # Questions app is en-US only.
    raw_results = (my_question_search.query(query)
                                     .values_dict('id')[:QUESTIONS_RESULTS])

    for r in raw_results:
        try:
            q = Question.objects.get(pk=r['id'])
            results.append({
                'search_summary': q.content[0:500],
                'url': q.get_absolute_url(),
                'title': q.title,
                'type': 'question',
                'object': q
            })
        except Question.DoesNotExist:
            pass

    return results

コード例 #36

0

ファイルを表示

ファイル: test_search.py プロジェクト: jasonthomas/kitsune

 def test_indexer(self):
     results = wiki_searcher().query('audio')
     eq_(2, len(results))

コード例 #37

0

ファイルを表示

ファイル: views.py プロジェクト: jasonthomas/kitsune

def search(request, template=None):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    if waffle.flag_is_active(request, 'elasticsearch'):
        engine = 'elastic'
    else:
        engine = 'sphinx'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use `initial`
    # on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t,
                               {'advanced': a, 'request': request,
                                'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    wiki_s = wiki_searcher(request)
    question_s = question_searcher(request)
    discussion_s = discussion_searcher(request)

    documents = []

    # wiki filters
    # Category filter
    if cleaned['category']:
        wiki_s = wiki_s.filter(category__in=cleaned['category'])

    # Locale filter
    wiki_s = wiki_s.filter(locale=language)

    # Product filter
    products = cleaned['product']
    for p in products:
        wiki_s = wiki_s.filter(tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned['tags'].split()]
    for t in tags:
        wiki_s = wiki_s.filter(tag=t)

    # Archived bit
    if a == '0' and not cleaned['include_archived']:
        # Default to NO for basic search:
        cleaned['include_archived'] = False
    if not cleaned['include_archived']:
        wiki_s = wiki_s.filter(is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict((filter_name, _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters
                 if cleaned[filter_name])
        if d:
            question_s = question_s.filter(**d)

        if cleaned['asked_by']:
            question_s = question_s.filter(
                question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_s = question_s.filter(
                answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split()]
        for t in q_tags:
            question_s = question_s.filter(tag=t)

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_s = discussion_s.filter(author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_s = discussion_s.filter(is_locked=1)

        if cleaned['forum']:
            discussion_s = discussion_s.filter(forum_id__in=cleaned['forum'])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']),
        ('question_votes', cleaned['num_voted'], cleaned['num_votes']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            if filter_name != 'question_votes':
                discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned['q']

        if cleaned['w'] & constants.WHERE_WIKI:
            wiki_s = wiki_s.query(cleaned_q)[:max_results]
            # Execute the query and append to documents
            documents += [('wiki', (pair[0], pair[1]))
                          for pair in enumerate(wiki_s.object_ids())]

        if cleaned['w'] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            if engine == 'elastic':
                highlight_fields = ['title', 'question_content',
                                    'answer_content']
            else:
                highlight_fields = ['content']

            question_s = question_s.highlight(
                *highlight_fields,
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            question_s = question_s.query(cleaned_q)[:max_results]
            documents += [('question', (pair[0], pair[1]))
                          for pair in enumerate(question_s.object_ids())]

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            # Sort results by
            try:
                # Note that the first attribute needs to be the same
                # here and in forums/models.py discussion_search.
                discussion_s = discussion_s.group_by(
                    'thread_id', constants.GROUPSORT[sortby])
            except IndexError:
                pass

            discussion_s = discussion_s.highlight(
                'content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            discussion_s = discussion_s.query(cleaned_q)[:max_results]
            documents += [('discussion', (pair[0], pair[1]))
                          for pair in enumerate(discussion_s.object_ids())]

        pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

        # Build a dict of { type_ -> list of indexes } for the specific
        # docs that we're going to display on this page.  This makes it
        # easy for us to slice the appropriate search Ss so we're limiting
        # our db hits to just the items we're showing.
        documents_dict = {}
        for doc in documents[offset:offset + settings.SEARCH_RESULTS_PER_PAGE]:
            documents_dict.setdefault(doc[0], []).append(doc[1][0])

        docs_for_page = []
        for kind, search_s in [('wiki', wiki_s),
                                ('question', question_s),
                                ('discussion', discussion_s)]:
            if kind not in documents_dict:
                continue

            # documents_dict[type_] is a list of indexes--one for each
            # object id search result for that type_.  We use the values
            # at the beginning and end of the list for slice boundaries.
            begin = documents_dict[kind][0]
            end = documents_dict[kind][-1] + 1

            search_s = search_s[begin:end]

            if engine == 'elastic':
                # If we're doing elasticsearch, then we need to update
                # the _s variables to point to the sliced versions of
                # S so that, when we iterate over them in the
                # following list comp, we hang onto the version that
                # does the query, so we can call excerpt() on it
                # later.
                #
                # We only need to do this with elasticsearch.  For Sphinx,
                # search_s at this point is an ObjectResults and not an S
                # because we've already acquired object_ids on it.  Thus
                # if we update the _s variables, we'd be pointing to the
                # ObjectResults and not the S and then excerpting breaks.
                #
                # Ugh.
                if kind == 'wiki':
                    wiki_s = search_s
                elif kind == 'question':
                    question_s = search_s
                elif kind == 'discussion':
                    discussion_s = search_s

            docs_for_page += [(kind, doc) for doc in search_s]

        results = []
        for i, docinfo in enumerate(docs_for_page):
            rank = i + offset
            type_, doc = docinfo
            try:
                if type_ == 'wiki':
                    summary = doc.current_revision.summary

                    result = {
                        'search_summary': summary,
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'document',
                        'rank': rank,
                        'object': doc,
                    }
                    results.append(result)

                elif type_ == 'question':
                    try:
                        excerpt = excerpt_joiner.join(
                            [m for m in chain(*question_s.excerpt(doc)) if m])
                    except ExcerptTimeoutError:
                        statsd.incr('search.excerpt.timeout')
                        excerpt = u''
                    except ExcerptSocketError:
                        statsd.incr('search.excerpt.socketerror')
                        excerpt = u''

                    summary = jinja2.Markup(clean_excerpt(excerpt))

                    result = {
                        'search_summary': summary,
                        'url': doc.get_absolute_url(),
                        'title': doc.title,
                        'type': 'question',
                        'rank': rank,
                        'object': doc,
                    }
                    results.append(result)

                else:
                    if engine == 'elastic':
                        thread = doc
                    else:
                        thread = Thread.objects.get(pk=doc.thread_id)

                    try:
                        excerpt = excerpt_joiner.join(
                            [m for m in chain(*discussion_s.excerpt(doc))])
                    except ExcerptTimeoutError:
                        statsd.incr('search.excerpt.timeout')
                        excerpt = u''
                    except ExcerptSocketError:
                        statsd.incr('search.excerpt.socketerror')
                        excerpt = u''

                    summary = jinja2.Markup(clean_excerpt(excerpt))

                    result = {
                        'search_summary': summary,
                        'url': thread.get_absolute_url(),
                        'title': thread.title,
                        'type': 'thread',
                        'rank': rank,
                        'object': thread,
                    }
                    results.append(result)
            except IndexError:
                break
            except ObjectDoesNotExist:
                continue

    except (SearchError, ESTimeoutError, ESMaxRetryError), exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        if isinstance(exc, SearchError):
            statsd.incr('search.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('search.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('search.%s.maxretryerror' % engine)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)