Exemple #1
0
def search(request, locale):
    """ An API endpoint to return search results as a JSON blob.
    This endpoint makes a relatively simple ElasticSearch query
    for documents matching the value of the q parameter.
    """
    # TODO: I'm betting that a simple search like this will be faster
    # and just as good as the more complex searches implemented by the
    # code in kuma/search/. Peter disagrees and thinks that we might
    # eventually want to make this endpoint use code from kuma/search/.
    # An alternative is to just abandon this API endpoint and have
    # the frontend call wiki.d.m.o/locale/search.json?q=query. On the
    # other hand, if we're ever going to implement any kind of
    # search-as-you-type interface, we'll need a super-fast custom
    # endpoint like this one.
    query_string = request.GET.get('q')
    if locale == 'en-US':
        search = (WikiDocumentType.search().filter(
            'term', locale=locale).source(
                ['slug', 'title', 'summary',
                 'tags']).query('multi_match',
                                query=query_string,
                                fields=['title^7', 'summary^2', 'content']))
    else:
        search = (
            WikiDocumentType.search().filter('terms', locale=[
                locale, 'en-US'
            ]).source(['slug', 'title', 'summary', 'tags', 'locale']).query(
                query.Bool(
                    must=Q('multi_match',
                           query=query_string,
                           fields=['title^7', 'summary^2', 'content']),
                    should=[
                        # boost the score if the document is translated
                        Q('term', locale={
                            'value': locale,
                            'boost': 8
                        }),
                    ])))

    # Add excerpts with search results highlighted
    search = search.highlight('content')
    search = search.highlight_options(order='score',
                                      pre_tags=['<mark>'],
                                      post_tags=['</mark>'])

    # Return as many as 40 matches, since we're not implementing pagination yet
    response = search[0:40].execute()
    return JsonResponse(response.to_dict())
 def test_excerpt(self):
     search = WikiDocumentType.search()
     search = search.query("match", summary="CSS")
     search = search.highlight(*WikiDocumentType.excerpt_fields)
     result = search.execute()
     serializer = DocumentSerializer(result, many=True)
     assert "A <em>CSS</em> article" == serializer.data[0]["excerpt"]
Exemple #3
0
 def test_current_locale_results(self):
     self.refresh()
     results = (WikiDocumentType.search().query(
         query.Match(title='article')
         | query.Match(content='article')).filter('term', locale='en-US'))
     for doc in results.execute():
         eq_('en-US', doc.locale)
Exemple #4
0
 def test_excerpt(self):
     search = WikiDocumentType.search()
     search = search.query('match', summary='CSS')
     search = search.highlight(*WikiDocumentType.excerpt_fields)
     result = search.execute()
     serializer = DocumentSerializer(result, many=True)
     assert 'A <em>CSS</em> article' == serializer.data[0]['excerpt']
Exemple #5
0
 def test_excerpt(self):
     search = WikiDocumentType.search()
     search = search.query("match", summary="CSS")
     search = search.highlight(*WikiDocumentType.excerpt_fields)
     result = search.execute()
     serializer = DocumentSerializer(result, many=True)
     eq_(serializer.data[0]["excerpt"], u"A <em>CSS</em> article")
Exemple #6
0
 def test_excerpt(self):
     search = WikiDocumentType.search()
     search = search.query('match', summary='CSS')
     search = search.highlight(*WikiDocumentType.excerpt_fields)
     result = search.execute()
     data = DocumentSerializer(result).data
     eq_(data[0]['excerpt'], u'A <em>CSS</em> article')
Exemple #7
0
 def test_excerpt(self):
     search = WikiDocumentType.search()
     search = search.query('match', summary='CSS')
     search = search.highlight(*WikiDocumentType.excerpt_fields)
     result = search.execute()
     data = DocumentSerializer(result, many=True).data
     eq_(data[0]['excerpt'], u'A <em>CSS</em> article')
Exemple #8
0
 def test_get_excerpt_uses_summary(self):
     self.refresh()
     results = WikiDocumentType.search().query("match", content="audio")
     assert results.count()
     for doc in results.execute():
         excerpt = doc.get_excerpt()
         assert "the word for tough things" in excerpt
         assert "extra content" not in excerpt
Exemple #9
0
 def test_get_excerpt_strips_html(self):
     self.refresh()
     results = WikiDocumentType.search().query('match', content='audio')
     assert results.count()
     for doc in results.execute():
         excerpt = doc.get_excerpt()
         assert 'audio' in excerpt
         assert '<strong>' not in excerpt
Exemple #10
0
 def delete_if_exists(self):
     es = WikiDocumentType.get_connection()
     try:
         es.indices.delete(self.prefixed_name)
     except NotFoundError:
         # Can ignore this since it indicates the index doesn't exist
         # and therefore there's nothing to delete.
         pass
Exemple #11
0
 def delete_if_exists(self):
     es = WikiDocumentType.get_connection()
     try:
         es.indices.delete(self.prefixed_name)
     except NotFoundError:
         # Can ignore this since it indicates the index doesn't exist
         # and therefore there's nothing to delete.
         pass
Exemple #12
0
 def test_get_excerpt_strips_html(self):
     self.refresh()
     results = WikiDocumentType.search().query("match", content="audio")
     assert results.count()
     for doc in results.execute():
         excerpt = doc.get_excerpt()
         assert "audio" in excerpt
         assert "<strong>" not in excerpt
Exemple #13
0
 def test_get_excerpt_uses_summary(self):
     self.refresh()
     results = WikiDocumentType.search().query('match', content='audio')
     assert results.count()
     for doc in results.execute():
         excerpt = doc.get_excerpt()
         assert 'the word for tough things' in excerpt
         assert 'extra content' not in excerpt
Exemple #14
0
 def test_get_excerpt_uses_summary(self):
     self.refresh()
     results = WikiDocumentType.search().query('match', content='audio')
     ok_(results.count() > 0)
     for doc in results.execute():
         excerpt = doc.get_excerpt()
         ok_('the word for tough things' in excerpt)
         ok_('extra content' not in excerpt)
Exemple #15
0
 def test_get_excerpt_strips_html(self):
     self.refresh()
     results = WikiDocumentType.search().query('match', content='audio')
     ok_(results.count() > 0)
     for doc in results.execute():
         excerpt = doc.get_excerpt()
         ok_('audio' in excerpt)
         ok_('<strong>' not in excerpt)
Exemple #16
0
 def test_current_locale_results(self):
     self.refresh()
     results = (WikiDocumentType.search()
                                .query(query.Match(title='article') |
                                       query.Match(content='article'))
                                .filter('term', locale='en-US'))
     for doc in results.execute():
         eq_('en-US', doc.locale)
Exemple #17
0
def test_base_search(db):
    '''WikiDocumentType.search() searches all documents by default.'''
    search = WikiDocumentType.search()
    expected = {
        'query': {
            'match_all': {}
        }
    }
    assert search.to_dict() == expected
Exemple #18
0
def test_base_search(db):
    '''WikiDocumentType.search() searches all documents by default.'''
    search = WikiDocumentType.search()
    expected = {
        'query': {
            'match_all': {}
        }
    }
    assert search.to_dict() == expected
Exemple #19
0
 def test_current_locale_results(self):
     self.refresh()
     results = (
         WikiDocumentType.search()
         .query(query.Match(title="article") | query.Match(content="article"))
         .filter("term", locale="en-US")
     )
     for doc in results.execute():
         assert "en-US" == doc.locale
Exemple #20
0
    def handle(self, *args, **options):
        logging.basicConfig(level=logging.INFO)

        percent = options["percent"]
        if not 1 <= percent <= 100:
            raise CommandError("percent should be between 1 and 100")

        message = WikiDocumentType.reindex_all(options["chunk_size"],
                                               percent=percent)
        self.stdout.write(message + "\n")
Exemple #21
0
    def handle(self, *args, **options):
        logging.basicConfig(level=logging.INFO)

        percent = options['percent']
        if not 1 <= percent <= 100:
            raise CommandError('percent should be between 1 and 100')

        message = WikiDocumentType.reindex_all(options['chunk_size'],
                                               percent=percent)
        self.stdout.write(unicode(message) + '\n')
Exemple #22
0
def pre_delete_handler(instance, **kwargs):
    if not settings.ES_LIVE_INDEX:
        return

    doc = instance
    current_index = Index.objects.get_current()

    if WikiDocumentType.should_update(doc):
        unindex_documents.delay([doc.pk], current_index.pk)
    else:
        log.info('Ignoring wiki document %r while updating search index',
                 doc.pk, exc_info=True)
Exemple #23
0
def mock_search(mock_elasticsearch):
    """Mock WikiDocumentType.search() for a fake Elasticsearch and index."""
    patcher_get_conn = mock.patch(
        "kuma.wiki.search.connections.get_connection",
        return_value=mock_elasticsearch)
    patcher_get_index = mock.patch(
        "kuma.wiki.search.WikiDocumentType.get_index", return_value="mdn-test")
    patcher_get_conn.start()
    patcher_get_index.start()
    yield WikiDocumentType.search()
    patcher_get_index.stop()
    patcher_get_conn.stop()
Exemple #24
0
def mock_search(mock_elasticsearch):
    '''Mock WikiDocumentType.search() for a fake Elasticsearch and index.'''
    patcher_get_conn = mock.patch(
        'kuma.wiki.search.connections.get_connection',
        return_value=mock_elasticsearch)
    patcher_get_index = mock.patch(
        'kuma.wiki.search.WikiDocumentType.get_index', return_value='mdn-test')
    patcher_get_conn.start()
    patcher_get_index.start()
    yield WikiDocumentType.search()
    patcher_get_index.stop()
    patcher_get_conn.stop()
Exemple #25
0
def pre_delete_handler(instance, **kwargs):
    if not settings.ES_LIVE_INDEX:
        return

    doc = instance
    current_index = Index.objects.get_current()

    if WikiDocumentType.should_update(doc):
        unindex_documents.delay([doc.pk], current_index.pk)
    else:
        log.info('Ignoring wiki document %r while updating search index',
                 doc.pk,
                 exc_info=True)
Exemple #26
0
    def test_document_serializer(self):
        search = WikiDocumentType.search()
        result = search.execute()
        doc_serializer = DocumentSerializer(result, many=True)
        list_data = doc_serializer.data
        eq_(len(list_data), 7)
        ok_(isinstance(list_data, list))
        ok_(1 in [data['id'] for data in list_data])

        doc_serializer = DocumentSerializer(result[0], many=False)
        dict_data = doc_serializer.data
        ok_(isinstance(dict_data, dict))
        eq_(dict_data['id'], result[0].id)
Exemple #27
0
def mock_search(mock_elasticsearch):
    '''Mock WikiDocumentType.search() for a fake Elasticsearch and index.'''
    patcher_get_conn = mock.patch(
        'kuma.wiki.search.connections.get_connection',
        return_value=mock_elasticsearch)
    patcher_get_index = mock.patch(
        'kuma.wiki.search.WikiDocumentType.get_index',
        return_value='mdn-test')
    patcher_get_conn.start()
    patcher_get_index.start()
    yield WikiDocumentType.search()
    patcher_get_index.stop()
    patcher_get_conn.stop()
Exemple #28
0
    def test_document_serializer(self):
        search = WikiDocumentType.search()
        result = search.execute()
        doc_serializer = DocumentSerializer(result, many=True)
        list_data = doc_serializer.data
        eq_(len(list_data), 7)
        ok_(isinstance(list_data, list))
        ok_(1 in [data['id'] for data in list_data])

        doc_serializer = DocumentSerializer(result[0], many=False)
        dict_data = doc_serializer.data
        ok_(isinstance(dict_data, dict))
        eq_(dict_data['id'], result[0].id)
    def test_document_serializer(self):
        search = WikiDocumentType.search()
        result = search.execute()
        doc_serializer = DocumentSerializer(result, many=True)
        list_data = doc_serializer.data
        assert 7 == len(list_data)
        assert isinstance(list_data, list)
        assert 1 in [data["id"] for data in list_data]

        doc_serializer = DocumentSerializer(result[0], many=False)
        dict_data = doc_serializer.data
        assert isinstance(dict_data, dict)
        assert dict_data["id"] == result[0].id
Exemple #30
0
def pre_delete_handler(**kwargs):
    if not settings.ES_LIVE_INDEX or 'instance' not in kwargs:
        return

    from kuma.wiki.tasks import unindex_documents
    from .models import Index

    doc = kwargs['instance']
    current_index = Index.objects.get_current()

    if WikiDocumentType.should_update(doc):
        unindex_documents.delay([doc.pk], current_index.pk)
    else:
        log.info('Ignoring wiki document %r while updating search index',
                 doc.pk, exc_info=True)
Exemple #31
0
def pre_delete_handler(**kwargs):
    if not settings.ES_LIVE_INDEX or 'instance' not in kwargs:
        return

    from kuma.wiki.tasks import unindex_documents
    from .models import Index

    doc = kwargs['instance']
    current_index = Index.objects.get_current()

    if WikiDocumentType.should_update(doc):
        unindex_documents.delay([doc.pk], current_index.pk)
    else:
        log.info('Ignoring wiki document %r while updating search index',
                 doc.pk, exc_info=True)
Exemple #32
0
def render_done_handler(instance, **kwargs):
    if not settings.ES_LIVE_INDEX:
        return

    doc = instance
    if WikiDocumentType.should_update(doc):
        current_index = Index.objects.get_current()
        outdated = current_index.record_outdated(doc)
        if outdated:
            log.info('Found a newer index and scheduled '
                     'indexing it after promotion.')
        doc_pks = set([item.pk for item in doc.other_translations])
        doc_pks.add(doc.id)
        try:
            index_documents.delay(list(doc_pks), current_index.pk)
        except Exception:
            log.error('Search indexing task failed', exc_info=True)
    else:
        log.info('Ignoring wiki document %r while updating search index',
                 doc.id, exc_info=True)
Exemple #33
0
def render_done_handler(instance, **kwargs):
    if not settings.ES_LIVE_INDEX:
        return

    doc = instance
    if WikiDocumentType.should_update(doc):
        current_index = Index.objects.get_current()
        outdated = current_index.record_outdated(doc)
        if outdated:
            log.info('Found a newer index and scheduled '
                     'indexing it after promotion.')
        doc_pks = {item.pk for item in doc.other_translations}
        doc_pks.add(doc.id)
        try:
            index_documents.delay(list(doc_pks), current_index.pk)
        except Exception:
            log.error('Search indexing task failed', exc_info=True)
    else:
        log.info('Ignoring wiki document %r while updating search index',
                 doc.id,
                 exc_info=True)
Exemple #34
0
def render_done_handler(**kwargs):
    if not settings.ES_LIVE_INDEX or 'instance' not in kwargs:
        return

    from kuma.wiki.tasks import index_documents
    from .models import Index

    doc = kwargs['instance']
    if WikiDocumentType.should_update(doc):
        current_index = Index.objects.get_current()
        outdated = current_index.record_outdated(doc)
        if outdated:
            log.info('Found a newer index and scheduled '
                     'indexing it after promotion.')
        doc_pks = set(doc.other_translations.values_list('pk', flat=True))
        doc_pks.add(doc.id)
        try:
            index_documents.delay(list(doc_pks), current_index.pk)
        except:
            log.error('Search indexing task failed', exc_info=True)
    else:
        log.info('Ignoring wiki document %r while updating search index',
                 doc.id, exc_info=True)
Exemple #35
0
 def test_hidden_slugs_should_update(self):
     jezdez_doc = Document.objects.get(slug='User:jezdez')
     eq_(WikiDocumentType.should_update(jezdez_doc), False)
Exemple #36
0
 def populate(self):
     return WikiDocumentType.reindex_all(index=self, chunk_size=500)
Exemple #37
0
 def test_hidden_slugs_get_indexable(self):
     self.refresh()
     title_list = WikiDocumentType.get_indexable().values_list('title',
                                                               flat=True)
     assert 'User:jezdez' not in title_list
Exemple #38
0
def status(request):
    """
    Return summary information about this Kuma instance.

    Functional tests can use this to customize the test process.
    """
    data = {
        'version': 1,
        'request': {
            'url': request.build_absolute_uri(''),
            'host': request.get_host(),
            'is_secure': request.is_secure(),
            'scheme': request.scheme,
        },
        'services': {
            'database': {},
            'kumascript': {},
            'search': {},
            'test_accounts': {},
        },
        'settings': {
            'ALLOWED_HOSTS': settings.ALLOWED_HOSTS,
            'ATTACHMENT_HOST': settings.ATTACHMENT_HOST,
            'ATTACHMENT_ORIGIN': settings.ATTACHMENT_ORIGIN,
            'DEBUG': settings.DEBUG,
            'INTERACTIVE_EXAMPLES_BASE': settings.INTERACTIVE_EXAMPLES_BASE,
            'LEGACY_HOSTS': settings.LEGACY_HOSTS,
            'MAINTENANCE_MODE': settings.MAINTENANCE_MODE,
            'PROTOCOL': settings.PROTOCOL,
            'REVISION_HASH': settings.REVISION_HASH,
            'SITE_URL': settings.SITE_URL,
            'STATIC_URL': settings.STATIC_URL,
        },
    }

    # Check that database is reachable, populated
    doc_data = {'available': True, 'populated': False, 'document_count': 0}
    try:
        doc_count = Document.objects.count()
    except DatabaseError:
        doc_data['available'] = False
    else:
        if doc_count:
            doc_data['populated'] = True
            doc_data['document_count'] = doc_count
    data['services']['database'] = doc_data

    # Check that KumaScript is reachable
    ks_data = {
        'available': True,
        'revision': None,
    }
    try:
        ks_response = request_revision_hash()
    except Requests_ConnectionError:
        ks_response = None
    if not ks_response or ks_response.status_code != 200:
        ks_data['available'] = False
    else:
        ks_data['revision'] = ks_response.text
    data['services']['kumascript'] = ks_data

    # Check that ElasticSearch is reachable, populated
    search_data = {'available': True, 'populated': False, 'count': 0}
    try:
        search_count = WikiDocumentType.search().count()
    except ES_ConnectionError:
        search_data['available'] = False
    except NotFoundError:
        pass  # available but unpopulated (and maybe uncreated)
    else:
        if search_count:
            search_data['populated'] = True
            search_data['count'] = search_count
    data['services']['search'] = search_data

    # Check if the testing accounts are available
    test_account_data = {'available': False}
    test_account_names = [
        'test-super', 'test-moderator', 'test-new', 'test-banned',
        'viagra-test-123'
    ]
    try:
        users = list(
            User.objects.only(
                'id', 'username',
                'password').filter(username__in=test_account_names))
    except DatabaseError:
        users = []
    if len(users) == len(test_account_names):
        for user in users:
            if not user.check_password('test-password'):
                break
        else:
            # All users have the testing password
            test_account_data['available'] = True
    data['services']['test_accounts'] = test_account_data

    return JsonResponse(data)
Exemple #39
0
 def populate(self):
     return WikiDocumentType.reindex_all(index=self, chunk_size=500)
Exemple #40
0
 def setup_indexes(self):
     """Clear and repopulate the current index."""
     WikiDocumentType.reindex_all()
Exemple #41
0
 def setup_indexes(self):
     """Clear and repopulate the current index."""
     WikiDocumentType.reindex_all()
Exemple #42
0
def test_should_not_update_excluded_slug(mock_doc, slug):
    """Excluded slugs should not update the search index."""
    mock_doc.slug = slug
    assert not WikiDocumentType.should_update(mock_doc)
Exemple #43
0
def test_should_not_update_excluded_slug(mock_doc, slug):
    """Excluded slugs should not update the search index."""
    mock_doc.slug = slug
    assert not WikiDocumentType.should_update(mock_doc)
Exemple #44
0
def status(request):
    """
    Return summary information about this Kuma instance.

    Functional tests can use this to customize the test process.
    """
    data = {
        'version': 1,
        'request': {
            'url': request.build_absolute_uri(''),
            'host': request.get_host(),
            'is_secure': request.is_secure(),
            'scheme': request.scheme,
        },
        'services': {
            'database': {},
            'kumascript': {},
            'search': {},
            'test_accounts': {},
        },
        'settings': {
            'ALLOWED_HOSTS': settings.ALLOWED_HOSTS,
            'ATTACHMENT_HOST': settings.ATTACHMENT_HOST,
            'ATTACHMENT_ORIGIN': settings.ATTACHMENT_ORIGIN,
            'DEBUG': settings.DEBUG,
            'INTERACTIVE_EXAMPLES_BASE': settings.INTERACTIVE_EXAMPLES_BASE,
            'LEGACY_HOSTS': settings.LEGACY_HOSTS,
            'MAINTENANCE_MODE': settings.MAINTENANCE_MODE,
            'PROTOCOL': settings.PROTOCOL,
            'REVISION_HASH': settings.REVISION_HASH,
            'SITE_URL': settings.SITE_URL,
            'STATIC_URL': settings.STATIC_URL,
        },
    }

    # Check that database is reachable, populated
    doc_data = {
        'available': True,
        'populated': False,
        'document_count': 0
    }
    try:
        doc_count = Document.objects.count()
    except DatabaseError:
        doc_data['available'] = False
    else:
        if doc_count:
            doc_data['populated'] = True
            doc_data['document_count'] = doc_count
    data['services']['database'] = doc_data

    # Check that KumaScript is reachable
    ks_data = {
        'available': True,
        'revision': None,
    }
    try:
        ks_response = request_revision_hash()
    except Requests_ConnectionError:
        ks_response = None
    if not ks_response or ks_response.status_code != 200:
        ks_data['available'] = False
    else:
        ks_data['revision'] = ks_response.text
    data['services']['kumascript'] = ks_data

    # Check that ElasticSearch is reachable, populated
    search_data = {
        'available': True,
        'populated': False,
        'count': 0
    }
    try:
        search_count = WikiDocumentType.search().count()
    except ES_ConnectionError:
        search_data['available'] = False
    except NotFoundError:
        pass  # available but unpopulated (and maybe uncreated)
    else:
        if search_count:
            search_data['populated'] = True
            search_data['count'] = search_count
    data['services']['search'] = search_data

    # Check if the testing accounts are available
    test_account_data = {
        'available': False
    }
    test_account_names = ['test-super', 'test-moderator', 'test-new',
                          'test-banned', 'viagra-test-123']
    try:
        users = list(User.objects.only('id', 'username', 'password')
                                 .filter(username__in=test_account_names))
    except DatabaseError:
        users = []
    if len(users) == len(test_account_names):
        for user in users:
            if not user.check_password('test-password'):
                break
        else:
            # All users have the testing password
            test_account_data['available'] = True
    data['services']['test_accounts'] = test_account_data

    return JsonResponse(data)
Exemple #45
0
def test_should_update_standard_doc(mock_doc):
    """The mock_doc should update search index."""
    assert WikiDocumentType.should_update(mock_doc)
Exemple #46
0
 def get_queryset(self):
     return WikiDocumentType.search()
Exemple #47
0
def test_base_search(db):
    '''WikiDocumentType.search() searches all documents by default.'''
    search = WikiDocumentType.search()
    assert search.to_dict() == {}
Exemple #48
0
 def test_hidden_slugs_get_indexable(self):
     self.refresh()
     title_list = WikiDocumentType.get_indexable().values_list('title',
                                                               flat=True)
     ok_('User:jezdez' not in title_list)
Exemple #49
0
def test_should_update_standard_doc(mock_doc):
    """The mock_doc should update search index."""
    assert WikiDocumentType.should_update(mock_doc)
Exemple #50
0
 def get_queryset(self):
     return WikiDocumentType.search()
Exemple #51
0
def test_should_not_update_excluded_flags(mock_doc, flag):
    """Do not update the search index if some flags are set."""
    setattr(mock_doc, flag, True)
    assert not WikiDocumentType.should_update(mock_doc)
Exemple #52
0
def test_should_not_update_excluded_flags(mock_doc, flag):
    """Do not update the search index if some flags are set."""
    setattr(mock_doc, flag, True)
    assert not WikiDocumentType.should_update(mock_doc)