def test_recent_helpful_votes(self): """Recent helpful votes are indexed properly.""" # Create a document and verify it doesn't show up in a # query for recent_helpful_votes__gt=0. r = revision(is_approved=True, save=True) self.refresh() eq_(DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 0) # Add an unhelpful vote, it still shouldn't show up. helpful_vote(revision=r, helpful=False, save=True) r.document.save() # Votes don't trigger a reindex. self.refresh() eq_(DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 0) # Add an helpful vote created 31 days ago, it still shouldn't show up. created = datetime.now() - timedelta(days=31) helpful_vote(revision=r, helpful=True, created=created, save=True) r.document.save() # Votes don't trigger a reindex. self.refresh() eq_(DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 0) # Add an helpful vote created 29 days ago, it should show up now. created = datetime.now() - timedelta(days=29) helpful_vote(revision=r, helpful=True, created=created, save=True) r.document.save() # Votes don't trigger a reindex. self.refresh() eq_(DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 1)
def test_recent_helpful_votes(self): """Recent helpful votes are indexed properly.""" # Create a document and verify it doesn't show up in a # query for recent_helpful_votes__gt=0. r = revision(is_approved=True, save=True) self.refresh() eq_( DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 0) # Add an unhelpful vote, it still shouldn't show up. helpful_vote(revision=r, helpful=False, save=True) r.document.save() # Votes don't trigger a reindex. self.refresh() eq_( DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 0) # Add an helpful vote created 31 days ago, it still shouldn't show up. created = datetime.now() - timedelta(days=31) helpful_vote(revision=r, helpful=True, created=created, save=True) r.document.save() # Votes don't trigger a reindex. self.refresh() eq_( DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 0) # Add an helpful vote created 29 days ago, it should show up now. created = datetime.now() - timedelta(days=29) helpful_vote(revision=r, helpful=True, created=created, save=True) r.document.save() # Votes don't trigger a reindex. self.refresh() eq_( DocumentMappingType.search().filter( document_recent_helpful_votes__gt=0).count(), 1)
def test_add_and_delete(self): """Adding a doc should add it to the search index; deleting should delete it.""" doc = document(save=True) revision(document=doc, is_approved=True, save=True) self.refresh() eq_(DocumentMappingType.search().count(), 1) doc.delete() self.refresh() eq_(DocumentMappingType.search().count(), 0)
def test_wiki_no_revisions(self): """Don't index documents without approved revisions""" # Create a document with no revisions and make sure the # document is not in the index. doc = document(save=True) self.refresh() eq_(DocumentMappingType.search().count(), 0) # Create a revision that's not approved and make sure the # document is still not in the index. revision(document=doc, is_approved=False, save=True) self.refresh() eq_(DocumentMappingType.search().count(), 0)
def test_wiki_redirects(self): """Make sure we don't index redirects""" # First create a revision that doesn't have a redirect and # make sure it's in the index. doc = DocumentFactory(title=u'wool hats') RevisionFactory(document=doc, is_approved=True) self.refresh() eq_(DocumentMappingType.search().query(document_title__match='wool').count(), 1) # Now create a revision that is a redirect and make sure the # document is removed from the index. RedirectRevisionFactory(document=doc) self.refresh() eq_(DocumentMappingType.search().query(document_title__match='wool').count(), 0)
def test_wiki_keywords(self): """Make sure updating keywords updates the index.""" # Create a document with a revision with no keywords. It # shouldn't show up with a document_keywords term query for # 'wool' since it has no keywords. doc = DocumentFactory(title=u'wool hats') RevisionFactory(document=doc, is_approved=True) self.refresh() eq_(DocumentMappingType.search().query( document_keywords='wool').count(), 0) RevisionFactory(document=doc, is_approved=True, keywords='wool') self.refresh() eq_(DocumentMappingType.search().query(document_keywords='wool').count(), 1)
def test_wiki_keywords(self): """Make sure updating keywords updates the index.""" # Create a document with a revision with no keywords. It # shouldn't show up with a document_keywords term query for # 'wool' since it has no keywords. doc = DocumentFactory(title='wool hats') RevisionFactory(document=doc, is_approved=True) self.refresh() eq_( DocumentMappingType.search().query( document_keywords='wool').count(), 0) RevisionFactory(document=doc, is_approved=True, keywords='wool') self.refresh() eq_( DocumentMappingType.search().query( document_keywords='wool').count(), 1)
def test_wiki_redirects(self): """Make sure we don't index redirects""" # First create a revision that doesn't have a redirect and # make sure it's in the index. doc = DocumentFactory(title='wool hats') RevisionFactory(document=doc, is_approved=True) self.refresh() eq_( DocumentMappingType.search().query( document_title__match='wool').count(), 1) # Now create a revision that is a redirect and make sure the # document is removed from the index. RedirectRevisionFactory(document=doc) self.refresh() eq_( DocumentMappingType.search().query( document_title__match='wool').count(), 0)
def test_wiki_redirects(self): """Make sure we don't index redirects""" # First create a revision that doesn't have a redirect and # make sure it's in the index. doc = document(title=u'wool hats') doc.save() revision(document=doc, is_approved=True, save=True) self.refresh() eq_(DocumentMappingType.search().query( document_title__match='wool').count(), 1) # Now create a revision that is a redirect and make sure the # document is removed from the index. revision(document=doc, content=REDIRECT_CONTENT, is_approved=True, save=True) self.refresh() eq_(DocumentMappingType.search().query( document_title__match='wool').count(), 0)
def troubleshooting_view(request): # Build a list of the most recently indexed 50 wiki documents. last_50_indexed = list(_fix_results(DocumentMappingType.search().order_by("-indexed_on")[:50])) last_50_reviewed = list( Document.objects.filter(current_revision__is_approved=True).order_by("-current_revision__reviewed")[:50] ) diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed) return render(request, "admin/search_troubleshooting.html", {"title": "Index Troubleshooting", "diffs": diff_list})
def test_wiki_topics(self): """Make sure that adding topics to a Document causes it to refresh the index. """ t = topic(slug=u'hiphop', save=True) eq_(DocumentMappingType.search().filter(topic=t.slug).count(), 0) doc = document(save=True) revision(document=doc, is_approved=True, save=True) self.refresh() eq_(DocumentMappingType.search().filter(topic=t.slug).count(), 0) doc.topics.add(t) self.refresh() eq_(DocumentMappingType.search().filter(topic=t.slug).count(), 1) doc.topics.clear() self.refresh() # Make sure the document itself is still there and that we didn't # accidentally delete it through screwed up signal handling: eq_(DocumentMappingType.search().filter().count(), 1) eq_(DocumentMappingType.search().filter(topic=t.slug).count(), 0)
def test_wiki_products(self): """Make sure that adding products to a Document causes it to refresh the index. """ p = product(slug=u'desktop', save=True) eq_(DocumentMappingType.search().filter(product=p.slug).count(), 0) doc = document(save=True) revision(document=doc, is_approved=True, save=True) self.refresh() eq_(DocumentMappingType.search().filter(product=p.slug).count(), 0) doc.products.add(p) self.refresh() eq_(DocumentMappingType.search().filter(product=p.slug).count(), 1) doc.products.remove(p) self.refresh() # Make sure the document itself is still there and that we didn't # accidentally delete it through screwed up signal handling: eq_(DocumentMappingType.search().filter().count(), 1) eq_(DocumentMappingType.search().filter(product=p.slug).count(), 0)
def test_wiki_redirects(self): """Make sure we don't index redirects""" # First create a revision that doesn't have a redirect and # make sure it's in the index. doc = document(title=u'wool hats') doc.save() revision(document=doc, is_approved=True, save=True) self.refresh() eq_( DocumentMappingType.search().query( document_title__match='wool').count(), 1) # Now create a revision that is a redirect and make sure the # document is removed from the index. revision(document=doc, content=REDIRECT_CONTENT, is_approved=True, save=True) self.refresh() eq_( DocumentMappingType.search().query( document_title__match='wool').count(), 0)
def opensearch_suggestions(request): """A simple search view that returns OpenSearch suggestions.""" content_type = "application/x-suggestions+json" term = request.GET.get("q") if not term: return HttpResponseBadRequest(content_type=content_type) locale = locale_or_default(request.LANGUAGE_CODE) # FIXME: Rewrite this using the simple search search business # logic. This currently returns templates (amongst other things) # which is totally wrong. try: query = dict(("%s__match" % field, term) for field in DocumentMappingType.get_query_fields()) # Upgrade the query to an analyzer-aware one. query = es_utils.es_query_with_analyzer(query, locale) wiki_s = ( DocumentMappingType.search() .filter(document_is_archived=False) .filter(document_locale=locale) .values_dict("document_title", "url") .query(or_=query)[:5] ) query = dict(("%s__match" % field, term) for field in QuestionMappingType.get_query_fields()) question_s = ( QuestionMappingType.search() .filter(question_has_helpful=True) .values_dict("question_title", "url") .query(or_=query)[:5] ) results = list(chain(question_s, wiki_s)) except ES_EXCEPTIONS: # If we have ES problems, we just send back an empty result # set. results = [] def urlize(r): return u"%s://%s%s" % ("https" if request.is_secure() else "http", request.get_host(), r["url"][0]) def titleize(r): # NB: Elasticsearch returns an array of strings as the value, # so we mimic that and then pull out the first (and only) # string. return r.get("document_title", r.get("question_title", [_("No title")]))[0] data = [term, [titleize(r) for r in results], [], [urlize(r) for r in results]] return HttpResponse(json.dumps(data), content_type=content_type)
def opensearch_suggestions(request): """A simple search view that returns OpenSearch suggestions.""" content_type = 'application/x-suggestions+json' term = request.GET.get('q') if not term: return HttpResponseBadRequest(content_type=content_type) locale = locale_or_default(request.LANGUAGE_CODE) # FIXME: Rewrite this using the simple search search business # logic. This currently returns templates (amongst other things) # which is totally wrong. try: query = dict(('%s__match' % field, term) for field in DocumentMappingType.get_query_fields()) # Upgrade the query to an analyzer-aware one. query = es_utils.es_query_with_analyzer(query, locale) wiki_s = (DocumentMappingType.search().filter( document_is_archived=False).filter( document_locale=locale).values_dict( 'document_title', 'url').query(or_=query)[:5]) query = dict(('%s__match' % field, term) for field in QuestionMappingType.get_query_fields()) question_s = (QuestionMappingType.search().filter( question_has_helpful=True).values_dict('question_title', 'url').query(or_=query)[:5]) results = list(chain(question_s, wiki_s)) except ES_EXCEPTIONS: # If we have ES problems, we just send back an empty result # set. results = [] def urlize(r): return u'%s://%s%s' % ('https' if request.is_secure() else 'http', request.get_host(), r['url'][0]) def titleize(r): # NB: Elasticsearch returns an array of strings as the value, # so we mimic that and then pull out the first (and only) # string. return r.get('document_title', r.get('question_title', [_('No title')]))[0] data = [ term, [titleize(r) for r in results], [], [urlize(r) for r in results] ] return HttpResponse(json.dumps(data), content_type=content_type)
def _es_documents_for(locale, topics=None, products=None): """ES implementation of documents_for.""" s = (DocumentMappingType.search() .values_dict('id', 'document_title', 'url', 'document_parent_id', 'document_summary') .filter(document_locale=locale, document_is_archived=False, document_category__in=settings.IA_DEFAULT_CATEGORIES)) for topic in topics or []: s = s.filter(topic=topic.slug) for product in products or []: s = s.filter(product=product.slug) return list(s.order_by('-document_recent_helpful_votes')[:100])
def troubleshooting_view(request): # Build a list of the most recently indexed 50 wiki documents. last_50_indexed = list( _fix_value_dicts(DocumentMappingType.search().values_dict().order_by( '-indexed_on')[:50])) last_50_reviewed = list( Document.uncached.filter(current_revision__is_approved=True).order_by( '-current_revision__reviewed')[:50]) diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed) return render(request, 'admin/search_troubleshooting.html', { 'title': 'Index Troubleshooting', 'diffs': diff_list, })
def suggestions(request): """A simple search view that returns OpenSearch suggestions.""" content_type = 'application/x-suggestions+json' term = request.GET.get('q') if not term: return HttpResponseBadRequest(content_type=content_type) site = Site.objects.get_current() locale = locale_or_default(request.LANGUAGE_CODE) try: query = dict(('%s__match' % field, term) for field in DocumentMappingType.get_query_fields()) # Upgrade the query to an analyzer-aware one. query = es_utils.es_query_with_analyzer(query, locale) wiki_s = (DocumentMappingType.search() .filter(document_is_archived=False) .filter(document_locale=locale) .values_dict('document_title', 'url') .query(or_=query)[:5]) query = dict(('%s__match' % field, term) for field in QuestionMappingType.get_query_fields()) question_s = (QuestionMappingType.search() .filter(question_has_helpful=True) .values_dict('question_title', 'url') .query(or_=query)[:5]) results = list(chain(question_s, wiki_s)) except ES_EXCEPTIONS: # If we have ES problems, we just send back an empty result # set. results = [] def urlize(r): return u'https://%s%s' % (site, r['url']) def titleize(r): return r.get('document_title', r.get('document_title')) data = [term, [titleize(r) for r in results], [], [urlize(r) for r in results]] return HttpResponse(json.dumps(data), content_type=content_type)
def suggestions(request): """A simple search view that returns OpenSearch suggestions.""" content_type = 'application/x-suggestions+json' term = request.GET.get('q') if not term: return HttpResponseBadRequest(content_type=content_type) site = Site.objects.get_current() locale = locale_or_default(request.LANGUAGE_CODE) try: query = dict(('{0!s}__match'.format(field), term) for field in DocumentMappingType.get_query_fields()) # Upgrade the query to an analyzer-aware one. query = es_utils.es_query_with_analyzer(query, locale) wiki_s = (DocumentMappingType.search() .filter(document_is_archived=False) .filter(document_locale=locale) .values_dict('document_title', 'url') .query(or_=query)[:5]) query = dict(('{0!s}__match'.format(field), term) for field in QuestionMappingType.get_query_fields()) question_s = (QuestionMappingType.search() .filter(question_has_helpful=True) .values_dict('question_title', 'url') .query(or_=query)[:5]) results = list(chain(question_s, wiki_s)) except ES_EXCEPTIONS: # If we have ES problems, we just send back an empty result # set. results = [] def urlize(r): return u'https://{0!s}{1!s}'.format(site, r['url']) def titleize(r): return r.get('document_title', r.get('document_title')) data = [term, [titleize(r) for r in results], [], [urlize(r) for r in results]] return HttpResponse(json.dumps(data), content_type=content_type)
def troubleshooting_view(request): # Build a list of the most recently indexed 50 wiki documents. last_50_indexed = list(_fix_results(DocumentMappingType.search() .order_by('-indexed_on')[:50])) last_50_reviewed = list(Document.uncached .filter(current_revision__is_approved=True) .order_by('-current_revision__reviewed')[:50]) diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed) return render( request, 'admin/search_troubleshooting.html', {'title': 'Index Troubleshooting', 'diffs': diff_list, })
def _es_documents_for(locale, topics=None, products=None): """ES implementation of documents_for.""" s = (DocumentMappingType.search().values_dict( "id", "document_title", "url", "document_parent_id", "document_summary").filter( document_locale=locale, document_is_archived=False, document_category__in=settings.IA_DEFAULT_CATEGORIES, )) for topic in topics or []: s = s.filter(topic=topic.slug) for product in products or []: s = s.filter(product=product.slug) results = s.order_by("document_display_order", "-document_recent_helpful_votes")[:100] results = DocumentMappingType.reshape(results) return results
def suggestions(request): """A simple search view that returns OpenSearch suggestions.""" mimetype = 'application/x-suggestions+json' term = request.GET.get('q') if not term: return HttpResponseBadRequest(mimetype=mimetype) site = Site.objects.get_current() locale = locale_or_default(request.LANGUAGE_CODE) try: query = dict(('%s__text' % field, term) for field in DocumentMappingType.get_query_fields()) wiki_s = (DocumentMappingType.search() .filter(document_is_archived=False) .filter(document_locale=locale) .values_dict('document_title', 'url') .query(or_=query)[:5]) query = dict(('%s__text' % field, term) for field in QuestionMappingType.get_query_fields()) question_s = (QuestionMappingType.search() .filter(question_has_helpful=True) .values_dict('question_title', 'url') .query(or_=query)[:5]) results = list(chain(question_s, wiki_s)) except ES_EXCEPTIONS: # If we have ES problems, we just send back an empty result # set. results = [] urlize = lambda r: u'https://%s%s' % (site, r['url']) titleize = lambda r: (r['document_title'] if 'document_title' in r else r['question_title']) data = [term, [titleize(r) for r in results], [], [urlize(r) for r in results]] return HttpResponse(json.dumps(data), mimetype=mimetype)