def _do_search(self, request, model): self.method_check(request, allowed=['get']) self.is_authenticated(request) self.throttle_check(request) # Do the query. query = request.GET.get('q', '') sqs = SearchQuerySet().models(model).load_all().auto_query(query) paginator = Paginator(sqs, 20) try: page = paginator.page(int(request.GET.get('page', 1))) except InvalidPage: raise Http404("Sorry, no results on that page.") objects = [] for result in page.object_list: if result: highlighter = Highlighter(query) text = highlighter.highlight(result.text) bundle = self.build_bundle(obj=result.object, request=request) bundle = self.full_dehydrate(bundle) bundle.data['text'] = text objects.append(bundle) object_list = { 'objects': objects, } self.log_throttled_access(request) return self.create_response(request, object_list)
def get_results(self): """ Override get_results to add the value of the field where query was found Also takes care of highlighting the query. """ results = super(FindView, self).get_results() query = self.query.lower() highlight = Highlighter(query) for r in results: for field in r.get_stored_fields(): value = getattr(r, field) # assume search index field 'text' is document field if isinstance(value, string_types) and\ query in value.lower() and\ field != 'text': # assume search index field name == model field name try: name = r.object._meta.get_field(field).verbose_name except: name = field r.context = { 'field': name, 'value': highlight.highlight(value) } continue return results
def homeroom(request): user = request.user context = RequestContext(request) if request.method == 'POST': query = request.POST['course-search'] results = SearchQuerySet().autocomplete(text=query).models(Course)[:10] highlighter = Highlighter(query, html_tag='span', css_class='keyword') courses = [] for result in results: course = {} course['object'] = result.object course['highlight'] = highlighter.highlight(result.text) courses.append(course) # courses = Course.objects.filter(institute=user.get_profile().institute, title__icontains=query) suggestion = None # suggestion = SearchQuerySet().spelling_suggestion(query) context['courses'] = courses context['suggestion'] = suggestion sections = [ assign.section for assign in SectionAssign.objects.filter( user=user).order_by('-section__start_date') ] form = AddCourseForm(request=request) context['sections'] = sections context['form'] = form return render_to_response('homeroom/index.html', context)
def _do_search(self, request, model): self.method_check(request, allowed=['get']) self.is_authenticated(request) self.throttle_check(request) # Do the query. query = request.GET.get('q', '') sqs = SearchQuerySet().models(model).auto_query(query) paginator = Paginator(sqs, 20) try: page = paginator.page(int(request.GET.get('page', 1))) except InvalidPage: raise Http404("Sorry, no results on that page.") objects = [] for result in page.object_list: highlighter = Highlighter(query) text = highlighter.highlight(result.text) bundle = self.full_dehydrate(result.object) bundle.data['text'] = text objects.append(bundle) object_list = { 'objects': objects, } self.log_throttled_access(request) return self.create_response(request, object_list)
def _search(self, request, model, facets=None, page_size=20, highlight=True): """ `facets` A list of facets to include with the results `models` Limit the search to one or more models """ form = FacetedSearchForm(request.GET, facets=facets or [], models=(model,), load_all=True) if not form.is_valid(): return self.error_response({'errors': form.errors}, request) results = form.search() paginator = Paginator(results, page_size) try: page = paginator.page(int(request.GET.get('page', 1))) except InvalidPage: raise Http404(ugettext("Sorry, no results on that page.")) objects = [] query = request.GET.get('q', '') highlighter = Highlighter(query) for result in page.object_list: if not result: continue text = result.text if highlight: text = highlighter.highlight(text) bundle = self.build_bundle(obj=result.object, request=request) bundle = self.full_dehydrate(bundle) bundle.data['text'] = text objects.append(bundle) url_template = self._url_template(query, form['selected_facets'].value()) page_data = { 'number': page.number, 'per_page': paginator.per_page, 'num_pages': paginator.num_pages, 'page_range': paginator.page_range, 'object_count': paginator.count, 'url_template': url_template, } if page.has_next(): page_data['url_next'] = url_template.format( page.next_page_number()) if page.has_previous(): page_data['url_prev'] = url_template.format( page.previous_page_number()) object_list = { 'page': page_data, 'objects': objects, } if facets: object_list.update({'facets': results.facet_counts()}) return object_list
def execute_highlighter(query, text_key, results): highlight = Highlighter(query) for result in results: highlight.text_block = result.get_additional_fields().get(text_key, "") highlight_locations = highlight.find_highlightable_words() result.highlight_locations = [] for q, locations in highlight_locations.iteritems(): result.highlight_locations.extend([[location, location + len(q)] for location in locations])
def no_query_found(self): all_results = SearchQuerySet(self).all() #sqs = SearchQuerySet().filter(content='foo').highlight() sqs = SearchQuerySet().filter(content=all_results).highlight() highlighter = Highlighter(search_query) result = sqs[0] result.highlighted['text'][0] print highlighter.highlight(sqs[0].text)
def build_results_for_page(games, query): highlighter = Highlighter(query) return [{'url': game.url, 'title': highlighter.highlight(game.title), 'intro': highlighter.highlight(game.intro), 'city': highlighter.highlight(game.location.city), 'state': highlighter.highlight(game.location.state)} for game in games]
def _search(self, request, model, facets=None, page_size=20, highlight=True): ''' `facets` A list of facets to include with the results `models` Limit the search to one or more models ''' form = FacetedSearchForm(request.GET, facets=facets or [], models=(model,), load_all=True) if not form.is_valid(): return self.error_response({'errors': form.errors}, request) results = form.search() paginator = Paginator(results, page_size) try: page = paginator.page(int(request.GET.get('page', 1))) except InvalidPage: raise Http404(ugettext("Sorry, no results on that page.")) objects = [] query = request.GET.get('q', '') highlighter = Highlighter(query) for result in page.object_list: if not result: continue text = result.text if highlight: text = highlighter.highlight(text) bundle = self.build_bundle(obj=result.object, request=request) bundle = self.full_dehydrate(bundle) bundle.data['text'] = text objects.append(bundle) url_template = self._url_template(query, form['selected_facets'].value()) page_data = { 'number': page.number, 'per_page': paginator.per_page, 'num_pages': paginator.num_pages, 'page_range': paginator.page_range, 'object_count': paginator.count, 'url_template': url_template, } if page.has_next(): page_data['url_next'] = url_template.format( page.next_page_number()) if page.has_previous(): page_data['url_prev'] = url_template.format( page.previous_page_number()) object_list = { 'page': page_data, 'objects': objects, } if facets: object_list.update({'facets': results.facet_counts()}) return object_list
def autocomplete(request): query = request.GET.get('q', '') sqs = SearchQuerySet().models(Search).autocomplete(content_auto=query)[:5] highlight = Highlighter(query, css_class='no-highlight') suggestions = [highlight.highlight(result.search) for result in sqs] the_data = json.dumps({ 'results': suggestions }) return HttpResponse(the_data, content_type='application/json')
def autocomplete(request): sqs = SearchQuerySet().autocomplete( model_auto=request.GET.get('q', ''))[:5] highlight = Highlighter(request.GET.get('q', ''), max_length=35) suggestions = [highlight.highlight(result.text) for result in sqs] print len(sqs) # Make sure you return a JSON object, not a bare list. # Otherwise, you could be vulnerable to an XSS attack. the_data = json.dumps({'results': suggestions}) return HttpResponse(the_data, content_type='application/json')
def search_view(request): keyword = request.GET['q'] results = SearchQuerySet().filter(content=keyword) highlighter = Highlighter(keyword) results_dict = { 'success': True, 'by': 'search', 'list': [{ 'title': r.object.title, 'content': highlighter.highlight(r.object.content), 'uri': r.object.abs_uri } for r in results] } return HttpResponse(json.dumps(results_dict), content_type="application/json")
def index(request): #搜索词 words = request.GET['key'] results = SearchQuerySet().filter(name__contains=(words)) #Highlighter(my_query,html_tag='',css_class='',max_length=100) highlight = Highlighter(words,max_length=100) #(content=(words))##.facet('name',limit=10) #输出总条数 counts = results.count() for r in results: #设置高亮 r.name = highlight.highlight(r.name) #unicode -> string unicodestring.endcode('utf-8') #string -> unicode unicode(utf8string,'utf-8) return render(request,'search/search.html',{'data':results,'counts':counts})
def __init__(self, scholarship_key=None, search_result=None, to_highlight=''): scholarship_model = search_result.object self.scholarship_key = scholarship_key scholarship_model = scholarship_model highlight = Highlighter(to_highlight, max_length=300) self.snippet = highlight.highlight(scholarship_model.description) if scholarship_model is not None: self.deadline = scholarship_model.deadline self.source = scholarship_model.organization self.href = scholarship_model.third_party_url self.title = scholarship_model.title self.essay_required = scholarship_model.essay_required self.gender_restriction = scholarship_model.gender_restriction safe_title = scholarship_model.title[:100].encode('ascii', 'ignore') self.vs_href = u'/scholarship/{}?title={}'.format(self.scholarship_key, safe_title)
def test_find_highlightable_words(self): highlighter = Highlighter("this test") highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {"this": [0, 53, 79], "test": [10, 68]}) # We don't stem for now. highlighter = Highlighter("highlight tests") highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22], "tests": []}) # Ignore negated bits. highlighter = Highlighter("highlight -test") highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22]})
def test_find_highlightable_words(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'this': [0, 53, 79], 'test': [10, 68]}) # We don't stem for now. highlighter = Highlighter('highlight tests') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22], 'tests': []}) # Ignore negated bits. highlighter = Highlighter('highlight -test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22]})
def index(request): #搜索词 words = request.GET['key'] results = SearchQuerySet().filter(name__contains=(words)) #Highlighter(my_query,html_tag='',css_class='',max_length=100) highlight = Highlighter(words, max_length=100) #(content=(words))##.facet('name',limit=10) #输出总条数 counts = results.count() for r in results: #设置高亮 r.name = highlight.highlight(r.name) #unicode -> string unicodestring.endcode('utf-8') #string -> unicode unicode(utf8string,'utf-8) return render(request, 'search/search.html', { 'data': results, 'counts': counts })
def autocomplete(request): query = escape(request.GET.get('q', '')).strip() if len(query) < 2: suggestions = [] else: lighter = Highlighter(query, max_length=64) sqs = SearchQuerySet().autocomplete(auto=query)[:7] suggestions = [] for result in sqs: match = ' '.join(striptags(lighter.highlight(result.auto)).strip('.').split()) url = None if hasattr(result.object, 'get_absolute_url'): url = result.object.get_absolute_url() suggestions.append({ 'name': match, #'title': result.title, 'url': url, }) return JSONResponse(request, suggestions)
def homeroom(request): user = request.user context = RequestContext(request) if request.method == 'POST': query = request.POST['course-search'] results = SearchQuerySet().autocomplete(text=query).models(Course)[:10] highlighter = Highlighter(query, html_tag='span', css_class='keyword') courses = [] for result in results: course = {} course['object'] = result.object course['highlight'] = highlighter.highlight(result.text) courses.append(course) # courses = Course.objects.filter(institute=user.get_profile().institute, title__icontains=query) suggestion = None # suggestion = SearchQuerySet().spelling_suggestion(query) context['courses'] = courses context['suggestion'] = suggestion sections = [assign.section for assign in SectionAssign.objects.filter(user=user).order_by('-section__start_date')] form = AddCourseForm(request=request) context['sections'] = sections context['form'] = form return render_to_response('homeroom/index.html', context)
def get_response(project_uri, query_string, include_n3=True): d = { 'results': list(), } project_graph = projects.get_project_graph(project_uri) graph = Graph() query_set = SearchQuerySet().models(Text).filter( content=AutoQuery(query_string), project__exact=project_uri) highlighter = Highlighter(query_string, html_tag='span', css_class=CSS_RESULT_MATCH_CLASS) title_highlighter = TitleHighlighter(query_string, html_tag='span', css_class=CSS_RESULT_MATCH_CLASS) d['spelling_suggestion'] = query_set.spelling_suggestion() for result in query_set: text_uri = URIRef(result.get_stored_fields()['identifier']) if annotations.has_annotation_link( project_graph, text_uri) or projects.is_top_level_project_resource( project_uri, text_uri): d['results'].append( search_result_to_dict(result, project_uri, highlighter, title_highlighter)) if include_n3: graph += utils.metadata_triples(project_graph, text_uri) if include_n3: d['n3'] = graph.serialize(format='n3') return d
def highlight(text_block, query, **kwargs): highlighter = Highlighter(query, **kwargs) highlighted_text = highlighter.highlight(text_block) return mark_safe(highlighted_text)
def test_find_window(self): # The query doesn't matter for this method, so ignore it. highlighter = Highlighter('') highlighter.text_block = self.document_1 # No query. self.assertEqual(highlighter.find_window({}), (0, 200)) # Nothing found. self.assertEqual( highlighter.find_window({ 'highlight': [], 'tests': [] }), (0, 200)) # Simple cases. self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [100] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [99], 'tests': [199] }), (99, 299)) self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [201] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [203], 'tests': [120] }), (120, 320)) self.assertEqual( highlighter.find_window({ 'highlight': [], 'tests': [100] }), (100, 300)) self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [80], 'moof': [120] }), (0, 200)) # Simple cases, with an outlier far outside the window. self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [100, 450] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [100], 'tests': [220, 450] }), (100, 300)) self.assertEqual( highlighter.find_window({ 'highlight': [100], 'tests': [350, 450] }), (350, 550)) self.assertEqual( highlighter.find_window({ 'highlight': [100], 'tests': [220], 'moof': [450] }), (100, 300)) # Density checks. self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [100, 180, 450] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [0, 40], 'tests': [100, 200, 220, 450] }), (40, 240)) self.assertEqual( highlighter.find_window({ 'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [450] }), (40, 240)) self.assertEqual( highlighter.find_window({ 'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [294, 299, 450] }), (100, 300))
def test_find_highlightable_words(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), { 'this': [0, 53, 79], 'test': [10, 68] }) # We don't stem for now. highlighter = Highlighter('highlight tests') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), { 'highlight': [22], 'tests': [] }) # Ignore negated bits. highlighter = Highlighter('highlight -test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22]})
def test_highlight(self): highlighter = Highlighter('this test') self.assertEqual( highlighter.highlight(self.document_1), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.' ) self.assertEqual( highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.' ) self.assertEqual( highlighter.highlight(self.document_3), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...' ) highlighter = Highlighter('this test', html_tag='div', css_class=None) self.assertEqual( highlighter.highlight(self.document_1), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air.' ) self.assertEqual( highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.' ) self.assertEqual( highlighter.highlight(self.document_3), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...' ) highlighter = Highlighter('content detection') self.assertEqual( highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.' ) self.assertEqual( highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.' ) self.assertEqual( highlighter.highlight(self.document_3), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.' ) highlighter = Highlighter('content detection', max_length=100) self.assertEqual( highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-...' ) self.assertEqual( highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.' ) self.assertEqual( highlighter.highlight(self.document_3), u'This is a test of the highlightable words <span class="highlighted">detection</span>. This is only a test. Were this an actual emerge...' )
def test_render_html(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual( highlighter.render_html({ 'this': [0, 53, 79], 'test': [10, 68] }, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.' ) highlighter.text_block = self.document_2 self.assertEqual( highlighter.render_html({ 'this': [0, 53, 79], 'test': [10, 68] }, 0, 200), 'The content of words in no particular order causes nothing to occur.' ) highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({ 'this': [0, 53, 79], 'test': [10, 68] }, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...' ) highlighter = Highlighter('content detection') highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({ 'content': [151], 'detection': [42] }, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.' ) self.assertEqual( highlighter.render_html({ 'content': [151], 'detection': [42] }, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...' ) # One term found within another term. highlighter = Highlighter('this is') highlighter.text_block = self.document_1 self.assertEqual( highlighter.render_html( { 'this': [0, 53, 79], 'is': [2, 5, 55, 58, 81] }, 0, 200), '<span class="highlighted">This</span> <span class="highlighted">is</span> a test of the highlightable words detection. <span class="highlighted">This</span> <span class="highlighted">is</span> only a test. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.' ) # Regression for repetition in the regular expression. highlighter = Highlighter('i++') highlighter.text_block = 'Foo is i++ in most cases.' self.assertEqual( highlighter.render_html({'i++': [7]}, 0, 200), 'Foo is <span class="highlighted">i++</span> in most cases.') highlighter = Highlighter('i**') highlighter.text_block = 'Foo is i** in most cases.' self.assertEqual( highlighter.render_html({'i**': [7]}, 0, 200), 'Foo is <span class="highlighted">i**</span> in most cases.') highlighter = Highlighter('i..') highlighter.text_block = 'Foo is i.. in most cases.' self.assertEqual( highlighter.render_html({'i..': [7]}, 0, 200), 'Foo is <span class="highlighted">i..</span> in most cases.') highlighter = Highlighter('i??') highlighter.text_block = 'Foo is i?? in most cases.' self.assertEqual( highlighter.render_html({'i??': [7]}, 0, 200), 'Foo is <span class="highlighted">i??</span> in most cases.') # Regression for highlighting already highlighted HTML terms. highlighter = Highlighter('span') highlighter.text_block = 'A span in spam makes html in a can.' self.assertEqual( highlighter.render_html({'span': [2]}, 0, 200), 'A <span class="highlighted">span</span> in spam makes html in a can.' ) highlighter = Highlighter('highlight') highlighter.text_block = 'A span in spam makes highlighted html in a can.' self.assertEqual( highlighter.render_html({'highlight': [21]}, 0, 200), 'A span in spam makes <span class="highlighted">highlight</span>ed html in a can.' )
def test_render_html(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.') highlighter.text_block = self.document_2 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), 'The content of words in no particular order causes nothing to occur.') highlighter.text_block = self.document_3 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') highlighter = Highlighter('content detection') highlighter.text_block = self.document_3 self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...') # One term found within another term. highlighter = Highlighter('this is') highlighter.text_block = self.document_1 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'is': [2, 5, 55, 58, 81]}, 0, 200), '<span class="highlighted">This</span> <span class="highlighted">is</span> a test of the highlightable words detection. <span class="highlighted">This</span> <span class="highlighted">is</span> only a test. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.') # Regression for repetition in the regular expression. highlighter = Highlighter('i++') highlighter.text_block = 'Foo is i++ in most cases.' self.assertEqual(highlighter.render_html({'i++': [7]}, 0, 200), 'Foo is <span class="highlighted">i++</span> in most cases.') highlighter = Highlighter('i**') highlighter.text_block = 'Foo is i** in most cases.' self.assertEqual(highlighter.render_html({'i**': [7]}, 0, 200), 'Foo is <span class="highlighted">i**</span> in most cases.') highlighter = Highlighter('i..') highlighter.text_block = 'Foo is i.. in most cases.' self.assertEqual(highlighter.render_html({'i..': [7]}, 0, 200), 'Foo is <span class="highlighted">i..</span> in most cases.') highlighter = Highlighter('i??') highlighter.text_block = 'Foo is i?? in most cases.' self.assertEqual(highlighter.render_html({'i??': [7]}, 0, 200), 'Foo is <span class="highlighted">i??</span> in most cases.') # Regression for highlighting already highlighted HTML terms. highlighter = Highlighter('span') highlighter.text_block = 'A span in spam makes html in a can.' self.assertEqual(highlighter.render_html({'span': [2]}, 0, 200), 'A <span class="highlighted">span</span> in spam makes html in a can.') highlighter = Highlighter('highlight') highlighter.text_block = 'A span in spam makes highlighted html in a can.' self.assertEqual(highlighter.render_html({'highlight': [21]}, 0, 200), 'A span in spam makes <span class="highlighted">highlight</span>ed html in a can.')
def highlight_result(text, query, length=500): hl = Highlighter(query, html_tag='strong', max_length=length) hl = hl.highlight(text) return hl
def return_search_results_ajax(request): """ Process queries issued from the haystack search form and validate the form. If the form is valid, highlight the queried terms and return the top hundred search results as highlighted snippets. NOTE: Due to performance issues, only the top one hundred search results will be returned at maximum, no matter how many search results have been found. This shortcoming can be improved by providing more efficient database queries and adding a sophisticated caching functionality. """ haystack_search_form = HaystackSearchForm(request.GET) response = {} if haystack_search_form.is_valid(): search_query = haystack_search_form.cleaned_data['search_query'] search_source = haystack_search_form.cleaned_data['search_source'] max_results = haystack_search_form.cleaned_data['max_results'] search_source_to_model_name = { 'venyoo_events': 'event', 'crawled_webpages': 'crawledwebpage'} highlighter = Highlighter( search_query, html_tag='strong', css_class='highlighted', max_length=250) search_results = SearchQuerySet().filter(content=AutoQuery(search_query)) end = int(math.ceil(search_results.count() / 1000)) results = [] webpage_urls = [] highlighted_snippets = [] a, b = 0, 1000 for i in xrange(end): if search_source in ('venyoo_events', 'crawled_webpages'): results = results + \ [result for result in search_results[a:b] if isinstance(result, SearchResult) and result.model_name == search_source_to_model_name[search_source]] else: results = results +\ [result for result in search_results[a:b] if isinstance(result, SearchResult)] webpage_urls = webpage_urls +\ [result.get_stored_fields()['url'] for result in results[a:b]] highlighted_snippets = highlighted_snippets +\ [highlighter.highlight(result.text) for result in results[a:b]] a += 1000 b += 1000 results_total = len(results) response['results_total'] = results_total response['results_shown'] = max_results if max_results <= results_total else results_total response['webpage_urls'] = webpage_urls[:max_results] response['highlighted_snippets'] = highlighted_snippets[:max_results] return HttpResponse(json.dumps(response), mimetype='application/json')
def test_highlight(self): highlighter = Highlighter('this test') self.assertEqual(highlighter.highlight(self.document_1), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.') self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') highlighter = Highlighter('this test', html_tag='div', css_class=None) self.assertEqual(highlighter.highlight(self.document_1), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air.') self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') highlighter = Highlighter('content detection') self.assertEqual(highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.') self.assertEqual(highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.') highlighter = Highlighter('content detection', max_length=100) self.assertEqual(highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-...') self.assertEqual(highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'This is a test of the highlightable words <span class="highlighted">detection</span>. This is only a test. Were this an actual emerge...')
def slow_highlight(query, text): "Invoked only if the search backend does not support highlighting" highlight = Highlighter(query) value = highlight.highlight(text) return value
def highlighted_persref(text, query, **kwargs): highlight = Highlighter( query, html_tag='strong', css_class='found', max_length=120) phText = highlight.highlight(text) parsedText = add_persref_links(phText) return format_html(parsedText)
def test_render_html(self): highlighter = Highlighter("this test") highlighter.text_block = self.document_1 self.assertEqual( highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.', ) highlighter.text_block = self.document_2 self.assertEqual( highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200), "The content of words in no particular order causes nothing to occur.", ) highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...', ) highlighter = Highlighter("content detection") highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({"content": [151], "detection": [42]}, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.', ) self.assertEqual( highlighter.render_html({"content": [151], "detection": [42]}, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...', )
def test_find_window(self): # The query doesn't matter for this method, so ignore it. highlighter = Highlighter('') highlighter.text_block = self.document_1 # No query. self.assertEqual(highlighter.find_window({}), (0, 200)) # Nothing found. self.assertEqual(highlighter.find_window({'highlight': [], 'tests': []}), (0, 200)) # Simple cases. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [99], 'tests': [199]}), (99, 299)) self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [201]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [203], 'tests': [120]}), (120, 320)) self.assertEqual(highlighter.find_window({'highlight': [], 'tests': [100]}), (100, 300)) self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [80], 'moof': [120]}), (0, 200)) # Simple cases, with an outlier far outside the window. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 450]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220, 450]}), (100, 300)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [350, 450]}), (350, 550)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220], 'moof': [450]}), (100, 300)) # Density checks. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 180, 450]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220, 450]}), (40, 240)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [450]}), (40, 240)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [294, 299, 450]}), (100, 300))
def custom_resumen(self, xfield): request = self.context['request'] preg = request.query_params.get('q') highlight = Highlighter(preg, max_length=150) # Highlighter(my_query, html_tag='div', css_class='found', max_length=35) return highlight.highlight(xfield.resumen)