Ejemplo n.º 1
0
def _do_search(self, request, model):

    self.method_check(request, allowed=['get'])
    self.is_authenticated(request)
    self.throttle_check(request)

    # Do the query.
    query = request.GET.get('q', '')
    sqs = SearchQuerySet().models(model).load_all().auto_query(query)
    paginator = Paginator(sqs, 20)

    try:
        page = paginator.page(int(request.GET.get('page', 1)))
    except InvalidPage:
        raise Http404("Sorry, no results on that page.")

    objects = []

    for result in page.object_list:
        if result:
            highlighter = Highlighter(query)
            text = highlighter.highlight(result.text)
            bundle = self.build_bundle(obj=result.object, request=request)
            bundle = self.full_dehydrate(bundle)
            bundle.data['text'] = text
            objects.append(bundle)

    object_list = {
        'objects': objects,
    }

    self.log_throttled_access(request)
    return self.create_response(request, object_list)
Ejemplo n.º 2
0
 def get_results(self):
     """
     Override get_results to add the value of the field where query was found
     Also takes care of highlighting the query.
     """
     results = super(FindView, self).get_results()
     query = self.query.lower()
     highlight = Highlighter(query)
     for r in results:
         for field in r.get_stored_fields():
             value = getattr(r, field)
             # assume search index field 'text' is document field
             if isinstance(value, string_types) and\
                     query in value.lower() and\
                     field != 'text':
                 # assume search index field name == model field name
                 try:
                     name = r.object._meta.get_field(field).verbose_name
                 except:
                     name = field
                 r.context = {
                     'field': name,
                     'value': highlight.highlight(value)
                 }
                 continue
     return results
Ejemplo n.º 3
0
def homeroom(request):
    user = request.user
    context = RequestContext(request)
    if request.method == 'POST':
        query = request.POST['course-search']
        results = SearchQuerySet().autocomplete(text=query).models(Course)[:10]
        highlighter = Highlighter(query, html_tag='span', css_class='keyword')
        courses = []
        for result in results:
            course = {}
            course['object'] = result.object
            course['highlight'] = highlighter.highlight(result.text)
            courses.append(course)
        # courses = Course.objects.filter(institute=user.get_profile().institute, title__icontains=query)
        suggestion = None
        # suggestion = SearchQuerySet().spelling_suggestion(query)
        context['courses'] = courses
        context['suggestion'] = suggestion
    sections = [
        assign.section for assign in SectionAssign.objects.filter(
            user=user).order_by('-section__start_date')
    ]
    form = AddCourseForm(request=request)
    context['sections'] = sections
    context['form'] = form
    return render_to_response('homeroom/index.html', context)
Ejemplo n.º 4
0
def _do_search(self, request, model):
    self.method_check(request, allowed=['get'])
    self.is_authenticated(request)
    self.throttle_check(request)

    # Do the query.
    query = request.GET.get('q', '')
    sqs = SearchQuerySet().models(model).auto_query(query)
    paginator = Paginator(sqs, 20)

    try:
        page = paginator.page(int(request.GET.get('page', 1)))
    except InvalidPage:
        raise Http404("Sorry, no results on that page.")

    objects = []

    for result in page.object_list:
        highlighter = Highlighter(query)
        text = highlighter.highlight(result.text)
        bundle = self.full_dehydrate(result.object)
        bundle.data['text'] = text
        objects.append(bundle)

    object_list = {
        'objects': objects,
    }

    self.log_throttled_access(request)
    return self.create_response(request, object_list)
Ejemplo n.º 5
0
 def get_results(self):
     """
     Override get_results to add the value of the field where query was found
     Also takes care of highlighting the query.
     """
     results = super(FindView, self).get_results()
     query = self.query.lower()
     highlight = Highlighter(query)
     for r in results:
         for field in r.get_stored_fields():
             value = getattr(r, field)
             # assume search index field 'text' is document field
             if isinstance(value, string_types) and\
                     query in value.lower() and\
                     field != 'text':
                 # assume search index field name == model field name
                 try:
                     name = r.object._meta.get_field(field).verbose_name
                 except:
                     name = field
                 r.context = {
                     'field': name,
                     'value': highlight.highlight(value)
                 }
                 continue
     return results
Ejemplo n.º 6
0
    def _search(self, request, model, facets=None, page_size=20,
                highlight=True):
        """
        `facets`

            A list of facets to include with the results
        `models`
            Limit the search to one or more models
        """
        form = FacetedSearchForm(request.GET, facets=facets or [],
                                 models=(model,), load_all=True)
        if not form.is_valid():
            return self.error_response({'errors': form.errors}, request)
        results = form.search()

        paginator = Paginator(results, page_size)
        try:
            page = paginator.page(int(request.GET.get('page', 1)))
        except InvalidPage:
            raise Http404(ugettext("Sorry, no results on that page."))

        objects = []
        query = request.GET.get('q', '')
        highlighter = Highlighter(query)
        for result in page.object_list:
            if not result:
                continue
            text = result.text
            if highlight:
                text = highlighter.highlight(text)
            bundle = self.build_bundle(obj=result.object, request=request)
            bundle = self.full_dehydrate(bundle)
            bundle.data['text'] = text
            objects.append(bundle)

        url_template = self._url_template(query,
                                          form['selected_facets'].value())
        page_data = {
            'number': page.number,
            'per_page': paginator.per_page,
            'num_pages': paginator.num_pages,
            'page_range': paginator.page_range,
            'object_count': paginator.count,
            'url_template': url_template,
        }
        if page.has_next():
            page_data['url_next'] = url_template.format(
                page.next_page_number())
        if page.has_previous():
            page_data['url_prev'] = url_template.format(
                page.previous_page_number())

        object_list = {
            'page': page_data,
            'objects': objects,
        }
        if facets:
            object_list.update({'facets': results.facet_counts()})
        return object_list
Ejemplo n.º 7
0
def execute_highlighter(query, text_key, results):
    highlight = Highlighter(query)
    for result in results:
        highlight.text_block = result.get_additional_fields().get(text_key, "")
        highlight_locations = highlight.find_highlightable_words()
        result.highlight_locations = []
        for q, locations in highlight_locations.iteritems():
            result.highlight_locations.extend([[location, location + len(q)] for location in locations])
Ejemplo n.º 8
0
 def no_query_found(self):
     all_results = SearchQuerySet(self).all()
     #sqs = SearchQuerySet().filter(content='foo').highlight()
     sqs = SearchQuerySet().filter(content=all_results).highlight()
     highlighter = Highlighter(search_query)
     result = sqs[0]
     result.highlighted['text'][0]
     print highlighter.highlight(sqs[0].text)
Ejemplo n.º 9
0
 def no_query_found(self):
     all_results = SearchQuerySet(self).all()
     #sqs = SearchQuerySet().filter(content='foo').highlight()
     sqs = SearchQuerySet().filter(content=all_results).highlight()
     highlighter = Highlighter(search_query)
     result = sqs[0]
     result.highlighted['text'][0]
     print highlighter.highlight(sqs[0].text)
Ejemplo n.º 10
0
	def build_results_for_page(games, query):
		highlighter = Highlighter(query)

		return [{'url': game.url,
		         'title': highlighter.highlight(game.title),
		         'intro': highlighter.highlight(game.intro),
		         'city': highlighter.highlight(game.location.city),
		         'state': highlighter.highlight(game.location.state)} for game in games]
Ejemplo n.º 11
0
    def _search(self, request, model, facets=None, page_size=20,
                highlight=True):
        '''
        `facets`
            A list of facets to include with the results
        `models`
            Limit the search to one or more models
        '''
        form = FacetedSearchForm(request.GET, facets=facets or [],
                                 models=(model,), load_all=True)
        if not form.is_valid():
            return self.error_response({'errors': form.errors}, request)
        results = form.search()

        paginator = Paginator(results, page_size)
        try:
            page = paginator.page(int(request.GET.get('page', 1)))
        except InvalidPage:
            raise Http404(ugettext("Sorry, no results on that page."))

        objects = []
        query = request.GET.get('q', '')
        highlighter = Highlighter(query)
        for result in page.object_list:
            if not result:
                continue
            text = result.text
            if highlight:
                text = highlighter.highlight(text)
            bundle = self.build_bundle(obj=result.object, request=request)
            bundle = self.full_dehydrate(bundle)
            bundle.data['text'] = text
            objects.append(bundle)

        url_template = self._url_template(query,
                                          form['selected_facets'].value())
        page_data = {
            'number': page.number,
            'per_page': paginator.per_page,
            'num_pages': paginator.num_pages,
            'page_range': paginator.page_range,
            'object_count': paginator.count,
            'url_template': url_template,
        }
        if page.has_next():
            page_data['url_next'] = url_template.format(
                page.next_page_number())
        if page.has_previous():
            page_data['url_prev'] = url_template.format(
                page.previous_page_number())

        object_list = {
            'page': page_data,
            'objects': objects,
        }
        if facets:
            object_list.update({'facets': results.facet_counts()})
        return object_list
Ejemplo n.º 12
0
def autocomplete(request):
    query = request.GET.get('q', '')
    sqs = SearchQuerySet().models(Search).autocomplete(content_auto=query)[:5]
    highlight = Highlighter(query, css_class='no-highlight')
    suggestions = [highlight.highlight(result.search) for result in sqs]
    the_data = json.dumps({
        'results': suggestions
    })
    return HttpResponse(the_data, content_type='application/json')
Ejemplo n.º 13
0
def autocomplete(request):
    sqs = SearchQuerySet().autocomplete(
        model_auto=request.GET.get('q', ''))[:5]
    highlight = Highlighter(request.GET.get('q', ''), max_length=35)
    suggestions = [highlight.highlight(result.text) for result in sqs]
    print len(sqs)
    # Make sure you return a JSON object, not a bare list.
    # Otherwise, you could be vulnerable to an XSS attack.
    the_data = json.dumps({'results': suggestions})
    return HttpResponse(the_data, content_type='application/json')
Ejemplo n.º 14
0
def search_view(request):
    keyword = request.GET['q']
    results = SearchQuerySet().filter(content=keyword)
    highlighter = Highlighter(keyword)
    results_dict = {
        'success': True,
        'by': 'search',
        'list': [{
                'title': r.object.title,
                'content': highlighter.highlight(r.object.content),
                'uri': r.object.abs_uri
                } for r in results]
    }
    return HttpResponse(json.dumps(results_dict), content_type="application/json")
Ejemplo n.º 15
0
def index(request):
	#搜索词
	words = request.GET['key'] 
	results = SearchQuerySet().filter(name__contains=(words))
	#Highlighter(my_query,html_tag='',css_class='',max_length=100)
	highlight = Highlighter(words,max_length=100)
	#(content=(words))##.facet('name',limit=10)
	#输出总条数
	counts = results.count()
	for r in results:
		#设置高亮
		r.name = highlight.highlight(r.name)
	#unicode -> string  unicodestring.endcode('utf-8') 
	#string -> unicode unicode(utf8string,'utf-8)
	return render(request,'search/search.html',{'data':results,'counts':counts})
Ejemplo n.º 16
0
 def __init__(self, scholarship_key=None, search_result=None, to_highlight=''):
     scholarship_model = search_result.object
     self.scholarship_key = scholarship_key
     scholarship_model = scholarship_model
     highlight = Highlighter(to_highlight, max_length=300)
     self.snippet = highlight.highlight(scholarship_model.description)
     if scholarship_model is not None:
         self.deadline = scholarship_model.deadline
     self.source = scholarship_model.organization
     self.href = scholarship_model.third_party_url
     self.title = scholarship_model.title
     self.essay_required = scholarship_model.essay_required
     self.gender_restriction = scholarship_model.gender_restriction
     safe_title = scholarship_model.title[:100].encode('ascii', 'ignore')
     self.vs_href = u'/scholarship/{}?title={}'.format(self.scholarship_key, safe_title)
Ejemplo n.º 17
0
    def test_find_highlightable_words(self):
        highlighter = Highlighter("this test")
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {"this": [0, 53, 79], "test": [10, 68]})

        # We don't stem for now.
        highlighter = Highlighter("highlight tests")
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22], "tests": []})

        # Ignore negated bits.
        highlighter = Highlighter("highlight -test")
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22]})
Ejemplo n.º 18
0
    def test_find_highlightable_words(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {'this': [0, 53, 79], 'test': [10, 68]})

        # We don't stem for now.
        highlighter = Highlighter('highlight tests')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22], 'tests': []})

        # Ignore negated bits.
        highlighter = Highlighter('highlight -test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22]})
Ejemplo n.º 19
0
def index(request):
    #搜索词
    words = request.GET['key']
    results = SearchQuerySet().filter(name__contains=(words))
    #Highlighter(my_query,html_tag='',css_class='',max_length=100)
    highlight = Highlighter(words, max_length=100)
    #(content=(words))##.facet('name',limit=10)
    #输出总条数
    counts = results.count()
    for r in results:
        #设置高亮
        r.name = highlight.highlight(r.name)
    #unicode -> string  unicodestring.endcode('utf-8')
    #string -> unicode unicode(utf8string,'utf-8)
    return render(request, 'search/search.html', {
        'data': results,
        'counts': counts
    })
Ejemplo n.º 20
0
def autocomplete(request):
	query = escape(request.GET.get('q', '')).strip()
	if len(query) < 2:
		suggestions = []
	else:
		lighter = Highlighter(query, max_length=64)
		sqs = SearchQuerySet().autocomplete(auto=query)[:7]
		suggestions = []
		for result in sqs:
			match = ' '.join(striptags(lighter.highlight(result.auto)).strip('.').split())
			url = None
			if hasattr(result.object, 'get_absolute_url'):
				url = result.object.get_absolute_url()
			suggestions.append({
				'name': match,
				#'title': result.title,
				'url': url,
			})
	return JSONResponse(request, suggestions)
Ejemplo n.º 21
0
def homeroom(request):
    user = request.user
    context = RequestContext(request)
    if request.method == 'POST':
        query = request.POST['course-search']
        results = SearchQuerySet().autocomplete(text=query).models(Course)[:10]
        highlighter = Highlighter(query, html_tag='span', css_class='keyword')
        courses = []
        for result in results:
            course = {}
            course['object'] = result.object
            course['highlight'] = highlighter.highlight(result.text)
            courses.append(course)
        # courses = Course.objects.filter(institute=user.get_profile().institute, title__icontains=query)
        suggestion = None
        # suggestion = SearchQuerySet().spelling_suggestion(query)
        context['courses'] = courses
        context['suggestion'] = suggestion
    sections = [assign.section for assign in SectionAssign.objects.filter(user=user).order_by('-section__start_date')]
    form = AddCourseForm(request=request)
    context['sections'] = sections
    context['form'] = form
    return render_to_response('homeroom/index.html', context)
Ejemplo n.º 22
0
def get_response(project_uri, query_string, include_n3=True):
    d = {
        'results': list(),
    }

    project_graph = projects.get_project_graph(project_uri)
    graph = Graph()

    query_set = SearchQuerySet().models(Text).filter(
        content=AutoQuery(query_string), project__exact=project_uri)

    highlighter = Highlighter(query_string,
                              html_tag='span',
                              css_class=CSS_RESULT_MATCH_CLASS)
    title_highlighter = TitleHighlighter(query_string,
                                         html_tag='span',
                                         css_class=CSS_RESULT_MATCH_CLASS)

    d['spelling_suggestion'] = query_set.spelling_suggestion()

    for result in query_set:
        text_uri = URIRef(result.get_stored_fields()['identifier'])

        if annotations.has_annotation_link(
                project_graph,
                text_uri) or projects.is_top_level_project_resource(
                    project_uri, text_uri):
            d['results'].append(
                search_result_to_dict(result, project_uri, highlighter,
                                      title_highlighter))

            if include_n3:
                graph += utils.metadata_triples(project_graph, text_uri)

    if include_n3:
        d['n3'] = graph.serialize(format='n3')

    return d
Ejemplo n.º 23
0
def highlight(text_block, query, **kwargs):
	highlighter = Highlighter(query, **kwargs)
	highlighted_text = highlighter.highlight(text_block)
	return mark_safe(highlighted_text)
Ejemplo n.º 24
0
    def test_find_window(self):
        # The query doesn't matter for this method, so ignore it.
        highlighter = Highlighter('')
        highlighter.text_block = self.document_1

        # No query.
        self.assertEqual(highlighter.find_window({}), (0, 200))

        # Nothing found.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [],
                'tests': []
            }), (0, 200))

        # Simple cases.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [100]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [99],
                'tests': [199]
            }), (99, 299))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [201]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [203],
                'tests': [120]
            }), (120, 320))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [],
                'tests': [100]
            }), (100, 300))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [80],
                'moof': [120]
            }), (0, 200))

        # Simple cases, with an outlier far outside the window.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [100, 450]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [100],
                'tests': [220, 450]
            }), (100, 300))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [100],
                'tests': [350, 450]
            }), (350, 550))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [100],
                'tests': [220],
                'moof': [450]
            }), (100, 300))

        # Density checks.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [100, 180, 450]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0, 40],
                'tests': [100, 200, 220, 450]
            }), (40, 240))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0, 40],
                'tests': [100, 200, 220],
                'moof': [450]
            }), (40, 240))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0, 40],
                'tests': [100, 200, 220],
                'moof': [294, 299, 450]
            }), (100, 300))
Ejemplo n.º 25
0
    def test_find_highlightable_words(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {
            'this': [0, 53, 79],
            'test': [10, 68]
        })

        # We don't stem for now.
        highlighter = Highlighter('highlight tests')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {
            'highlight': [22],
            'tests': []
        })

        # Ignore negated bits.
        highlighter = Highlighter('highlight -test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(),
                         {'highlight': [22]})
Ejemplo n.º 26
0
    def test_highlight(self):
        highlighter = Highlighter('this test')
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'The content of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...'
        )

        highlighter = Highlighter('this test', html_tag='div', css_class=None)
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'The content of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...'
        )

        highlighter = Highlighter('content detection')
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )

        highlighter = Highlighter('content detection', max_length=100)
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-...'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'This is a test of the highlightable words <span class="highlighted">detection</span>. This is only a test. Were this an actual emerge...'
        )
Ejemplo n.º 27
0
    def test_render_html(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(
            highlighter.render_html({
                'this': [0, 53, 79],
                'test': [10, 68]
            }, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.'
        )

        highlighter.text_block = self.document_2
        self.assertEqual(
            highlighter.render_html({
                'this': [0, 53, 79],
                'test': [10, 68]
            }, 0, 200),
            'The content of words in no particular order causes nothing to occur.'
        )

        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({
                'this': [0, 53, 79],
                'test': [10, 68]
            }, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...'
        )

        highlighter = Highlighter('content detection')
        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({
                'content': [151],
                'detection': [42]
            }, 42, 242),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )

        self.assertEqual(
            highlighter.render_html({
                'content': [151],
                'detection': [42]
            }, 42, 200),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...'
        )

        # One term found within another term.
        highlighter = Highlighter('this is')
        highlighter.text_block = self.document_1
        self.assertEqual(
            highlighter.render_html(
                {
                    'this': [0, 53, 79],
                    'is': [2, 5, 55, 58, 81]
                }, 0, 200),
            '<span class="highlighted">This</span> <span class="highlighted">is</span> a test of the highlightable words detection. <span class="highlighted">This</span> <span class="highlighted">is</span> only a test. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.'
        )

        # Regression for repetition in the regular expression.
        highlighter = Highlighter('i++')
        highlighter.text_block = 'Foo is i++ in most cases.'
        self.assertEqual(
            highlighter.render_html({'i++': [7]}, 0, 200),
            'Foo is <span class="highlighted">i++</span> in most cases.')
        highlighter = Highlighter('i**')
        highlighter.text_block = 'Foo is i** in most cases.'
        self.assertEqual(
            highlighter.render_html({'i**': [7]}, 0, 200),
            'Foo is <span class="highlighted">i**</span> in most cases.')
        highlighter = Highlighter('i..')
        highlighter.text_block = 'Foo is i.. in most cases.'
        self.assertEqual(
            highlighter.render_html({'i..': [7]}, 0, 200),
            'Foo is <span class="highlighted">i..</span> in most cases.')
        highlighter = Highlighter('i??')
        highlighter.text_block = 'Foo is i?? in most cases.'
        self.assertEqual(
            highlighter.render_html({'i??': [7]}, 0, 200),
            'Foo is <span class="highlighted">i??</span> in most cases.')

        # Regression for highlighting already highlighted HTML terms.
        highlighter = Highlighter('span')
        highlighter.text_block = 'A span in spam makes html in a can.'
        self.assertEqual(
            highlighter.render_html({'span': [2]}, 0, 200),
            'A <span class="highlighted">span</span> in spam makes html in a can.'
        )

        highlighter = Highlighter('highlight')
        highlighter.text_block = 'A span in spam makes highlighted html in a can.'
        self.assertEqual(
            highlighter.render_html({'highlight': [21]}, 0, 200),
            'A span in spam makes <span class="highlighted">highlight</span>ed html in a can.'
        )
Ejemplo n.º 28
0
    def test_render_html(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.')
        
        highlighter.text_block = self.document_2
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), 'The content of words in no particular order causes nothing to occur.')
        
        highlighter.text_block = self.document_3
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...')
        
        highlighter = Highlighter('content detection')
        highlighter.text_block = self.document_3
        self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
        
        self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...')
        
        # One term found within another term.
        highlighter = Highlighter('this is')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'is': [2, 5, 55, 58, 81]}, 0, 200), '<span class="highlighted">This</span> <span class="highlighted">is</span> a test of the highlightable words detection. <span class="highlighted">This</span> <span class="highlighted">is</span> only a test. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.')

        # Regression for repetition in the regular expression.
        highlighter = Highlighter('i++')
        highlighter.text_block = 'Foo is i++ in most cases.'
        self.assertEqual(highlighter.render_html({'i++': [7]}, 0, 200), 'Foo is <span class="highlighted">i++</span> in most cases.')
        highlighter = Highlighter('i**')
        highlighter.text_block = 'Foo is i** in most cases.'
        self.assertEqual(highlighter.render_html({'i**': [7]}, 0, 200), 'Foo is <span class="highlighted">i**</span> in most cases.')
        highlighter = Highlighter('i..')
        highlighter.text_block = 'Foo is i.. in most cases.'
        self.assertEqual(highlighter.render_html({'i..': [7]}, 0, 200), 'Foo is <span class="highlighted">i..</span> in most cases.')
        highlighter = Highlighter('i??')
        highlighter.text_block = 'Foo is i?? in most cases.'
        self.assertEqual(highlighter.render_html({'i??': [7]}, 0, 200), 'Foo is <span class="highlighted">i??</span> in most cases.')
        
        # Regression for highlighting already highlighted HTML terms.
        highlighter = Highlighter('span')
        highlighter.text_block = 'A span in spam makes html in a can.'
        self.assertEqual(highlighter.render_html({'span': [2]}, 0, 200), 'A <span class="highlighted">span</span> in spam makes html in a can.')
        
        highlighter = Highlighter('highlight')
        highlighter.text_block = 'A span in spam makes highlighted html in a can.'
        self.assertEqual(highlighter.render_html({'highlight': [21]}, 0, 200), 'A span in spam makes <span class="highlighted">highlight</span>ed html in a can.')
Ejemplo n.º 29
0
Archivo: utils.py Proyecto: xyzz/Misago
def highlight_result(text, query, length=500):
    hl = Highlighter(query, html_tag='strong', max_length=length)
    hl = hl.highlight(text)

    return hl
Ejemplo n.º 30
0
def return_search_results_ajax(request):
    """
    Process queries issued from the haystack search form and
    validate the form. If the form is valid, highlight the queried terms and
    return the top hundred search results as highlighted snippets.

    NOTE: Due to performance issues, only the top one hundred search results
    will be returned at maximum, no matter how many search results have been
    found. This shortcoming can be improved by providing more efficient
    database queries and adding a sophisticated caching functionality.
    """
    haystack_search_form = HaystackSearchForm(request.GET)
    response = {}

    if haystack_search_form.is_valid():
        search_query = haystack_search_form.cleaned_data['search_query']
        search_source = haystack_search_form.cleaned_data['search_source']
        max_results = haystack_search_form.cleaned_data['max_results']

        search_source_to_model_name = {
            'venyoo_events': 'event',
            'crawled_webpages': 'crawledwebpage'}

        highlighter = Highlighter(
            search_query,
            html_tag='strong',
            css_class='highlighted',
            max_length=250)

        search_results = SearchQuerySet().filter(content=AutoQuery(search_query))
        end = int(math.ceil(search_results.count() / 1000))

        results = []
        webpage_urls = []
        highlighted_snippets = []

        a, b = 0, 1000

        for i in xrange(end):
            if search_source in ('venyoo_events', 'crawled_webpages'):
                results = results + \
                    [result for result in search_results[a:b]
                     if isinstance(result, SearchResult)
                     and result.model_name ==
                     search_source_to_model_name[search_source]]
            else:
                results = results +\
                    [result for result in search_results[a:b]
                     if isinstance(result, SearchResult)]

            webpage_urls = webpage_urls +\
                           [result.get_stored_fields()['url'] for result
                            in results[a:b]]

            highlighted_snippets = highlighted_snippets +\
                                   [highlighter.highlight(result.text) for
                                    result in results[a:b]]
            a += 1000
            b += 1000

        results_total = len(results)

        response['results_total'] = results_total
        response['results_shown'] = max_results if max_results <= results_total else results_total
        response['webpage_urls'] = webpage_urls[:max_results]
        response['highlighted_snippets'] = highlighted_snippets[:max_results]

    return HttpResponse(json.dumps(response), mimetype='application/json')
Ejemplo n.º 31
0
 def test_highlight(self):
     highlighter = Highlighter('this test')
     self.assertEqual(highlighter.highlight(self.document_1), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.')
     self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...')
     
     highlighter = Highlighter('this test', html_tag='div', css_class=None)
     self.assertEqual(highlighter.highlight(self.document_1), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air.')
     self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...')
     
     highlighter = Highlighter('content detection')
     self.assertEqual(highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.')
     self.assertEqual(highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
     
     highlighter = Highlighter('content detection', max_length=100)
     self.assertEqual(highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-...')
     self.assertEqual(highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'This is a test of the highlightable words <span class="highlighted">detection</span>. This is only a test. Were this an actual emerge...')
Ejemplo n.º 32
0
def highlight_result(text, query, length=500):
    hl = Highlighter(query, html_tag='strong', max_length=length)
    hl = hl.highlight(text)
    return hl
Ejemplo n.º 33
0
def slow_highlight(query, text):
    "Invoked only if the search backend does not support highlighting"
    highlight = Highlighter(query)
    value = highlight.highlight(text)
    return value
Ejemplo n.º 34
0
def highlighted_persref(text, query, **kwargs):
    highlight = Highlighter(
        query, html_tag='strong', css_class='found', max_length=120)
    phText = highlight.highlight(text)
    parsedText = add_persref_links(phText)
    return format_html(parsedText)
Ejemplo n.º 35
0
    def test_render_html(self):
        highlighter = Highlighter("this test")
        highlighter.text_block = self.document_1
        self.assertEqual(
            highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.',
        )

        highlighter.text_block = self.document_2
        self.assertEqual(
            highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200),
            "The content of words in no particular order causes nothing to occur.",
        )

        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...',
        )

        highlighter = Highlighter("content detection")
        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({"content": [151], "detection": [42]}, 42, 242),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.',
        )

        self.assertEqual(
            highlighter.render_html({"content": [151], "detection": [42]}, 42, 200),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...',
        )
Ejemplo n.º 36
0
    def test_find_window(self):
        # The query doesn't matter for this method, so ignore it.
        highlighter = Highlighter('')
        highlighter.text_block = self.document_1

        # No query.
        self.assertEqual(highlighter.find_window({}), (0, 200))

        # Nothing found.
        self.assertEqual(highlighter.find_window({'highlight': [], 'tests': []}), (0, 200))

        # Simple cases.
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [99], 'tests': [199]}), (99, 299))
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [201]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [203], 'tests': [120]}), (120, 320))
        self.assertEqual(highlighter.find_window({'highlight': [], 'tests': [100]}), (100, 300))
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [80], 'moof': [120]}), (0, 200))
        
        # Simple cases, with an outlier far outside the window.
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 450]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220, 450]}), (100, 300))
        self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [350, 450]}), (350, 550))
        self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220], 'moof': [450]}), (100, 300))
        
        # Density checks.
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 180, 450]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220, 450]}), (40, 240))
        self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [450]}), (40, 240))
        self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [294, 299, 450]}), (100, 300))
Ejemplo n.º 37
0
def slow_highlight(query, text):
    "Invoked only if the search backend does not support highlighting"
    highlight = Highlighter(query)
    value = highlight.highlight(text)
    return value
Ejemplo n.º 38
0
 def custom_resumen(self, xfield):
     request = self.context['request']
     preg = request.query_params.get('q')
     highlight = Highlighter(preg, max_length=150)
     # Highlighter(my_query, html_tag='div', css_class='found', max_length=35)
     return highlight.highlight(xfield.resumen)