예제 #1
0
파일: __init__.py 프로젝트: varunarora/OC
    def render_html(self, highlight_locations=None, start_offset=None, end_offset=None):
        # Start by chopping the block down to the proper window.
        text = self.text_block[start_offset:end_offset]

        # Invert highlight_locations to a location -> term list
        term_list = []

        for term, locations in highlight_locations.items():
            term_list += [(loc - start_offset, term) for loc in locations]

        loc_to_term = sorted(term_list)

        # Prepare the highlight template
        if self.css_class:
            hl_start = '<%s class="%s">' % (self.html_tag, self.css_class)
        else:
            hl_start = '<%s>' % (self.html_tag)

        hl_end = '</%s>' % self.html_tag
        highlight_length = len(hl_start + hl_end)

        # Copy the part from the start of the string to the first match,
        # and there replace the match with a highlighted version.
        highlighted_chunk = ""
        matched_so_far = 0
        prev = 0
        prev_str = ""

        for cur, cur_str in loc_to_term:
            # This can be in a different case than cur_str
            actual_term = text[cur:cur + len(cur_str)]

            # Handle incorrect highlight_locations by first checking for the term
            if actual_term.lower() == cur_str:
                highlighted_chunk += text[prev + len(prev_str):cur] + hl_start + actual_term + hl_end
                prev = cur
                prev_str = cur_str

                # Keep track of how far we've copied so far, for the last step
                matched_so_far = cur + len(actual_term)

        # Don't forget the chunk after the last term
        highlighted_chunk += text[matched_so_far:]

        # Unicode characters at the end of highlighted_chunk might get split,
        # leaving behind partial encodings at the end that break Javascript.
        # So our highlighted_chunk could be "My heart is in the \u4".
        #
        # Remove the orphaned encodings by calling trim().
        from haystack import trim
        highlighted_chunk = trim.trim(highlighted_chunk)

        if start_offset > 0:
            highlighted_chunk = '...%s' % highlighted_chunk

        if end_offset < len(self.text_block):
            highlighted_chunk = '%s...' % highlighted_chunk

        return highlighted_chunk
예제 #2
0
파일: tests.py 프로젝트: varunarora/OC
    def test_trim(self):
        """Checks the output of trim() against selected test cases."""
        str0 = "34\u2345\u3456"
        str1 = "\u1234\u2345\u3456"
        str2 = "\u2345\u3456\\"
        str3 = "34\u2345\u3456 \u"
        str4 = "34\u2345\u3456\u3"
        str5 = "34\u2345\u3456\u34"
        str6 = "34\u2345\u3456\u345"
        str7 = ""

        self.assertTrue(trim.is_trimmed(trim.trim(str0)))
        self.assertTrue(trim.is_trimmed(trim.trim(str1)))
        self.assertTrue(trim.is_trimmed(trim.trim(str2)))
        self.assertTrue(trim.is_trimmed(trim.trim(str3)))
        self.assertTrue(trim.is_trimmed(trim.trim(str4)))
        self.assertTrue(trim.is_trimmed(trim.trim(str5)))
        self.assertTrue(trim.is_trimmed(trim.trim(str6)))
        self.assertTrue(trim.is_trimmed(trim.trim(str7)))