def test_render_html(self): highlighter = Highlighter("this test") highlighter.text_block = self.document_1 self.assertEqual( highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.', ) highlighter.text_block = self.document_2 self.assertEqual( highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200), "The content of words in no particular order causes nothing to occur.", ) highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...', ) highlighter = Highlighter("content detection") highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({"content": [151], "detection": [42]}, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.', ) self.assertEqual( highlighter.render_html({"content": [151], "detection": [42]}, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...', )
def test_find_highlightable_words(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'this': [0, 53, 79], 'test': [10, 68]}) # We don't stem for now. highlighter = Highlighter('highlight tests') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22], 'tests': []}) # Ignore negated bits. highlighter = Highlighter('highlight -test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22]})
def test_render_html(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.') highlighter.text_block = self.document_2 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), 'The content of words in no particular order causes nothing to occur.') highlighter.text_block = self.document_3 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') highlighter = Highlighter('content detection') highlighter.text_block = self.document_3 self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...') # Regression for repetition in the regular expression. highlighter = Highlighter('i++') highlighter.text_block = 'Foo is i++ in most cases.' self.assertEqual(highlighter.render_html({'i++': [7]}, 0, 200), 'Foo is <span class="highlighted">i++</span> in most cases.') highlighter = Highlighter('i**') highlighter.text_block = 'Foo is i** in most cases.' self.assertEqual(highlighter.render_html({'i**': [7]}, 0, 200), 'Foo is <span class="highlighted">i**</span> in most cases.') highlighter = Highlighter('i..') highlighter.text_block = 'Foo is i.. in most cases.' self.assertEqual(highlighter.render_html({'i..': [7]}, 0, 200), 'Foo is <span class="highlighted">i..</span> in most cases.') highlighter = Highlighter('i??') highlighter.text_block = 'Foo is i?? in most cases.' self.assertEqual(highlighter.render_html({'i??': [7]}, 0, 200), 'Foo is <span class="highlighted">i??</span> in most cases.')
def test_find_highlightable_words(self): highlighter = Highlighter("this test") highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {"this": [0, 53, 79], "test": [10, 68]}) # We don't stem for now. highlighter = Highlighter("highlight tests") highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22], "tests": []}) # Ignore negated bits. highlighter = Highlighter("highlight -test") highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22]})
def test_find_window(self): # The query doesn't matter for this method, so ignore it. highlighter = Highlighter('') highlighter.text_block = self.document_1 # No query. self.assertEqual(highlighter.find_window({}), (0, 200)) # Nothing found. self.assertEqual(highlighter.find_window({'highlight': [], 'tests': []}), (0, 200)) # Simple cases. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [99], 'tests': [199]}), (99, 299)) self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [201]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [203], 'tests': [120]}), (120, 320)) self.assertEqual(highlighter.find_window({'highlight': [], 'tests': [100]}), (100, 300)) self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [80], 'moof': [120]}), (0, 200)) # Simple cases, with an outlier far outside the window. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 450]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220, 450]}), (100, 300)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [350, 450]}), (350, 550)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220], 'moof': [450]}), (100, 300)) # Density checks. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 180, 450]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220, 450]}), (40, 240)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [450]}), (40, 240)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [294, 299, 450]}), (100, 300))
def execute_highlighter(query, text_key, results): highlight = Highlighter(query) for result in results: highlight.text_block = result.get_additional_fields().get(text_key, "") highlight_locations = highlight.find_highlightable_words() result.highlight_locations = [] for q, locations in highlight_locations.iteritems(): result.highlight_locations.extend([[location, location + len(q)] for location in locations])
def test_find_highlightable_words(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), { 'this': [0, 53, 79], 'test': [10, 68] }) # We don't stem for now. highlighter = Highlighter('highlight tests') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), { 'highlight': [22], 'tests': [] }) # Ignore negated bits. highlighter = Highlighter('highlight -test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22]})
def test_find_window(self): # The query doesn't matter for this method, so ignore it. highlighter = Highlighter('') highlighter.text_block = self.document_1 # No query. self.assertEqual(highlighter.find_window({}), (0, 200)) # Nothing found. self.assertEqual( highlighter.find_window({ 'highlight': [], 'tests': [] }), (0, 200)) # Simple cases. self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [100] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [99], 'tests': [199] }), (99, 299)) self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [201] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [203], 'tests': [120] }), (120, 320)) self.assertEqual( highlighter.find_window({ 'highlight': [], 'tests': [100] }), (100, 300)) self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [80], 'moof': [120] }), (0, 200)) # Simple cases, with an outlier far outside the window. self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [100, 450] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [100], 'tests': [220, 450] }), (100, 300)) self.assertEqual( highlighter.find_window({ 'highlight': [100], 'tests': [350, 450] }), (350, 550)) self.assertEqual( highlighter.find_window({ 'highlight': [100], 'tests': [220], 'moof': [450] }), (100, 300)) # Density checks. self.assertEqual( highlighter.find_window({ 'highlight': [0], 'tests': [100, 180, 450] }), (0, 200)) self.assertEqual( highlighter.find_window({ 'highlight': [0, 40], 'tests': [100, 200, 220, 450] }), (40, 240)) self.assertEqual( highlighter.find_window({ 'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [450] }), (40, 240)) self.assertEqual( highlighter.find_window({ 'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [294, 299, 450] }), (100, 300))
def test_render_html(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual( highlighter.render_html({ 'this': [0, 53, 79], 'test': [10, 68] }, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.' ) highlighter.text_block = self.document_2 self.assertEqual( highlighter.render_html({ 'this': [0, 53, 79], 'test': [10, 68] }, 0, 200), 'The content of words in no particular order causes nothing to occur.' ) highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({ 'this': [0, 53, 79], 'test': [10, 68] }, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...' ) highlighter = Highlighter('content detection') highlighter.text_block = self.document_3 self.assertEqual( highlighter.render_html({ 'content': [151], 'detection': [42] }, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.' ) self.assertEqual( highlighter.render_html({ 'content': [151], 'detection': [42] }, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...' ) # One term found within another term. highlighter = Highlighter('this is') highlighter.text_block = self.document_1 self.assertEqual( highlighter.render_html( { 'this': [0, 53, 79], 'is': [2, 5, 55, 58, 81] }, 0, 200), '<span class="highlighted">This</span> <span class="highlighted">is</span> a test of the highlightable words detection. <span class="highlighted">This</span> <span class="highlighted">is</span> only a test. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.' ) # Regression for repetition in the regular expression. highlighter = Highlighter('i++') highlighter.text_block = 'Foo is i++ in most cases.' self.assertEqual( highlighter.render_html({'i++': [7]}, 0, 200), 'Foo is <span class="highlighted">i++</span> in most cases.') highlighter = Highlighter('i**') highlighter.text_block = 'Foo is i** in most cases.' self.assertEqual( highlighter.render_html({'i**': [7]}, 0, 200), 'Foo is <span class="highlighted">i**</span> in most cases.') highlighter = Highlighter('i..') highlighter.text_block = 'Foo is i.. in most cases.' self.assertEqual( highlighter.render_html({'i..': [7]}, 0, 200), 'Foo is <span class="highlighted">i..</span> in most cases.') highlighter = Highlighter('i??') highlighter.text_block = 'Foo is i?? in most cases.' self.assertEqual( highlighter.render_html({'i??': [7]}, 0, 200), 'Foo is <span class="highlighted">i??</span> in most cases.') # Regression for highlighting already highlighted HTML terms. highlighter = Highlighter('span') highlighter.text_block = 'A span in spam makes html in a can.' self.assertEqual( highlighter.render_html({'span': [2]}, 0, 200), 'A <span class="highlighted">span</span> in spam makes html in a can.' ) highlighter = Highlighter('highlight') highlighter.text_block = 'A span in spam makes highlighted html in a can.' self.assertEqual( highlighter.render_html({'highlight': [21]}, 0, 200), 'A span in spam makes <span class="highlighted">highlight</span>ed html in a can.' )