def print_examples(out=sys.stdout): """Print examples of the snippet creation to `out`.""" for (description, doc, query, chars, sents) in DATA: snippet = highlight_doc(doc, query, chars, sents) print >> out, '#### %s ####' % description print >> out, 'DOCUMENT: """%s"""' % doc print >> out, 'QUERY: """%s"""' % query print >> out, 'SNIPPET: """%s"""' % snippet print >> out, '\n'
def test(self): doc = 'Dog! Cat! The sentence with rat is too long.' query = 'rat' snippet = snippets.highlight_doc(doc, query, max_chars=10, max_sents=1) assert snippet in 'Dog!', 'Cat!'
def test_two(self): doc = 'I love pepperoni. Pepperoni. Pepperoni.' query = 'pepperoni' snippet = snippets.highlight_doc(doc, query, max_sents=2) assert snippet.count('.') == 2
def test_with_tags(self): doc = 'I love pepperoni pizza. Pizza!' query = 'pizza' snippet = snippets.highlight_doc(doc, query, max_chars=35) assert snippet == '[[HIGHLIGHT]]Pizza[[ENDHIGHLIGHT]]!'
def test_no_tags(self): doc = 'I love pepperoni pizza. LOL!' query = 'pizza' snippet = snippets.highlight_doc(doc, query, max_chars=10) assert snippet == 'LOL!'
def test_mixed_both(self): doc = 'pEpPeRoNi PiZzA' query = 'PePpErOnI pIzZa' snippet = snippets.highlight_doc(doc, query) assert snippet == '[[HIGHLIGHT]]pEpPeRoNi PiZzA[[ENDHIGHLIGHT]]'
def test_mixed_query(self): doc = 'pepperoni pizza is good pizza.' query = 'pEppeRonI pIzZa' snippet = snippets.highlight_doc(doc, query) assert snippet == '[[HIGHLIGHT]]pepperoni pizza[[ENDHIGHLIGHT]] is good [[HIGHLIGHT]]pizza[[ENDHIGHLIGHT]].'
def test_multiple_query_matches_2(self): doc = 'pizza pepperoni olive pizza olive pizza' query = 'pepperoni olive pizza' snippet = snippets.highlight_doc(doc, query) assert snippet == '[[HIGHLIGHT]]pizza[[ENDHIGHLIGHT]] [[HIGHLIGHT]]pepperoni olive pizza[[ENDHIGHLIGHT]] [[HIGHLIGHT]]olive pizza[[ENDHIGHLIGHT]]'
def test_multiple_query_matches_1(self): doc = 'Their specialty pizza is deep dish pizza.' query = 'deep dish pizza' snippet = snippets.highlight_doc(doc, query) assert snippet == 'Their specialty [[HIGHLIGHT]]pizza[[ENDHIGHLIGHT]] is [[HIGHLIGHT]]deep dish pizza[[ENDHIGHLIGHT]].'
def test_multi_word_query(self): doc = 'I really love deep dish pizza.' query = 'deep dish pizza' snippet = snippets.highlight_doc(doc, query) assert snippet == 'I really love [[HIGHLIGHT]]deep dish pizza[[ENDHIGHLIGHT]].'
def test_(self): doc = query = 'pepperoni pizza' snippet = snippets.highlight_doc(doc, query) assert snippet == '[[HIGHLIGHT]]pepperoni pizza[[ENDHIGHLIGHT]]'