def test_whole_noterms(): schema = fields.Schema(text=fields.TEXT(stored=True), tag=fields.KEYWORD) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(text=u("alfa bravo charlie delta echo foxtrot golf"), tag=u("foo")) with ix.searcher() as s: r = s.search(query.Term("text", u("delta"))) assert len(r) == 1 r.fragmenter = highlight.WholeFragmenter() r.formatter = highlight.UppercaseFormatter() hi = r[0].highlights("text") assert hi == u("alfa bravo charlie DELTA echo foxtrot golf") r = s.search(query.Term("tag", u("foo"))) assert len(r) == 1 r.fragmenter = highlight.WholeFragmenter() r.formatter = highlight.UppercaseFormatter() hi = r[0].highlights("text") assert hi == u("") hi = r[0].highlights("text", minscore=0) assert hi == u("alfa bravo charlie delta echo foxtrot golf")
def test_null_fragment(): terms = frozenset(("bravo", "india")) sa = analysis.StandardAnalyzer() nf = highlight.WholeFragmenter() uc = highlight.UppercaseFormatter() htext = highlight.highlight(_doc, terms, sa, nf, uc) assert htext == "alfa BRAVO charlie delta echo foxtrot golf hotel INDIA juliet kilo lima"
def highlight_all(result, field): text = result[field] return Markup( highlight.Highlighter( fragmenter=highlight.WholeFragmenter(), formatter=result.results.highlighter.formatter).highlight_hit( result, field, text=text)) or text
def test_workflow_easy(): schema = fields.Schema(id=fields.ID(stored=True), title=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("1"), title=u("The man who wasn't there")) w.add_document(id=u("2"), title=u("The dog who barked at midnight")) w.add_document(id=u("3"), title=u("The invisible man")) w.add_document(id=u("4"), title=u("The girl with the dragon tattoo")) w.add_document(id=u("5"), title=u("The woman who disappeared")) w.commit() with ix.searcher() as s: # Parse the user query parser = qparser.QueryParser("title", schema=ix.schema) q = parser.parse(u("man")) r = s.search(q, terms=True) assert_equal(len(r), 2) r.fragmenter = highlight.WholeFragmenter() r.formatter = highlight.UppercaseFormatter() outputs = [hit.highlights("title") for hit in r] assert_equal(outputs, ["The invisible MAN", "The MAN who wasn't there"])
def test_html_escape(): terms = frozenset(["bravo"]) sa = analysis.StandardAnalyzer() wf = highlight.WholeFragmenter() hf = highlight.HtmlFormatter() htext = highlight.highlight(u('alfa <bravo "charlie"> delta'), terms, sa, wf, hf) assert htext == 'alfa <<strong class="match term0">bravo</strong> "charlie"> delta'
def find(query): with ix.searcher() as searcher: query = QueryParser("transcript", ix.schema).parse(query) results = searcher.search(query) results.fragmenter = highlight.WholeFragmenter() results.formatter = brf return [ dict(url=result['url'], summary=result.highlights('transcript'), date=datetime.strftime(result['date'], '%b %d %Y'), transcript=result['transcript']) for result in results ]
def test_workflow_manual(): schema = fields.Schema(id=fields.ID(stored=True), title=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("1"), title=u("The man who wasn't there")) w.add_document(id=u("2"), title=u("The dog who barked at midnight")) w.add_document(id=u("3"), title=u("The invisible man")) w.add_document(id=u("4"), title=u("The girl with the dragon tattoo")) w.add_document(id=u("5"), title=u("The woman who disappeared")) w.commit() with ix.searcher() as s: # Parse the user query parser = qparser.QueryParser("title", schema=ix.schema) q = parser.parse(u("man")) # Extract the terms the user used in the field we're interested in terms = [ text for fieldname, text in q.all_terms() if fieldname == "title" ] # Perform the search r = s.search(q) assert_equal(len(r), 2) # Use the same analyzer as the field uses. To be sure, you can # do schema[fieldname].analyzer. Be careful not to do this # on non-text field types such as DATETIME. analyzer = schema["title"].analyzer # Since we want to highlight the full title, not extract fragments, # we'll use WholeFragmenter. nf = highlight.WholeFragmenter() # In this example we'll simply uppercase the matched terms fmt = highlight.UppercaseFormatter() outputs = [] for d in r: text = d["title"] outputs.append(highlight.highlight(text, terms, analyzer, nf, fmt)) assert_equal(outputs, ["The invisible MAN", "The MAN who wasn't there"])
def test_paged_highlights(): schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(text=u("alfa bravo charlie delta echo foxtrot")) w.add_document(text=u("bravo charlie delta echo foxtrot golf")) w.add_document(text=u("charlie delta echo foxtrot golf hotel")) w.add_document(text=u("delta echo foxtrot golf hotel india")) w.add_document(text=u("echo foxtrot golf hotel india juliet")) w.add_document(text=u("foxtrot golf hotel india juliet kilo")) with ix.searcher() as s: q = query.Term("text", u("alfa")) page = s.search_page(q, 1, pagelen=3) page.results.fragmenter = highlight.WholeFragmenter() page.results.formatter = highlight.UppercaseFormatter() hi = page[0].highlights("text") assert hi == u("ALFA bravo charlie delta echo foxtrot")
def quote(bot, message, author): """ Returns a quote. No argument returns a random quote. A text argument will search through the quote DB and return a random result. An argument of the form `id:69` will attempt to get the quote with the id of `69`. """ buf = MessageBuffer() results = [] if bot.index.doc_count() == 0: buf.add("No quotes have been added.") return buf with bot.index.searcher() as searcher: if message == "": i = randint(1, bot.index.doc_count()) query = QueryParser("id", bot.index.schema).parse(str(i)) results = searcher.search(query) else: query = QueryParser("quote", bot.index.schema).parse(message) results = searcher.search(query, limit=None) if len(results) > 0: results.formatter = BoldFormatter() results.fragmenter = highlight.WholeFragmenter() result = choice(results) quote = bot.santise_quote( result.highlights("quote", minscore=0)) buf.add(f"[{result['id']}] {quote}") if "submitter" in result.keys() and "submitted" in result.keys( ): buf.add("") buf.add( f"*Submitted by {result['submitter']} on {result['submitted']}*." ) else: buf.add("No quote found.") return buf
results.formatter = EscapeSeqFormatter() # first run to extract context from tex files results.fragmenter = highlight.ContextFragmenter(maxchars=50, surround=20) task_hl = [] solution_hl = [] wrapper = textwrap.TextWrapper(initial_indent='\t', subsequent_indent='\t') for i, res in enumerate(results, start=1): task_hl.append(wrapper.fill(hf(res, "task"))) solution_hl.append(wrapper.fill(hf(res, "solution"))) results.fragmenter = highlight.WholeFragmenter(charlimit=300) print('-' * 60) print( 'found \x1b[37;1m%u\x1b[37;0m matching entries for query:\x1b[92;2m %s\x1b[0m\n' % (len(results), args.q)) for i, res in enumerate(results, start=1): datestr = '' if res["lastupdate"] is not None: datestr = res["lastupdate"].strftime("%Y-%m-%d") print('\x1b[37;1m(%2u) \x1b[91;22m%s \x1b[0m%s:' % (i, hf(res, "folder_name"), hf(res, "maintainer"))) print('\t\x1b[36m%s\x1b[33m %s\x1b[0m' % (hf(res, "language"), datestr))