def test_highlight_ngrams(): schema = fields.Schema(text=fields.NGRAMWORDS(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(text=u("Multiplication and subtraction are good")) with ix.searcher() as s: qp = qparser.QueryParser("text", ix.schema) q = qp.parse(u("multiplication")) r = s.search(q) assert r.scored_length() == 1 r.fragmenter = highlight.SentenceFragmenter() r.formatter = highlight.UppercaseFormatter() snippet = r[0].highlights("text") assert snippet == "MULTIPLICATIon and subtracTION are good"
def test_workflow_manual(): schema = fields.Schema(id=fields.ID(stored=True), title=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("1"), title=u("The man who wasn't there")) w.add_document(id=u("2"), title=u("The dog who barked at midnight")) w.add_document(id=u("3"), title=u("The invisible man")) w.add_document(id=u("4"), title=u("The girl with the dragon tattoo")) w.add_document(id=u("5"), title=u("The woman who disappeared")) w.commit() with ix.searcher() as s: # Parse the user query parser = qparser.QueryParser("title", schema=ix.schema) q = parser.parse(u("man")) # Extract the terms the user used in the field we're interested in terms = [ text for fieldname, text in q.all_terms() if fieldname == "title" ] # Perform the search r = s.search(q) assert_equal(len(r), 2) # Use the same analyzer as the field uses. To be sure, you can # do schema[fieldname].analyzer. Be careful not to do this # on non-text field types such as DATETIME. analyzer = schema["title"].analyzer # Since we want to highlight the full title, not extract fragments, # we'll use WholeFragmenter. nf = highlight.WholeFragmenter() # In this example we'll simply uppercase the matched terms fmt = highlight.UppercaseFormatter() outputs = [] for d in r: text = d["title"] outputs.append(highlight.highlight(text, terms, analyzer, nf, fmt)) assert_equal(outputs, ["The invisible MAN", "The MAN who wasn't there"])
def test_paged_highlights(): schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(text=u("alfa bravo charlie delta echo foxtrot")) w.add_document(text=u("bravo charlie delta echo foxtrot golf")) w.add_document(text=u("charlie delta echo foxtrot golf hotel")) w.add_document(text=u("delta echo foxtrot golf hotel india")) w.add_document(text=u("echo foxtrot golf hotel india juliet")) w.add_document(text=u("foxtrot golf hotel india juliet kilo")) with ix.searcher() as s: q = query.Term("text", u("alfa")) page = s.search_page(q, 1, pagelen=3) page.results.fragmenter = highlight.WholeFragmenter() page.results.formatter = highlight.UppercaseFormatter() hi = page[0].highlights("text") assert hi == u("ALFA bravo charlie delta echo foxtrot")
def test_correct_correct(): from whoosh import qparser schema = fields.Schema(a=fields.TEXT()) with TempIndex(schema) as ix: with ix.writer() as w: w.add_document(a=u'dworska') w.add_document(a=u'swojska') with ix.searcher() as s: s = ix.searcher() qtext = u'dworska' qp = qparser.QueryParser('a', ix.schema) q = qp.parse(qtext, ix.schema) c = s.correct_query(q, qtext) assert c.string == "dworska" string = c.format_string(highlight.UppercaseFormatter()) assert string == "dworska"
def test_correct_correct(): from whoosh import qparser schema = fields.Schema(a=fields.TEXT(spelling=True)) ix = RamStorage().create_index(schema) ix_writer = ix.writer() ix_writer.add_document(a=u('dworska')) ix_writer.add_document(a=u('swojska')) ix_writer.commit() s = ix.searcher() qtext = u('dworska') qp = qparser.QueryParser('a', ix.schema) q = qp.parse(qtext, ix.schema) c = s.correct_query(q, qtext) assert c.string == "dworska" assert c.format_string(highlight.UppercaseFormatter()) == "dworska"
def test_snippets(): ana = analysis.StemmingAnalyzer() schema = fields.Schema(text=fields.TEXT(stored=True, analyzer=ana)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u( "Lay out the rough animation by creating the important poses where they occur on the timeline." )) w.add_document(text=u( "Set key frames on everything that's key-able. This is for control and predictability: you don't want to accidentally leave something un-keyed. This is also much faster than selecting the parameters to key." )) w.add_document(text=u( "Use constant (straight) or sometimes linear transitions between keyframes in the channel editor. This makes the character jump between poses." )) w.add_document(text=u( "Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters." )) w.add_document(text=u( "Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame." )) w.commit() target = [ "Set KEY frames on everything that's KEY-able", "Copy the current pose to create the next one: pose the character, KEY everything, then copy the keyframe in the playbar to another frame, and KEY everything at that frame", "KEYING everything gives quick, immediate results" ] with ix.searcher() as s: qp = qparser.QueryParser("text", ix.schema) q = qp.parse(u("key")) r = s.search(q, terms=True) r.fragmenter = highlight.SentenceFragmenter() r.formatter = highlight.UppercaseFormatter() assert sorted([hit.highlights("text", top=1) for hit in r]) == sorted(target)
def test_workflow_easy(): schema = fields.Schema(id=fields.ID(stored=True), title=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("1"), title=u("The man who wasn't there")) w.add_document(id=u("2"), title=u("The dog who barked at midnight")) w.add_document(id=u("3"), title=u("The invisible man")) w.add_document(id=u("4"), title=u("The girl with the dragon tattoo")) w.add_document(id=u("5"), title=u("The woman who disappeared")) w.commit() with ix.searcher() as s: # Parse the user query parser = qparser.QueryParser("title", schema=ix.schema) q = parser.parse(u("man")) r = s.search(q, terms=True) assert len(r) == 2 r.fragmenter = highlight.WholeFragmenter() r.formatter = highlight.UppercaseFormatter() outputs = [hit.highlights("title") for hit in r] assert outputs == ["The invisible MAN", "The MAN who wasn't there"]
myquery = parser.parse( query.search_terms ) results = [] reponse = [] rr={} with self.queryIndex.searcher() as searcher: results = searcher.search( myquery ) for result in results: temp = result['content'] temp = temp.split("\t") sugg= temp[0] print result rr['content']= sugg results.fragmenter = highlight.ContextFragmenter(surround=40) results.formatter = highlight.UppercaseFormatter() response = parse_whoosh_trec('WhooshQueryEngine', query.search_terms, results) return response # ----- The Following are Whoosh errors ----- # There's a problem with the Whoosh query created from the users query except QueryError, e: raise SearchEngineError("Whoosh Query Suggest Engine", e, errorType="Whoosh", query=query) # Our Whoosh Index is empty except EmptyIndexError, e: raise SearchEngineError("Whoosh Query Suggest Engine", e, errorType="Whoosh") # Our Whoosh Index does not match our version of Whoosh