def __init__(self, location, mean, var, num_results, time_limit): self._name = location self._index = index.open_dir(location) self._query_hash = 0 self._query_terms = None self._document_cache = {} self._limit = num_results self._time = time_limit # TODO(jbg): This is a parameter that can be optimized og = qparser.OrGroup.factory(0.9) self._text_parser = qparser.QueryParser("content", self._index.schema, group=og) self._id_parser = qparser.QueryParser("id", self._index.schema, group=og) if not isnan(mean): self._mean = mean else: self._mean = 0.0 if not isnan(var): self._var = var else: self._var = 1.0 self._var = var self._misses = 0 self._hits = 0
def mostrar_lista(event): ix = open_dir(dirindex) with ix.searcher() as searcher: contenido = str(en.get()).split(" ")[0] operador = str(en.get()).split(" ")[1] titulo = str(en.get()).split(" ")[2] print(contenido) print(operador) print(titulo) if "Y" in operador: query = qparser.QueryParser( 'contenido', ix.schema, group=qparser.OrGroup ).parse(contenido) & qparser.QueryParser( 'titulo', ix.schema, group=qparser.OrGroup).parse(titulo) elif "OR" in operador: query = QueryParser('contenido', ix.schema).parse(contenido) | QueryParser( 'titulo', ix.schema).parse(titulo) else: query = QueryParser('contenido', ix.schema).parse(contenido) - QueryParser( 'titulo', ix.schema).parse(titulo) results = searcher.search(query) imprimir_b_a(results)
def test_escaping(self): qp = qparser.QueryParser("text") q = qp.parse(r'big\small') self.assertEqual(q.__class__, query.Term, q) self.assertEqual(q.text, "bigsmall") q = qp.parse(r'big\\small') self.assertEqual(q.__class__, query.Term) self.assertEqual(q.text, r'big\small') q = qp.parse(r'http\:example') self.assertEqual(q.__class__, query.Term) self.assertEqual(q.fieldname, "text") self.assertEqual(q.text, "http:example") q = qp.parse(r'hello\ there') self.assertEqual(q.__class__, query.Term) self.assertEqual(q.text, "hello there") q = qp.parse(r'\[start\ TO\ end\]') self.assertEqual(q.__class__, query.Term) self.assertEqual(q.text, "[start TO end]") schema = fields.Schema(text=fields.TEXT) qp = qparser.QueryParser("text") q = qp.parse(r"http\:\/\/www\.example\.com") self.assertEqual(q.__class__.__name__, "Term") self.assertEqual(q.text, "http://www.example.com") q = qp.parse(u"\\\\") self.assertEqual(q.__class__.__name__, "Term") self.assertEqual(q.text, "\\")
def test_pseudofield(): schema = fields.Schema(a=fields.KEYWORD, b=fields.TEXT) def regex_maker(node): if node.has_text: node = qparser.RegexPlugin.RegexNode(node.text) node.set_fieldname("content") return node qp = qparser.QueryParser("a", schema) qp.add_plugin(qparser.PseudoFieldPlugin({"regex": regex_maker})) q = qp.parse(u("alfa regex:br.vo")) assert_equal(q.__unicode__(), '(a:alfa AND content:r"br.vo")') def rev_text(node): if node.has_text: # Create a word node for the reversed text revtext = node.text[::-1] # Reverse the text rnode = qparser.WordNode(revtext) # Duplicate the original node's start and end char rnode.set_range(node.startchar, node.endchar) # Put the original node and the reversed node in an OrGroup group = qparser.OrGroup([node, rnode]) # Need to set the fieldname here because the PseudoFieldPlugin # removes the field name syntax group.set_fieldname("reverse") return group qp = qparser.QueryParser("content", schema) qp.add_plugin(qparser.PseudoFieldPlugin({"reverse": rev_text})) q = qp.parse(u("alfa reverse:bravo")) assert_equal(q.__unicode__(), '(content:alfa AND (reverse:bravo OR reverse:ovarb))')
def get_video_ids(query): levenshtein_distance = 1 index = open_dir(corpus_index_dir) query_terms = query.split(" ") fuzzy_query_terms = [ "{0}~{1}".format(qt, levenshtein_distance) for qt in query_terms ] fuzzy_query_terms = " ".join(fuzzy_query_terms) fuzzy_or_query_parser = qparser.QueryParser("content", index.schema, group=qparser.OrGroup) fuzzy_or_query_parser.add_plugin(qparser.FuzzyTermPlugin()) fuzzy_parsed_or_query = fuzzy_or_query_parser.parse(fuzzy_query_terms) fuzzy_and_query_parser = qparser.QueryParser("content", index.schema, group=qparser.AndGroup) fuzzy_and_query_parser.add_plugin(qparser.FuzzyTermPlugin()) fuzzy_parsed_and_query = fuzzy_and_query_parser.parse(fuzzy_query_terms) fuzzy_query_parser = Or([fuzzy_parsed_or_query, fuzzy_parsed_and_query]) with index.searcher(weighting=scoring.TF_IDF()) as searcher: results = searcher.search(fuzzy_query_parser, limit=None) video_ids = [result.fields()["title"] for result in results] return video_ids
def test_multi_language(): # Analyzer for English ana_eng = analysis.StemmingAnalyzer() # analyzer for Pig Latin def stem_piglatin(w): if w.endswith("ay"): w = w[:-2] return w ana_pig = analysis.StemmingAnalyzer(stoplist=["nday", "roay"], stemfn=stem_piglatin) # Dictionary mapping languages to analyzers analyzers = {"eng": ana_eng, "pig": ana_pig} # Fake documents corpus = [(u("eng"), u("Such stuff as dreams are made on")), (u("pig"), u("Otay ebay, roay otnay otay ebay"))] schema = fields.Schema(content=fields.TEXT(stored=True), lang=fields.ID(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: for doclang, content in corpus: ana = analyzers[doclang] # "Pre-analyze" the field into token strings words = [token.text for token in ana(content)] # Note we store the original value but index the pre-analyzed words w.add_document(lang=doclang, content=words, _stored_content=content) with ix.searcher() as s: schema = s.schema # Modify the schema to fake the correct analyzer for the language # we're searching in schema["content"].analyzer = analyzers["eng"] qp = qparser.QueryParser("content", schema) q = qp.parse("dreaming") r = s.search(q) assert len(r) == 1 assert r[0]["content"] == "Such stuff as dreams are made on" schema["content"].analyzer = analyzers["pig"] qp = qparser.QueryParser("content", schema) q = qp.parse("otnay") r = s.search(q) assert len(r) == 1 assert r[0]["content"] == "Otay ebay, roay otnay otay ebay"
def test_workflow_easy(): schema = fields.Schema(id=fields.ID(stored=True), title=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("1"), title=u("The man who wasn't there")) w.add_document(id=u("2"), title=u("The dog who barked at midnight")) w.add_document(id=u("3"), title=u("The invisible man")) w.add_document(id=u("4"), title=u("The girl with the dragon tattoo")) w.add_document(id=u("5"), title=u("The woman who disappeared")) w.commit() with ix.searcher() as s: # Parse the user query parser = qparser.QueryParser("title", schema=ix.schema) q = parser.parse(u("man")) r = s.search(q, terms=True) assert_equal(len(r), 2) r.fragmenter = highlight.WholeFragmenter() r.formatter = highlight.UppercaseFormatter() outputs = [hit.highlights("title") for hit in r] assert_equal(outputs, ["The invisible MAN", "The MAN who wasn't there"])
def queryparse(): c = True while c: print("Enter string") qstring = input() qp = qparser.QueryParser("content", ix.schema) q = qp.parse(qstring) with ix.searcher() as s: corrected = s.correct_query( q, qstring, ) if corrected.query != q: print("Did you mean:", corrected.string) print(corrected.string) print("Enter yes or no [Y/N]") x = input() if (x == "Y"): c = False return corrected.string else: print(qstring) c = False return qstring
def base_search(searcher, field, schema, query): ''' Wrapper for fulltext search. ''' parser = qparser.QueryParser(field, schema) parsed = parser.parse(query) return [result['checksum'] for result in searcher.search(parsed)]
def test_boolean(): schema = fields.Schema(id=fields.ID(stored=True), done=fields.BOOLEAN(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("a"), done=True) w.add_document(id=u("b"), done=False) w.add_document(id=u("c"), done=True) w.add_document(id=u("d"), done=False) w.add_document(id=u("e"), done=True) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("id", schema) r = s.search(qp.parse("done:true")) assert sorted([d["id"] for d in r]) == ["a", "c", "e"] assert all(d["done"] for d in r) r = s.search(qp.parse("done:yes")) assert sorted([d["id"] for d in r]) == ["a", "c", "e"] assert all(d["done"] for d in r) q = qp.parse("done:false") assert q.__class__ == query.Term assert q.text is False assert schema["done"].to_bytes(False) == b("f") r = s.search(q) assert sorted([d["id"] for d in r]) == ["b", "d"] assert not any(d["done"] for d in r) r = s.search(qp.parse("done:no")) assert sorted([d["id"] for d in r]) == ["b", "d"] assert not any(d["done"] for d in r)
def test_decimal_ranges(): from decimal import Decimal schema = fields.Schema(id=fields.STORED, num=fields.NUMERIC(int, decimal_places=2)) ix = RamStorage().create_index(schema) w = ix.writer() count = Decimal("0.0") inc = Decimal("0.2") for _ in xrange(500): w.add_document(id=str(count), num=count) count += inc w.commit() with ix.searcher() as s: qp = qparser.QueryParser("num", schema) def check(qs, start, end): q = qp.parse(qs) result = [s.stored_fields(d)["id"] for d in q.docs(s)] target = [] count = Decimal(start) limit = Decimal(end) while count <= limit: target.append(str(count)) count += inc assert result == target check("[10.2 to 80.8]", "10.2", "80.8") check("{10.2 to 80.8]", "10.4", "80.8") check("[10.2 to 80.8}", "10.2", "80.6") check("{10.2 to 80.8}", "10.4", "80.6")
def test_decimal_numeric(): from decimal import Decimal f = fields.NUMERIC(int, decimal_places=4) schema = fields.Schema(id=fields.ID(stored=True), deci=f) ix = RamStorage().create_index(schema) # assert f.from_text(f.to_text(Decimal("123.56"))), Decimal("123.56")) w = ix.writer() w.add_document(id=u("a"), deci=Decimal("123.56")) w.add_document(id=u("b"), deci=Decimal("0.536255")) w.add_document(id=u("c"), deci=Decimal("2.5255")) w.add_document(id=u("d"), deci=Decimal("58")) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("deci", schema) q = qp.parse(u("123.56")) r = s.search(q) assert len(r) == 1 assert r[0]["id"] == "a" r = s.search(qp.parse(u("0.536255"))) assert len(r) == 1 assert r[0]["id"] == "b"
def test_andmaybe_quality(): schema = fields.Schema(id=fields.STORED, title=fields.TEXT(stored=True), year=fields.NUMERIC) ix = RamStorage().create_index(schema) domain = [(u('Alpha Bravo Charlie Delta'), 2000), (u('Echo Bravo Foxtrot'), 2000), (u('Bravo Golf Hotel'), 2002), (u('Bravo India'), 2002), (u('Juliet Kilo Bravo'), 2004), (u('Lima Bravo Mike'), 2004)] w = ix.writer() for title, year in domain: w.add_document(title=title, year=year) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("title", ix.schema) q = qp.parse(u("title:bravo ANDMAYBE year:2004")) titles = [hit["title"] for hit in s.search(q, limit=None)[:2]] print("titles1=", titles) assert "Juliet Kilo Bravo" in titles titles = [hit["title"] for hit in s.search(q, limit=2)] print("titles2=", titles) assert "Juliet Kilo Bravo" in titles
def test_not2(): schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT) storage = RamStorage() ix = storage.create_index(schema) writer = ix.writer() writer.add_document(name=u("a"), value=u("alfa bravo charlie delta echo")) writer.add_document(name=u("b"), value=u("bravo charlie delta echo foxtrot")) writer.add_document(name=u("c"), value=u("charlie delta echo foxtrot golf")) writer.add_document(name=u("d"), value=u("delta echo golf hotel india")) writer.add_document(name=u("e"), value=u("echo golf hotel india juliet")) writer.commit() with ix.searcher() as s: p = qparser.QueryParser("value", None) results = s.search(p.parse("echo NOT golf")) assert_equal(sorted([d["name"] for d in results]), ["a", "b"]) results = s.search(p.parse("echo NOT bravo")) assert_equal(sorted([d["name"] for d in results]), ["c", "d", "e"]) ix.delete_by_term("value", u("bravo")) with ix.searcher() as s: results = s.search(p.parse("echo NOT charlie")) assert_equal(sorted([d["name"] for d in results]), ["d", "e"])
def test_gtlt(): schema = fields.Schema(a=fields.KEYWORD, b=fields.NUMERIC, c=fields.KEYWORD, d=fields.NUMERIC(float), e=fields.DATETIME) qp = qparser.QueryParser("a", schema) qp.add_plugin(plugins.GtLtPlugin()) qp.add_plugin(dateparse.DateParserPlugin()) q = qp.parse(u("a:hello b:>100 c:<=z there")) assert_equal(q.__class__, query.And) assert_equal(len(q), 4) assert_equal(q[0], query.Term("a", "hello")) assert_equal(q[1], query.NumericRange("b", 100, None, startexcl=True)) assert_equal(q[2], query.TermRange("c", None, 'z')) assert_equal(q[3], query.Term("a", "there")) q = qp.parse(u("hello e:>'29 mar 2001' there")) assert_equal(q.__class__, query.And) assert_equal(len(q), 3) assert_equal(q[0], query.Term("a", "hello")) # As of this writing, date ranges don't support startexcl/endexcl assert_equal(q[1], query.DateRange("e", datetime(2001, 3, 29, 0, 0), None)) assert_equal(q[2], query.Term("a", "there")) q = qp.parse(u("a:> alfa c:<= bravo")) assert_equal(text_type(q), "(a:a: AND a:alfa AND a:c: AND a:bravo)") qp.remove_plugin_class(plugins.FieldsPlugin) qp.remove_plugin_class(plugins.RangePlugin) q = qp.parse(u("hello a:>500 there")) assert_equal(text_type(q), "(a:hello AND a:a: AND a:500 AND a:there)")
def test_custom_tokens(): qp = qparser.QueryParser("text", None) qp.remove_plugin_class(plugins.OperatorsPlugin) cp = plugins.OperatorsPlugin(And="&", Or="\\|", AndNot="&!", AndMaybe="&~", Not="-") qp.add_plugin(cp) q = qp.parse("this | that") assert_equal(q.__class__, query.Or) assert_equal(q[0].__class__, query.Term) assert_equal(q[0].text, "this") assert_equal(q[1].__class__, query.Term) assert_equal(q[1].text, "that") q = qp.parse("this&!that") assert_equal(q.__class__, query.AndNot) assert_equal(q.a.__class__, query.Term) assert_equal(q.a.text, "this") assert_equal(q.b.__class__, query.Term) assert_equal(q.b.text, "that") q = qp.parse("alfa -bravo NOT charlie") assert_equal(len(q), 4) assert_equal(q[1].__class__, query.Not) assert_equal(q[1].query.text, "bravo") assert_equal(q[2].text, "NOT")
def test_combos(): qs = 'w:a "hi there"^4.2 AND x:b^2.3 OR c AND (y:d OR e) (apple ANDNOT bear)^2.3' init_args = {plugins.MultifieldPlugin: (["content", "title"], {"content": 1.0, "title": 1.2}), plugins.FieldAliasPlugin: ({"content": ("text", "body")},), plugins.MultifieldPlugin: (["title", "content"],), plugins.CopyFieldPlugin: ({"name": "phone"},), plugins.PseudoFieldPlugin: ({"name": lambda x: x}), } pis = _plugin_classes(()) for i, plugin in enumerate(pis): try: pis[i] = plugin(*init_args.get(plugin, ())) except TypeError: raise TypeError("Error instantiating %s" % plugin) count = 0 for i, first in enumerate(pis): for j in xrange(len(pis)): if i == j: continue plist = [p for p in pis[:j] if p is not first] + [first] qp = qparser.QueryParser("text", None, plugins=plist) try: qp.parse(qs) except Exception: e = sys.exc_info()[1] raise Exception(str(e) + " combo: %s %r" % (count, plist)) count += 1
def test_globfield_length_merge(): # Issue 343 schema = fields.Schema(title=fields.TEXT(stored=True), path=fields.ID(stored=True)) schema.add("*_text", fields.TEXT, glob=True) with TempIndex(schema, "globlenmerge") as ix: with ix.writer() as w: w.add_document( title=u("First document"), path=u("/a"), content_text=u("This is the first document we've added!")) with ix.writer() as w: w.add_document( title=u("Second document"), path=u("/b"), content_text=u( "The second document is even more interesting!")) with ix.searcher() as s: docnum = s.document_number(path="/a") assert s.doc_field_length(docnum, "content_text") is not None qp = qparser.QueryParser("content", schema) q = qp.parse("content_text:document") r = s.search(q) paths = sorted(hit["path"] for hit in r) assert paths == ["/a", "/b"]
def test_numeric(): schema = fields.Schema(id=fields.ID(stored=True), integer=fields.NUMERIC(int), floating=fields.NUMERIC(float)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("a"), integer=5820, floating=1.2) w.add_document(id=u("b"), integer=22, floating=2.3) w.add_document(id=u("c"), integer=78, floating=3.4) w.add_document(id=u("d"), integer=13, floating=4.5) w.add_document(id=u("e"), integer=9, floating=5.6) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("integer", schema) q = qp.parse(u("5820")) r = s.search(q) assert len(r) == 1 assert r[0]["id"] == "a" with ix.searcher() as s: r = s.search(qp.parse("floating:4.5")) assert len(r) == 1 assert r[0]["id"] == "d" q = qp.parse("integer:*") assert q.__class__ == query.Every assert q.field() == "integer" q = qp.parse("integer:5?6") assert q == query.NullQuery
def test_finalweighting(): from whoosh.scoring import Frequency schema = fields.Schema(id=fields.ID(stored=True), summary=fields.TEXT, n_comments=fields.STORED) st = RamStorage() ix = st.create_index(schema) w = ix.writer() w.add_document(id=u("1"), summary=u("alfa bravo"), n_comments=5) w.add_document(id=u("2"), summary=u("alfa"), n_comments=12) w.add_document(id=u("3"), summary=u("bravo"), n_comments=2) w.add_document(id=u("4"), summary=u("bravo bravo"), n_comments=7) w.commit() class CommentWeighting(Frequency): use_final = True def final(self, searcher, docnum, score): ncomments = searcher.stored_fields(docnum).get("n_comments", 0) return ncomments with ix.searcher(weighting=CommentWeighting()) as s: r = s.search(qparser.QueryParser("summary", None).parse("alfa OR bravo")) ids = [fs["id"] for fs in r] assert_equal(["2", "4", "1", "3"], ids)
def test_numeric_ranges(): schema = fields.Schema(id=fields.STORED, num=fields.NUMERIC) ix = RamStorage().create_index(schema) w = ix.writer() for i in xrange(400): w.add_document(id=i, num=i) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("num", schema) def check(qs, target): q = qp.parse(qs) result = [s.stored_fields(d)["id"] for d in q.docs(s)] assert result == target # Note that range() is always inclusive-exclusive check("[10 to 390]", list(range(10, 390 + 1))) check("[100 to]", list(range(100, 400))) check("[to 350]", list(range(0, 350 + 1))) check("[16 to 255]", list(range(16, 255 + 1))) check("{10 to 390]", list(range(11, 390 + 1))) check("[10 to 390}", list(range(10, 390))) check("{10 to 390}", list(range(11, 390))) check("{16 to 255}", list(range(17, 255)))
def searcher(self): path = os.path.join(self.options.dir, "%s_whoosh" % self.options.indexname) ix = index.open_dir(path) self.srch = ix.searcher() self.parser = qparser.QueryParser(self.bench.spec.main_field, schema=ix.schema)
def test_datetime(): dtf = fields.DATETIME(stored=True) schema = fields.Schema(id=fields.ID(stored=True), date=dtf) st = RamStorage() ix = st.create_index(schema) w = ix.writer() for month in xrange(1, 12): for day in xrange(1, 28): w.add_document(id=u("%s-%s") % (month, day), date=datetime(2010, month, day, 14, 0, 0)) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("id", schema) r = s.search(qp.parse("date:20100523")) assert len(r) == 1 assert r[0]["id"] == "5-23" assert r[0]["date"].__class__ is datetime assert r[0]["date"].month == 5 assert r[0]["date"].day == 23 r = s.search(qp.parse("date:'2010 02'")) assert len(r) == 27 q = qp.parse(u("date:[2010-05 to 2010-08]")) startdt = datetime(2010, 5, 1, 0, 0, 0, 0) enddt = datetime(2010, 8, 31, 23, 59, 59, 999999) assert q.__class__ is query.NumericRange assert q.start == times.datetime_to_long(startdt) assert q.end == times.datetime_to_long(enddt)
def score_to_file(): # Open index ix = index.open_dir(index_dir) # Use the reader to get statistics reader = ix.reader() queries = load_queries() outfile = open(output_file, "w") with ix.searcher(weighting=scoring.BM25F()) as searcher: # with ix.searcher(weighting=scoring.TF_IDF()) as searcher: qp = qparser.QueryParser(field, schema=ix.schema) # qp = qparser.MultifieldParser(fields, schema=ix.schema) for query in queries: print("Processing query number", query['id']) # Retrieve documents using the vector space model q = qp.parse(query['text']) # we contatenate query terms res = searcher.search(q) # res = get_score(searcher, qp, query['text']) for r in res: outfile.write(query['id'] + " Q0 " + r['id'] + " " + str(r.score) + "\n") # Output max 50 results # for docnum in sorted(res, key=res.get, reverse=True)[:50]: # # Look up our docID # stored = reader.stored_fields(docnum) # # Write `docID Q0 queryID score` into output file # outfile.write(query['id']+ " Q0 " + stored['id'] + " " + str(res[docnum]) + "\n") outfile.close() ix.close()
def test_boolean_strings(): schema = fields.Schema(i=fields.STORED, b=fields.BOOLEAN(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(i=0, b="true") w.add_document(i=1, b="True") w.add_document(i=2, b="false") w.add_document(i=3, b="False") w.add_document(i=4, b=u("true")) w.add_document(i=5, b=u("True")) w.add_document(i=6, b=u("false")) w.add_document(i=7, b=u("False")) with ix.searcher() as s: qp = qparser.QueryParser("b", ix.schema) def check(qs, nums): q = qp.parse(qs) r = s.search(q, limit=None) assert [hit["i"] for hit in r] == nums trues = [0, 1, 4, 5] falses = [2, 3, 6, 7] check("true", trues) check("True", trues) check("false", falses) check("False", falses) check("t", trues) check("f", falses)
def _make_query_parser(self): schema = self._make_schema() qp = qparser.QueryParser('path', schema=schema) qp.add_plugin(qparser.GtLtPlugin()) from whoosh.qparser.dateparse import DateParserPlugin qp.add_plugin(DateParserPlugin()) return qp
def test_boost(self): qp = qparser.QueryParser("content") q = qp.parse("this^3 fn:that^0.5 5.67") self.assertEqual(q.subqueries[0].boost, 3.0) self.assertEqual(q.subqueries[1].boost, 0.5) self.assertEqual(q.subqueries[1].fieldname, "fn") self.assertEqual(q.subqueries[2].text, "5.67")
def n_gram_query(self, query_string): og = qparser.OrGroup.factory(0.8) parser = qparser.QueryParser(_N_GRAM_FIELD, self._schema, group=og) parser.remove_plugin_class(qparser.FieldsPlugin) parser.remove_plugin_class(qparser.WildcardPlugin) parser.add_plugin(qparser.FuzzyTermPlugin()) return parser.parse(query_string)
def search(self, query, number): self.get_index() import pprint pp = pprint.PrettyPrinter(indent=4, depth=9) with self.index.searcher() as searcher: # improve relevance! form query from keywords keywords = searcher.key_terms_from_text("text", query, numterms=number) keyword_query = " ".join( [keyword for keyword, score in keywords]) # if we don't find any keywords. for example, we're not actually # looking up context tweets # TODO find better way of doing this if not keyword_query: keyword_query = "*" print("keyword query: %s" % keyword_query) parser = qparser.QueryParser( "text", self.index.schema, group=qparser.OrGroup) query = parser.parse(keyword_query) restrict_retweets = whoosh.query.Term("retweet", True) results = searcher.search(query, mask=restrict_retweets, limit=MAX_TWEETS_NUMBER) for result in results: yield result["text"]
def getQparser(index): parser = qparser.QueryParser("body", schema=index.schema, group=qparser.OrGroup) parser.remove_plugin_class(qparser.FieldsPlugin) parser.remove_plugin_class(qparser.WildcardPlugin) return parser