def test_query_facet(): schema = fields.Schema(id=fields.STORED, v=fields.ID) ix = RamStorage().create_index(schema) for i, ltr in enumerate(u("iacgbehdf")): w = ix.writer() w.add_document(id=i, v=ltr) w.commit(merge=False) with ix.searcher() as s: q1 = query.TermRange("v", "a", "c") q2 = query.TermRange("v", "d", "f") q3 = query.TermRange("v", "g", "i") assert_equal([hit["id"] for hit in s.search(q1)], [1, 2, 4]) assert_equal([hit["id"] for hit in s.search(q2)], [5, 7, 8]) assert_equal([hit["id"] for hit in s.search(q3)], [0, 3, 6]) facet = sorting.QueryFacet({"a-c": q1, "d-f": q2, "g-i": q3}) r = s.search(query.Every(), groupedby=facet) # If you specify a facet without a name, it's automatically called # "facet" assert_equal(r.groups("facet"), { "a-c": [1, 2, 4], "d-f": [5, 7, 8], "g-i": [0, 3, 6] })
def test_query_facet2(): domain = u("abcdefghi") schema = fields.Schema(v=fields.KEYWORD(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: for i, ltr in enumerate(domain): v = "%s %s" % (ltr, domain[0 - i]) w.add_document(v=v) with ix.searcher() as s: q1 = query.TermRange("v", "a", "c") q2 = query.TermRange("v", "d", "f") q3 = query.TermRange("v", "g", "i") facets = sorting.Facets() facets.add_query("myfacet", { "a-c": q1, "d-f": q2, "g-i": q3 }, allow_overlap=True) r = s.search(query.Every(), groupedby=facets) assert_equal(r.groups("myfacet"), { 'a-c': [0, 1, 2, 7, 8], 'd-f': [4, 5], 'g-i': [3, 6] })
def test_closed_searcher(): from whoosh.reading import ReaderClosed schema = fields.Schema(key=fields.KEYWORD(stored=True, sortable=True)) with TempStorage() as st: ix = st.create_index(schema) with ix.writer() as w: w.add_document(key=u"alfa") w.add_document(key=u"bravo") w.add_document(key=u"charlie") w.add_document(key=u"delta") w.add_document(key=u"echo") s = ix.searcher() r = s.search(query.TermRange("key", "b", "d")) s.close() assert s.is_closed with pytest.raises(ReaderClosed): assert r[0]["key"] == "bravo" with pytest.raises(ReaderClosed): s.reader().column_reader("key") with pytest.raises(ReaderClosed): s.suggest("key", "brovo") s = ix.searcher() r = s.search(query.TermRange("key", "b", "d")) assert r[0] assert r[0]["key"] == "bravo" c = s.reader().column_reader("key") assert c[1] == "bravo" assert s.suggest("key", "brovo") == ["bravo"]
def query(self, parser): fieldname = self.fieldname or parser.fieldname start = self.start end = self.end if parser.schema and fieldname in parser.schema: field = parser.schema[fieldname] if field.self_parsing(): try: q = field.parse_range(fieldname, start, end, self.startexcl, self.endexcl, boost=self.boost) if q is not None: return attach(q, self) except QueryParserError: e = sys.exc_info()[1] return attach(query.error_query(e), self) if start: start = get_single_text(field, start, tokenize=False, removestops=False) if end: end = get_single_text(field, end, tokenize=False, removestops=False) q = query.TermRange(fieldname, start, end, self.startexcl, self.endexcl, boost=self.boost) return attach(q, self)
def test_gtlt(): schema = fields.Schema(a=fields.KEYWORD, b=fields.NUMERIC, c=fields.KEYWORD, d=fields.NUMERIC(float), e=fields.DATETIME) qp = qparser.QueryParser("a", schema) qp.add_plugin(plugins.GtLtPlugin()) qp.add_plugin(dateparse.DateParserPlugin()) q = qp.parse(u("a:hello b:>100 c:<=z there")) assert_equal(q.__class__, query.And) assert_equal(len(q), 4) assert_equal(q[0], query.Term("a", "hello")) assert_equal(q[1], query.NumericRange("b", 100, None, startexcl=True)) assert_equal(q[2], query.TermRange("c", None, 'z')) assert_equal(q[3], query.Term("a", "there")) q = qp.parse(u("hello e:>'29 mar 2001' there")) assert_equal(q.__class__, query.And) assert_equal(len(q), 3) assert_equal(q[0], query.Term("a", "hello")) # As of this writing, date ranges don't support startexcl/endexcl assert_equal(q[1], query.DateRange("e", datetime(2001, 3, 29, 0, 0), None)) assert_equal(q[2], query.Term("a", "there")) q = qp.parse(u("a:> alfa c:<= bravo")) assert_equal(text_type(q), "(a:a: AND a:alfa AND a:c: AND a:bravo)") qp.remove_plugin_class(plugins.FieldsPlugin) qp.remove_plugin_class(plugins.RangePlugin) q = qp.parse(u("hello a:>500 there")) assert_equal(text_type(q), "(a:hello AND a:a: AND a:500 AND a:there)")
def test_weighting(): from whoosh.scoring import Weighting, BaseScorer schema = fields.Schema(id=fields.ID(stored=True), n_comments=fields.STORED) st = RamStorage() ix = st.create_index(schema) w = ix.writer() w.add_document(id=u("1"), n_comments=5) w.add_document(id=u("2"), n_comments=12) w.add_document(id=u("3"), n_comments=2) w.add_document(id=u("4"), n_comments=7) w.commit() # Fake Weighting implementation class CommentWeighting(Weighting): def scorer(self, searcher, fieldname, text, qf=1): return self.CommentScorer(searcher.stored_fields) class CommentScorer(BaseScorer): def __init__(self, stored_fields): self.stored_fields = stored_fields def score(self, matcher): sf = self.stored_fields(matcher.id()) ncomments = sf.get("n_comments", 0) return ncomments with ix.searcher(weighting=CommentWeighting()) as s: q = query.TermRange("id", u("1"), u("4"), constantscore=False) r = s.search(q) ids = [fs["id"] for fs in r] assert_equal(ids, ["2", "4", "1", "3"])
def test_searching(): with make_index().searcher() as s: def _runq(q, result, **kwargs): r = s.search(q, **kwargs) assert_equal([d["id"] for d in r], result) _runq(query.Term("text", u("format")), ["format", "vector"]) _runq(query.Term("text", u("the")), ["fieldtype", "format", "const", "vector", "stored"]) _runq(query.Prefix("text", u("st")), ["format", "vector", "stored"]) _runq(query.Wildcard("id", u("*st*")), ["stored", "const"]) _runq(query.TermRange("id", u("c"), u("s")), ["fieldtype", "format", "const"]) _runq(query.NumericRange("subs", 10, 100), ["fieldtype", "format", "vector", "scorable"]) _runq(query.Phrase("text", ["this", "field"]), ["scorable", "unique", "stored"], limit=None) _runq(query.Every(), [ "fieldtype", "format", "vector", "scorable", "stored", "unique", "const" ]) _runq(query.Every("subs"), [ "fieldtype", "format", "vector", "scorable", "stored", "unique", "const" ])
def test_nocachefield_segments(): schema = fields.Schema(a=fields.ID(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(a=u("bravo")) w.add_document(a=u("echo")) w.add_document(a=u("juliet")) w.commit() w = ix.writer() w.add_document(a=u("kilo")) w.add_document(a=u("foxtrot")) w.add_document(a=u("charlie")) w.commit(merge=False) w = ix.writer() w.delete_by_term("a", u("echo")) w.add_document(a=u("alfa")) w.add_document(a=u("india")) w.add_document(a=u("delta")) w.commit(merge=False) with ix.searcher() as s: q = query.TermRange("a", u("bravo"), u("k")) facet = sorting.FieldFacet("a", reverse=True) r = s.search(q, sortedby=facet) assert [hit["a"] for hit in r] == [ "juliet", "india", "foxtrot", "delta", "charlie", "bravo" ] mq = query.Or( [query.Term("a", u("bravo")), query.Term("a", u("delta"))]) anq = query.AndNot(q, mq) r = s.search(anq, sortedby=facet) assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"] mq = query.Or( [query.Term("a", u("bravo")), query.Term("a", u("delta"))]) r = s.search(q, mask=mq, sortedby=facet) assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"] fq = query.Or([ query.Term("a", u("alfa")), query.Term("a", u("charlie")), query.Term("a", u("echo")), query.Term("a", u("india")), ]) r = s.search(query.Every(), filter=fq, sortedby=facet) assert [hit["a"] for hit in r] == ["india", "charlie", "alfa"] nq = query.Not( query.Or([query.Term("a", u("alfa")), query.Term("a", u("india"))])) r = s.search(query.Every(), filter=nq, sortedby=facet) assert [hit["a"] for hit in r] == [ "kilo", "juliet", "foxtrot", "delta", "charlie", "bravo" ]
def make_range(self, fieldname, range): start, end = range fieldname = fieldname or self.default_field start = self._analyze(fieldname, start) end = self._analyze(fieldname, end) return query.TermRange(fieldname or self.default_field, (start, end))
def check(startexcl, endexcl, string): q = query.TermRange("id", "b", "f", startexcl, endexcl) r = "".join(sorted(d['id'] for d in s.search(q))) assert_equal(r, string)
class Range(Token): expr = rcompile(r""" (?P<open>\{|\[) # Open paren ( # Begin optional "start" ( # Begin choice between start1 and start2 ('(?P<start2>[^']+)') # Quoted start | (?P<start1>[^ ]+) # ...or regular start ) # End choice [ ]+)? # Space at end of optional "start" [Tt][Oo] # "to" between start and end ([ ]+ # Space at start of optional "end" ( # Begin choice between end1 and end2 ('(?P<end2>[^']+)') # Quoted end | (?P<end1>[^\]\}]*) # ...or normal end ) # End choice )? # End of optional "end (?P<close>\}|\]) # Close paren """, re.VERBOSE) def __init__(self, start, end, startexcl, endexcl, fieldname=None, boost=1.0): self.fieldname = fieldname self.start = start self.end = end self.startexcl = startexcl self.endexcl = endexcl self.boost = boost def set_boost(self, b): return self.__class__(self.start, self.end, self.startexcl, self.endexcl, fieldname=self.fieldname, boost=b) def set_fieldname(self, name): return self.__class__(self.start, self.end, self.startexcl, self.endexcl, fieldname=name, boost=self.boost) def __repr__(self): r = "%s:(%r, %r, %s, %s)" % (self.fieldname, self.start, self.end, self.startexcl, self.endexcl) if self.boost != 1.0: r += "^%s" % self.boost return r @classmethod def create(cls, parser, match): start = match.group("start2") or match.group("start1") end = match.group("end2") or match.group("end1") return cls(start, end, startexcl=match.group("open") == "{", endexcl=match.group("close") == "}") def query(self, parser): fieldname = self.fieldname or parser.fieldname start, end = self.start, self.end if parser.schema and fieldname in parser.schema: field = parser.schema[fieldname] if field.self_parsing(): try: rangeq = field.parse_range(fieldname, start, end, self.startexcl, self.endexcl, boost=self.boost) if rangeq is not None: return rangeq except QueryParserError, e: return query.NullQuery if start: start = parser.get_single_text(field, start, tokenize=False, removestops=False) if end: end = parser.get_single_text(field, end, tokenize=False, removestops=False) if start is None: start = u'' if end is None: end = u'\uFFFF' return query.TermRange(fieldname, start, end, self.startexcl, self.endexcl, boost=self.boost)