def test_nocachefield_segments(): schema = fields.Schema(a=fields.ID(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(a=u("bravo")) w.add_document(a=u("echo")) w.add_document(a=u("juliet")) w.commit() w = ix.writer() w.add_document(a=u("kilo")) w.add_document(a=u("foxtrot")) w.add_document(a=u("charlie")) w.commit(merge=False) w = ix.writer() w.delete_by_term("a", u("echo")) w.add_document(a=u("alfa")) w.add_document(a=u("india")) w.add_document(a=u("delta")) w.commit(merge=False) with ix.searcher() as s: q = query.TermRange("a", u("bravo"), u("k")) facet = sorting.FieldFacet("a", reverse=True) r = s.search(q, sortedby=facet) assert [hit["a"] for hit in r] == [ "juliet", "india", "foxtrot", "delta", "charlie", "bravo" ] mq = query.Or( [query.Term("a", u("bravo")), query.Term("a", u("delta"))]) anq = query.AndNot(q, mq) r = s.search(anq, sortedby=facet) assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"] mq = query.Or( [query.Term("a", u("bravo")), query.Term("a", u("delta"))]) r = s.search(q, mask=mq, sortedby=facet) assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"] fq = query.Or([ query.Term("a", u("alfa")), query.Term("a", u("charlie")), query.Term("a", u("echo")), query.Term("a", u("india")), ]) r = s.search(query.Every(), filter=fq, sortedby=facet) assert [hit["a"] for hit in r] == ["india", "charlie", "alfa"] nq = query.Not( query.Or([query.Term("a", u("alfa")), query.Term("a", u("india"))])) r = s.search(query.Every(), filter=nq, sortedby=facet) assert [hit["a"] for hit in r] == [ "kilo", "juliet", "foxtrot", "delta", "charlie", "bravo" ]
def test_page_sorted(): schema = fields.Schema(key=fields.ID(stored=True)) with TempIndex(schema, "pagesorted") as ix: domain = list(u("abcdefghijklmnopqrstuvwxyz")) random.shuffle(domain) w = ix.writer() for char in domain: w.add_document(key=char) w.commit() with ix.searcher() as s: r = s.search(query.Every(), sortedby="key", limit=5) assert_equal(r.scored_length(), 5) assert_equal(len(r), s.doc_count_all()) rp = s.search_page(query.Every(), 1, pagelen=5, sortedby="key") assert_equal("".join([h["key"] for h in rp]), "abcde") assert_equal(rp[10:], []) rp = s.search_page(query.Term("key", "glonk"), 1, pagelen=5, sortedby="key") assert_equal(len(rp), 0) assert rp.is_last_page()
def test(ix): with ix.searcher() as s: # Sort by title r = s.search(query.Every(), sortedby="title") assert [hit["title"] for hit in r] == sorted_titles # Sort by reverse title facet = sorting.FieldFacet("title", reverse=True) r = s.search(query.Every(), sortedby=facet) assert [hit["title"] for hit in r] == list(reversed(sorted_titles)) # Sort by num (-10 to 10) first, and within that, by reverse title facet = sorting.MultiFacet() facet.add_field("num") facet.add_field("title", reverse=True) r = s.search(query.Every(), sortedby=facet) target = ["Visual and Statistical Thinking", "Cognitive Style of Powerpoint", "Beautiful Evidence", "Visual Explanations", "Visual Display of Quantitative Information, The", "Envisioning Information", ] assert [hit["title"] for hit in r] == target
def test_sort_filter(): schema = fields.Schema(group=fields.ID(stored=True), key=fields.ID(stored=True)) groups = u("alfa bravo charlie").split() keys = u("abcdefghijklmnopqrstuvwxyz") source = [] for i in xrange(100): key = keys[i % len(keys)] group = groups[i % len(groups)] source.append({"key": key, "group": group}) source.sort(key=lambda x: (x["key"], x["group"])) sample = list(source) random.shuffle(sample) with TempIndex(schema, "sortfilter") as ix: w = ix.writer() for i, fs in enumerate(sample): w.add_document(**fs) i += 1 if not i % 26: w.commit(merge=False) w = ix.writer() w.commit() fq = query.Term("group", u("bravo")) with ix.searcher() as s: r = s.search(query.Every(), sortedby=("key", "group"), filter=fq, limit=20) assert_equal([h.fields() for h in r], [d for d in source if d["group"] == "bravo"][:20]) fq = query.Term("group", u("bravo")) r = s.search(query.Every(), sortedby=("key", "group"), filter=fq, limit=None) assert_equal([h.fields() for h in r], [d for d in source if d["group"] == "bravo"]) ix.optimize() with ix.searcher() as s: r = s.search(query.Every(), sortedby=("key", "group"), filter=fq, limit=20) assert_equal([h.fields() for h in r], [d for d in source if d["group"] == "bravo"][:20]) fq = query.Term("group", u("bravo")) r = s.search(query.Every(), sortedby=("key", "group"), filter=fq, limit=None) assert_equal([h.fields() for h in r], [d for d in source if d["group"] == "bravo"])
def test_read_with_type_facet(self): result = self.whoosh_backend.query(query.Every()) self.print_result(result) result = self.whoosh_backend.query(query.Every(), facets=["type"]) self.print_result(result) self.assertLessEqual(1, result.hits)
def test_read_all(self): result = self.whoosh_backend.query(query.Every()) self.print_result(result) result = self.whoosh_backend.query(query.Every()) self.print_result(result) self.assertLessEqual(1, result.hits)
def test_overlapping_lists(): schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(id=0, tags=u("alfa bravo charlie")) w.add_document(id=1, tags=u("bravo charlie delta")) w.add_document(id=2, tags=u("charlie delta echo")) w.add_document(id=3, tags=u("delta echo alfa")) w.add_document(id=4, tags=u("echo alfa bravo")) with ix.searcher() as s: of = sorting.FieldFacet("tags", allow_overlap=True) cat = of.categorizer(s) assert not cat._use_vectors r = s.search(query.Every(), groupedby={"tags": of}) assert r.groups("tags") == {'alfa': [0, 3, 4], 'bravo': [0, 1, 4], 'charlie': [0, 1, 2], 'delta': [1, 2, 3], 'echo': [2, 3, 4]} fcts = sorting.Facets() fcts.add_field("tags", allow_overlap=True) r = s.search(query.Every(), groupedby=fcts) assert r.groups("tags") == {'alfa': [0, 3, 4], 'bravo': [0, 1, 4], 'charlie': [0, 1, 2], 'delta': [1, 2, 3], 'echo': [2, 3, 4]}
def test_searching(): with make_index().searcher() as s: def _runq(q, result, **kwargs): r = s.search(q, **kwargs) assert_equal([d["id"] for d in r], result) _runq(query.Term("text", u("format")), ["format", "vector"]) _runq(query.Term("text", u("the")), ["fieldtype", "format", "const", "vector", "stored"]) _runq(query.Prefix("text", u("st")), ["format", "vector", "stored"]) _runq(query.Wildcard("id", u("*st*")), ["stored", "const"]) _runq(query.TermRange("id", u("c"), u("s")), ["fieldtype", "format", "const"]) _runq(query.NumericRange("subs", 10, 100), ["fieldtype", "format", "vector", "scorable"]) _runq(query.Phrase("text", ["this", "field"]), ["scorable", "unique", "stored"], limit=None) _runq(query.Every(), [ "fieldtype", "format", "vector", "scorable", "stored", "unique", "const" ]) _runq(query.Every("subs"), [ "fieldtype", "format", "vector", "scorable", "stored", "unique", "const" ])
def test_can_select_fields(self): self.whoosh_backend.add_doc(dict(id="1", type="ticket")) result = self.whoosh_backend.query(query.Every(), fields=("id", "type")) self.print_result(result) docs = result.docs self.assertEqual({'id': '1', 'type': 'ticket'}, docs[0])
def test_groupedby_empty_field(self): schema = Schema( unique_id=ID(stored=True, unique=True), id=ID(stored=True), type=ID(stored=True), status=KEYWORD(stored=True), content=TEXT(stored=True), ) ix = index.create_in(self.index_dir, schema=schema) with ix.writer() as w: w.add_document(unique_id=u"1", type=u"type1") w.add_document(unique_id=u"2", type=u"type2", status=u"New") facet_fields = (u"type", u"status") groupedby = facet_fields with ix.searcher() as s: r = s.search( query.Every(), groupedby=groupedby, maptype=sorting.Count, ) facets = self._load_facets(r) self.assertEquals( { 'status': { None: 1, 'New': 1 }, 'type': { 'type1': 1, 'type2': 1 } }, facets)
def test_can_search_time_with_utc_tzinfo(self): time = datetime(2012, 12, 13, 11, 8, 34, 711957, tzinfo=FixedOffset(0, 'UTC')) self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time)) result = self.whoosh_backend.query(query.Every()) self.print_result(result) self.assertEqual(time, result.docs[0]["time"])
def test_can_apply_multiple_sort_conditions_asc(self): self.whoosh_backend.add_doc(dict(id="2", type="ticket2")) self.whoosh_backend.add_doc(dict(id="3", type="ticket1")) self.whoosh_backend.add_doc(dict(id="4", type="ticket3")) self.whoosh_backend.add_doc(dict(id="1", type="ticket1")) result = self.whoosh_backend.query( query.Every(), sort=[SortInstruction("type", ASC), SortInstruction("id", ASC)], fields=("id", "type"), ) self.print_result(result) self.assertEqual([{ 'type': 'ticket1', 'id': '1' }, { 'type': 'ticket1', 'id': '3' }, { 'type': 'ticket2', 'id': '2' }, { 'type': 'ticket3', 'id': '4' }], result.docs)
def test_query_facet(): schema = fields.Schema(id=fields.STORED, v=fields.ID) ix = RamStorage().create_index(schema) for i, ltr in enumerate(u("iacgbehdf")): w = ix.writer() w.add_document(id=i, v=ltr) w.commit(merge=False) with ix.searcher() as s: q1 = query.TermRange("v", "a", "c") q2 = query.TermRange("v", "d", "f") q3 = query.TermRange("v", "g", "i") assert_equal([hit["id"] for hit in s.search(q1)], [1, 2, 4]) assert_equal([hit["id"] for hit in s.search(q2)], [5, 7, 8]) assert_equal([hit["id"] for hit in s.search(q3)], [0, 3, 6]) facet = sorting.QueryFacet({"a-c": q1, "d-f": q2, "g-i": q3}) r = s.search(query.Every(), groupedby=facet) # If you specify a facet without a name, it's automatically called # "facet" assert_equal(r.groups("facet"), { "a-c": [1, 2, 4], "d-f": [5, 7, 8], "g-i": [0, 3, 6] })
def parse_query(self, fieldname, qstring, boost=1.0): from whoosh import query if qstring == "*": return query.Every(fieldname, boost=boost) return query.Term(fieldname, self._obj_to_bool(qstring), boost=boost)
def test_delete_doc(): ix = make_index() ix.delete_document(2) ix.delete_document(5) with ix.searcher() as s: assert ([d["id"] for d in s.search(query.Every()) ] == ["fieldtype", "format", "scorable", "stored", "const"])
def query_pre_process(self, query_parameters, context=None): if not self.enabled: return permissions = self.get_user_permissions(context.req.authname) allowed_docs, denied_docs = [], [] for product, doc_type, doc_id, perm, denied in permissions: term_spec = [] if product: term_spec.append(query.Term(IndexFields.PRODUCT, product)) else: term_spec.append(query.Not(query.Every(IndexFields.PRODUCT))) if doc_type != '*': term_spec.append(query.Term(IndexFields.TYPE, doc_type)) if doc_id != '*': term_spec.append(query.Term(IndexFields.ID, doc_id)) term_spec.append(query.Term(IndexFields.REQUIRED_PERMISSION, perm)) term_spec = query.And(term_spec) if denied: denied_docs.append(term_spec) else: allowed_docs.append(term_spec) self.update_security_filter(query_parameters, allowed_docs, denied_docs)
def test_relative_daterange(): from whoosh.support.relativedelta import relativedelta dt = datetime schema = fields.Schema(id=fields.STORED, date=fields.DATETIME) ix = RamStorage().create_index(schema) basedate = datetime(2001, 1, 1) count = 0 with ix.writer() as w: while basedate < datetime(2001, 12, 1): w.add_document(id=count, date=basedate) basedate += timedelta(days=14, hours=16) count += 1 with ix.searcher() as s: gap = relativedelta(months=1) rf = sorting.DateRangeFacet("date", dt(2001, 1, 1), dt(2001, 12, 31), gap) r = s.search(query.Every(), groupedby={"date": rf}) assert r.groups("date") == {(dt(2001, 1, 1), dt(2001, 2, 1)): [0, 1, 2], (dt(2001, 2, 1), dt(2001, 3, 1)): [3, 4], (dt(2001, 3, 1), dt(2001, 4, 1)): [5, 6], (dt(2001, 4, 1), dt(2001, 5, 1)): [7, 8], (dt(2001, 5, 1), dt(2001, 6, 1)): [9, 10], (dt(2001, 6, 1), dt(2001, 7, 1)): [11, 12], (dt(2001, 7, 1), dt(2001, 8, 1)): [13, 14], (dt(2001, 8, 1), dt(2001, 9, 1)): [15, 16], (dt(2001, 9, 1), dt(2001, 10, 1)): [17, 18], (dt(2001, 10, 1), dt(2001, 11, 1)): [19, 20], (dt(2001, 11, 1), dt(2001, 12, 1)): [21, 22], }
def test_multifacet(): schema = fields.Schema(tag=fields.ID(stored=True), size=fields.ID(stored=True)) with TempIndex(schema, "multifacet") as ix: w = ix.writer() w.add_document(tag=u("alfa"), size=u("small")) w.add_document(tag=u("bravo"), size=u("medium")) w.add_document(tag=u("alfa"), size=u("large")) w.add_document(tag=u("bravo"), size=u("small")) w.add_document(tag=u("alfa"), size=u("medium")) w.add_document(tag=u("bravo"), size=u("medium")) w.commit() correct = { (u('bravo'), u('medium')): [1, 5], (u('alfa'), u('large')): [2], (u('alfa'), u('medium')): [4], (u('alfa'), u('small')): [0], (u('bravo'), u('small')): [3] } with ix.searcher() as s: facet = sorting.MultiFacet(["tag", "size"]) r = s.search(query.Every(), groupedby={"tag/size": facet}) cats = r.groups(("tag/size")) assert_equal(cats, correct)
def test_query_facet2(): domain = u("abcdefghi") schema = fields.Schema(v=fields.KEYWORD(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: for i, ltr in enumerate(domain): v = "%s %s" % (ltr, domain[0 - i]) w.add_document(v=v) with ix.searcher() as s: q1 = query.TermRange("v", "a", "c") q2 = query.TermRange("v", "d", "f") q3 = query.TermRange("v", "g", "i") facets = sorting.Facets() facets.add_query("myfacet", { "a-c": q1, "d-f": q2, "g-i": q3 }, allow_overlap=True) r = s.search(query.Every(), groupedby=facets) assert_equal(r.groups("myfacet"), { 'a-c': [0, 1, 2, 7, 8], 'd-f': [4, 5], 'g-i': [3, 6] })
def test_sorting(): from whoosh import sorting schema = fields.Schema(id=fields.STORED, name=fields.ID(stored=True), size=fields.NUMERIC) ix = RamIndex(schema) with ix.writer() as w: w.add_document(id=0, name=u("bravo"), size=10) w.add_document(id=1, name=u("alfa"), size=9) w.add_document(id=2, name=u("delta"), size=8) w.add_document(id=3, name=u("charlie"), size=7) with ix.searcher() as s: q = query.Every() r = s.search(q, sortedby="name") assert_equal([hit["id"] for hit in r], [1, 0, 3, 2]) r = s.search(q, sortedby="size") assert_equal([hit["id"] for hit in r], [3, 2, 1, 0]) facet = sorting.FieldFacet("size", reverse=True) r = s.search(q, sortedby=facet) assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])
def search(self, content, page=1, pagelen=50): '''search''' try: searcher = self._index.searcher() if content == '*': q = query.Every() else: q = QueryParser( 'content', self._index.schema).parse(content) logging.debug('-- YLSearcher query %s', q) results = searcher.search_page(q, page, pagelen) hits = [] for hit in results: hits.append(hit.fields()) return { 'page_total': results.pagecount, 'page_num': results.pagenum, 'page_len': results.pagelen, 'count': results.total, 'hits': hits } except Exception, e: logging.error('-- YLSearcher query %s', e)
def run(self): ix = self.storage.open_index(self.indexname) with ix.searcher() as s: r = s.search(query.Every(), sortedby="key", limit=None) result = "".join([h["key"] for h in r]) assert_equal( result, "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz")
def check(method): with TempIndex(get_schema()) as ix: method(ix) with ix.searcher() as s: results = s.search(query.Every(), groupedby="tag") groups = results.groups() assert sorted(groups.items()) == [(u('one'), [0, 6]), (u('three'), [1, 3, 7, 8]), (u('two'), [2, 4, 5])]
def test_set_should_not_be_empty_fields(self): self.insert_ticket("test x") result = self.whoosh_backend.query(query.Every()) self.print_result(result) doc = result.docs[0] null_marker = WhooshEmptyFacetErrorWorkaround.NULL_MARKER self.assertEqual(null_marker, doc["component"]) self.assertEqual(null_marker, doc["status"]) self.assertEqual(null_marker, doc["milestone"])
def allowed_documents(): #todo: add special case handling for trac_admin and product_owner for product, perm in self._get_all_user_permissions(context): if product: prod_term = query.Term(IndexFields.PRODUCT, product) else: prod_term = query.Not(query.Every(IndexFields.PRODUCT)) perm_term = query.Term(IndexFields.REQUIRED_PERMISSION, perm) yield query.And([prod_term, perm_term])
def test_can_return_all_fields(self): self.whoosh_backend.add_doc(dict(id="1", type="ticket")) result = self.whoosh_backend.query(query.Every()) self.print_result(result) docs = result.docs self.assertEqual( {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1', "score": 1.0}, docs[0])
def test_can_return_empty_result(self): result = self.whoosh_backend.query( query.Every(), sort=[SortInstruction("type", ASC), SortInstruction("id", DESC)], fields=("id", "type"), facets=("type", "product")) self.print_result(result) self.assertEqual(0, result.hits)
def parse(self, query_string, context=None): parser = self._create_parser(context) query_string = query_string.strip() if query_string == "" or query_string == "*" or query_string == "*:*": return query.Every() query_string = unicode(query_string) parsed_query = parser.parse(query_string) parsed_query.original_query_string = query_string return parsed_query
def test_can_apply_filter_and_facet(self): self.whoosh_backend.add_doc(dict(id="1", type="ticket")) self.whoosh_backend.add_doc(dict(id="2", type="wiki")) result = self.whoosh_backend.query(query.Every(), filter=query.Term("type", "ticket"), facets=["type"]) self.print_result(result) self.assertEqual(1, result.hits) self.assertEqual("ticket", result.docs[0]["type"])
def query(self, parser): from whoosh import query fieldname = self.fieldname or parser.fieldname start = self.starttime end = self.endtime if start is None and end is None: return query.Every(fieldname) else: return query.DateRange(fieldname, start, end, boost=self.boost)