def test_nocachefield_segments(): schema = fields.Schema(a=fields.ID(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(a=u("bravo")) w.add_document(a=u("echo")) w.add_document(a=u("juliet")) w.commit() w = ix.writer() w.add_document(a=u("kilo")) w.add_document(a=u("foxtrot")) w.add_document(a=u("charlie")) w.commit(merge=False) w = ix.writer() w.delete_by_term("a", u("echo")) w.add_document(a=u("alfa")) w.add_document(a=u("india")) w.add_document(a=u("delta")) w.commit(merge=False) with ix.searcher() as s: q = query.TermRange("a", u("bravo"), u("k")) facet = sorting.FieldFacet("a", reverse=True) r = s.search(q, sortedby=facet) assert [hit["a"] for hit in r] == [ "juliet", "india", "foxtrot", "delta", "charlie", "bravo" ] mq = query.Or( [query.Term("a", u("bravo")), query.Term("a", u("delta"))]) anq = query.AndNot(q, mq) r = s.search(anq, sortedby=facet) assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"] mq = query.Or( [query.Term("a", u("bravo")), query.Term("a", u("delta"))]) r = s.search(q, mask=mq, sortedby=facet) assert [hit["a"] for hit in r] == ["juliet", "india", "foxtrot", "charlie"] fq = query.Or([ query.Term("a", u("alfa")), query.Term("a", u("charlie")), query.Term("a", u("echo")), query.Term("a", u("india")), ]) r = s.search(query.Every(), filter=fq, sortedby=facet) assert [hit["a"] for hit in r] == ["india", "charlie", "alfa"] nq = query.Not( query.Or([query.Term("a", u("alfa")), query.Term("a", u("india"))])) r = s.search(query.Every(), filter=nq, sortedby=facet) assert [hit["a"] for hit in r] == [ "kilo", "juliet", "foxtrot", "delta", "charlie", "bravo" ]
def test_or(): _run_query(query.Or([query.Term("value", u("red")), query.Term("name", u("yellow"))]), [u("A"), u("D"), u("E")]) # Missing _run_query(query.Or([query.Term("value", u("ochre")), query.Term("name", u("glonk"))]), []) _run_query(query.Or([]), [])
def _Toplevel(self, node, fieldname): queries = [self._eval(s, fieldname) for s in node] reqds = [q[0] for q in queries if isinstance(q, tuple)] if reqds: nots = [q for q in queries if isinstance(q, query.Not)] opts = [q for q in queries if not isinstance(q, query.Not) and not isinstance(q, tuple)] return query.AndMaybe([query.And(reqds + nots), query.Or(opts)]) else: return query.Or(queries)
def finalize(self): self._subqueries = [] if self.allowed: self.a = query.Or(self.allowed) else: self.a = query.NullQuery if self.denied: self.b = query.Or(self.denied) else: self.b = query.NullQuery self._subqueries = (self.a, self.b)
def parse(self, input): """Parses the input string and returns a Query object/tree. This method may return None if the input string does not result in any valid queries. It may also raise a variety of exceptions if the input string is malformed. :input: the unicode string to parse. """ required = [] optional = [] gramsize = max(self.minchars, min(self.maxchars, len(input))) if gramsize > len(input): return None discardspaces = self.discardspaces for t in self.analyzerclass(gramsize)(input): gram = t.text if " " in gram: if not discardspaces: optional.append(gram) else: required.append(gram) if required: fieldname = self.fieldname andquery = query.And([query.Term(fieldname, g) for g in required]) if optional: orquery = query.Or([query.Term(fieldname, g) for g in optional]) return query.AndMaybe([andquery, orquery]) else: return andquery else: return None
def test_lengths(): schema = fields.Schema(id=fields.STORED, text=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=1, text=u("alfa bravo charlie delta echo")) w.add_document(id=2, text=u("bravo charlie delta echo foxtrot")) w.add_document(id=3, text=u("charlie needle echo foxtrot golf")) w.add_document(id=4, text=u("delta echo foxtrot golf hotel")) w.add_document(id=5, text=u("echo needle needle hotel india")) w.add_document(id=6, text=u("foxtrot golf hotel india juliet")) w.add_document(id=7, text=u("golf needle india juliet kilo")) w.add_document(id=8, text=u("hotel india juliet needle lima")) w.commit() with ix.searcher() as s: q = query.Or([ query.Term("text", u("needle")), query.Term("text", u("charlie")) ]) r = s.search(q, limit=2) assert not r.has_exact_length() assert r.estimated_length() == 7 assert r.estimated_min_length() == 3 assert r.scored_length() == 2 assert len(r) == 6
def test_no_parents(): schema = fields.Schema(id=fields.STORED, kind=fields.ID, name=fields.ID(stored=True)) k = u("alfa") ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(id=0, kind=k, name=u("one")) w.add_document(id=1, kind=k, name=u("two")) w.add_document(id=2, kind=k, name=u("three")) w.add_document(id=3, kind=k, name=u("four")) w.add_document(id=4, kind=k, name=u("one")) w.add_document(id=5, kind=k, name=u("two")) w.add_document(id=6, kind=k, name=u("three")) w.add_document(id=7, kind=k, name=u("four")) w.add_document(id=8, kind=k, name=u("one")) w.add_document(id=9, kind=k, name=u("two")) w.add_document(id=10, kind=k, name=u("three")) w.add_document(id=11, kind=k, name=u("four")) with ix.searcher() as s: pq = query.Term("kind", "bravo") cq = query.Or([query.Term("name", "two"), query.Term("name", "four")]) q = query.NestedParent(pq, cq) r = s.search(q) assert r.is_empty()
def test_everything_is_a_parent(): schema = fields.Schema(id=fields.STORED, kind=fields.ID, name=fields.ID(stored=True)) k = u("alfa") ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(id=0, kind=k, name=u("one")) w.add_document(id=1, kind=k, name=u("two")) w.add_document(id=2, kind=k, name=u("three")) w.add_document(id=3, kind=k, name=u("four")) w.add_document(id=4, kind=k, name=u("one")) w.add_document(id=5, kind=k, name=u("two")) w.add_document(id=6, kind=k, name=u("three")) w.add_document(id=7, kind=k, name=u("four")) w.add_document(id=8, kind=k, name=u("one")) w.add_document(id=9, kind=k, name=u("two")) w.add_document(id=10, kind=k, name=u("three")) w.add_document(id=11, kind=k, name=u("four")) with ix.searcher() as s: pq = query.Term("kind", k) cq = query.Or([query.Term("name", "two"), query.Term("name", "four")]) q = query.NestedParent(pq, cq) r = s.search(q) assert [hit["id"] for hit in r] == [1, 3, 5, 7, 9, 11]
def update_document(self, **fields): """Adds or replaces a document. At least one of the fields for which you supply values must be marked as 'unique' in the index's schema. The keyword arguments map field names to the values to index/store. For fields that are both indexed and stored, you can specify an alternate value to store using a keyword argument in the form "_stored_<fieldname>". For example, if you have a field named "title" and you want to index the text "a b c" but store the text "e f g", use keyword arguments like this:: update_document(title=u"a b c", _stored_title=u"e f g") """ # Check which of the supplied fields are unique unique_fields = [ name for name, field in self.index.schema.fields() if name in fields and field.unique ] if not unique_fields: raise IndexingError("None of the fields in %r are unique" % fields.keys()) # Delete documents in which the supplied unique fields match from whoosh import query delquery = query.Or( [query.Term(name, fields[name]) for name in unique_fields]) delquery = delquery.normalize() self.delete_by_query(delquery) # Add the given fields self.add_document(**fields)
def suggest(self, text, number=3, usescores=False): """Returns a list of suggested alternative spellings of 'text'. You must add words to the dictionary (using add_field, add_words, and/or add_scored_words) before you can use this. :param text: The word to check. :param number: The maximum number of suggestions to return. :param usescores: Use the per-word score to influence the suggestions. :rtype: list """ grams = defaultdict(list) for size in xrange(self.mingram, self.maxgram + 1): key = "gram%s" % size nga = analysis.NgramAnalyzer(size) for t in nga(text): grams[key].append(t.text) queries = [] for size in xrange(self.mingram, min(self.maxgram + 1, len(text))): key = "gram%s" % size gramlist = grams[key] queries.append( query.Term("start%s" % size, gramlist[0], boost=self.booststart)) queries.append( query.Term("end%s" % size, gramlist[-1], boost=self.boostend)) for gram in gramlist: queries.append(query.Term(key, gram)) q = query.Or(queries) ix = self.index() s = searching.Searcher(ix) try: results = s.search(q) length = len(results) if len(results) > number * 2: length = len(results) // 2 fieldlist = results[:length] suggestions = [(fs["word"], fs["score"]) for fs in fieldlist if fs["word"] != text] if usescores: def keyfn(a): return 0 - (1 / distance(text, a[0])) * a[1] else: def keyfn(a): return distance(text, a[0]) suggestions.sort(key=keyfn) finally: s.close() return [word for word, _ in suggestions[:number]]
def test_filter(): schema = fields.Schema(id=fields.STORED, path=fields.ID, text=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=1, path=u("/a/1"), text=u("alfa bravo charlie")) w.add_document(id=2, path=u("/b/1"), text=u("bravo charlie delta")) w.add_document(id=3, path=u("/c/1"), text=u("charlie delta echo")) w.commit(merge=False) w = ix.writer() w.add_document(id=4, path=u("/a/2"), text=u("delta echo alfa")) w.add_document(id=5, path=u("/b/2"), text=u("echo alfa bravo")) w.add_document(id=6, path=u("/c/2"), text=u("alfa bravo charlie")) w.commit(merge=False) w = ix.writer() w.add_document(id=7, path=u("/a/3"), text=u("bravo charlie delta")) w.add_document(id=8, path=u("/b/3"), text=u("charlie delta echo")) w.add_document(id=9, path=u("/c/3"), text=u("delta echo alfa")) w.commit(merge=False) with ix.searcher() as s: fq = query.Or([query.Prefix("path", "/a"), query.Prefix("path", "/b")]) r = s.search(query.Term("text", "alfa"), filter=fq) assert_equal([d["id"] for d in r], [1, 4, 5]) r = s.search(query.Term("text", "bravo"), filter=fq) assert_equal([d["id"] for d in r], [1, 2, 5, 7, ])
def test_fieldboost(): schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=0, a=u("alfa bravo charlie"), b=u("echo foxtrot india")) w.add_document(id=1, a=u("delta bravo charlie"), b=u("alfa alfa alfa")) w.add_document(id=2, a=u("alfa alfa alfa"), b=u("echo foxtrot india")) w.add_document(id=3, a=u("alfa sierra romeo"), b=u("alfa tango echo")) w.add_document(id=4, a=u("bravo charlie delta"), b=u("alfa foxtrot india")) w.add_document(id=5, a=u("alfa alfa echo"), b=u("tango tango tango")) w.add_document(id=6, a=u("alfa bravo echo"), b=u("alfa alfa tango")) w.commit() def field_booster(fieldname, factor=2.0): "Returns a function which will boost the given field in a query tree" def booster_fn(obj): if obj.is_leaf() and obj.field() == fieldname: obj = copy.deepcopy(obj) obj.boost *= factor return obj else: return obj return booster_fn with ix.searcher() as s: q = query.Or([query.Term("a", u("alfa")), query.Term("b", u("alfa"))]) q = q.accept(field_booster("a", 100.0)) assert_equal(text_type(q), text_type("(a:alfa^100.0 OR b:alfa)")) r = s.search(q) assert_equal([hit["id"] for hit in r], [2, 5, 6, 3, 0, 1, 4])
def test_boost_phrase(): schema = fields.Schema(title=fields.TEXT(field_boost=5.0, stored=True), text=fields.TEXT) ix = RamStorage().create_index(schema) domain = u("alfa bravo charlie delta").split() w = ix.writer() for ls in permutations(domain): t = u(" ").join(ls) w.add_document(title=t, text=t) w.commit() q = query.Or([query.Term("title", u("alfa")), query.Term("title", u("bravo")), query.Phrase("text", [u("bravo"), u("charlie"), u("delta")]) ]) def boost_phrases(q): if isinstance(q, query.Phrase): q.boost *= 1000.0 return q else: return q.apply(boost_phrases) q = boost_phrases(q) with ix.searcher() as s: r = s.search(q, limit=None) for hit in r: if "bravo charlie delta" in hit["title"]: assert hit.score > 100.0
def test_deleteall(): schema = fields.Schema(text=fields.TEXT) with TempIndex(schema, "deleteall") as ix: w = ix.writer() domain = u("alfa bravo charlie delta echo").split() for i, ls in enumerate(permutations(domain)): w.add_document(text=u(" ").join(ls)) if not i % 10: w.commit() w = ix.writer() w.commit() # This is just a test, don't use this method to delete all docs IRL! doccount = ix.doc_count_all() w = ix.writer() for docnum in xrange(doccount): w.delete_document(docnum) w.commit() with ix.searcher() as s: r = s.search( query.Or([ query.Term("text", u("alfa")), query.Term("text", u("bravo")) ])) assert len(r) == 0 ix.optimize() assert ix.doc_count_all() == 0 with ix.reader() as r: assert list(r) == []
def parse(self, input): required = [] optional = [] gramsize = max(self.minchars, min(self.maxchars, len(input))) if gramsize > len(input): return None discardspaces = self.discardspaces for t in self.analyzerclass(gramsize)(input): gram = t.text if " " in gram: if not discardspaces: optional.append(gram) else: required.append(gram) if required: fieldname = self.fieldname andquery = query.And([query.Term(fieldname, g) for g in required]) if optional: orquery = query.Or( [query.Term(fieldname, g) for g in optional]) return query.AndMaybe([andquery, orquery]) else: return andquery else: return None
def suggestions_and_scores(self, text, weighting=None): if weighting is None: weighting = scoring.TF_IDF() grams = defaultdict(list) for size in xrange(self.mingram, self.maxgram + 1): key = "gram%s" % size nga = analysis.NgramAnalyzer(size) for t in nga(text): grams[key].append(t.text) queries = [] for size in xrange(self.mingram, min(self.maxgram + 1, len(text))): key = "gram%s" % size gramlist = grams[key] queries.append( query.Term("start%s" % size, gramlist[0], boost=self.booststart)) queries.append( query.Term("end%s" % size, gramlist[-1], boost=self.boostend)) for gram in gramlist: queries.append(query.Term(key, gram)) q = query.Or(queries) ix = self.index() s = ix.searcher(weighting=weighting) try: result = s.search(q, limit=None) return [(fs["word"], fs["score"], result.score(i)) for i, fs in enumerate(result) if fs["word"] != text] finally: s.close()
def test_can_parse_keyword_resolved(self): parsed_query = self.parser.parse("$resolved") self.assertEqual( parsed_query, query.Or([ query.Term('status', 'resolved'), query.Term('status', 'closed') ]))
def GET(self): search_term = self.request.get_param("s") all_tags = r.table(rm.Recipe.table)\ .concat_map(lambda doc: doc["tags"])\ .distinct()\ .coerce_to('array').run() self.view.data = {"tags": all_tags, "recipes": None} if search_term: if "recipe:" in search_term: parts = search_term.split(" ") for part in parts: if "recipe:" in part: recipe = rm.Recipe.find(part[7:]) if recipe is not None: return Redirect("/recipes/{}".format(part[7:])) search_term = search_term.replace("tag:", "tags:") searcher = RecipeSearcher() if self.session.id: allow = q.Or([ q.And([ q.Term("user", self.session.id), q.Term("deleted", False), q.Term("reported", False) ]), q.And([ q.Term("public", True), q.Term("deleted", False), q.Term("reported", False) ]) ]) else: allow = q.And([ q.Term("public", True), q.Term("deleted", False), q.Term("reported", False) ]) ids = searcher.search(search_term, collection=True, allow=allow) if ids is not None: ids.fetch() page = Paginate(ids, self.request, "title", sort_direction_default="desc") self.view.data = {"recipes": page} self.view.template = "public/recipes/search/results" return self.view
def test_can_parse_meta_keywords_that_resolve_to_meta_keywords(self): parsed_query = self.parser.parse("$unresolved") self.assertEqual( parsed_query, query.Not( query.Or([ query.Term('status', 'resolved'), query.Term('status', 'closed') ])))
def related(self, kitab, vrr, nodeIdNum): dn, kt = self.keyterms(kitab, vrr, nodeIdNum) if not dn: return None for t, r in kt: print "term=", t, " @ rank=", r q = query.Or([query.Term("content", t) for (t, r) in kt]) results = self.indexer.searcher().search(q, limit=10) for i, fields in enumerate(results): if results.docnum(i) != dn: print fields['kitab'], "\t\t", str( fields['nodeIdNum']), "\t\t", fields['title']
def parse(filt): if filt.query_type == Filter.Q_APPROX: mp = qparser.MultifieldParser(filt.get_fields(), schema=schema) return mp.parse(unicode(filt.query_string)) elif filt.query_type == Filter.Q_EXACT: s = cls.get_index().searcher() qs = filt.query_string f = lambda d: qs in [ d.get(field) for field in filt.get_fields() ] ids = [unicode(d['id']) for d in filter(f, s.documents())] return query.Or([query.Term('id', iden) for iden in ids])
def suggestions_and_scores(self, text, weighting=None): """Returns a list of possible alternative spellings of 'text', as ('word', score, weight) triples, where 'word' is the suggested word, 'score' is the score that was assigned to the word using :meth:`SpellChecker.add_field` or :meth:`SpellChecker.add_scored_words`, and 'weight' is the score the word received in the search for the original word's ngrams. You must add words to the dictionary (using add_field, add_words, and/or add_scored_words) before you can use this. This is a lower-level method, in case an expert user needs access to the raw scores, for example to implement a custom suggestion ranking algorithm. Most people will want to call :meth:`~SpellChecker.suggest` instead, which simply returns the top N valued words. :param text: The word to check. :rtype: list """ if weighting is None: weighting = TF_IDF() grams = defaultdict(list) for size in xrange(self.mingram, self.maxgram + 1): key = "gram%s" % size nga = analysis.NgramAnalyzer(size) for t in nga(text): grams[key].append(t.text) queries = [] for size in xrange(self.mingram, min(self.maxgram + 1, len(text))): key = "gram%s" % size gramlist = grams[key] queries.append( query.Term("start%s" % size, gramlist[0], boost=self.booststart)) queries.append( query.Term("end%s" % size, gramlist[-1], boost=self.boostend)) for gram in gramlist: queries.append(query.Term(key, gram)) q = query.Or(queries) ix = self.index() s = ix.searcher(weighting=weighting) try: result = s.search(q) return [(fs["word"], fs["score"], result.score(i)) for i, fs in enumerate(result) if fs["word"] != text] finally: s.close()
def get_filter(self, querydict): """ Generates a Whoosh query filter reflecting which facets are currently selected. Takes `querydict` - a MultiDict with current HTTP GET params. """ terms = [] for field in self.get_fields(): # user-provided values concerning a given field values = querydict.getlist('filter_' + field) if values: subterms = [query.Term(field, val) for val in values] terms.append(query.Or(subterms)) return query.And(terms)
def test_can_parse_complex_query(self): parsed_query = self.parser.parse("content:test $ticket $unresolved") self.assertEqual( parsed_query, query.And([ query.Term('content', 'test'), query.Term('type', 'ticket'), query.Not( query.Or([ query.Term('status', 'resolved'), query.Term('status', 'closed') ])) ]))
def test_or_nots2(): # Issue #286 schema = fields.Schema(a=fields.KEYWORD(stored=True), b=fields.KEYWORD(stored=True)) st = RamStorage() ix = st.create_index(schema) with ix.writer() as w: w.add_document(b=u("bravo")) with ix.searcher() as s: q = query.Or([query.Term("a", "alfa"), query.Not(query.Term("b", "alfa")) ]) r = s.search(q) assert len(r) == 1
def search(self, backend, start, stop, score_field=None): # TODO: Handle MatchAll nested inside other search query classes. if isinstance(self.query, MatchAll): return self.queryset[start:stop] config = backend.get_config() queryset = self.queryset models = get_descendant_models(queryset.model) search_kwargs = { 'filter': wquery.Or( [wquery.Term(DJANGO_CT, get_model_ct(m)) for m in models]), 'limit': None } searcher = backend.index.searcher() results = searcher.search( backend.parser.parse(self.build_whoosh_query(config=config)), **search_kwargs) # Results are returned by order of relevance, OrderedDict used to keep track score_map = OrderedDict([(r['django_id'], r.score) for r in results]) searcher.close() django_id_ls = score_map.keys() if not django_id_ls: return queryset.none() # Retrieve the results from the db, but preserve the order by score preserved_order = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(django_id_ls)]) queryset = queryset.filter( pk__in=django_id_ls).order_by(preserved_order) # support search on specific fields if self.fields: q = self.build_database_filter() queryset = queryset.filter(q) queryset = queryset.distinct()[start:stop] # Add score annotations if required if score_field: for obj in queryset: setattr(obj, score_field, score_map.get(str(obj.pk))) return queryset
def _query(self): more_like_doc_id = int(self.query_params['more_like_id']) content = Document.objects.get(id=more_like_doc_id).content docnum = self.searcher.document_number(id=more_like_doc_id) kts = self.searcher.key_terms_from_text('content', content, numterms=20, model=classify.Bo1Model, normalize=False) q = query.Or([ query.Term('content', word, boost=weight) for word, weight in kts ]) mask = {docnum} return q, mask
def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content): searcher = ix.searcher() try: if querystring: qp = MultifieldParser( ["content", "title", "correspondent", "tag", "type"], ix.schema) qp.add_plugin(DateParserPlugin()) str_q = qp.parse(querystring) corrected = searcher.correct_query(str_q, querystring) else: str_q = None corrected = None if more_like_doc_id: docnum = searcher.document_number(id=more_like_doc_id) kts = searcher.key_terms_from_text('content', more_like_doc_content, numterms=20, model=classify.Bo1Model, normalize=False) more_like_q = query.Or([ query.Term('content', word, boost=weight) for word, weight in kts ]) result_page = searcher.search_page(more_like_q, page, filter=str_q, mask={docnum}) elif str_q: result_page = searcher.search_page(str_q, page) else: raise ValueError( "Either querystring or more_like_doc_id is required.") result_page.results.fragmenter = highlight.ContextFragmenter( surround=50) result_page.results.formatter = JsonFormatter() if corrected and corrected.query != str_q: corrected_query = corrected.string else: corrected_query = None yield result_page, corrected_query finally: searcher.close()
def test_contains(): schema = fields.Schema(text=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u("alfa sierra tango")) w.add_document(text=u("bravo charlie delta")) w.add_document(text=u("charlie delta echo")) w.add_document(text=u("delta echo foxtrot")) w.commit() q = query.Or([query.Term("text", "bravo"), query.Term("text", "charlie")]) r = ix.searcher().search(q, terms=True) for hit in r: assert not hit.contains_term("text", "alfa") assert (hit.contains_term("text", "bravo") or hit.contains_term("text", "charlie")) assert not hit.contains_term("text", "foxtrot")
def test_reverse_collapse(): from whoosh import sorting schema = fields.Schema(title=fields.TEXT(stored=True), content=fields.TEXT, path=fields.ID(stored=True), tags=fields.KEYWORD, order=fields.NUMERIC(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(title=u"First document", content=u"This is my document!", path=u"/a", tags=u"first", order=20.0) w.add_document(title=u"Second document", content=u"This is the second example.", path=u"/b", tags=u"second", order=12.0) w.add_document(title=u"Third document", content=u"Examples are many.", path=u"/c", tags=u"third", order=15.0) w.add_document(title=u"Thirdish document", content=u"Examples are too many.", path=u"/d", tags=u"third", order=25.0) with ix.searcher() as s: q = query.Every('content') r = s.search(q) assert [hit["path"] for hit in r] == ["/a", "/b", "/c", "/d"] q = query.Or([ query.Term("title", "document"), query.Term("content", "document"), query.Term("tags", "document") ]) cf = sorting.FieldFacet("tags") of = sorting.FieldFacet("order", reverse=True) r = s.search(q, collapse=cf, collapse_order=of, terms=True) assert [hit["path"] for hit in r] == ["/a", "/b", "/d"]