def test_wrapper(): wm = matching.WrappingMatcher(matching.ListMatcher([1, 2, 5, 9, 10]), boost=2.0) ls = [] while wm.is_active(): ls.append((wm.id(), wm.score())) wm.next() assert ls == [(1, 2.0), (2, 2.0), (5, 2.0), (9, 2.0), (10, 2.0)] ids = [1, 2, 5, 9, 10] wm = matching.WrappingMatcher(matching.ListMatcher(ids), boost=2.0) assert list(wm.all_ids()) == ids
def matcher(self, searcher, context=None): fieldname = self.fieldname text = self.text if fieldname not in searcher.schema: return matching.NullMatcher() field = searcher.schema[fieldname] try: text = field.to_bytes(text) except ValueError: return matching.NullMatcher() if (self.fieldname, text) in searcher.reader(): if context is None: w = searcher.weighting else: w = context.weighting m = searcher.postings(self.fieldname, text, weighting=w) if self.minquality: m.set_min_quality(self.minquality) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m else: return matching.NullMatcher()
def matcher(self, searcher, context=None): from whoosh.query import Term, SpanNear2 fieldname = self.fieldname if fieldname not in searcher.schema: return matching.NullMatcher() field = searcher.schema[fieldname] if not field.format or not field.format.supports("positions"): raise qcore.QueryError("Phrase search: %r field has no positions" % self.fieldname) terms = [] # Build a list of Term queries from the words in the phrase reader = searcher.reader() for word in self.words: word = field.to_bytes(word) if (fieldname, word) not in reader: # Shortcut the query if one of the words doesn't exist. return matching.NullMatcher() terms.append(Term(fieldname, word)) # Create the equivalent SpanNear2 query from the terms q = SpanNear2(terms, slop=self.slop, ordered=True, mindist=1) # Get the matcher m = q.matcher(searcher, context) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m
def test_replacements(): sc = scoring.WeightScorer(0.25) a = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc) b = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc) um = matching.UnionMatcher(a, b) a2 = a.replace(0.5) assert_equal(a2.__class__, matching.NullMatcherClass) um2 = um.replace(0.5) assert_equal(um2.__class__, matching.IntersectionMatcher) um2 = um.replace(0.6) assert_equal(um2.__class__, matching.NullMatcherClass) wm = matching.WrappingMatcher(um, boost=2.0) wm = wm.replace(0.5) assert_equal(wm.__class__, matching.WrappingMatcher) assert_equal(wm.boost, 2.0) assert_equal(wm.child.__class__, matching.IntersectionMatcher) ls1 = matching.ListMatcher([1, 2, 3], [0.1, 0.1, 0.1], scorer=scoring.WeightScorer(0.1)) ls2 = matching.ListMatcher([1, 2, 3], [0.2, 0.2, 0.2], scorer=scoring.WeightScorer(0.2)) ls3 = matching.ListMatcher([1, 2, 3], [0.3, 0.3, 0.3], scorer=scoring.WeightScorer(0.3)) mm = matching.MultiMatcher([ls1, ls2, ls3], [0, 4, 8]) mm = mm.replace(0.25) assert_equal(mm.current, 2) dm = matching.DisjunctionMaxMatcher(ls1, ls2) dm = dm.replace(0.15) assert dm is ls2
def matcher(self, searcher, context=None): fieldname = self.fieldname reader = searcher.reader() if fieldname not in searcher.schema: return matching.NullMatcher() field = searcher.schema[fieldname] words = [field.to_bytes(word) for word in self.words] # Shortcut the query if one of the words doesn't exist. for word in words: if (fieldname, word) not in reader: return matching.NullMatcher() if not field.format or not field.format.supports("positions"): raise qcore.QueryError("Phrase search: %r field has no positions" % self.fieldname) # Construct a tree of SpanNear queries representing the words in the # phrase and return its matcher from whoosh.query.spans import SpanNear q = SpanNear.phrase(fieldname, words, slop=self.slop) m = q.matcher(searcher, context) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m
def matcher(self, searcher, weighting=None): text = self.text if self.fieldname not in searcher.schema: return matching.NullMatcher() # If someone created a query object with a non-text term,e.g. # query.Term("printed", True), be nice and convert it to text if not isinstance(text, (bytes_type, text_type)): field = searcher.schema[self.fieldname] text = field.to_text(text) if (self.fieldname, text) in searcher.reader(): m = searcher.postings(self.fieldname, text, weighting=weighting) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m else: return matching.NullMatcher()
def _matcher(self, matchercls, q_weight_fn, searcher, weighting=None, **kwargs): # q_weight_fn is a function which is called on each query and returns a # "weight" value which is used to build a huffman-like matcher tree. If # q_weight_fn is None, an order-preserving binary tree is used instead. # Pull any queries inside a Not() out into their own list subs, nots = self._split_queries() if not subs: return matching.NullMatcher() # Create a matcher from the list of subqueries subms = [q.matcher(searcher, weighting=weighting) for q in subs] if len(subms) == 1: m = subms[0] elif q_weight_fn is None: m = make_binary_tree(matchercls, subms) else: w_subms = [(q_weight_fn(q), m) for q, m in zip(subs, subms)] m = make_weighted_tree(matchercls, w_subms) # If there were queries inside Not(), make a matcher for them and # wrap the matchers in an AndNotMatcher if nots: if len(nots) == 1: notm = nots[0].matcher(searcher) else: r = searcher.reader() notms = [(q.estimate_size(r), q.matcher(searcher)) for q in nots] notm = make_weighted_tree(matching.UnionMatcher, notms) if notm.is_active(): m = matching.AndNotMatcher(m, notm) # If this query had a boost, add a wrapping matcher to apply the boost if self.boost != 1.0: m = matching.WrappingMatcher(m, self.boost) return m
def _tree_matcher(self, subs, mcls, searcher, context, q_weight_fn, **kwargs): # q_weight_fn is a function which is called on each query and returns a # "weight" value which is used to build a huffman-like matcher tree. If # q_weight_fn is None, an order-preserving binary tree is used instead. # Create a matcher from the list of subqueries subms = [q.matcher(searcher, context) for q in subs] if len(subms) == 1: m = subms[0] elif q_weight_fn is None: m = make_binary_tree(mcls, subms, **kwargs) else: w_subms = [(q_weight_fn(q), m) for q, m in zip(subs, subms)] m = make_weighted_tree(mcls, w_subms, **kwargs) # If this query had a boost, add a wrapping matcher to apply the boost if self.boost != 1.0: m = matching.WrappingMatcher(m, self.boost) return m