def test_weighting(): from whoosh.scoring import Weighting, BaseScorer schema = fields.Schema(id=fields.ID(stored=True), n_comments=fields.STORED) st = RamStorage() ix = st.create_index(schema) w = ix.writer() w.add_document(id=u("1"), n_comments=5) w.add_document(id=u("2"), n_comments=12) w.add_document(id=u("3"), n_comments=2) w.add_document(id=u("4"), n_comments=7) w.commit() # Fake Weighting implementation class CommentWeighting(Weighting): def scorer(self, searcher, fieldname, text, qf=1): return self.CommentScorer(searcher.stored_fields) class CommentScorer(BaseScorer): def __init__(self, stored_fields): self.stored_fields = stored_fields def score(self, matcher): ncomments = self.stored_fields(matcher.id()).get("n_comments", 0) return ncomments with ix.searcher(weighting=CommentWeighting()) as s: q = TermRange("id", u("1"), u("4"), constantscore=False) r = s.search(q) ids = [fs["id"] for fs in r] assert_equal(ids, ["2", "4", "1", "3"])
def test_query_copy_hash(): def do(q1, q2): q1a = copy.deepcopy(q1) assert_equal(q1, q1a) assert_equal(hash(q1), hash(q1a)) assert_not_equal(q1, q2) do(Term("a", u("b"), boost=1.1), Term("a", u("b"), boost=1.5)) do(And([Term("a", u("b")), Term("c", u("d"))], boost=1.1), And([Term("a", u("b")), Term("c", u("d"))], boost=1.5)) do(Or([Term("a", u("b"), boost=1.1), Term("c", u("d"))]), Or([Term("a", u("b"), boost=1.8), Term("c", u("d"))], boost=1.5)) do(DisjunctionMax([Term("a", u("b"), boost=1.8), Term("c", u("d"))]), DisjunctionMax([Term("a", u("b"), boost=1.1), Term("c", u("d"))], boost=1.5)) do(Not(Term("a", u("b"), boost=1.1)), Not(Term("a", u("b"), boost=1.5))) do(Prefix("a", u("b"), boost=1.1), Prefix("a", u("b"), boost=1.5)) do(Wildcard("a", u("b*x?"), boost=1.1), Wildcard("a", u("b*x?"), boost=1.5)) do(FuzzyTerm("a", u("b"), constantscore=True), FuzzyTerm("a", u("b"), constantscore=False)) do(FuzzyTerm("a", u("b"), boost=1.1), FuzzyTerm("a", u("b"), boost=1.5)) do(TermRange("a", u("b"), u("c")), TermRange("a", u("b"), u("d"))) do(TermRange("a", None, u("c")), TermRange("a", None, None)) do(TermRange("a", u("b"), u("c"), boost=1.1), TermRange("a", u("b"), u("c"), boost=1.5)) do(TermRange("a", u("b"), u("c"), constantscore=True), TermRange("a", u("b"), u("c"), constantscore=False)) do(NumericRange("a", 1, 5), NumericRange("a", 1, 6)) do(NumericRange("a", None, 5), NumericRange("a", None, None)) do(NumericRange("a", 3, 6, boost=1.1), NumericRange("a", 3, 6, boost=1.5)) do(NumericRange("a", 3, 6, constantscore=True), NumericRange("a", 3, 6, constantscore=False)) # do(DateRange) do(Variations("a", u("render")), Variations("a", u("renders"))) do(Variations("a", u("render"), boost=1.1), Variations("a", u("renders"), boost=1.5)) do(Phrase("a", [u("b"), u("c"), u("d")]), Phrase("a", [u("b"), u("c"), u("e")])) do(Phrase("a", [u("b"), u("c"), u("d")], boost=1.1), Phrase("a", [u("b"), u("c"), u("d")], boost=1.5)) do(Phrase("a", [u("b"), u("c"), u("d")], slop=1), Phrase("a", [u("b"), u("c"), u("d")], slop=2)) # do(Ordered) do(Every(), Every("a")) do(Every("a"), Every("b")) do(Every("a", boost=1.1), Every("a", boost=1.5)) do(NullQuery, Term("a", u("b"))) do(ConstantScoreQuery(Term("a", u("b"))), ConstantScoreQuery(Term("a", u("c")))) do(ConstantScoreQuery(Term("a", u("b")), score=2.0), ConstantScoreQuery(Term("a", u("c")), score=2.1)) do(Require(Term("a", u("b")), Term("c", u("d"))), Require(Term("a", u("b"), boost=1.1), Term("c", u("d")))) # do(Require) # do(AndMaybe) # do(AndNot) # do(Otherwise) do(SpanFirst(Term("a", u("b")), limit=1), SpanFirst(Term("a", u("b")), limit=2)) do(SpanNear(Term("a", u("b")), Term("c", u("d"))), SpanNear(Term("a", u("b")), Term("c", u("e")))) do(SpanNear(Term("a", u("b")), Term("c", u("d")), slop=1), SpanNear(Term("a", u("b")), Term("c", u("d")), slop=2)) do(SpanNear(Term("a", u("b")), Term("c", u("d")), mindist=1), SpanNear(Term("a", u("b")), Term("c", u("d")), mindist=2)) do(SpanNear(Term("a", u("b")), Term("c", u("d")), ordered=True), SpanNear(Term("a", u("b")), Term("c", u("d")), ordered=False)) do(SpanNot(Term("a", u("b")), Term("a", u("c"))), SpanNot(Term("a", u("b")), Term("a", u("d")))) do(SpanOr([Term("a", u("b")), Term("a", u("c")), Term("a", u("d"))]), SpanOr([Term("a", u("b")), Term("a", u("c")), Term("a", u("e"))])) do(SpanContains(Term("a", u("b")), Term("a", u("c"))), SpanContains(Term("a", u("b")), Term("a", u("d"))))
def test_merge_ranges(): q = And([TermRange("f1", u("a"), None), TermRange("f1", None, u("z"))]) assert_equal(q.normalize(), TermRange("f1", u("a"), u("z"))) q = And([NumericRange("f1", None, u("aaaaa")), NumericRange("f1", u("zzzzz"), None)]) assert_equal(q.normalize(), q) q = And([TermRange("f1", u("a"), u("z")), TermRange("f1", "b", "x")]) assert_equal(q.normalize(), TermRange("f1", u("a"), u("z"))) q = And([TermRange("f1", u("a"), u("m")), TermRange("f1", u("f"), u("q"))]) assert_equal(q.normalize(), TermRange("f1", u("f"), u("m"))) q = Or([TermRange("f1", u("a"), u("m")), TermRange("f1", u("f"), u("q"))]) assert_equal(q.normalize(), TermRange("f1", u("a"), u("q"))) q = Or([TermRange("f1", u("m"), None), TermRange("f1", None, u("n"))]) assert_equal(q.normalize(), Every("f1")) q = And([Every("f1"), Term("f1", "a"), Variations("f1", "b")]) assert_equal(q.normalize(), Every("f1")) q = Or([Term("f1", u("q")), TermRange("f1", u("m"), None), TermRange("f1", None, u("n"))]) assert_equal(q.normalize(), Every("f1")) q = And([Or([Term("f1", u("a")), Term("f1", u("b"))]), Every("f1")]) assert_equal(q.normalize(), Every("f1")) q = And([Term("f1", u("a")), And([Or([Every("f1")])])]) assert_equal(q.normalize(), Every("f1"))
def in_site_search(request): """ 站内搜索 """ user = get_login_user(request) keyword = request.POST.get('keyword', '').strip() scope = request.POST.get('scope', 'all') logger.warning(f"搜索关键字:`{keyword}") keyword = split_cn_words(keyword, join=True) logger.info(f"转换后的关键字:`{keyword}") if scope not in ('all', 'feed', 'article'): return HttpResponseForbidden('Param Error') if not keyword: return HttpResponseNotFound("Empty Keyword") storage = FileStorage(settings.WHOOSH_IDX_DIR) rel_sites, rel_articles = None, None # 查找相关源 if scope in ('feed', 'all'): idx = storage.open_index(indexname="site", schema=whoosh_site_schema) qp = MultifieldParser(['cname', 'author', 'brief'], schema=whoosh_site_schema) query = qp.parse(keyword) sites = [] with idx.searcher() as s: results = s.search(query, limit=50) for ret in results: sites.append(ret['id']) rel_sites = Site.objects.filter(status='active', pk__in=sites).order_by('-star') elif scope == 'article': # 查找相关文章 idx = storage.open_index(indexname="article", schema=whoosh_article_schema) qp = MultifieldParser(['title', 'author', 'content'], schema=whoosh_article_schema) query = qp.parse(keyword) articles = [] with idx.searcher() as s: old_mask = TermRange("uindex", None, str(current_ts() - 7 * 86400 * 1000)) results = s.search(query, mask=old_mask, limit=50) for ret in results: articles.append(ret['uindex']) rel_articles = Article.objects.filter(is_recent=True, status='active', uindex__in=articles).iterator() # 用户订阅 user_sub_feeds = [] if user: user_sub_feeds = get_user_subscribe_feeds(user.oauth_id, user_level=user.level) context = dict() context['user'] = user context['user_sub_feeds'] = user_sub_feeds context['rel_sites'] = rel_sites context['rel_articles'] = rel_articles context['keyword'] = keyword if scope == 'all': return render(request, 'search/search.html', context=context) elif scope == 'feed': return render(request, 'search/search_feeds.html', context=context) elif scope == 'article': return render(request, 'search/search_articles.html', context=context)
def check(startexcl, endexcl, string): q = TermRange("id", "b", "f", startexcl, endexcl) r = "".join(sorted(d['id'] for d in s.search(q))) assert_equal(r, string)