Ejemplo n.º 1
0
def test_weighting():
    from whoosh.scoring import Weighting, BaseScorer

    schema = fields.Schema(id=fields.ID(stored=True),
                           n_comments=fields.STORED)
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), n_comments=5)
    w.add_document(id=u("2"), n_comments=12)
    w.add_document(id=u("3"), n_comments=2)
    w.add_document(id=u("4"), n_comments=7)
    w.commit()

    # Fake Weighting implementation
    class CommentWeighting(Weighting):
        def scorer(self, searcher, fieldname, text, qf=1):
            return self.CommentScorer(searcher.stored_fields)

        class CommentScorer(BaseScorer):
            def __init__(self, stored_fields):
                self.stored_fields = stored_fields

            def score(self, matcher):
                ncomments = self.stored_fields(matcher.id()).get("n_comments", 0)
                return ncomments

    with ix.searcher(weighting=CommentWeighting()) as s:
        q = TermRange("id", u("1"), u("4"), constantscore=False)

        r = s.search(q)
        ids = [fs["id"] for fs in r]
        assert_equal(ids, ["2", "4", "1", "3"])
Ejemplo n.º 2
0
def test_query_copy_hash():
    def do(q1, q2):
        q1a = copy.deepcopy(q1)
        assert_equal(q1, q1a)
        assert_equal(hash(q1), hash(q1a))
        assert_not_equal(q1, q2)

    do(Term("a", u("b"), boost=1.1), Term("a", u("b"), boost=1.5))
    do(And([Term("a", u("b")), Term("c", u("d"))], boost=1.1),
       And([Term("a", u("b")), Term("c", u("d"))], boost=1.5))
    do(Or([Term("a", u("b"), boost=1.1), Term("c", u("d"))]),
       Or([Term("a", u("b"), boost=1.8), Term("c", u("d"))], boost=1.5))
    do(DisjunctionMax([Term("a", u("b"), boost=1.8), Term("c", u("d"))]),
       DisjunctionMax([Term("a", u("b"), boost=1.1), Term("c", u("d"))],
                      boost=1.5))
    do(Not(Term("a", u("b"), boost=1.1)), Not(Term("a", u("b"), boost=1.5)))
    do(Prefix("a", u("b"), boost=1.1), Prefix("a", u("b"), boost=1.5))
    do(Wildcard("a", u("b*x?"), boost=1.1), Wildcard("a", u("b*x?"),
                                                     boost=1.5))
    do(FuzzyTerm("a", u("b"), constantscore=True),
       FuzzyTerm("a", u("b"), constantscore=False))
    do(FuzzyTerm("a", u("b"), boost=1.1), FuzzyTerm("a", u("b"), boost=1.5))
    do(TermRange("a", u("b"), u("c")), TermRange("a", u("b"), u("d")))
    do(TermRange("a", None, u("c")), TermRange("a", None, None))
    do(TermRange("a", u("b"), u("c"), boost=1.1),
       TermRange("a", u("b"), u("c"), boost=1.5))
    do(TermRange("a", u("b"), u("c"), constantscore=True),
       TermRange("a", u("b"), u("c"), constantscore=False))
    do(NumericRange("a", 1, 5), NumericRange("a", 1, 6))
    do(NumericRange("a", None, 5), NumericRange("a", None, None))
    do(NumericRange("a", 3, 6, boost=1.1), NumericRange("a", 3, 6, boost=1.5))
    do(NumericRange("a", 3, 6, constantscore=True),
       NumericRange("a", 3, 6, constantscore=False))
    # do(DateRange)
    do(Variations("a", u("render")), Variations("a", u("renders")))
    do(Variations("a", u("render"), boost=1.1),
       Variations("a", u("renders"), boost=1.5))
    do(Phrase("a", [u("b"), u("c"), u("d")]),
       Phrase("a", [u("b"), u("c"), u("e")]))
    do(Phrase("a", [u("b"), u("c"), u("d")], boost=1.1),
       Phrase("a", [u("b"), u("c"), u("d")], boost=1.5))
    do(Phrase("a", [u("b"), u("c"), u("d")], slop=1),
       Phrase("a", [u("b"), u("c"), u("d")], slop=2))
    # do(Ordered)
    do(Every(), Every("a"))
    do(Every("a"), Every("b"))
    do(Every("a", boost=1.1), Every("a", boost=1.5))
    do(NullQuery, Term("a", u("b")))
    do(ConstantScoreQuery(Term("a", u("b"))),
       ConstantScoreQuery(Term("a", u("c"))))
    do(ConstantScoreQuery(Term("a", u("b")), score=2.0),
       ConstantScoreQuery(Term("a", u("c")), score=2.1))
    do(Require(Term("a", u("b")), Term("c", u("d"))),
       Require(Term("a", u("b"), boost=1.1), Term("c", u("d"))))
    # do(Require)
    # do(AndMaybe)
    # do(AndNot)
    # do(Otherwise)

    do(SpanFirst(Term("a", u("b")), limit=1), SpanFirst(Term("a", u("b")),
                                                        limit=2))
    do(SpanNear(Term("a", u("b")), Term("c", u("d"))),
       SpanNear(Term("a", u("b")), Term("c", u("e"))))
    do(SpanNear(Term("a", u("b")), Term("c", u("d")), slop=1),
       SpanNear(Term("a", u("b")), Term("c", u("d")), slop=2))
    do(SpanNear(Term("a", u("b")), Term("c", u("d")), mindist=1),
       SpanNear(Term("a", u("b")), Term("c", u("d")), mindist=2))
    do(SpanNear(Term("a", u("b")), Term("c", u("d")), ordered=True),
       SpanNear(Term("a", u("b")), Term("c", u("d")), ordered=False))
    do(SpanNot(Term("a", u("b")), Term("a", u("c"))),
       SpanNot(Term("a", u("b")), Term("a", u("d"))))
    do(SpanOr([Term("a", u("b")), Term("a", u("c")), Term("a", u("d"))]),
       SpanOr([Term("a", u("b")), Term("a", u("c")), Term("a", u("e"))]))
    do(SpanContains(Term("a", u("b")), Term("a", u("c"))),
       SpanContains(Term("a", u("b")), Term("a", u("d"))))
Ejemplo n.º 3
0
def test_merge_ranges():
    q = And([TermRange("f1", u("a"), None), TermRange("f1", None, u("z"))])
    assert_equal(q.normalize(), TermRange("f1", u("a"), u("z")))

    q = And([NumericRange("f1", None, u("aaaaa")),
             NumericRange("f1", u("zzzzz"), None)])
    assert_equal(q.normalize(), q)

    q = And([TermRange("f1", u("a"), u("z")), TermRange("f1", "b", "x")])
    assert_equal(q.normalize(), TermRange("f1", u("a"), u("z")))

    q = And([TermRange("f1", u("a"), u("m")), TermRange("f1", u("f"), u("q"))])
    assert_equal(q.normalize(), TermRange("f1", u("f"), u("m")))

    q = Or([TermRange("f1", u("a"), u("m")), TermRange("f1", u("f"), u("q"))])
    assert_equal(q.normalize(), TermRange("f1", u("a"), u("q")))

    q = Or([TermRange("f1", u("m"), None), TermRange("f1", None, u("n"))])
    assert_equal(q.normalize(), Every("f1"))

    q = And([Every("f1"), Term("f1", "a"), Variations("f1", "b")])
    assert_equal(q.normalize(), Every("f1"))

    q = Or([Term("f1", u("q")), TermRange("f1", u("m"), None),
            TermRange("f1", None, u("n"))])
    assert_equal(q.normalize(), Every("f1"))

    q = And([Or([Term("f1", u("a")), Term("f1", u("b"))]), Every("f1")])
    assert_equal(q.normalize(), Every("f1"))

    q = And([Term("f1", u("a")), And([Or([Every("f1")])])])
    assert_equal(q.normalize(), Every("f1"))
Ejemplo n.º 4
0
def in_site_search(request):
    """
    站内搜索
    """
    user = get_login_user(request)
    keyword = request.POST.get('keyword', '').strip()
    scope = request.POST.get('scope', 'all')

    logger.warning(f"搜索关键字:`{keyword}")
    keyword = split_cn_words(keyword, join=True)
    logger.info(f"转换后的关键字:`{keyword}")

    if scope not in ('all', 'feed', 'article'):
        return HttpResponseForbidden('Param Error')

    if not keyword:
        return HttpResponseNotFound("Empty Keyword")

    storage = FileStorage(settings.WHOOSH_IDX_DIR)
    rel_sites, rel_articles = None, None

    # 查找相关源
    if scope in ('feed', 'all'):
        idx = storage.open_index(indexname="site", schema=whoosh_site_schema)
        qp = MultifieldParser(['cname', 'author', 'brief'],
                              schema=whoosh_site_schema)
        query = qp.parse(keyword)
        sites = []

        with idx.searcher() as s:
            results = s.search(query, limit=50)

            for ret in results:
                sites.append(ret['id'])

        rel_sites = Site.objects.filter(status='active',
                                        pk__in=sites).order_by('-star')
    elif scope == 'article':
        # 查找相关文章
        idx = storage.open_index(indexname="article",
                                 schema=whoosh_article_schema)
        qp = MultifieldParser(['title', 'author', 'content'],
                              schema=whoosh_article_schema)
        query = qp.parse(keyword)
        articles = []

        with idx.searcher() as s:
            old_mask = TermRange("uindex", None,
                                 str(current_ts() - 7 * 86400 * 1000))
            results = s.search(query, mask=old_mask, limit=50)

            for ret in results:
                articles.append(ret['uindex'])
        rel_articles = Article.objects.filter(is_recent=True,
                                              status='active',
                                              uindex__in=articles).iterator()

    # 用户订阅
    user_sub_feeds = []
    if user:
        user_sub_feeds = get_user_subscribe_feeds(user.oauth_id,
                                                  user_level=user.level)

    context = dict()
    context['user'] = user
    context['user_sub_feeds'] = user_sub_feeds
    context['rel_sites'] = rel_sites
    context['rel_articles'] = rel_articles
    context['keyword'] = keyword

    if scope == 'all':
        return render(request, 'search/search.html', context=context)
    elif scope == 'feed':
        return render(request, 'search/search_feeds.html', context=context)
    elif scope == 'article':
        return render(request, 'search/search_articles.html', context=context)
Ejemplo n.º 5
0
 def check(startexcl, endexcl, string):
     q = TermRange("id", "b", "f", startexcl, endexcl)
     r = "".join(sorted(d['id'] for d in s.search(q)))
     assert_equal(r, string)