Example #1
0
def test_query_facet():
    schema = fields.Schema(id=fields.STORED, v=fields.ID)
    ix = RamStorage().create_index(schema)
    for i, ltr in enumerate(u("iacgbehdf")):
        w = ix.writer()
        w.add_document(id=i, v=ltr)
        w.commit(merge=False)

    with ix.searcher() as s:
        q1 = query.TermRange("v", "a", "c")
        q2 = query.TermRange("v", "d", "f")
        q3 = query.TermRange("v", "g", "i")

        assert_equal([hit["id"] for hit in s.search(q1)], [1, 2, 4])
        assert_equal([hit["id"] for hit in s.search(q2)], [5, 7, 8])
        assert_equal([hit["id"] for hit in s.search(q3)], [0, 3, 6])

        facet = sorting.QueryFacet({"a-c": q1, "d-f": q2, "g-i": q3})
        r = s.search(query.Every(), groupedby=facet)
        # If you specify a facet without a name, it's automatically called
        # "facet"
        assert_equal(r.groups("facet"), {
            "a-c": [1, 2, 4],
            "d-f": [5, 7, 8],
            "g-i": [0, 3, 6]
        })
Example #2
0
def test_query_facet2():
    domain = u("abcdefghi")
    schema = fields.Schema(v=fields.KEYWORD(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for i, ltr in enumerate(domain):
            v = "%s %s" % (ltr, domain[0 - i])
            w.add_document(v=v)

    with ix.searcher() as s:
        q1 = query.TermRange("v", "a", "c")
        q2 = query.TermRange("v", "d", "f")
        q3 = query.TermRange("v", "g", "i")

        facets = sorting.Facets()
        facets.add_query("myfacet", {
            "a-c": q1,
            "d-f": q2,
            "g-i": q3
        },
                         allow_overlap=True)
        r = s.search(query.Every(), groupedby=facets)
        assert_equal(r.groups("myfacet"), {
            'a-c': [0, 1, 2, 7, 8],
            'd-f': [4, 5],
            'g-i': [3, 6]
        })
def test_closed_searcher():
    from whoosh.reading import ReaderClosed

    schema = fields.Schema(key=fields.KEYWORD(stored=True, sortable=True))

    with TempStorage() as st:
        ix = st.create_index(schema)
        with ix.writer() as w:
            w.add_document(key=u"alfa")
            w.add_document(key=u"bravo")
            w.add_document(key=u"charlie")
            w.add_document(key=u"delta")
            w.add_document(key=u"echo")

        s = ix.searcher()
        r = s.search(query.TermRange("key", "b", "d"))
        s.close()
        assert s.is_closed
        with pytest.raises(ReaderClosed):
            assert r[0]["key"] == "bravo"
        with pytest.raises(ReaderClosed):
            s.reader().column_reader("key")
        with pytest.raises(ReaderClosed):
            s.suggest("key", "brovo")

        s = ix.searcher()
        r = s.search(query.TermRange("key", "b", "d"))
        assert r[0]
        assert r[0]["key"] == "bravo"
        c = s.reader().column_reader("key")
        assert c[1] == "bravo"
        assert s.suggest("key", "brovo") == ["bravo"]
Example #4
0
    def query(self, parser):
        fieldname = self.fieldname or parser.fieldname
        start = self.start
        end = self.end

        if parser.schema and fieldname in parser.schema:
            field = parser.schema[fieldname]
            if field.self_parsing():
                try:
                    q = field.parse_range(fieldname, start, end,
                                          self.startexcl, self.endexcl,
                                          boost=self.boost)
                    if q is not None:
                        return attach(q, self)
                except QueryParserError:
                    e = sys.exc_info()[1]
                    return attach(query.error_query(e), self)

            if start:
                start = get_single_text(field, start, tokenize=False,
                                        removestops=False)
            if end:
                end = get_single_text(field, end, tokenize=False,
                                      removestops=False)

        q = query.TermRange(fieldname, start, end, self.startexcl,
                            self.endexcl, boost=self.boost)
        return attach(q, self)
Example #5
0
def test_gtlt():
    schema = fields.Schema(a=fields.KEYWORD, b=fields.NUMERIC,
                           c=fields.KEYWORD,
                           d=fields.NUMERIC(float), e=fields.DATETIME)
    qp = qparser.QueryParser("a", schema)
    qp.add_plugin(plugins.GtLtPlugin())
    qp.add_plugin(dateparse.DateParserPlugin())

    q = qp.parse(u("a:hello b:>100 c:<=z there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 4)
    assert_equal(q[0], query.Term("a", "hello"))
    assert_equal(q[1], query.NumericRange("b", 100, None, startexcl=True))
    assert_equal(q[2], query.TermRange("c", None, 'z'))
    assert_equal(q[3], query.Term("a", "there"))

    q = qp.parse(u("hello e:>'29 mar 2001' there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 3)
    assert_equal(q[0], query.Term("a", "hello"))
    # As of this writing, date ranges don't support startexcl/endexcl
    assert_equal(q[1], query.DateRange("e", datetime(2001, 3, 29, 0, 0), None))
    assert_equal(q[2], query.Term("a", "there"))

    q = qp.parse(u("a:> alfa c:<= bravo"))
    assert_equal(text_type(q), "(a:a: AND a:alfa AND a:c: AND a:bravo)")

    qp.remove_plugin_class(plugins.FieldsPlugin)
    qp.remove_plugin_class(plugins.RangePlugin)
    q = qp.parse(u("hello a:>500 there"))
    assert_equal(text_type(q), "(a:hello AND a:a: AND a:500 AND a:there)")
Example #6
0
def test_weighting():
    from whoosh.scoring import Weighting, BaseScorer

    schema = fields.Schema(id=fields.ID(stored=True),
                           n_comments=fields.STORED)
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), n_comments=5)
    w.add_document(id=u("2"), n_comments=12)
    w.add_document(id=u("3"), n_comments=2)
    w.add_document(id=u("4"), n_comments=7)
    w.commit()

    # Fake Weighting implementation
    class CommentWeighting(Weighting):
        def scorer(self, searcher, fieldname, text, qf=1):
            return self.CommentScorer(searcher.stored_fields)

        class CommentScorer(BaseScorer):
            def __init__(self, stored_fields):
                self.stored_fields = stored_fields

            def score(self, matcher):
                sf = self.stored_fields(matcher.id())
                ncomments = sf.get("n_comments", 0)
                return ncomments

    with ix.searcher(weighting=CommentWeighting()) as s:
        q = query.TermRange("id", u("1"), u("4"), constantscore=False)

        r = s.search(q)
        ids = [fs["id"] for fs in r]
        assert_equal(ids, ["2", "4", "1", "3"])
Example #7
0
def test_searching():
    with make_index().searcher() as s:

        def _runq(q, result, **kwargs):
            r = s.search(q, **kwargs)
            assert_equal([d["id"] for d in r], result)

        _runq(query.Term("text", u("format")), ["format", "vector"])
        _runq(query.Term("text", u("the")),
              ["fieldtype", "format", "const", "vector", "stored"])
        _runq(query.Prefix("text", u("st")), ["format", "vector", "stored"])
        _runq(query.Wildcard("id", u("*st*")), ["stored", "const"])
        _runq(query.TermRange("id", u("c"), u("s")),
              ["fieldtype", "format", "const"])
        _runq(query.NumericRange("subs", 10, 100),
              ["fieldtype", "format", "vector", "scorable"])
        _runq(query.Phrase("text", ["this", "field"]),
              ["scorable", "unique", "stored"],
              limit=None)
        _runq(query.Every(), [
            "fieldtype", "format", "vector", "scorable", "stored", "unique",
            "const"
        ])
        _runq(query.Every("subs"), [
            "fieldtype", "format", "vector", "scorable", "stored", "unique",
            "const"
        ])
Example #8
0
def test_nocachefield_segments():
    schema = fields.Schema(a=fields.ID(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(a=u("bravo"))
    w.add_document(a=u("echo"))
    w.add_document(a=u("juliet"))
    w.commit()
    w = ix.writer()
    w.add_document(a=u("kilo"))
    w.add_document(a=u("foxtrot"))
    w.add_document(a=u("charlie"))
    w.commit(merge=False)
    w = ix.writer()
    w.delete_by_term("a", u("echo"))
    w.add_document(a=u("alfa"))
    w.add_document(a=u("india"))
    w.add_document(a=u("delta"))
    w.commit(merge=False)

    with ix.searcher() as s:
        q = query.TermRange("a", u("bravo"), u("k"))
        facet = sorting.FieldFacet("a", reverse=True)

        r = s.search(q, sortedby=facet)
        assert [hit["a"] for hit in r] == [
            "juliet", "india", "foxtrot", "delta", "charlie", "bravo"
        ]

        mq = query.Or(
            [query.Term("a", u("bravo")),
             query.Term("a", u("delta"))])
        anq = query.AndNot(q, mq)
        r = s.search(anq, sortedby=facet)
        assert [hit["a"]
                for hit in r] == ["juliet", "india", "foxtrot", "charlie"]

        mq = query.Or(
            [query.Term("a", u("bravo")),
             query.Term("a", u("delta"))])
        r = s.search(q, mask=mq, sortedby=facet)
        assert [hit["a"]
                for hit in r] == ["juliet", "india", "foxtrot", "charlie"]

        fq = query.Or([
            query.Term("a", u("alfa")),
            query.Term("a", u("charlie")),
            query.Term("a", u("echo")),
            query.Term("a", u("india")),
        ])
        r = s.search(query.Every(), filter=fq, sortedby=facet)
        assert [hit["a"] for hit in r] == ["india", "charlie", "alfa"]

        nq = query.Not(
            query.Or([query.Term("a", u("alfa")),
                      query.Term("a", u("india"))]))
        r = s.search(query.Every(), filter=nq, sortedby=facet)
        assert [hit["a"] for hit in r] == [
            "kilo", "juliet", "foxtrot", "delta", "charlie", "bravo"
        ]
Example #9
0
 def make_range(self, fieldname, range):
     start, end = range
     fieldname = fieldname or self.default_field
     start = self._analyze(fieldname, start)
     end = self._analyze(fieldname, end)
     return query.TermRange(fieldname or self.default_field, (start, end))
Example #10
0
 def check(startexcl, endexcl, string):
     q = query.TermRange("id", "b", "f", startexcl, endexcl)
     r = "".join(sorted(d['id'] for d in s.search(q)))
     assert_equal(r, string)
Example #11
0
 class Range(Token):
     expr = rcompile(r"""
     (?P<open>\{|\[)               # Open paren
     
     (                             # Begin optional "start"
       (                           # Begin choice between start1 and start2
         ('(?P<start2>[^']+)')     # Quoted start
         | (?P<start1>[^ ]+)       # ...or regular start
       )                           # End choice
     [ ]+)?                        # Space at end of optional "start"
     
     [Tt][Oo]                      # "to" between start and end
     
     ([ ]+                         # Space at start of optional "end"
       (                           # Begin choice between end1 and end2
         ('(?P<end2>[^']+)')       # Quoted end
         | (?P<end1>[^\]\}]*)      # ...or normal end
       )                           # End choice
     )?                            # End of optional "end
     
     (?P<close>\}|\])              # Close paren
     """, re.VERBOSE)
     
     def __init__(self, start, end, startexcl, endexcl, fieldname=None, boost=1.0):
         self.fieldname = fieldname
         self.start = start
         self.end = end
         self.startexcl = startexcl
         self.endexcl = endexcl
         self.boost = boost
     
     def set_boost(self, b):
         return self.__class__(self.start, self.end, self.startexcl,
                               self.endexcl, fieldname=self.fieldname,
                               boost=b)
     
     def set_fieldname(self, name):
         return self.__class__(self.start, self.end, self.startexcl,
                               self.endexcl, fieldname=name,
                               boost=self.boost)
     
     def __repr__(self):
         r = "%s:(%r, %r, %s, %s)" % (self.fieldname, self.start, self.end,
                                      self.startexcl, self.endexcl)
         if self.boost != 1.0:
             r += "^%s" % self.boost
         return r
     
     @classmethod
     def create(cls, parser, match):
         start = match.group("start2") or match.group("start1")
         end = match.group("end2") or match.group("end1")
         return cls(start, end, startexcl=match.group("open") == "{",
                    endexcl=match.group("close") == "}")
         
     def query(self, parser):
         fieldname = self.fieldname or parser.fieldname
         start, end = self.start, self.end
         if parser.schema and fieldname in parser.schema:
             field = parser.schema[fieldname]
             
             if field.self_parsing():
                 try:
                     rangeq = field.parse_range(fieldname, start, end,
                                                self.startexcl, self.endexcl,
                                                boost=self.boost)
                     if rangeq is not None:
                         return rangeq
                 except QueryParserError, e:
                     return query.NullQuery
                 
             if start:
                 start = parser.get_single_text(field, start,
                                                tokenize=False,
                                                removestops=False)
             if end:
                 end = parser.get_single_text(field, end, tokenize=False,
                                              removestops=False)
         
         if start is None:
             start = u''
         if end is None:
             end = u'\uFFFF'
         
         return query.TermRange(fieldname, start, end, self.startexcl,
                                self.endexcl, boost=self.boost)