Beispiel #1
0
def test_nocachefield_segments():
    schema = fields.Schema(a=fields.ID(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(a=u("bravo"))
    w.add_document(a=u("echo"))
    w.add_document(a=u("juliet"))
    w.commit()
    w = ix.writer()
    w.add_document(a=u("kilo"))
    w.add_document(a=u("foxtrot"))
    w.add_document(a=u("charlie"))
    w.commit(merge=False)
    w = ix.writer()
    w.delete_by_term("a", u("echo"))
    w.add_document(a=u("alfa"))
    w.add_document(a=u("india"))
    w.add_document(a=u("delta"))
    w.commit(merge=False)

    with ix.searcher() as s:
        q = query.TermRange("a", u("bravo"), u("k"))
        facet = sorting.FieldFacet("a", reverse=True)

        r = s.search(q, sortedby=facet)
        assert [hit["a"] for hit in r] == [
            "juliet", "india", "foxtrot", "delta", "charlie", "bravo"
        ]

        mq = query.Or(
            [query.Term("a", u("bravo")),
             query.Term("a", u("delta"))])
        anq = query.AndNot(q, mq)
        r = s.search(anq, sortedby=facet)
        assert [hit["a"]
                for hit in r] == ["juliet", "india", "foxtrot", "charlie"]

        mq = query.Or(
            [query.Term("a", u("bravo")),
             query.Term("a", u("delta"))])
        r = s.search(q, mask=mq, sortedby=facet)
        assert [hit["a"]
                for hit in r] == ["juliet", "india", "foxtrot", "charlie"]

        fq = query.Or([
            query.Term("a", u("alfa")),
            query.Term("a", u("charlie")),
            query.Term("a", u("echo")),
            query.Term("a", u("india")),
        ])
        r = s.search(query.Every(), filter=fq, sortedby=facet)
        assert [hit["a"] for hit in r] == ["india", "charlie", "alfa"]

        nq = query.Not(
            query.Or([query.Term("a", u("alfa")),
                      query.Term("a", u("india"))]))
        r = s.search(query.Every(), filter=nq, sortedby=facet)
        assert [hit["a"] for hit in r] == [
            "kilo", "juliet", "foxtrot", "delta", "charlie", "bravo"
        ]
Beispiel #2
0
def test_page_sorted():
    schema = fields.Schema(key=fields.ID(stored=True))
    with TempIndex(schema, "pagesorted") as ix:
        domain = list(u("abcdefghijklmnopqrstuvwxyz"))
        random.shuffle(domain)

        w = ix.writer()
        for char in domain:
            w.add_document(key=char)
        w.commit()

        with ix.searcher() as s:
            r = s.search(query.Every(), sortedby="key", limit=5)
            assert_equal(r.scored_length(), 5)
            assert_equal(len(r), s.doc_count_all())

            rp = s.search_page(query.Every(), 1, pagelen=5, sortedby="key")
            assert_equal("".join([h["key"] for h in rp]), "abcde")
            assert_equal(rp[10:], [])

            rp = s.search_page(query.Term("key", "glonk"),
                               1,
                               pagelen=5,
                               sortedby="key")
            assert_equal(len(rp), 0)
            assert rp.is_last_page()
Beispiel #3
0
    def test(ix):
        with ix.searcher() as s:
            # Sort by title
            r = s.search(query.Every(), sortedby="title")
            assert [hit["title"] for hit in r] == sorted_titles

            # Sort by reverse title
            facet = sorting.FieldFacet("title", reverse=True)
            r = s.search(query.Every(), sortedby=facet)
            assert [hit["title"] for hit in r] == list(reversed(sorted_titles))

            # Sort by num (-10 to 10) first, and within that, by reverse title
            facet = sorting.MultiFacet()
            facet.add_field("num")
            facet.add_field("title", reverse=True)

            r = s.search(query.Every(), sortedby=facet)
            target = ["Visual and Statistical Thinking",
                      "Cognitive Style of Powerpoint",
                      "Beautiful Evidence",
                      "Visual Explanations",
                      "Visual Display of Quantitative Information, The",
                      "Envisioning Information",
                      ]
            assert [hit["title"] for hit in r] == target
Beispiel #4
0
def test_sort_filter():
    schema = fields.Schema(group=fields.ID(stored=True),
                           key=fields.ID(stored=True))
    groups = u("alfa bravo charlie").split()
    keys = u("abcdefghijklmnopqrstuvwxyz")
    source = []
    for i in xrange(100):
        key = keys[i % len(keys)]
        group = groups[i % len(groups)]
        source.append({"key": key, "group": group})
    source.sort(key=lambda x: (x["key"], x["group"]))

    sample = list(source)
    random.shuffle(sample)

    with TempIndex(schema, "sortfilter") as ix:
        w = ix.writer()
        for i, fs in enumerate(sample):
            w.add_document(**fs)
            i += 1
            if not i % 26:
                w.commit(merge=False)
                w = ix.writer()
        w.commit()

        fq = query.Term("group", u("bravo"))

        with ix.searcher() as s:
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=20)
            assert_equal([h.fields() for h in r],
                         [d for d in source if d["group"] == "bravo"][:20])

            fq = query.Term("group", u("bravo"))
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=None)
            assert_equal([h.fields() for h in r],
                         [d for d in source if d["group"] == "bravo"])

        ix.optimize()

        with ix.searcher() as s:
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=20)
            assert_equal([h.fields() for h in r],
                         [d for d in source if d["group"] == "bravo"][:20])

            fq = query.Term("group", u("bravo"))
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=None)
            assert_equal([h.fields() for h in r],
                         [d for d in source if d["group"] == "bravo"])
    def test_read_with_type_facet(self):
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)

        result = self.whoosh_backend.query(query.Every(), facets=["type"])
        self.print_result(result)
        self.assertLessEqual(1, result.hits)
    def test_read_all(self):
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)

        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        self.assertLessEqual(1, result.hits)
Beispiel #7
0
def test_overlapping_lists():
    schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD)
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(id=0, tags=u("alfa bravo charlie"))
        w.add_document(id=1, tags=u("bravo charlie delta"))
        w.add_document(id=2, tags=u("charlie delta echo"))
        w.add_document(id=3, tags=u("delta echo alfa"))
        w.add_document(id=4, tags=u("echo alfa bravo"))

    with ix.searcher() as s:
        of = sorting.FieldFacet("tags", allow_overlap=True)
        cat = of.categorizer(s)
        assert not cat._use_vectors

        r = s.search(query.Every(), groupedby={"tags": of})
        assert r.groups("tags") == {'alfa': [0, 3, 4], 'bravo': [0, 1, 4],
                                    'charlie': [0, 1, 2], 'delta': [1, 2, 3],
                                    'echo': [2, 3, 4]}

        fcts = sorting.Facets()
        fcts.add_field("tags", allow_overlap=True)
        r = s.search(query.Every(), groupedby=fcts)
        assert r.groups("tags") == {'alfa': [0, 3, 4], 'bravo': [0, 1, 4],
                                    'charlie': [0, 1, 2], 'delta': [1, 2, 3],
                                    'echo': [2, 3, 4]}
Beispiel #8
0
def test_searching():
    with make_index().searcher() as s:

        def _runq(q, result, **kwargs):
            r = s.search(q, **kwargs)
            assert_equal([d["id"] for d in r], result)

        _runq(query.Term("text", u("format")), ["format", "vector"])
        _runq(query.Term("text", u("the")),
              ["fieldtype", "format", "const", "vector", "stored"])
        _runq(query.Prefix("text", u("st")), ["format", "vector", "stored"])
        _runq(query.Wildcard("id", u("*st*")), ["stored", "const"])
        _runq(query.TermRange("id", u("c"), u("s")),
              ["fieldtype", "format", "const"])
        _runq(query.NumericRange("subs", 10, 100),
              ["fieldtype", "format", "vector", "scorable"])
        _runq(query.Phrase("text", ["this", "field"]),
              ["scorable", "unique", "stored"],
              limit=None)
        _runq(query.Every(), [
            "fieldtype", "format", "vector", "scorable", "stored", "unique",
            "const"
        ])
        _runq(query.Every("subs"), [
            "fieldtype", "format", "vector", "scorable", "stored", "unique",
            "const"
        ])
Beispiel #9
0
 def test_can_select_fields(self):
     self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
     result = self.whoosh_backend.query(query.Every(),
                                        fields=("id", "type"))
     self.print_result(result)
     docs = result.docs
     self.assertEqual({'id': '1', 'type': 'ticket'}, docs[0])
Beispiel #10
0
    def test_groupedby_empty_field(self):
        schema = Schema(
            unique_id=ID(stored=True, unique=True),
            id=ID(stored=True),
            type=ID(stored=True),
            status=KEYWORD(stored=True),
            content=TEXT(stored=True),
        )

        ix = index.create_in(self.index_dir, schema=schema)
        with ix.writer() as w:
            w.add_document(unique_id=u"1", type=u"type1")
            w.add_document(unique_id=u"2", type=u"type2", status=u"New")

        facet_fields = (u"type", u"status")
        groupedby = facet_fields
        with ix.searcher() as s:
            r = s.search(
                query.Every(),
                groupedby=groupedby,
                maptype=sorting.Count,
            )
            facets = self._load_facets(r)
        self.assertEquals(
            {
                'status': {
                    None: 1,
                    'New': 1
                },
                'type': {
                    'type1': 1,
                    'type2': 1
                }
            }, facets)
Beispiel #11
0
 def test_can_search_time_with_utc_tzinfo(self):
     time = datetime(2012, 12, 13, 11, 8, 34, 711957,
         tzinfo=FixedOffset(0, 'UTC'))
     self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time))
     result = self.whoosh_backend.query(query.Every())
     self.print_result(result)
     self.assertEqual(time, result.docs[0]["time"])
Beispiel #12
0
 def test_can_apply_multiple_sort_conditions_asc(self):
     self.whoosh_backend.add_doc(dict(id="2", type="ticket2"))
     self.whoosh_backend.add_doc(dict(id="3", type="ticket1"))
     self.whoosh_backend.add_doc(dict(id="4", type="ticket3"))
     self.whoosh_backend.add_doc(dict(id="1", type="ticket1"))
     result = self.whoosh_backend.query(
         query.Every(),
         sort=[SortInstruction("type", ASC),
               SortInstruction("id", ASC)],
         fields=("id", "type"),
     )
     self.print_result(result)
     self.assertEqual([{
         'type': 'ticket1',
         'id': '1'
     }, {
         'type': 'ticket1',
         'id': '3'
     }, {
         'type': 'ticket2',
         'id': '2'
     }, {
         'type': 'ticket3',
         'id': '4'
     }], result.docs)
Beispiel #13
0
def test_query_facet():
    schema = fields.Schema(id=fields.STORED, v=fields.ID)
    ix = RamStorage().create_index(schema)
    for i, ltr in enumerate(u("iacgbehdf")):
        w = ix.writer()
        w.add_document(id=i, v=ltr)
        w.commit(merge=False)

    with ix.searcher() as s:
        q1 = query.TermRange("v", "a", "c")
        q2 = query.TermRange("v", "d", "f")
        q3 = query.TermRange("v", "g", "i")

        assert_equal([hit["id"] for hit in s.search(q1)], [1, 2, 4])
        assert_equal([hit["id"] for hit in s.search(q2)], [5, 7, 8])
        assert_equal([hit["id"] for hit in s.search(q3)], [0, 3, 6])

        facet = sorting.QueryFacet({"a-c": q1, "d-f": q2, "g-i": q3})
        r = s.search(query.Every(), groupedby=facet)
        # If you specify a facet without a name, it's automatically called
        # "facet"
        assert_equal(r.groups("facet"), {
            "a-c": [1, 2, 4],
            "d-f": [5, 7, 8],
            "g-i": [0, 3, 6]
        })
Beispiel #14
0
    def parse_query(self, fieldname, qstring, boost=1.0):
        from whoosh import query

        if qstring == "*":
            return query.Every(fieldname, boost=boost)

        return query.Term(fieldname, self._obj_to_bool(qstring), boost=boost)
Beispiel #15
0
def test_delete_doc():
    ix = make_index()
    ix.delete_document(2)
    ix.delete_document(5)
    with ix.searcher() as s:
        assert ([d["id"] for d in s.search(query.Every())
                 ] == ["fieldtype", "format", "scorable", "stored", "const"])
Beispiel #16
0
    def query_pre_process(self, query_parameters, context=None):
        if not self.enabled:
            return

        permissions = self.get_user_permissions(context.req.authname)
        allowed_docs, denied_docs = [], []
        for product, doc_type, doc_id, perm, denied in permissions:
            term_spec = []
            if product:
                term_spec.append(query.Term(IndexFields.PRODUCT, product))
            else:
                term_spec.append(query.Not(query.Every(IndexFields.PRODUCT)))

            if doc_type != '*':
                term_spec.append(query.Term(IndexFields.TYPE, doc_type))
            if doc_id != '*':
                term_spec.append(query.Term(IndexFields.ID, doc_id))
            term_spec.append(query.Term(IndexFields.REQUIRED_PERMISSION, perm))
            term_spec = query.And(term_spec)
            if denied:
                denied_docs.append(term_spec)
            else:
                allowed_docs.append(term_spec)
        self.update_security_filter(query_parameters, allowed_docs,
                                    denied_docs)
Beispiel #17
0
def test_relative_daterange():
    from whoosh.support.relativedelta import relativedelta
    dt = datetime

    schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
    ix = RamStorage().create_index(schema)
    basedate = datetime(2001, 1, 1)
    count = 0
    with ix.writer() as w:
        while basedate < datetime(2001, 12, 1):
            w.add_document(id=count, date=basedate)
            basedate += timedelta(days=14, hours=16)
            count += 1

    with ix.searcher() as s:
        gap = relativedelta(months=1)
        rf = sorting.DateRangeFacet("date", dt(2001, 1, 1),
                                    dt(2001, 12, 31), gap)
        r = s.search(query.Every(), groupedby={"date": rf})
        assert r.groups("date") == {(dt(2001, 1, 1), dt(2001, 2, 1)): [0, 1, 2],
                                    (dt(2001, 2, 1), dt(2001, 3, 1)): [3, 4],
                                    (dt(2001, 3, 1), dt(2001, 4, 1)): [5, 6],
                                    (dt(2001, 4, 1), dt(2001, 5, 1)): [7, 8],
                                    (dt(2001, 5, 1), dt(2001, 6, 1)): [9, 10],
                                    (dt(2001, 6, 1), dt(2001, 7, 1)): [11, 12],
                                    (dt(2001, 7, 1), dt(2001, 8, 1)): [13, 14],
                                    (dt(2001, 8, 1), dt(2001, 9, 1)): [15, 16],
                                    (dt(2001, 9, 1), dt(2001, 10, 1)): [17, 18],
                                    (dt(2001, 10, 1), dt(2001, 11, 1)): [19, 20],
                                    (dt(2001, 11, 1), dt(2001, 12, 1)): [21, 22],
                                    }
Beispiel #18
0
def test_multifacet():
    schema = fields.Schema(tag=fields.ID(stored=True),
                           size=fields.ID(stored=True))
    with TempIndex(schema, "multifacet") as ix:
        w = ix.writer()
        w.add_document(tag=u("alfa"), size=u("small"))
        w.add_document(tag=u("bravo"), size=u("medium"))
        w.add_document(tag=u("alfa"), size=u("large"))
        w.add_document(tag=u("bravo"), size=u("small"))
        w.add_document(tag=u("alfa"), size=u("medium"))
        w.add_document(tag=u("bravo"), size=u("medium"))
        w.commit()

        correct = {
            (u('bravo'), u('medium')): [1, 5],
            (u('alfa'), u('large')): [2],
            (u('alfa'), u('medium')): [4],
            (u('alfa'), u('small')): [0],
            (u('bravo'), u('small')): [3]
        }

        with ix.searcher() as s:
            facet = sorting.MultiFacet(["tag", "size"])
            r = s.search(query.Every(), groupedby={"tag/size": facet})
            cats = r.groups(("tag/size"))
            assert_equal(cats, correct)
Beispiel #19
0
def test_query_facet2():
    domain = u("abcdefghi")
    schema = fields.Schema(v=fields.KEYWORD(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for i, ltr in enumerate(domain):
            v = "%s %s" % (ltr, domain[0 - i])
            w.add_document(v=v)

    with ix.searcher() as s:
        q1 = query.TermRange("v", "a", "c")
        q2 = query.TermRange("v", "d", "f")
        q3 = query.TermRange("v", "g", "i")

        facets = sorting.Facets()
        facets.add_query("myfacet", {
            "a-c": q1,
            "d-f": q2,
            "g-i": q3
        },
                         allow_overlap=True)
        r = s.search(query.Every(), groupedby=facets)
        assert_equal(r.groups("myfacet"), {
            'a-c': [0, 1, 2, 7, 8],
            'd-f': [4, 5],
            'g-i': [3, 6]
        })
Beispiel #20
0
def test_sorting():
    from whoosh import sorting

    schema = fields.Schema(id=fields.STORED,
                           name=fields.ID(stored=True),
                           size=fields.NUMERIC)
    ix = RamIndex(schema)

    with ix.writer() as w:
        w.add_document(id=0, name=u("bravo"), size=10)
        w.add_document(id=1, name=u("alfa"), size=9)
        w.add_document(id=2, name=u("delta"), size=8)
        w.add_document(id=3, name=u("charlie"), size=7)

    with ix.searcher() as s:
        q = query.Every()
        r = s.search(q, sortedby="name")
        assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])

        r = s.search(q, sortedby="size")
        assert_equal([hit["id"] for hit in r], [3, 2, 1, 0])

        facet = sorting.FieldFacet("size", reverse=True)
        r = s.search(q, sortedby=facet)
        assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])
Beispiel #21
0
    def search(self, content, page=1, pagelen=50):
        '''search'''
        try:
            searcher = self._index.searcher()
            if content == '*':
                q = query.Every()
            else:
                q = QueryParser(
                    'content', self._index.schema).parse(content)
            logging.debug('-- YLSearcher query %s', q)
            results = searcher.search_page(q, page, pagelen)
            hits = []
            for hit in results:
                hits.append(hit.fields())

            return {
                'page_total': results.pagecount,
                'page_num': results.pagenum,
                'page_len': results.pagelen,
                'count': results.total,
                'hits': hits
            }

        except Exception, e:
            logging.error('-- YLSearcher query %s', e)
Beispiel #22
0
 def run(self):
     ix = self.storage.open_index(self.indexname)
     with ix.searcher() as s:
         r = s.search(query.Every(), sortedby="key", limit=None)
         result = "".join([h["key"] for h in r])
         assert_equal(
             result, "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
             "abcdefghijklmnopqrstuvwxyz")
Beispiel #23
0
 def check(method):
     with TempIndex(get_schema()) as ix:
         method(ix)
         with ix.searcher() as s:
             results = s.search(query.Every(), groupedby="tag")
             groups = results.groups()
             assert sorted(groups.items()) == [(u('one'), [0, 6]),
                                               (u('three'), [1, 3, 7, 8]),
                                               (u('two'), [2, 4, 5])]
Beispiel #24
0
 def test_set_should_not_be_empty_fields(self):
     self.insert_ticket("test x")
     result = self.whoosh_backend.query(query.Every())
     self.print_result(result)
     doc = result.docs[0]
     null_marker = WhooshEmptyFacetErrorWorkaround.NULL_MARKER
     self.assertEqual(null_marker, doc["component"])
     self.assertEqual(null_marker, doc["status"])
     self.assertEqual(null_marker, doc["milestone"])
Beispiel #25
0
 def allowed_documents():
     #todo: add special case handling for trac_admin and product_owner
     for product, perm in self._get_all_user_permissions(context):
         if product:
             prod_term = query.Term(IndexFields.PRODUCT, product)
         else:
             prod_term = query.Not(query.Every(IndexFields.PRODUCT))
         perm_term = query.Term(IndexFields.REQUIRED_PERMISSION, perm)
         yield query.And([prod_term, perm_term])
Beispiel #26
0
 def test_can_return_all_fields(self):
     self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
     result = self.whoosh_backend.query(query.Every())
     self.print_result(result)
     docs = result.docs
     self.assertEqual(
         {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1',
             "score": 1.0},
         docs[0])
Beispiel #27
0
 def test_can_return_empty_result(self):
     result = self.whoosh_backend.query(
         query.Every(),
         sort=[SortInstruction("type", ASC),
               SortInstruction("id", DESC)],
         fields=("id", "type"),
         facets=("type", "product"))
     self.print_result(result)
     self.assertEqual(0, result.hits)
Beispiel #28
0
 def parse(self, query_string, context=None):
     parser = self._create_parser(context)
     query_string = query_string.strip()
     if query_string == "" or query_string == "*" or query_string == "*:*":
         return query.Every()
     query_string = unicode(query_string)
     parsed_query = parser.parse(query_string)
     parsed_query.original_query_string = query_string
     return parsed_query
Beispiel #29
0
 def test_can_apply_filter_and_facet(self):
     self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
     self.whoosh_backend.add_doc(dict(id="2", type="wiki"))
     result = self.whoosh_backend.query(query.Every(),
                                        filter=query.Term("type", "ticket"),
                                        facets=["type"])
     self.print_result(result)
     self.assertEqual(1, result.hits)
     self.assertEqual("ticket", result.docs[0]["type"])
Beispiel #30
0
 def query(self, parser):
     from whoosh import query
     fieldname = self.fieldname or parser.fieldname
     start = self.starttime
     end = self.endtime
     
     if start is None and end is None:
         return query.Every(fieldname)
     else:
         return query.DateRange(fieldname, start, end, boost=self.boost)