Exemple #1
0
def test_all_parents_deleted():
    schema = fields.Schema(kind=fields.ID,
                           name=fields.KEYWORD(scorable=True, stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        with w.group():
            w.add_document(kind=u("class"), name=u("Index"))
            w.add_document(kind=u("method"), name=u("add document"))
            w.add_document(kind=u("method"), name=u("add reader"))
            w.add_document(kind=u("method"), name=u("close"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Accumulator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("get result"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Calculator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("add all"))
            w.add_document(kind=u("method"), name=u("add some"))
            w.add_document(kind=u("method"), name=u("multiply"))
            w.add_document(kind=u("method"), name=u("close"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Deleter"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("delete"))

    with ix.writer() as w:
        w.delete_by_term("name", "Index")
        w.delete_by_term("name", "Accumulator")
        w.delete_by_term("name", "Calculator")
        w.delete_by_term("name", "Deleter")

    with ix.searcher() as s:
        q = query.NestedParent(query.Term("kind", "class"),
                               query.Term("name", "add"))
        r = s.search(q)
        assert r.is_empty()
    def setup(self):
        """
        Defers loading until needed.
        """
        from haystack import connections
        new_index = False

        # Make sure the index is there.
        if self.use_file_storage and not os.path.exists(self.path):
            os.makedirs(self.path)
            new_index = True

        if self.use_file_storage and not os.access(self.path, os.W_OK):
            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)

        if self.use_file_storage:
            self.storage = FileStorage(self.path)
        else:
            global LOCALS

            if getattr(LOCALS, 'RAM_STORE', None) is None:
                LOCALS.RAM_STORE = RamStorage()

            self.storage = LOCALS.RAM_STORE

        self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
        self.parser = QueryParser(self.content_field_name, schema=self.schema)

        if new_index is True:
            self.index = self.storage.create_index(self.schema)
        else:
            try:
                self.index = self.storage.open_index(schema=self.schema)
            except index.EmptyIndexError:
                self.index = self.storage.create_index(self.schema)

        self.setup_complete = True
Exemple #3
0
def test_page_counts():
    from whoosh.scoring import Frequency

    schema = fields.Schema(id=fields.ID(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    for i in xrange(10):
        w.add_document(id=text_type(i))
    w.commit()

    with ix.searcher(weighting=Frequency) as s:
        q = query.Every("id")

        r = s.search(q)
        assert_equal(len(r), 10)

        assert_raises(ValueError, s.search_page, q, 0)

        r = s.search_page(q, 1, 5)
        assert_equal(len(r), 10)
        assert_equal(r.pagecount, 2)

        r = s.search_page(q, 1, 5)
        assert_equal(len(r), 10)
        assert_equal(r.pagecount, 2)

        r = s.search_page(q, 2, 5)
        assert_equal(len(r), 10)
        assert_equal(r.pagecount, 2)
        assert_equal(r.pagenum, 2)

        r = s.search_page(q, 1, 10)
        assert_equal(len(r), 10)
        assert_equal(r.pagecount, 1)
        assert_equal(r.pagenum, 1)
Exemple #4
0
def test_resultspage():
    schema = fields.Schema(id=fields.STORED, content=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    domain = ("alfa", "bravo", "bravo", "charlie", "delta")
    w = ix.writer()
    for i, lst in enumerate(permutations(domain, 3)):
        w.add_document(id=text_type(i), content=u(" ").join(lst))
    w.commit()

    with ix.searcher() as s:
        q = query.Term("content", u("bravo"))
        r = s.search(q, limit=10)
        tops = list(r)

        rp = s.search_page(q, 1, pagelen=5)
        assert rp.scored_length() == 5
        assert list(rp) == tops[0:5]
        assert rp[10:] == []

        rp = s.search_page(q, 2, pagelen=5)
        assert list(rp) == tops[5:10]

        rp = s.search_page(q, 1, pagelen=10)
        assert len(rp) == 54
        assert rp.pagecount == 6
        rp = s.search_page(q, 6, pagelen=10)
        assert len(list(rp)) == 4
        assert rp.is_last_page()

        with pytest.raises(ValueError):
            s.search_page(q, 0)
        assert s.search_page(q, 10).pagenum == 6

        rp = s.search_page(query.Term("content", "glonk"), 1)
        assert len(rp) == 0
        assert rp.is_last_page()
Exemple #5
0
def test_snippets():
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(text=fields.TEXT(stored=True, analyzer=ana))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u(
        "Lay out the rough animation by creating the important poses where they occur on the timeline."
    ))
    w.add_document(text=u(
        "Set key frames on everything that's key-able. This is for control and predictability: you don't want to accidentally leave something un-keyed. This is also much faster than selecting the parameters to key."
    ))
    w.add_document(text=u(
        "Use constant (straight) or sometimes linear transitions between keyframes in the channel editor. This makes the character jump between poses."
    ))
    w.add_document(text=u(
        "Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters."
    ))
    w.add_document(text=u(
        "Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame."
    ))
    w.commit()

    target = [
        "Set KEY frames on everything that's KEY-able",
        "Copy the current pose to create the next one: pose the character, KEY everything, then copy the keyframe in the playbar to another frame, and KEY everything at that frame",
        "KEYING everything gives quick, immediate results"
    ]

    with ix.searcher() as s:
        qp = qparser.QueryParser("text", ix.schema)
        q = qp.parse(u("key"))
        r = s.search(q, terms=True)
        r.fragmenter = highlight.SentenceFragmenter()
        r.formatter = highlight.UppercaseFormatter()

        assert_equal(sorted([hit.highlights("text", top=1) for hit in r]),
                     sorted(target))
def test_compound_sort():
    fspec = fields.KEYWORD(stored=True, sortable=True)
    schema = fields.Schema(a=fspec, b=fspec, c=fspec)
    ix = RamStorage().create_index(schema)

    alist = u("alfa bravo alfa bravo alfa bravo alfa bravo alfa bravo").split()
    blist = u("alfa bravo charlie alfa bravo charlie alfa bravo charlie alfa").split()
    clist = u("alfa bravo charlie delta echo foxtrot golf hotel india juliet").split()
    assert all(len(ls) == 10 for ls in (alist, blist, clist))

    with ix.writer() as w:
        for i in xrange(10):
            w.add_document(a=alist[i], b=blist[i], c=clist[i])

    with ix.searcher() as s:
        q = query.Every()
        sortedby = [sorting.FieldFacet("a"),
                    sorting.FieldFacet("b", reverse=True),
                    sorting.FieldFacet("c")]

        r = s.search(q, sortedby=sortedby)
        output = []
        for hit in r:
            output.append(" ".join((hit["a"], hit["b"], hit["c"])))

        assert output == [
            "alfa charlie charlie",
            "alfa charlie india",
            "alfa bravo echo",
            "alfa alfa alfa",
            "alfa alfa golf",
            "bravo charlie foxtrot",
            "bravo bravo bravo",
            "bravo bravo hotel",
            "bravo alfa delta",
            "bravo alfa juliet",
        ]
class test_translate():
    domain = [("alfa", 100, 50), ("bravo", 20, 80), ("charlie", 10, 10),
              ("delta", 82, 39), ("echo", 20, 73), ("foxtrot", 81, 59),
              ("golf", 39, 93), ("hotel", 57, 48), ("india", 84, 75),
              ]

    schema = fields.Schema(name=fields.TEXT(sortable=True),
                           a=fields.NUMERIC(sortable=True),
                           b=fields.NUMERIC(sortable=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for name, a, b in domain:
            w.add_document(name=u(name), a=a, b=b)

    with ix.searcher() as s:
        q = query.Every()

        # Baseline: just sort by a field
        r = s.search(q, sortedby="a")
        assert " ".join([hit["name"] for hit in r]) == "charlie bravo echo golf hotel foxtrot delta india alfa"

        # Sort by reversed name
        target = [x[0] for x in sorted(domain, key=lambda x: x[0][::-1])]
        tf = sorting.TranslateFacet(lambda name: name[::-1], sorting.FieldFacet("name"))
        r = s.search(q, sortedby=tf)
        assert [hit["name"] for hit in r] == target

        # Sort by average of a and b
        def avg(a, b):
            return (a + b) / 2

        target = [x[0] for x in sorted(domain, key=lambda x: (x[1] + x[2]) / 2)]
        af = sorting.FieldFacet("a")
        bf = sorting.FieldFacet("b")
        tf = sorting.TranslateFacet(avg, af, bf)
        r = s.search(q, sortedby=tf)
        assert [hit["name"] for hit in r] == target
def test_terms():
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u("alfa sierra tango"))
    w.add_document(text=u("bravo charlie delta"))
    w.add_document(text=u("charlie delta echo"))
    w.add_document(text=u("delta echo foxtrot"))
    w.commit()

    qp = qparser.QueryParser("text", ix.schema)
    q = qp.parse(u("(bravo AND charlie) OR foxtrot OR missing"))
    r = ix.searcher().search(q, terms=True)

    fieldobj = schema["text"]

    def txts(tset):
        return sorted(fieldobj.from_bytes(t[1]) for t in tset)

    assert txts(r.matched_terms()) == ["bravo", "charlie", "foxtrot"]
    for hit in r:
        value = hit["text"]
        for txt in txts(hit.matched_terms()):
            assert txt in value
def test_extend_empty():
    schema = fields.Schema(id=fields.STORED, words=fields.KEYWORD)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=1, words=u("alfa bravo charlie"))
    w.add_document(id=2, words=u("bravo charlie delta"))
    w.add_document(id=3, words=u("charlie delta echo"))
    w.add_document(id=4, words=u("delta echo foxtrot"))
    w.add_document(id=5, words=u("echo foxtrot golf"))
    w.commit()

    with ix.searcher() as s:
        # Get an empty results object
        r1 = s.search(query.Term("words", u("hotel")))
        # Copy it
        r1c = r1.copy()
        # Get a non-empty results object
        r2 = s.search(query.Term("words", u("delta")))
        # Copy it
        r2c = r2.copy()
        # Extend r1 with r2
        r1c.extend(r2c)
        assert [hit["id"] for hit in r1c] == [2, 3, 4]
        assert r1c.scored_length() == 3
Exemple #10
0
 def setup(self):
     """
     Defers loading until needed.
     """
     new_index = False
     
     # Make sure the index is there.
     if self.use_file_storage and not os.path.exists(settings.HAYSTACK_WHOOSH_PATH):
         os.makedirs(settings.HAYSTACK_WHOOSH_PATH)
         new_index = True
     
     if self.use_file_storage and not os.access(settings.HAYSTACK_WHOOSH_PATH, os.W_OK):
         raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % settings.HAYSTACK_WHOOSH_PATH)
     
     if self.use_file_storage:
         self.storage = FileStorage(settings.HAYSTACK_WHOOSH_PATH)
     else:
         global LOCALS
         
         if LOCALS.RAM_STORE is None:
             LOCALS.RAM_STORE = RamStorage()
         
         self.storage = LOCALS.RAM_STORE
     
     self.content_field_name, self.schema = self.build_schema(self.site.all_searchfields())
     self.parser = QueryParser(self.content_field_name, schema=self.schema)
     
     if new_index is True:
         self.index = self.storage.create_index(self.schema)
     else:
         try:
             self.index = self.storage.open_index(schema=self.schema)
         except index.EmptyIndexError:
             self.index = self.storage.create_index(self.schema)
     
     self.setup_complete = True
Exemple #11
0
def test_highlight_daterange():
    from datetime import datetime

    schema = fields.Schema(id=fields.ID(unique=True, stored=True),
                           title=fields.TEXT(stored=True),
                           content=fields.TEXT(stored=True),
                           released=fields.DATETIME(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.update_document(
        id=u('1'),
        title=u('Life Aquatic'),
        content=u('A nautic film crew sets out to kill a gigantic shark.'),
        released=datetime(2004, 12, 25)
    )
    w.update_document(
        id=u('2'),
        title=u('Darjeeling Limited'),
        content=u('Three brothers meet in India for a life changing train ' +
                  'journey.'),
        released=datetime(2007, 10, 27)
    )
    w.commit()

    s = ix.searcher()
    r = s.search(Term('content', u('train')), terms=True)
    assert_equal(len(r), 1)
    assert_equal(r[0]["id"], "2")
    assert_equal(r[0].highlights("content"),
                 'for a life changing ' +
                 '<b class="match term0">train</b> journey')

    r = s.search(DateRange('released', datetime(2007, 1, 1), None))
    assert_equal(len(r), 1)
    assert_equal(r[0].highlights("content"), '')
Exemple #12
0
    def make_index(self):
        s = fields.Schema(key=fields.ID(stored=True),
                          name=fields.TEXT,
                          value=fields.TEXT)
        st = RamStorage()
        ix = st.create_index(s)

        w = ix.writer()
        w.add_document(key=u"A",
                       name=u"Yellow brown",
                       value=u"Blue red green render purple?")
        w.add_document(key=u"B",
                       name=u"Alpha beta",
                       value=u"Gamma delta epsilon omega.")
        w.add_document(key=u"C",
                       name=u"One two",
                       value=u"Three rendered four five.")
        w.add_document(key=u"D", name=u"Quick went", value=u"Every red town.")
        w.add_document(key=u"E",
                       name=u"Yellow uptown",
                       value=u"Interest rendering outer photo!")
        w.commit()

        return ix
Exemple #13
0
def test_persistent_cache():
    schema = fields.Schema(id=fields.ID(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)
    with ix.writer() as w:
        for term in u("charlie alfa echo bravo delta").split():
            w.add_document(id=term)

    ix = st.open_index()
    with ix.reader() as r:
        _ = r.fieldcache("id")
        del _
    gc.collect()

    ix = st.open_index()
    with ix.reader() as r:
        assert r.fieldcache_available("id")
        assert not r.fieldcache_loaded("id")
        fc = r.fieldcache("id")
        assert r.fieldcache_loaded("id")
        assert_equal(list(fc.order), [3, 1, 5, 2, 4])
        assert_equal(
            list(fc.texts),
            [u('\uffff'), 'alfa', 'bravo', 'charlie', 'delta', 'echo'])
Exemple #14
0
def test_pages_with_filter():
    from whoosh.scoring import Frequency

    schema = fields.Schema(id=fields.ID(stored=True),
                           type=fields.TEXT(),
                           c=fields.TEXT)
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), type=u("odd"), c=u("alfa alfa alfa alfa alfa alfa"))
    w.add_document(id=u("2"), type=u("even"), c=u("alfa alfa alfa alfa alfa"))
    w.add_document(id=u("3"), type=u("odd"), c=u("alfa alfa alfa alfa"))
    w.add_document(id=u("4"), type=u("even"), c=u("alfa alfa alfa"))
    w.add_document(id=u("5"), type=u("odd"), c=u("alfa alfa"))
    w.add_document(id=u("6"), type=u("even"), c=u("alfa"))
    w.commit()

    with ix.searcher(weighting=Frequency) as s:
        q = query.Term("c", u("alfa"))
        filterq = query.Term("type", u("even"))
        r = s.search(q, filter=filterq)
        assert [d["id"] for d in r] == ["2", "4", "6"]
        r = s.search_page(q, 2, pagelen=2, filter=filterq)
        assert [d["id"] for d in r] == ["6"]
Exemple #15
0
    def test_missing_field_scoring(self):
        schema = fields.Schema(name=fields.TEXT(stored=True),
                               hobbies=fields.TEXT(stored=True))
        storage = RamStorage()
        idx = storage.create_index(schema)
        writer = idx.writer()
        writer.add_document(name=u'Frank', hobbies=u'baseball, basketball')
        writer.commit()
        self.assertEqual(idx.segments[0].field_length(0), 2)  # hobbies
        self.assertEqual(idx.segments[0].field_length(1), 1)  # name

        writer = idx.writer()
        writer.add_document(name=u'Jonny')
        writer.commit()
        self.assertEqual(len(idx.segments), 1)
        self.assertEqual(idx.segments[0].field_length(0), 2)  # hobbies
        self.assertEqual(idx.segments[0].field_length(1), 2)  # name

        reader = idx.reader()
        searcher = Searcher(reader)
        parser = qparser.MultifieldParser(['name', 'hobbies'], schema=schema)
        q = parser.parse(u"baseball")
        result = searcher.search(q)
        self.assertEqual(len(result), 1)
Exemple #16
0
    def test_phrase_score(self):
        schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
        storage = RamStorage()
        ix = storage.create_index(schema)
        writer = ix.writer()
        writer.add_document(name=u"A",
                            value=u"Little Miss Muffet sat on a tuffet")
        writer.add_document(
            name=u"D",
            value=u"Gibberish blonk falunk miss muffet sat tuffet garbonzo")
        writer.add_document(name=u"E", value=u"Blah blah blah pancakes")
        writer.add_document(name=u"F",
                            value=u"Little miss muffet little miss muffet")
        writer.commit()

        searcher = ix.searcher()
        q = query.Phrase("value", [u"little", u"miss", u"muffet"])
        sc = q.scorer(searcher)
        self.assertEqual(sc.id, 0)
        score1 = sc.score()
        self.assert_(score1 > 0)
        sc.next()
        self.assertEqual(sc.id, 3)
        self.assert_(sc.score() > score1)
Exemple #17
0
def test_lengths_ram():
    s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                      f2=fields.KEYWORD(stored=True, scorable=True))
    st = RamStorage()
    ix = st.create_index(s)
    w = ix.writer()
    w.add_document(f1=u("A B C D E"), f2=u("X Y Z"))
    w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T"))
    w.add_document(f1=u("D E F"), f2=u("U V A B C D E"))
    w.commit()

    dr = ix.reader()
    assert_equal(dr.stored_fields(0)["f1"], "A B C D E")
    assert_equal(dr.doc_field_length(0, "f1"), 5)
    assert_equal(dr.doc_field_length(1, "f1"), 8)
    assert_equal(dr.doc_field_length(2, "f1"), 3)
    assert_equal(dr.doc_field_length(0, "f2"), 3)
    assert_equal(dr.doc_field_length(1, "f2"), 4)
    assert_equal(dr.doc_field_length(2, "f2"), 7)

    assert_equal(dr.field_length("f1"), 16)
    assert_equal(dr.field_length("f2"), 14)
    assert_equal(dr.max_field_length("f1"), 8)
    assert_equal(dr.max_field_length("f2"), 7)
Exemple #18
0
def test_outofdate():
    schema = fields.Schema(id=fields.ID(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"))
    w.add_document(id=u("2"))
    w.commit()

    s = ix.searcher()
    assert s.up_to_date()

    w = ix.writer()
    w.add_document(id=u("3"))
    w.add_document(id=u("4"))

    assert s.up_to_date()
    w.commit()
    assert not s.up_to_date()

    s = s.refresh()
    assert s.up_to_date()
    s.close()
def test_reverse_collapse():
    from whoosh import sorting

    schema = fields.Schema(title=fields.TEXT(stored=True),
                           content=fields.TEXT,
                           path=fields.ID(stored=True),
                           tags=fields.KEYWORD,
                           order=fields.NUMERIC(stored=True))

    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(title=u"First document",
                       content=u"This is my document!",
                       path=u"/a", tags=u"first", order=20.0)
        w.add_document(title=u"Second document",
                       content=u"This is the second example.",
                       path=u"/b", tags=u"second", order=12.0)
        w.add_document(title=u"Third document",
                       content=u"Examples are many.",
                       path=u"/c", tags=u"third", order=15.0)
        w.add_document(title=u"Thirdish document",
                       content=u"Examples are too many.",
                       path=u"/d", tags=u"third", order=25.0)

    with ix.searcher() as s:
        q = query.Every('content')
        r = s.search(q)
        assert [hit["path"] for hit in r] == ["/a", "/b", "/c", "/d"]

        q = query.Or([query.Term("title", "document"),
                      query.Term("content", "document"),
                      query.Term("tags", "document")])
        cf = sorting.FieldFacet("tags")
        of = sorting.FieldFacet("order", reverse=True)
        r = s.search(q, collapse=cf, collapse_order=of, terms=True)
        assert [hit["path"] for hit in r] == ["/a", "/b", "/d"]
Exemple #20
0
def test_lengths_ram():
    s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                      f2=fields.KEYWORD(stored=True, scorable=True))
    st = RamStorage()
    ix = st.create_index(s)
    w = ix.writer()
    w.add_document(f1=u("A B C D E"), f2=u("X Y Z"))
    w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T"))
    w.add_document(f1=u("D E F"), f2=u("U V A B C D E"))
    w.commit()

    dr = ix.reader()
    assert dr.stored_fields(0)["f1"] == "A B C D E"
    assert dr.doc_field_length(0, "f1") == 5
    assert dr.doc_field_length(1, "f1") == 8
    assert dr.doc_field_length(2, "f1") == 3
    assert dr.doc_field_length(0, "f2") == 3
    assert dr.doc_field_length(1, "f2") == 4
    assert dr.doc_field_length(2, "f2") == 7

    assert dr.field_length("f1") == 16
    assert dr.field_length("f2") == 14
    assert dr.max_field_length("f1") == 8
    assert dr.max_field_length("f2") == 7
Exemple #21
0
def test_daterange_facet():
    schema = fields.Schema(id=fields.STORED, date=fields.DATETIME)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=0, date=datetime(2001, 1, 15))
    w.add_document(id=1, date=datetime(2001, 1, 10))
    w.add_document(id=2)
    w.add_document(id=3, date=datetime(2001, 1, 3))
    w.add_document(id=4, date=datetime(2001, 1, 8))
    w.add_document(id=5, date=datetime(2001, 1, 6))
    w.commit()

    with ix.searcher() as s:
        rf = sorting.DateRangeFacet("date", datetime(2001, 1, 1),
                                    datetime(2001, 1, 20), timedelta(days=5))
        r = s.search(query.Every(), groupedby={"date": rf})
        dt = datetime
        assert_equal(
            r.groups("date"), {
                (dt(2001, 1, 1, 0, 0), dt(2001, 1, 6, 0, 0)): [3],
                (dt(2001, 1, 6, 0, 0), dt(2001, 1, 11, 0, 0)): [1, 4, 5],
                (dt(2001, 1, 11, 0, 0), dt(2001, 1, 16, 0, 0)): [0],
                None: [2]
            })
Exemple #22
0
def test_decimal_numeric():
    from decimal import Decimal

    f = fields.NUMERIC(int, decimal_places=4)
    schema = fields.Schema(id=fields.ID(stored=True), deci=f)
    ix = RamStorage().create_index(schema)

    assert_equal(f.from_text(f.to_text(Decimal("123.56"))), Decimal("123.56"))

    w = ix.writer()
    w.add_document(id=u("a"), deci=Decimal("123.56"))
    w.add_document(id=u("b"), deci=Decimal("0.536255"))
    w.add_document(id=u("c"), deci=Decimal("2.5255"))
    w.add_document(id=u("d"), deci=Decimal("58"))
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("deci", schema)

        r = s.search(qp.parse("123.56"))
        assert_equal(r[0]["id"], "a")

        r = s.search(qp.parse("0.536255"))
        assert_equal(r[0]["id"], "b")
Exemple #23
0
def test_creation():
    s = fields.Schema(content=fields.TEXT(phrase=True),
                      title=fields.TEXT(stored=True),
                      path=fields.ID(stored=True),
                      tags=fields.KEYWORD(stored=True),
                      quick=fields.NGRAM,
                      note=fields.STORED)
    st = RamStorage()

    ix = st.create_index(s)
    w = ix.writer()
    w.add_document(title=u("First"),
                   content=u("This is the first document"),
                   path=u("/a"),
                   tags=u("first second third"),
                   quick=u("First document"),
                   note=u("This is the first document"))
    w.add_document(content=u("Let's try this again"),
                   title=u("Second"),
                   path=u("/b"),
                   tags=u("Uno Dos Tres"),
                   quick=u("Second document"),
                   note=u("This is the second document"))
    w.commit()
Exemple #24
0
def test_phrase_score():
    schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u("A"),
                        value=u("Little Miss Muffet sat on a tuffet"))
    writer.add_document(name=u("D"),
                        value=u("Gibberish blonk falunk miss muffet sat " +
                                "tuffet garbonzo"))
    writer.add_document(name=u("E"), value=u("Blah blah blah pancakes"))
    writer.add_document(name=u("F"),
                        value=u("Little miss muffet little miss muffet"))
    writer.commit()

    with ix.searcher() as s:
        q = query.Phrase("value", [u("little"), u("miss"), u("muffet")])
        m = q.matcher(s)
        assert_equal(m.id(), 0)
        score1 = m.weight()
        assert score1 > 0
        m.next()
        assert_equal(m.id(), 3)
        assert m.weight() > score1
Exemple #25
0
def test_all():
    domain = [u("alfa"), u("bravo"), u("charlie"), u("delta"), u("echo"), u("foxtrot")]
    schema = fields.Schema(text=fields.TEXT)
    storage = RamStorage()
    ix = storage.create_index(schema)
    w = ix.writer()
    for _ in xrange(100):
        w.add_document(text=u(" ").join(choice(domain)
                                      for _ in xrange(randint(10, 20))))
    w.commit()
    
    # List ABCs that should not be tested
    abcs = ()
    # provide initializer arguments for any weighting classes that require them
    init_args = {"MultiWeighting": ([scoring.BM25F()], {"text": scoring.Frequency()}),
                 "ReverseWeighting": ([scoring.BM25F()], {})}
    
    for wclass in _weighting_classes(abcs):
        try:
            if wclass.__name__ in init_args:
                args, kwargs = init_args[wclass.__name__]
                weighting = wclass(*args, **kwargs)
            else:
                weighting = wclass()
        except TypeError:
            e = sys.exc_info()[1]
            raise TypeError("Error instantiating %r: %s" % (wclass, e))
        
        with ix.searcher(weighting=weighting) as s:
            try:
                for word in domain:
                    s.search(query.Term("text", word))
            except Exception:
                e = sys.exc_info()[1]
                e.msg = "Error searching with %r: %s" % (wclass, e)
                raise
Exemple #26
0
def test_deleted_wildcard():
    schema = fields.Schema(id=fields.ID(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("alfa"))
    w.add_document(id=u("bravo"))
    w.add_document(id=u("charlie"))
    w.add_document(id=u("delta"))
    w.add_document(id=u("echo"))
    w.add_document(id=u("foxtrot"))
    w.commit()

    w = ix.writer()
    w.delete_by_term("id", "bravo")
    w.delete_by_term("id", "delta")
    w.delete_by_term("id", "echo")
    w.commit()

    with ix.searcher() as s:
        r = s.search(query.Every("id"))
        assert_equal(sorted([d['id'] for d in r]),
                     ["alfa", "charlie", "foxtrot"])
Exemple #27
0
def test_workflow_easy():
    schema = fields.Schema(id=fields.ID(stored=True),
                           title=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), title=u("The man who wasn't there"))
    w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
    w.add_document(id=u("3"), title=u("The invisible man"))
    w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
    w.add_document(id=u("5"), title=u("The woman who disappeared"))
    w.commit()

    with ix.searcher() as s:
        # Parse the user query
        parser = qparser.QueryParser("title", schema=ix.schema)
        q = parser.parse(u("man"))
        r = s.search(q, terms=True)
        assert len(r) == 2

        r.fragmenter = highlight.WholeFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        outputs = [hit.highlights("title") for hit in r]
        assert outputs == ["The invisible MAN", "The MAN who wasn't there"]
Exemple #28
0
def test_ordered():
    domain = u("alfa bravo charlie delta echo foxtrot").split(" ")

    schema = fields.Schema(f=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    writer = ix.writer()
    for ls in permutations(domain):
        writer.add_document(f=u(" ").join(ls))
    writer.commit()

    with ix.searcher() as s:
        q = query.Ordered([query.Term("f", u("alfa")),
                           query.Term("f", u("charlie")),
                           query.Term("f", u("echo"))])
        r = s.search(q)
        for hit in r:
            ls = hit["f"].split()
            assert "alfa" in ls
            assert "charlie" in ls
            assert "echo" in ls
            a = ls.index("alfa")
            c = ls.index("charlie")
            e = ls.index("echo")
            assert a < c and c < e, repr(ls)
Exemple #29
0
def test_query_facet():
    schema = fields.Schema(id=fields.STORED, v=fields.ID)
    ix = RamStorage().create_index(schema)
    for i, ltr in enumerate(u("iacgbehdf")):
        w = ix.writer()
        w.add_document(id=i, v=ltr)
        w.commit(merge=False)

    with ix.searcher() as s:
        q1 = query.TermRange("v", "a", "c")
        q2 = query.TermRange("v", "d", "f")
        q3 = query.TermRange("v", "g", "i")

        assert [hit["id"] for hit in s.search(q1)] == [1, 2, 4]
        assert [hit["id"] for hit in s.search(q2)] == [5, 7, 8]
        assert [hit["id"] for hit in s.search(q3)] == [0, 3, 6]

        facet = sorting.QueryFacet({"a-c": q1, "d-f": q2, "g-i": q3})
        r = s.search(query.Every(), groupedby=facet)
        # If you specify a facet without a name, it's automatically called
        # "facet"
        assert r.groups("facet") == {"a-c": [1, 2, 4],
                                     "d-f": [5, 7, 8],
                                     "g-i": [0, 3, 6]}
Exemple #30
0
def test_stored_fields():
    s = fields.Schema(a=fields.ID(stored=True),
                      b=fields.STORED,
                      c=fields.KEYWORD,
                      d=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(s)

    writer = ix.writer()
    writer.add_document(a=u("1"), b="a", c=u("zulu"), d=u("Alfa"))
    writer.add_document(a=u("2"), b="b", c=u("yankee"), d=u("Bravo"))
    writer.add_document(a=u("3"), b="c", c=u("xray"), d=u("Charlie"))
    writer.commit()

    with ix.searcher() as sr:
        assert_equal(sr.stored_fields(0), {
            "a": u("1"),
            "b": "a",
            "d": u("Alfa")
        })
        assert_equal(sr.stored_fields(2), {
            "a": u("3"),
            "b": "c",
            "d": u("Charlie")
        })

        assert_equal(sr.document(a=u("1")), {
            "a": u("1"),
            "b": "a",
            "d": u("Alfa")
        })
        assert_equal(sr.document(a=u("2")), {
            "a": u("2"),
            "b": "b",
            "d": u("Bravo")
        })