Python RamIndex Examples, whoosh.ramindex.RamIndex Python Examples

Example #1

0

Show file

File: test_ramindex.py Project: ws-os/oh-mainline

def test_sorting():
    from whoosh import sorting

    schema = fields.Schema(id=fields.STORED,
                           name=fields.ID(stored=True),
                           size=fields.NUMERIC)
    ix = RamIndex(schema)

    with ix.writer() as w:
        w.add_document(id=0, name=u("bravo"), size=10)
        w.add_document(id=1, name=u("alfa"), size=9)
        w.add_document(id=2, name=u("delta"), size=8)
        w.add_document(id=3, name=u("charlie"), size=7)

    with ix.searcher() as s:
        q = query.Every()
        r = s.search(q, sortedby="name")
        assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])

        r = s.search(q, sortedby="size")
        assert_equal([hit["id"] for hit in r], [3, 2, 1, 0])

        facet = sorting.FieldFacet("size", reverse=True)
        r = s.search(q, sortedby=facet)
        assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])

Example #2

0

Show file

File: test_ramindex.py Project: ws-os/oh-mainline

def test_missing_term_docfreq():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa bravo charlie"))
    ix.add_document(id=u("charlie delta echo"))
    ix.add_document(id=u("sierra tango xray"))
    assert_raises(TermNotFound, ix.doc_frequency, "content", "foo")
    assert_equal(ix.doc_frequency("id", "foo"), 0)

Example #3

0

Show file

File: test_ramindex.py Project: datakortet/whoosh

def test_update():
    schema = fields.Schema(id=fields.ID(unique=True, stored=True),
                           text=fields.ID(stored=True))
    ix = RamIndex(schema)
    for word in u("alfa bravo charlie delta").split():
        ix.update_document(id=word[0], text=word)
    for word in u("apple burrito cat dollhouse").split():
        ix.update_document(id=word[0], text=word)

    assert ix.has_deletions()
    assert_equal(ix.deleted, set([0, 1, 2, 3]))
    assert_equal(ix.doc_count(), 4)
    assert_equal([(d["id"], d["text"]) for d in ix.all_stored_fields()],
                 [("a", "apple"), ("b", "burrito"), ("c", "cat"),
                  ("d", "dollhouse")])

Example #4

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def test_update():
    schema = fields.Schema(id=fields.ID(unique=True, stored=True),
                           text=fields.ID(stored=True))
    ix = RamIndex(schema)
    for word in u("alfa bravo charlie delta").split():
        ix.update_document(id=word[0], text=word)
    for word in u("apple burrito cat dollhouse").split():
        ix.update_document(id=word[0], text=word)

    assert ix.has_deletions()
    assert_equal(ix.deleted, set([0, 1, 2, 3]))
    assert_equal(ix.doc_count(), 4)
    assert ([(d["id"], d["text"]) for d in ix.all_stored_fields()]
            == [("a", "apple"), ("b", "burrito"), ("c", "cat"), ("d", "dollhouse")])

Example #5

0

Show file

File: test_ramindex.py Project: ws-os/oh-mainline

def test_threaded():
    from threading import Thread

    class TWriter(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix

        def run(self):
            ix = self.ix
            for i in xrange(1000):
                ix.update_document(id=text_type(i), key=u("a"))

    class TReader(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix
            self.go = True

        def run(self):
            s = self.ix.searcher()
            while self.go:
                r = s.search(query.Term("key", u("a")))
                assert_equal(len(r), 1)

    schema = fields.Schema(id=fields.ID(stored=True),
                           key=fields.ID(unique=True, stored=True))
    ix = RamIndex(schema)
    tw = TWriter(ix)
    tr = TReader(ix)
    tw.start()
    tr.start()
    tw.join()
    tr.go = False
    tr.join()

    assert_equal(ix.doc_count(), 1)
    with ix.searcher() as s:
        assert_equal(len(list(s.documents(key="a"))), 1)

Example #6

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def test_missing_term_docfreq():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa bravo charlie"))
    ix.add_document(id=u("charlie delta echo"))
    ix.add_document(id=u("sierra tango xray"))
    assert_raises(TermNotFound, ix.doc_frequency, "content", "foo")
    assert_equal(ix.doc_frequency("id", "foo"), 0)

Example #7

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def test_threaded():
    from threading import Thread

    class TWriter(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix

        def run(self):
            ix = self.ix
            for i in xrange(1000):
                ix.update_document(id=text_type(i), key=u("a"))

    class TReader(Thread):
        def __init__(self, ix):
            Thread.__init__(self)
            self.ix = ix
            self.go = True

        def run(self):
            s = self.ix.searcher()
            while self.go:
                r = s.search(query.Term("key", u("a")))
                assert_equal(len(r), 1)

    schema = fields.Schema(id=fields.ID(stored=True), key=fields.ID(unique=True, stored=True))
    ix = RamIndex(schema)
    tw = TWriter(ix)
    tr = TReader(ix)
    tw.start()
    tr.start()
    tw.join()
    tr.go = False
    tr.join()

    assert_equal(ix.doc_count(), 1)
    with ix.searcher() as s:
        assert_equal(len(list(s.documents(key="a"))), 1)

Example #8

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def test_sorting():
    from whoosh import sorting

    schema = fields.Schema(id=fields.STORED, name=fields.ID(stored=True),
                           size=fields.NUMERIC)
    ix = RamIndex(schema)

    with ix.writer() as w:
        w.add_document(id=0, name=u("bravo"), size=10)
        w.add_document(id=1, name=u("alfa"), size=9)
        w.add_document(id=2, name=u("delta"), size=8)
        w.add_document(id=3, name=u("charlie"), size=7)

    with ix.searcher() as s:
        q = query.Every()
        r = s.search(q, sortedby="name")
        assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])

        r = s.search(q, sortedby="size")
        assert_equal([hit["id"] for hit in r], [3, 2, 1, 0])

        facet = sorting.FieldFacet("size", reverse=True)
        r = s.search(q, sortedby=facet)
        assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])

Example #9

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def test_block_info():
    schema = fields.Schema(key=fields.KEYWORD)
    ix = RamIndex(schema)
    ix.add_document(key=u("alfa bravo charlie"))
    ix.add_document(key=u("bravo delta"))
    ix.add_document(key=u("charlie delta echo foxtrot"))
    ix.add_document(key=u("delta echo foxtrot golf hotel india"))
    ix.add_document(key=u("echo foxtrot golf hotel india juliet alfa bravo"))
    s = ix.searcher()
    p = s.postings("key", "bravo")
    assert p.supports_block_quality()
    assert_equal(p.block_min_length(), 2)
    assert_equal(p.block_max_length(), 8)
    assert_equal(p.block_max_wol(), 0.5)

Example #10

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def test_missing_postings():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("one"))
    assert_raises(TermNotFound, ix.postings, "content", "foo")
    assert_raises(TermNotFound, ix.postings, "id", "foo")

Example #11

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def test_empty_field():
    schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa"), text=u("bravo"))
    ix.add_document(id=u("charlie"))
    ix.add_document(text=u("delta"))
    ix.add_document(id=u("echo"), text=None)
    ix.add_document(id=None, text=u("foxtrot"))
    ix.add_document(id=u("golf"), text=u(""))
    ix.add_document(id=u(""), text=u("hotel"))
    ix.add_document(id=u(""), text=u(""))

Example #12

0

Show file

File: test_ramindex.py Project: ChimmyTee/oh-mainline

def make_index():
    ana = analysis.StandardAnalyzer(stoplist=None)
    sc = fields.Schema(id=fields.ID(stored=True),
                       text=fields.TEXT(analyzer=ana, vector=formats.Frequency()),
                       subs=fields.NUMERIC(int, stored=True))
    ix = RamIndex(sc)
    ix.add_document(id=u("fieldtype"),
                    text=u("The FieldType object supports the following attributes"),
                    subs=56)
    ix.add_document(id=u("format"),
                    text=u("the storage format for the field contents"),
                    subs=100)
    ix.add_document(id=u("vector"),
                    text=u("the storage format for the field vectors (forward index)"),
                    subs=23)
    ix.add_document(id=u("scorable"),
                    text=u("whether searches against this field may be scored."),
                    subs=34)
    ix.add_document(id=u("stored"),
                    text=u("whether the content of this field is stored for each document."),
                    subs=575)
    ix.add_document(id=u("unique"),
                    text=u("whether this field value is unique to each document."),
                    subs=2)
    ix.add_document(id=u("const"),
                    text=u("The constructor for the base field type simply"),
                    subs=58204)
    return ix

Example #13

0

Show file

File: test_ramindex.py Project: ws-os/oh-mainline

def test_block_info():
    schema = fields.Schema(key=fields.KEYWORD)
    ix = RamIndex(schema)
    ix.add_document(key=u("alfa bravo charlie"))
    ix.add_document(key=u("bravo delta"))
    ix.add_document(key=u("charlie delta echo foxtrot"))
    ix.add_document(key=u("delta echo foxtrot golf hotel india"))
    ix.add_document(key=u("echo foxtrot golf hotel india juliet alfa bravo"))
    s = ix.searcher()
    p = s.postings("key", "bravo")
    assert p.supports_block_quality()
    assert_equal(p.block_min_length(), 2)
    assert_equal(p.block_max_length(), 8)
    assert_equal(p.block_max_wol(), 0.5)

Example #14

0

Show file

File: test_ramindex.py Project: ws-os/oh-mainline

def test_missing_postings():
    schema = fields.Schema(id=fields.ID)
    ix = RamIndex(schema)
    ix.add_document(id=u("one"))
    assert_raises(TermNotFound, ix.postings, "content", "foo")
    assert_raises(TermNotFound, ix.postings, "id", "foo")

Example #15

0

Show file

File: test_ramindex.py Project: ws-os/oh-mainline

def test_empty_field():
    schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
    ix = RamIndex(schema)
    ix.add_document(id=u("alfa"), text=u("bravo"))
    ix.add_document(id=u("charlie"))
    ix.add_document(text=u("delta"))
    ix.add_document(id=u("echo"), text=None)
    ix.add_document(id=None, text=u("foxtrot"))
    ix.add_document(id=u("golf"), text=u(""))
    ix.add_document(id=u(""), text=u("hotel"))
    ix.add_document(id=u(""), text=u(""))

Example #16

0

Show file

File: test_ramindex.py Project: ws-os/oh-mainline

def make_index():
    ana = analysis.StandardAnalyzer(stoplist=None)
    sc = fields.Schema(id=fields.ID(stored=True),
                       text=fields.TEXT(analyzer=ana,
                                        vector=formats.Frequency()),
                       subs=fields.NUMERIC(int, stored=True))
    ix = RamIndex(sc)
    ix.add_document(
        id=u("fieldtype"),
        text=u("The FieldType object supports the following attributes"),
        subs=56)
    ix.add_document(id=u("format"),
                    text=u("the storage format for the field contents"),
                    subs=100)
    ix.add_document(
        id=u("vector"),
        text=u("the storage format for the field vectors (forward index)"),
        subs=23)
    ix.add_document(
        id=u("scorable"),
        text=u("whether searches against this field may be scored."),
        subs=34)
    ix.add_document(
        id=u("stored"),
        text=u(
            "whether the content of this field is stored for each document."),
        subs=575)
    ix.add_document(
        id=u("unique"),
        text=u("whether this field value is unique to each document."),
        subs=2)
    ix.add_document(id=u("const"),
                    text=u("The constructor for the base field type simply"),
                    subs=58204)
    return ix