def test_sorting(): from whoosh import sorting schema = fields.Schema(id=fields.STORED, name=fields.ID(stored=True), size=fields.NUMERIC) ix = RamIndex(schema) with ix.writer() as w: w.add_document(id=0, name=u("bravo"), size=10) w.add_document(id=1, name=u("alfa"), size=9) w.add_document(id=2, name=u("delta"), size=8) w.add_document(id=3, name=u("charlie"), size=7) with ix.searcher() as s: q = query.Every() r = s.search(q, sortedby="name") assert_equal([hit["id"] for hit in r], [1, 0, 3, 2]) r = s.search(q, sortedby="size") assert_equal([hit["id"] for hit in r], [3, 2, 1, 0]) facet = sorting.FieldFacet("size", reverse=True) r = s.search(q, sortedby=facet) assert_equal([hit["id"] for hit in r], [0, 1, 2, 3])
def test_missing_term_docfreq(): schema = fields.Schema(id=fields.ID) ix = RamIndex(schema) ix.add_document(id=u("alfa bravo charlie")) ix.add_document(id=u("charlie delta echo")) ix.add_document(id=u("sierra tango xray")) assert_raises(TermNotFound, ix.doc_frequency, "content", "foo") assert_equal(ix.doc_frequency("id", "foo"), 0)
def test_update(): schema = fields.Schema(id=fields.ID(unique=True, stored=True), text=fields.ID(stored=True)) ix = RamIndex(schema) for word in u("alfa bravo charlie delta").split(): ix.update_document(id=word[0], text=word) for word in u("apple burrito cat dollhouse").split(): ix.update_document(id=word[0], text=word) assert ix.has_deletions() assert_equal(ix.deleted, set([0, 1, 2, 3])) assert_equal(ix.doc_count(), 4) assert_equal([(d["id"], d["text"]) for d in ix.all_stored_fields()], [("a", "apple"), ("b", "burrito"), ("c", "cat"), ("d", "dollhouse")])
def test_update(): schema = fields.Schema(id=fields.ID(unique=True, stored=True), text=fields.ID(stored=True)) ix = RamIndex(schema) for word in u("alfa bravo charlie delta").split(): ix.update_document(id=word[0], text=word) for word in u("apple burrito cat dollhouse").split(): ix.update_document(id=word[0], text=word) assert ix.has_deletions() assert_equal(ix.deleted, set([0, 1, 2, 3])) assert_equal(ix.doc_count(), 4) assert ([(d["id"], d["text"]) for d in ix.all_stored_fields()] == [("a", "apple"), ("b", "burrito"), ("c", "cat"), ("d", "dollhouse")])
def test_threaded(): from threading import Thread class TWriter(Thread): def __init__(self, ix): Thread.__init__(self) self.ix = ix def run(self): ix = self.ix for i in xrange(1000): ix.update_document(id=text_type(i), key=u("a")) class TReader(Thread): def __init__(self, ix): Thread.__init__(self) self.ix = ix self.go = True def run(self): s = self.ix.searcher() while self.go: r = s.search(query.Term("key", u("a"))) assert_equal(len(r), 1) schema = fields.Schema(id=fields.ID(stored=True), key=fields.ID(unique=True, stored=True)) ix = RamIndex(schema) tw = TWriter(ix) tr = TReader(ix) tw.start() tr.start() tw.join() tr.go = False tr.join() assert_equal(ix.doc_count(), 1) with ix.searcher() as s: assert_equal(len(list(s.documents(key="a"))), 1)
def test_block_info(): schema = fields.Schema(key=fields.KEYWORD) ix = RamIndex(schema) ix.add_document(key=u("alfa bravo charlie")) ix.add_document(key=u("bravo delta")) ix.add_document(key=u("charlie delta echo foxtrot")) ix.add_document(key=u("delta echo foxtrot golf hotel india")) ix.add_document(key=u("echo foxtrot golf hotel india juliet alfa bravo")) s = ix.searcher() p = s.postings("key", "bravo") assert p.supports_block_quality() assert_equal(p.block_min_length(), 2) assert_equal(p.block_max_length(), 8) assert_equal(p.block_max_wol(), 0.5)
def test_missing_postings(): schema = fields.Schema(id=fields.ID) ix = RamIndex(schema) ix.add_document(id=u("one")) assert_raises(TermNotFound, ix.postings, "content", "foo") assert_raises(TermNotFound, ix.postings, "id", "foo")
def test_empty_field(): schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT) ix = RamIndex(schema) ix.add_document(id=u("alfa"), text=u("bravo")) ix.add_document(id=u("charlie")) ix.add_document(text=u("delta")) ix.add_document(id=u("echo"), text=None) ix.add_document(id=None, text=u("foxtrot")) ix.add_document(id=u("golf"), text=u("")) ix.add_document(id=u(""), text=u("hotel")) ix.add_document(id=u(""), text=u(""))
def make_index(): ana = analysis.StandardAnalyzer(stoplist=None) sc = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT(analyzer=ana, vector=formats.Frequency()), subs=fields.NUMERIC(int, stored=True)) ix = RamIndex(sc) ix.add_document(id=u("fieldtype"), text=u("The FieldType object supports the following attributes"), subs=56) ix.add_document(id=u("format"), text=u("the storage format for the field contents"), subs=100) ix.add_document(id=u("vector"), text=u("the storage format for the field vectors (forward index)"), subs=23) ix.add_document(id=u("scorable"), text=u("whether searches against this field may be scored."), subs=34) ix.add_document(id=u("stored"), text=u("whether the content of this field is stored for each document."), subs=575) ix.add_document(id=u("unique"), text=u("whether this field value is unique to each document."), subs=2) ix.add_document(id=u("const"), text=u("The constructor for the base field type simply"), subs=58204) return ix
def make_index(): ana = analysis.StandardAnalyzer(stoplist=None) sc = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT(analyzer=ana, vector=formats.Frequency()), subs=fields.NUMERIC(int, stored=True)) ix = RamIndex(sc) ix.add_document( id=u("fieldtype"), text=u("The FieldType object supports the following attributes"), subs=56) ix.add_document(id=u("format"), text=u("the storage format for the field contents"), subs=100) ix.add_document( id=u("vector"), text=u("the storage format for the field vectors (forward index)"), subs=23) ix.add_document( id=u("scorable"), text=u("whether searches against this field may be scored."), subs=34) ix.add_document( id=u("stored"), text=u( "whether the content of this field is stored for each document."), subs=575) ix.add_document( id=u("unique"), text=u("whether this field value is unique to each document."), subs=2) ix.add_document(id=u("const"), text=u("The constructor for the base field type simply"), subs=58204) return ix