def test_value_stats(): """Simple test of being able to get value statistics. """ dbpath = "db_test_value_stats" db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE) vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0) for id in range(10): doc = xapian.Document() doc.add_value(1, xapian.sortable_serialise(vals[id])) db.add_document(doc) expect(db.get_value_freq(0), 0) expect(db.get_value_lower_bound(0), "") expect(db.get_value_upper_bound(0), "") expect(db.get_value_freq(1), 10) expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0)) expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9)) expect(db.get_value_freq(2), 0) expect(db.get_value_lower_bound(2), "") expect(db.get_value_upper_bound(2), "") db.close() shutil.rmtree(dbpath)
def test_value_stats(): """Simple test of being able to get value statistics. """ dbpath = 'db_test_value_stats' db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE) vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0) for id in range(10): doc = xapian.Document() doc.add_value(1, xapian.sortable_serialise(vals[id])) db.add_document(doc) expect(db.get_value_freq(0), 0) expect(db.get_value_lower_bound(0), "") expect(db.get_value_upper_bound(0), "") expect(db.get_value_freq(1), 10) expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0)) expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9)) expect(db.get_value_freq(2), 0) expect(db.get_value_lower_bound(2), "") expect(db.get_value_upper_bound(2), "") db.close() shutil.rmtree(dbpath)
def __init__(self, indexpath, dbtype=None): """Create a new connection to the index. There may only be one indexer connection for a particular database open at a given time. Therefore, if a connection to the database is already open, this will raise a xapian.DatabaseLockError. If the database doesn't already exist, it will be created. `dbtype` is the database type to use when creating the database. If the database already exists, this parameter will be ignored. A sensible default value for the version of xapian in use will be chosen, but you may wish to tweak this (most likely for performance, or backward compatibility, reasons). """ if dbtype is None: dbtype = 'chert' try: if dbtype == 'flint': self._index = xapian.flint_open(indexpath, xapian.DB_CREATE_OR_OPEN) elif dbtype == 'chert': self._index = xapian.chert_open(indexpath, xapian.DB_CREATE_OR_OPEN) elif dbtype == 'brass': self._index = xapian.brass_open(indexpath, xapian.DB_CREATE_OR_OPEN) else: raise xapian.InvalidArgumentError("Database type '%s' not known" % dbtype) except xapian.DatabaseOpeningError: self._index = xapian.WritableDatabase(indexpath, xapian.DB_OPEN) self._indexpath = os.path.realpath(os.path.abspath(indexpath)) # Set no cache manager. self.cache_manager = None # Read existing actions. self._field_actions = ActionSet() self._field_mappings = fieldmappings.FieldMappings() self._facet_hierarchy = {} self._facet_query_table = {} self._next_docid = 0 self._imgterms_cache = {} self._config_modified = False try: self._load_config() except: if hasattr(self._index, 'close'): self._index.close() self._index = None raise # Set management of the memory used. # This can be removed once Xapian implements this itself. self._mem_buffered = 0 self.set_max_mem_use()
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") # A regexp check would be better, but seems to create a bogus "leak" of -1 # objects in Python 3. expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') def access_cvar(): res = xapian.cvar print("Unhandled constants: ", res) return res # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem(b"english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data(b"a\0b") if doc.get_data() == b"a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data(b"is there anybody out there?") doc.add_term(b"XYzzy") doc.add_posting(stem(b"is"), 1) doc.add_posting(stem(b"there"), 2) doc.add_posting(stem(b"anybody"), 3) doc.add_posting(stem(b"out"), 4) doc.add_posting(stem(b"there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected (or not wrapped # in the first cases): expect_exception(AttributeError, "'module' object has no attribute 'open_stub'", lambda : xapian.open_stub(b"nosuchdir/nosuchdb")) expect_exception(AttributeError, "'module' object has no attribute 'open_stub'", lambda : xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN)) expect_exception(AttributeError, "'module' object has no attribute 'chert_open'", lambda : xapian.chert_open(b"nosuchdir/nosuchdb")) expect_exception(AttributeError, "'module' object has no attribute 'chert_open'", lambda : xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS)) expect_exception(xapian.DatabaseCreateError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_GLASS)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT)) expect_exception(xapian.DatabaseCreateError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_CHERT)) expect_exception(xapian.NetworkError, None, xapian.remote_open, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open, b"127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = b" ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, b"is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist(b"there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(b""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, b"there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to(b'n') while True: try: x = next(term) except StopIteration: break if x.term < b'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8')) # Feature test for Document.values count = 0 for term in list(doc.values()): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data(b"Two") doc.add_posting(stem(b"out"), 1) doc.add_posting(stem(b"outside"), 1) doc.add_posting(stem(b"source"), 2) doc.add_value(0, b"yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == b"yes" query = xapian.Query(stem(b"out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith(b'a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [item.term for item in eset] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith(b'a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem(b'en')) expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((out@2 SYNONYM outsid@2) OR Zo@2))") expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND Zoutsid@2)") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')), '(foo OR bar\\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')), '(foo OR bar\u00a3)') expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'), '(foo OR bar)') expect_query(qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\u00e9st@1)") doc = xapian.Document() doc.set_data(b"Unicode with an acc\xe9nt") doc.add_posting(stem(b"out\xe9r"), 1) expect(doc.get_data(), b"Unicode with an acc\xe9nt") term = next(doc.termlist()).term expect(term, b"out\xe9r") # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add(b'a') expect(stop(b'a'), True) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == b"b" def get_description(self): return "my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), "my_b_stopper") qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop(b'b'), True) expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text(b'foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query(b'12/03/99..12/04/01') expect(str(query), 'Query(0 * VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, b'$', True) a = '$10' b = '20' slot, a, b = vrp(a, b.encode('utf-8')) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Feature test for xapian.FieldProcessor context("running feature test for xapian.FieldProcessor") class testfieldprocessor(xapian.FieldProcessor): def __call__(self, s): if s == 'spam': raise Exception('already spam') return xapian.Query("spam") qp.add_prefix('spam', testfieldprocessor()) qp.add_boolean_prefix('boolspam', testfieldprocessor()) query = qp.parse_query('spam:ignored') expect(str(query), 'Query(spam)') # FIXME: This doesn't currently work: # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query(b"I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata(b'Foo'), b'') db.set_metadata(b'Foo', b'Foo') expect(db.get_metadata(b'Foo'), b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5), "5 * foo")
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") # A regexp check would be better, but seems to create a bogus "leak" of -1 # objects in Python 3. expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') def access_cvar(): res = xapian.cvar print("Unhandled constants: ", res) return res # Check that SWIG isn't generating cvar (regression test for ticket#297). # # Python 3.5 generates a different exception message here to earlier # versions, so we need a check which matches both. expect_exception(AttributeError, lambda msg: msg.find("has no attribute 'cvar'") != -1, access_cvar) stem = xapian.Stem(b"english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data(b"a\0b") if doc.get_data() == b"a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data(b"is there anybody out there?") doc.add_term(b"XYzzy") doc.add_posting(stem(b"is"), 1) doc.add_posting(stem(b"there"), 2) doc.add_posting(stem(b"anybody"), 3) doc.add_posting(stem(b"out"), 4) doc.add_posting(stem(b"there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query( xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query( query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query( xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected (or not wrapped # in the first cases): expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'open_stub'") != -1, lambda: xapian.open_stub(b"nosuchdir/nosuchdb")) expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'open_stub'") != -1, lambda: xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN)) expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'chert_open'") != -1, lambda: xapian.chert_open(b"nosuchdir/nosuchdb")) expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'chert_open'") != -1, lambda: xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_OPEN | xapian.DB_BACKEND_STUB)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database( b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS)) expect_exception( xapian.DatabaseCreateError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_GLASS)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database( b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT)) expect_exception( xapian.DatabaseCreateError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_CHERT)) expect_exception(xapian.NetworkError, None, xapian.remote_open, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open, b"127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = b" ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, b"is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist(b"there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(b""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, b"there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to(b'n') while True: try: x = next(term) except StopIteration: break if x.term < b'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8')) # Feature test for Document.values count = 0 for term in list(doc.values()): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data(b"Two") doc.add_posting(stem(b"out"), 1) doc.add_posting(stem(b"outside"), 1) doc.add_posting(stem(b"source"), 2) doc.add_value(0, b"yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == b"yes" query = xapian.Query(stem(b"out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith(b'a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [item.term for item in eset] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith(b'a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query( qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem(b'en')) expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((SYNONYM WILDCARD OR o) OR Zo@2))") expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((SYNONYM WILDCARD OR outside) OR Zoutsid@2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')), '(foo OR bar\\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')), '(foo OR bar\u00a3)') expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'), '(foo OR bar)') expect_query( qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\u00e9st@1)") doc = xapian.Document() doc.set_data(b"Unicode with an acc\xe9nt") doc.add_posting(stem(b"out\xe9r"), 1) expect(doc.get_data(), b"Unicode with an acc\xe9nt") term = next(doc.termlist()).term expect(term, b"out\xe9r") # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add(b'a') expect(stop(b'a'), True) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == b"b" def get_description(self): return "my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), "my_b_stopper") qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop(b'b'), True) expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text(b'foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query(b'12/03/99..12/04/01') expect(str(query), 'Query(0 * VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, b'$', True) a = '$10' b = '20' slot, a, b = vrp(a, b.encode('utf-8')) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Feature test for xapian.FieldProcessor context("running feature test for xapian.FieldProcessor") class testfieldprocessor(xapian.FieldProcessor): def __call__(self, s): if s == 'spam': raise Exception('already spam') return xapian.Query("spam") qp.add_prefix('spam', testfieldprocessor()) qp.add_boolean_prefix('boolspam', testfieldprocessor()) query = qp.parse_query('spam:ignored') expect(str(query), 'Query(spam)') # FIXME: This doesn't currently work: # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query(b"I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata(b'Foo'), b'') db.set_metadata(b'Foo', b'Foo') expect(db.get_metadata(b'Foo'), b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query( xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5), "5 * foo")
def test_value_mods(): """Test handling of modifications to values. """ dbpath = "db_test_value_mods" db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE) random.seed(42) doccount = 1000 vals = {} # Add a value to all the documents for num in xrange(1, doccount): doc = xapian.Document() val = "val%d" % num doc.add_value(1, val) db.add_document(doc) vals[num] = val db.commit() check_vals(db, vals) # Modify one of the values (this is a regression test which failed with the # initial implementation of streaming values). doc = xapian.Document() val = "newval0" doc.add_value(1, val) db.replace_document(2, doc) vals[2] = val db.commit() check_vals(db, vals) # Do some random modifications. for count in xrange(1, doccount * 2): docid = random.randint(1, doccount) doc = xapian.Document() if count % 5 == 0: val = "" else: val = "newval%d" % count doc.add_value(1, val) db.replace_document(docid, doc) vals[docid] = val # Check the values before and after modification. check_vals(db, vals) db.commit() check_vals(db, vals) # Delete all the values which are non-empty, in a random order. keys = [key for key, val in vals.iteritems() if val != ""] random.shuffle(keys) for key in keys: doc = xapian.Document() db.replace_document(key, doc) vals[key] = "" check_vals(db, vals) db.commit() check_vals(db, vals) db.close() expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals) shutil.rmtree(dbpath)
def test_value_mods(): """Test handling of modifications to values. """ dbpath = 'db_test_value_mods' db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE) random.seed(42) doccount = 1000 vals = {} # Add a value to all the documents for num in range(1, doccount): doc = xapian.Document() val = 'val%d' % num doc.add_value(1, val) db.add_document(doc) vals[num] = val db.commit() check_vals(db, vals) # Modify one of the values (this is a regression test which failed with the # initial implementation of streaming values). doc = xapian.Document() val = 'newval0' doc.add_value(1, val) db.replace_document(2, doc) vals[2] = val db.commit() check_vals(db, vals) # Do some random modifications. for count in range(1, doccount * 2): docid = random.randint(1, doccount) doc = xapian.Document() if count % 5 == 0: val = '' else: val = 'newval%d' % count doc.add_value(1, val) db.replace_document(docid, doc) vals[docid] = val # Check the values before and after modification. check_vals(db, vals) db.commit() check_vals(db, vals) # Delete all the values which are non-empty, in a random order. keys = [key for key, val in vals.items() if val != ''] random.shuffle(keys) for key in keys: doc = xapian.Document() db.replace_document(key, doc) vals[key] = '' check_vals(db, vals) db.commit() check_vals(db, vals) db.close() expect_exception(xapian.DatabaseError, "Database has been closed", check_vals, db, vals) shutil.rmtree(dbpath)